{ "best_global_step": 49040, "best_metric": 0.1272326409816742, "best_model_checkpoint": "saves_multiple/p-tuning/llama-3-8b-instruct/train_multirc_789_1770179268/checkpoint-49040", "epoch": 20.0, "eval_steps": 6130, "global_step": 122600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008156606851549756, "grad_norm": 225.24703979492188, "learning_rate": 3.262642740619902e-07, "loss": 11.3547, "num_input_tokens_seen": 11392, "step": 5 }, { "epoch": 0.0016313213703099511, "grad_norm": 203.43992614746094, "learning_rate": 7.34094616639478e-07, "loss": 10.4637, "num_input_tokens_seen": 21600, "step": 10 }, { "epoch": 0.0024469820554649264, "grad_norm": 178.73890686035156, "learning_rate": 1.1419249592169658e-06, "loss": 9.9991, "num_input_tokens_seen": 32800, "step": 15 }, { "epoch": 0.0032626427406199023, "grad_norm": 132.00656127929688, "learning_rate": 1.5497553017944535e-06, "loss": 8.5769, "num_input_tokens_seen": 43488, "step": 20 }, { "epoch": 0.004078303425774877, "grad_norm": 116.0261459350586, "learning_rate": 1.957585644371941e-06, "loss": 7.1386, "num_input_tokens_seen": 55072, "step": 25 }, { "epoch": 0.004893964110929853, "grad_norm": 103.69325256347656, "learning_rate": 2.365415986949429e-06, "loss": 5.9154, "num_input_tokens_seen": 65760, "step": 30 }, { "epoch": 0.005709624796084829, "grad_norm": 151.7090301513672, "learning_rate": 2.7732463295269165e-06, "loss": 4.8794, "num_input_tokens_seen": 76928, "step": 35 }, { "epoch": 0.0065252854812398045, "grad_norm": 128.4599609375, "learning_rate": 3.1810766721044044e-06, "loss": 3.5003, "num_input_tokens_seen": 88224, "step": 40 }, { "epoch": 0.00734094616639478, "grad_norm": 65.14027404785156, "learning_rate": 3.5889070146818927e-06, "loss": 2.4161, "num_input_tokens_seen": 98592, "step": 45 }, { "epoch": 0.008156606851549755, "grad_norm": 59.896209716796875, "learning_rate": 3.99673735725938e-06, "loss": 1.8013, "num_input_tokens_seen": 109216, "step": 50 }, { "epoch": 0.00897226753670473, "grad_norm": 71.38336944580078, "learning_rate": 4.404567699836868e-06, "loss": 1.151, "num_input_tokens_seen": 119968, "step": 55 }, { "epoch": 0.009787928221859706, "grad_norm": 30.256752014160156, "learning_rate": 4.812398042414356e-06, "loss": 0.8514, "num_input_tokens_seen": 131584, "step": 60 }, { "epoch": 0.010603588907014683, "grad_norm": 43.357784271240234, "learning_rate": 5.2202283849918435e-06, "loss": 0.5881, "num_input_tokens_seen": 143232, "step": 65 }, { "epoch": 0.011419249592169658, "grad_norm": 30.591232299804688, "learning_rate": 5.628058727569331e-06, "loss": 0.4186, "num_input_tokens_seen": 152992, "step": 70 }, { "epoch": 0.012234910277324634, "grad_norm": 26.471689224243164, "learning_rate": 6.035889070146819e-06, "loss": 0.4526, "num_input_tokens_seen": 165184, "step": 75 }, { "epoch": 0.013050570962479609, "grad_norm": 26.3847713470459, "learning_rate": 6.443719412724307e-06, "loss": 0.4078, "num_input_tokens_seen": 177792, "step": 80 }, { "epoch": 0.013866231647634585, "grad_norm": 23.78069496154785, "learning_rate": 6.851549755301794e-06, "loss": 0.3402, "num_input_tokens_seen": 188608, "step": 85 }, { "epoch": 0.01468189233278956, "grad_norm": 36.0848274230957, "learning_rate": 7.2593800978792825e-06, "loss": 0.7053, "num_input_tokens_seen": 198656, "step": 90 }, { "epoch": 0.015497553017944535, "grad_norm": 18.320499420166016, "learning_rate": 7.66721044045677e-06, "loss": 0.4017, "num_input_tokens_seen": 209888, "step": 95 }, { "epoch": 0.01631321370309951, "grad_norm": 50.101009368896484, "learning_rate": 8.075040783034257e-06, "loss": 0.58, "num_input_tokens_seen": 219904, "step": 100 }, { "epoch": 0.017128874388254486, "grad_norm": 38.38740158081055, "learning_rate": 8.482871125611746e-06, "loss": 0.4827, "num_input_tokens_seen": 231296, "step": 105 }, { "epoch": 0.01794453507340946, "grad_norm": 20.471477508544922, "learning_rate": 8.890701468189234e-06, "loss": 0.4109, "num_input_tokens_seen": 241344, "step": 110 }, { "epoch": 0.018760195758564437, "grad_norm": 13.499199867248535, "learning_rate": 9.298531810766722e-06, "loss": 0.4526, "num_input_tokens_seen": 252160, "step": 115 }, { "epoch": 0.01957585644371941, "grad_norm": 18.684961318969727, "learning_rate": 9.706362153344209e-06, "loss": 0.3783, "num_input_tokens_seen": 263584, "step": 120 }, { "epoch": 0.020391517128874388, "grad_norm": 33.62428665161133, "learning_rate": 1.0114192495921697e-05, "loss": 0.4103, "num_input_tokens_seen": 274592, "step": 125 }, { "epoch": 0.021207177814029365, "grad_norm": 49.0087776184082, "learning_rate": 1.0522022838499184e-05, "loss": 0.5503, "num_input_tokens_seen": 285984, "step": 130 }, { "epoch": 0.02202283849918434, "grad_norm": 22.654041290283203, "learning_rate": 1.0929853181076672e-05, "loss": 0.3487, "num_input_tokens_seen": 297632, "step": 135 }, { "epoch": 0.022838499184339316, "grad_norm": 37.37763977050781, "learning_rate": 1.1337683523654159e-05, "loss": 0.4105, "num_input_tokens_seen": 309792, "step": 140 }, { "epoch": 0.02365415986949429, "grad_norm": 36.20363235473633, "learning_rate": 1.1745513866231649e-05, "loss": 0.3761, "num_input_tokens_seen": 319296, "step": 145 }, { "epoch": 0.024469820554649267, "grad_norm": 47.81282424926758, "learning_rate": 1.2153344208809135e-05, "loss": 0.4026, "num_input_tokens_seen": 329344, "step": 150 }, { "epoch": 0.02528548123980424, "grad_norm": 28.513683319091797, "learning_rate": 1.2561174551386624e-05, "loss": 0.3756, "num_input_tokens_seen": 339712, "step": 155 }, { "epoch": 0.026101141924959218, "grad_norm": 26.848081588745117, "learning_rate": 1.296900489396411e-05, "loss": 0.4408, "num_input_tokens_seen": 350016, "step": 160 }, { "epoch": 0.026916802610114192, "grad_norm": 47.24400329589844, "learning_rate": 1.3376835236541599e-05, "loss": 0.3923, "num_input_tokens_seen": 360704, "step": 165 }, { "epoch": 0.02773246329526917, "grad_norm": 21.93566131591797, "learning_rate": 1.3784665579119085e-05, "loss": 0.4706, "num_input_tokens_seen": 372704, "step": 170 }, { "epoch": 0.028548123980424143, "grad_norm": 69.633056640625, "learning_rate": 1.4192495921696575e-05, "loss": 0.4982, "num_input_tokens_seen": 384384, "step": 175 }, { "epoch": 0.02936378466557912, "grad_norm": 32.12857437133789, "learning_rate": 1.4600326264274062e-05, "loss": 0.4647, "num_input_tokens_seen": 394624, "step": 180 }, { "epoch": 0.030179445350734094, "grad_norm": 24.541458129882812, "learning_rate": 1.500815660685155e-05, "loss": 0.4208, "num_input_tokens_seen": 404768, "step": 185 }, { "epoch": 0.03099510603588907, "grad_norm": 45.62610626220703, "learning_rate": 1.5415986949429037e-05, "loss": 0.3465, "num_input_tokens_seen": 415520, "step": 190 }, { "epoch": 0.03181076672104405, "grad_norm": 10.016119956970215, "learning_rate": 1.5823817292006523e-05, "loss": 0.4518, "num_input_tokens_seen": 426304, "step": 195 }, { "epoch": 0.03262642740619902, "grad_norm": 12.097718238830566, "learning_rate": 1.6231647634584013e-05, "loss": 0.4393, "num_input_tokens_seen": 437408, "step": 200 }, { "epoch": 0.033442088091353996, "grad_norm": 22.63746452331543, "learning_rate": 1.66394779771615e-05, "loss": 0.3453, "num_input_tokens_seen": 446560, "step": 205 }, { "epoch": 0.03425774877650897, "grad_norm": 20.22903823852539, "learning_rate": 1.704730831973899e-05, "loss": 0.4263, "num_input_tokens_seen": 457792, "step": 210 }, { "epoch": 0.03507340946166395, "grad_norm": 15.609079360961914, "learning_rate": 1.7455138662316477e-05, "loss": 0.445, "num_input_tokens_seen": 468224, "step": 215 }, { "epoch": 0.03588907014681892, "grad_norm": 26.14414405822754, "learning_rate": 1.7862969004893963e-05, "loss": 0.5234, "num_input_tokens_seen": 479808, "step": 220 }, { "epoch": 0.0367047308319739, "grad_norm": 39.37902069091797, "learning_rate": 1.8270799347471453e-05, "loss": 0.4522, "num_input_tokens_seen": 491424, "step": 225 }, { "epoch": 0.037520391517128875, "grad_norm": 31.666839599609375, "learning_rate": 1.867862969004894e-05, "loss": 0.3141, "num_input_tokens_seen": 502144, "step": 230 }, { "epoch": 0.03833605220228385, "grad_norm": 18.281274795532227, "learning_rate": 1.908646003262643e-05, "loss": 0.4603, "num_input_tokens_seen": 511808, "step": 235 }, { "epoch": 0.03915171288743882, "grad_norm": 13.595705032348633, "learning_rate": 1.9494290375203913e-05, "loss": 0.3795, "num_input_tokens_seen": 522464, "step": 240 }, { "epoch": 0.0399673735725938, "grad_norm": 52.27533721923828, "learning_rate": 1.9902120717781403e-05, "loss": 0.3493, "num_input_tokens_seen": 531424, "step": 245 }, { "epoch": 0.040783034257748776, "grad_norm": 93.90097045898438, "learning_rate": 2.0309951060358893e-05, "loss": 0.5449, "num_input_tokens_seen": 542912, "step": 250 }, { "epoch": 0.041598694942903754, "grad_norm": 48.24867630004883, "learning_rate": 2.071778140293638e-05, "loss": 0.5247, "num_input_tokens_seen": 554432, "step": 255 }, { "epoch": 0.04241435562805873, "grad_norm": 45.269630432128906, "learning_rate": 2.1125611745513866e-05, "loss": 0.3984, "num_input_tokens_seen": 565056, "step": 260 }, { "epoch": 0.0432300163132137, "grad_norm": 46.97299575805664, "learning_rate": 2.1533442088091353e-05, "loss": 0.4666, "num_input_tokens_seen": 576128, "step": 265 }, { "epoch": 0.04404567699836868, "grad_norm": 22.68583869934082, "learning_rate": 2.1941272430668843e-05, "loss": 0.3858, "num_input_tokens_seen": 586432, "step": 270 }, { "epoch": 0.044861337683523655, "grad_norm": 10.99829387664795, "learning_rate": 2.234910277324633e-05, "loss": 0.4208, "num_input_tokens_seen": 596320, "step": 275 }, { "epoch": 0.04567699836867863, "grad_norm": 11.16822624206543, "learning_rate": 2.2756933115823816e-05, "loss": 0.2968, "num_input_tokens_seen": 607776, "step": 280 }, { "epoch": 0.0464926590538336, "grad_norm": 15.200081825256348, "learning_rate": 2.3164763458401306e-05, "loss": 0.3483, "num_input_tokens_seen": 618336, "step": 285 }, { "epoch": 0.04730831973898858, "grad_norm": 30.689321517944336, "learning_rate": 2.3572593800978793e-05, "loss": 0.4654, "num_input_tokens_seen": 628704, "step": 290 }, { "epoch": 0.04812398042414356, "grad_norm": 16.599830627441406, "learning_rate": 2.3980424143556283e-05, "loss": 0.4205, "num_input_tokens_seen": 640192, "step": 295 }, { "epoch": 0.048939641109298535, "grad_norm": 11.319351196289062, "learning_rate": 2.4388254486133766e-05, "loss": 0.3868, "num_input_tokens_seen": 651840, "step": 300 }, { "epoch": 0.049755301794453505, "grad_norm": 4.523133754730225, "learning_rate": 2.4796084828711256e-05, "loss": 0.371, "num_input_tokens_seen": 661920, "step": 305 }, { "epoch": 0.05057096247960848, "grad_norm": 10.479153633117676, "learning_rate": 2.5203915171288743e-05, "loss": 0.3528, "num_input_tokens_seen": 673344, "step": 310 }, { "epoch": 0.05138662316476346, "grad_norm": 25.705860137939453, "learning_rate": 2.5611745513866233e-05, "loss": 0.3937, "num_input_tokens_seen": 683904, "step": 315 }, { "epoch": 0.052202283849918436, "grad_norm": 25.693862915039062, "learning_rate": 2.6019575856443723e-05, "loss": 0.3907, "num_input_tokens_seen": 693920, "step": 320 }, { "epoch": 0.05301794453507341, "grad_norm": 67.36791229248047, "learning_rate": 2.6427406199021206e-05, "loss": 0.4202, "num_input_tokens_seen": 704800, "step": 325 }, { "epoch": 0.053833605220228384, "grad_norm": 7.713624954223633, "learning_rate": 2.6835236541598696e-05, "loss": 0.4282, "num_input_tokens_seen": 716192, "step": 330 }, { "epoch": 0.05464926590538336, "grad_norm": 10.765233039855957, "learning_rate": 2.7243066884176183e-05, "loss": 0.3346, "num_input_tokens_seen": 728288, "step": 335 }, { "epoch": 0.05546492659053834, "grad_norm": 8.434929847717285, "learning_rate": 2.7650897226753673e-05, "loss": 0.3987, "num_input_tokens_seen": 738016, "step": 340 }, { "epoch": 0.05628058727569331, "grad_norm": 9.873099327087402, "learning_rate": 2.805872756933116e-05, "loss": 0.3736, "num_input_tokens_seen": 747808, "step": 345 }, { "epoch": 0.057096247960848286, "grad_norm": 29.44620132446289, "learning_rate": 2.8466557911908646e-05, "loss": 0.4724, "num_input_tokens_seen": 758208, "step": 350 }, { "epoch": 0.05791190864600326, "grad_norm": 8.276298522949219, "learning_rate": 2.8874388254486136e-05, "loss": 0.4013, "num_input_tokens_seen": 768704, "step": 355 }, { "epoch": 0.05872756933115824, "grad_norm": 24.195348739624023, "learning_rate": 2.9282218597063623e-05, "loss": 0.3244, "num_input_tokens_seen": 779808, "step": 360 }, { "epoch": 0.05954323001631321, "grad_norm": 8.514348983764648, "learning_rate": 2.969004893964111e-05, "loss": 0.3805, "num_input_tokens_seen": 791232, "step": 365 }, { "epoch": 0.06035889070146819, "grad_norm": 9.994996070861816, "learning_rate": 3.0097879282218596e-05, "loss": 0.3633, "num_input_tokens_seen": 801568, "step": 370 }, { "epoch": 0.061174551386623165, "grad_norm": 5.65026330947876, "learning_rate": 3.0505709624796086e-05, "loss": 0.3517, "num_input_tokens_seen": 812736, "step": 375 }, { "epoch": 0.06199021207177814, "grad_norm": 22.769699096679688, "learning_rate": 3.0913539967373576e-05, "loss": 0.4077, "num_input_tokens_seen": 823040, "step": 380 }, { "epoch": 0.06280587275693311, "grad_norm": 9.328766822814941, "learning_rate": 3.132137030995106e-05, "loss": 0.4248, "num_input_tokens_seen": 833568, "step": 385 }, { "epoch": 0.0636215334420881, "grad_norm": 16.415210723876953, "learning_rate": 3.172920065252855e-05, "loss": 0.3755, "num_input_tokens_seen": 843776, "step": 390 }, { "epoch": 0.06443719412724307, "grad_norm": 6.472670078277588, "learning_rate": 3.213703099510604e-05, "loss": 0.3219, "num_input_tokens_seen": 854016, "step": 395 }, { "epoch": 0.06525285481239804, "grad_norm": 4.967156887054443, "learning_rate": 3.254486133768352e-05, "loss": 0.2452, "num_input_tokens_seen": 866304, "step": 400 }, { "epoch": 0.06606851549755302, "grad_norm": 8.40876293182373, "learning_rate": 3.295269168026101e-05, "loss": 0.5797, "num_input_tokens_seen": 876608, "step": 405 }, { "epoch": 0.06688417618270799, "grad_norm": 17.834550857543945, "learning_rate": 3.33605220228385e-05, "loss": 0.4954, "num_input_tokens_seen": 886112, "step": 410 }, { "epoch": 0.06769983686786298, "grad_norm": 5.254931926727295, "learning_rate": 3.3768352365415986e-05, "loss": 0.342, "num_input_tokens_seen": 896128, "step": 415 }, { "epoch": 0.06851549755301795, "grad_norm": 13.259016036987305, "learning_rate": 3.4176182707993476e-05, "loss": 0.3828, "num_input_tokens_seen": 907584, "step": 420 }, { "epoch": 0.06933115823817292, "grad_norm": 15.13353157043457, "learning_rate": 3.458401305057096e-05, "loss": 0.4065, "num_input_tokens_seen": 918592, "step": 425 }, { "epoch": 0.0701468189233279, "grad_norm": 6.152390956878662, "learning_rate": 3.4991843393148456e-05, "loss": 0.3726, "num_input_tokens_seen": 928960, "step": 430 }, { "epoch": 0.07096247960848287, "grad_norm": 1.8378111124038696, "learning_rate": 3.539967373572594e-05, "loss": 0.3943, "num_input_tokens_seen": 940160, "step": 435 }, { "epoch": 0.07177814029363784, "grad_norm": 2.4877896308898926, "learning_rate": 3.580750407830342e-05, "loss": 0.4026, "num_input_tokens_seen": 951040, "step": 440 }, { "epoch": 0.07259380097879282, "grad_norm": 1.8833818435668945, "learning_rate": 3.621533442088092e-05, "loss": 0.3528, "num_input_tokens_seen": 961696, "step": 445 }, { "epoch": 0.0734094616639478, "grad_norm": 3.356362819671631, "learning_rate": 3.66231647634584e-05, "loss": 0.3434, "num_input_tokens_seen": 972640, "step": 450 }, { "epoch": 0.07422512234910278, "grad_norm": 3.093301773071289, "learning_rate": 3.703099510603589e-05, "loss": 0.344, "num_input_tokens_seen": 984128, "step": 455 }, { "epoch": 0.07504078303425775, "grad_norm": 2.75049090385437, "learning_rate": 3.7438825448613375e-05, "loss": 0.3527, "num_input_tokens_seen": 994944, "step": 460 }, { "epoch": 0.07585644371941272, "grad_norm": 1.8288135528564453, "learning_rate": 3.7846655791190865e-05, "loss": 0.3783, "num_input_tokens_seen": 1005632, "step": 465 }, { "epoch": 0.0766721044045677, "grad_norm": 1.9206750392913818, "learning_rate": 3.8254486133768355e-05, "loss": 0.3397, "num_input_tokens_seen": 1016224, "step": 470 }, { "epoch": 0.07748776508972267, "grad_norm": 39.06534194946289, "learning_rate": 3.866231647634584e-05, "loss": 0.3933, "num_input_tokens_seen": 1026752, "step": 475 }, { "epoch": 0.07830342577487764, "grad_norm": 5.172845363616943, "learning_rate": 3.907014681892333e-05, "loss": 0.4824, "num_input_tokens_seen": 1037856, "step": 480 }, { "epoch": 0.07911908646003263, "grad_norm": 8.095596313476562, "learning_rate": 3.947797716150082e-05, "loss": 0.3439, "num_input_tokens_seen": 1048672, "step": 485 }, { "epoch": 0.0799347471451876, "grad_norm": 19.97920036315918, "learning_rate": 3.98858075040783e-05, "loss": 0.7598, "num_input_tokens_seen": 1058400, "step": 490 }, { "epoch": 0.08075040783034258, "grad_norm": 3.1255152225494385, "learning_rate": 4.029363784665579e-05, "loss": 0.3997, "num_input_tokens_seen": 1068544, "step": 495 }, { "epoch": 0.08156606851549755, "grad_norm": 2.6855437755584717, "learning_rate": 4.070146818923328e-05, "loss": 0.3251, "num_input_tokens_seen": 1078848, "step": 500 }, { "epoch": 0.08238172920065252, "grad_norm": 3.594280481338501, "learning_rate": 4.1109298531810765e-05, "loss": 0.3757, "num_input_tokens_seen": 1088032, "step": 505 }, { "epoch": 0.08319738988580751, "grad_norm": 4.66409158706665, "learning_rate": 4.1517128874388255e-05, "loss": 0.3502, "num_input_tokens_seen": 1099616, "step": 510 }, { "epoch": 0.08401305057096248, "grad_norm": 2.976755380630493, "learning_rate": 4.1924959216965745e-05, "loss": 0.3664, "num_input_tokens_seen": 1110880, "step": 515 }, { "epoch": 0.08482871125611746, "grad_norm": 15.449748992919922, "learning_rate": 4.233278955954323e-05, "loss": 0.3915, "num_input_tokens_seen": 1120480, "step": 520 }, { "epoch": 0.08564437194127243, "grad_norm": 6.0880961418151855, "learning_rate": 4.274061990212072e-05, "loss": 0.4047, "num_input_tokens_seen": 1131040, "step": 525 }, { "epoch": 0.0864600326264274, "grad_norm": 10.06917953491211, "learning_rate": 4.314845024469821e-05, "loss": 0.3608, "num_input_tokens_seen": 1142176, "step": 530 }, { "epoch": 0.08727569331158239, "grad_norm": 5.160312652587891, "learning_rate": 4.35562805872757e-05, "loss": 0.5324, "num_input_tokens_seen": 1152064, "step": 535 }, { "epoch": 0.08809135399673736, "grad_norm": 9.738526344299316, "learning_rate": 4.396411092985318e-05, "loss": 0.5299, "num_input_tokens_seen": 1164512, "step": 540 }, { "epoch": 0.08890701468189233, "grad_norm": 1.763839602470398, "learning_rate": 4.4371941272430665e-05, "loss": 0.3698, "num_input_tokens_seen": 1176160, "step": 545 }, { "epoch": 0.08972267536704731, "grad_norm": 4.8534159660339355, "learning_rate": 4.477977161500816e-05, "loss": 0.3421, "num_input_tokens_seen": 1186048, "step": 550 }, { "epoch": 0.09053833605220228, "grad_norm": 1.7418982982635498, "learning_rate": 4.5187601957585645e-05, "loss": 0.3401, "num_input_tokens_seen": 1197088, "step": 555 }, { "epoch": 0.09135399673735727, "grad_norm": 8.804040908813477, "learning_rate": 4.559543230016313e-05, "loss": 0.4435, "num_input_tokens_seen": 1207296, "step": 560 }, { "epoch": 0.09216965742251224, "grad_norm": 2.6876790523529053, "learning_rate": 4.6003262642740625e-05, "loss": 0.3662, "num_input_tokens_seen": 1218688, "step": 565 }, { "epoch": 0.0929853181076672, "grad_norm": 8.875554084777832, "learning_rate": 4.641109298531811e-05, "loss": 0.341, "num_input_tokens_seen": 1228864, "step": 570 }, { "epoch": 0.09380097879282219, "grad_norm": 3.6708247661590576, "learning_rate": 4.68189233278956e-05, "loss": 0.3525, "num_input_tokens_seen": 1240608, "step": 575 }, { "epoch": 0.09461663947797716, "grad_norm": 5.566936492919922, "learning_rate": 4.722675367047308e-05, "loss": 0.3138, "num_input_tokens_seen": 1251136, "step": 580 }, { "epoch": 0.09543230016313213, "grad_norm": 3.727107524871826, "learning_rate": 4.763458401305057e-05, "loss": 0.3379, "num_input_tokens_seen": 1263328, "step": 585 }, { "epoch": 0.09624796084828711, "grad_norm": 1.3581801652908325, "learning_rate": 4.804241435562806e-05, "loss": 0.3495, "num_input_tokens_seen": 1274656, "step": 590 }, { "epoch": 0.09706362153344208, "grad_norm": 1.5037490129470825, "learning_rate": 4.8450244698205544e-05, "loss": 0.3493, "num_input_tokens_seen": 1286016, "step": 595 }, { "epoch": 0.09787928221859707, "grad_norm": 2.7448742389678955, "learning_rate": 4.885807504078304e-05, "loss": 0.3638, "num_input_tokens_seen": 1296032, "step": 600 }, { "epoch": 0.09869494290375204, "grad_norm": 1.2616125345230103, "learning_rate": 4.9265905383360524e-05, "loss": 0.3595, "num_input_tokens_seen": 1307712, "step": 605 }, { "epoch": 0.09951060358890701, "grad_norm": 5.149881362915039, "learning_rate": 4.967373572593801e-05, "loss": 0.3598, "num_input_tokens_seen": 1319552, "step": 610 }, { "epoch": 0.100326264274062, "grad_norm": 2.3276751041412354, "learning_rate": 5.00815660685155e-05, "loss": 0.3367, "num_input_tokens_seen": 1330336, "step": 615 }, { "epoch": 0.10114192495921696, "grad_norm": 11.520888328552246, "learning_rate": 5.048939641109299e-05, "loss": 0.3395, "num_input_tokens_seen": 1341440, "step": 620 }, { "epoch": 0.10195758564437195, "grad_norm": 6.141873359680176, "learning_rate": 5.089722675367047e-05, "loss": 0.3934, "num_input_tokens_seen": 1352352, "step": 625 }, { "epoch": 0.10277324632952692, "grad_norm": 49.95155715942383, "learning_rate": 5.130505709624796e-05, "loss": 0.6138, "num_input_tokens_seen": 1363360, "step": 630 }, { "epoch": 0.10358890701468189, "grad_norm": 5.784404277801514, "learning_rate": 5.171288743882545e-05, "loss": 0.4101, "num_input_tokens_seen": 1372800, "step": 635 }, { "epoch": 0.10440456769983687, "grad_norm": 2.336291790008545, "learning_rate": 5.212071778140294e-05, "loss": 0.4858, "num_input_tokens_seen": 1383424, "step": 640 }, { "epoch": 0.10522022838499184, "grad_norm": 2.0160911083221436, "learning_rate": 5.2528548123980424e-05, "loss": 0.425, "num_input_tokens_seen": 1393536, "step": 645 }, { "epoch": 0.10603588907014681, "grad_norm": 6.756796836853027, "learning_rate": 5.293637846655791e-05, "loss": 0.5216, "num_input_tokens_seen": 1403712, "step": 650 }, { "epoch": 0.1068515497553018, "grad_norm": 1.3891957998275757, "learning_rate": 5.3344208809135404e-05, "loss": 0.3932, "num_input_tokens_seen": 1414368, "step": 655 }, { "epoch": 0.10766721044045677, "grad_norm": 4.341510772705078, "learning_rate": 5.375203915171289e-05, "loss": 0.3126, "num_input_tokens_seen": 1424544, "step": 660 }, { "epoch": 0.10848287112561175, "grad_norm": 1.8400681018829346, "learning_rate": 5.415986949429037e-05, "loss": 0.3431, "num_input_tokens_seen": 1435552, "step": 665 }, { "epoch": 0.10929853181076672, "grad_norm": 3.3328657150268555, "learning_rate": 5.456769983686787e-05, "loss": 0.3079, "num_input_tokens_seen": 1446432, "step": 670 }, { "epoch": 0.11011419249592169, "grad_norm": 4.7521586418151855, "learning_rate": 5.497553017944535e-05, "loss": 0.4253, "num_input_tokens_seen": 1457216, "step": 675 }, { "epoch": 0.11092985318107668, "grad_norm": 5.234409332275391, "learning_rate": 5.538336052202284e-05, "loss": 0.3452, "num_input_tokens_seen": 1467424, "step": 680 }, { "epoch": 0.11174551386623165, "grad_norm": 3.171687126159668, "learning_rate": 5.579119086460033e-05, "loss": 0.3829, "num_input_tokens_seen": 1479072, "step": 685 }, { "epoch": 0.11256117455138662, "grad_norm": 2.48236346244812, "learning_rate": 5.6199021207177814e-05, "loss": 0.3195, "num_input_tokens_seen": 1489792, "step": 690 }, { "epoch": 0.1133768352365416, "grad_norm": 1.310857892036438, "learning_rate": 5.6606851549755304e-05, "loss": 0.3925, "num_input_tokens_seen": 1500192, "step": 695 }, { "epoch": 0.11419249592169657, "grad_norm": 1.6563694477081299, "learning_rate": 5.701468189233279e-05, "loss": 0.3372, "num_input_tokens_seen": 1511040, "step": 700 }, { "epoch": 0.11500815660685156, "grad_norm": 3.357430934906006, "learning_rate": 5.7422512234910284e-05, "loss": 0.3276, "num_input_tokens_seen": 1521920, "step": 705 }, { "epoch": 0.11582381729200653, "grad_norm": 1.1804074048995972, "learning_rate": 5.783034257748777e-05, "loss": 0.2971, "num_input_tokens_seen": 1533792, "step": 710 }, { "epoch": 0.1166394779771615, "grad_norm": 3.9882752895355225, "learning_rate": 5.823817292006525e-05, "loss": 0.3838, "num_input_tokens_seen": 1544032, "step": 715 }, { "epoch": 0.11745513866231648, "grad_norm": 1.3871312141418457, "learning_rate": 5.864600326264275e-05, "loss": 0.3479, "num_input_tokens_seen": 1554528, "step": 720 }, { "epoch": 0.11827079934747145, "grad_norm": 1.7694770097732544, "learning_rate": 5.905383360522023e-05, "loss": 0.3429, "num_input_tokens_seen": 1565888, "step": 725 }, { "epoch": 0.11908646003262642, "grad_norm": 2.2913472652435303, "learning_rate": 5.9461663947797714e-05, "loss": 0.3352, "num_input_tokens_seen": 1576992, "step": 730 }, { "epoch": 0.1199021207177814, "grad_norm": 4.81171178817749, "learning_rate": 5.9869494290375204e-05, "loss": 0.3297, "num_input_tokens_seen": 1587392, "step": 735 }, { "epoch": 0.12071778140293637, "grad_norm": 4.814436912536621, "learning_rate": 6.0277324632952694e-05, "loss": 0.3405, "num_input_tokens_seen": 1598080, "step": 740 }, { "epoch": 0.12153344208809136, "grad_norm": 1.2078404426574707, "learning_rate": 6.0685154975530184e-05, "loss": 0.3328, "num_input_tokens_seen": 1608224, "step": 745 }, { "epoch": 0.12234910277324633, "grad_norm": 1.0516026020050049, "learning_rate": 6.109298531810767e-05, "loss": 0.3524, "num_input_tokens_seen": 1618368, "step": 750 }, { "epoch": 0.1231647634584013, "grad_norm": 8.321268081665039, "learning_rate": 6.150081566068516e-05, "loss": 0.3277, "num_input_tokens_seen": 1630400, "step": 755 }, { "epoch": 0.12398042414355628, "grad_norm": 8.43825912475586, "learning_rate": 6.190864600326265e-05, "loss": 0.545, "num_input_tokens_seen": 1641920, "step": 760 }, { "epoch": 0.12479608482871125, "grad_norm": 8.50485897064209, "learning_rate": 6.231647634584014e-05, "loss": 0.3535, "num_input_tokens_seen": 1653152, "step": 765 }, { "epoch": 0.12561174551386622, "grad_norm": 1.7483623027801514, "learning_rate": 6.272430668841763e-05, "loss": 0.394, "num_input_tokens_seen": 1664160, "step": 770 }, { "epoch": 0.1264274061990212, "grad_norm": 1.5351978540420532, "learning_rate": 6.31321370309951e-05, "loss": 0.3682, "num_input_tokens_seen": 1673536, "step": 775 }, { "epoch": 0.1272430668841762, "grad_norm": 5.330203533172607, "learning_rate": 6.35399673735726e-05, "loss": 0.3227, "num_input_tokens_seen": 1685056, "step": 780 }, { "epoch": 0.12805872756933115, "grad_norm": 18.881032943725586, "learning_rate": 6.394779771615008e-05, "loss": 0.5541, "num_input_tokens_seen": 1696928, "step": 785 }, { "epoch": 0.12887438825448613, "grad_norm": 14.782869338989258, "learning_rate": 6.435562805872756e-05, "loss": 0.4785, "num_input_tokens_seen": 1706496, "step": 790 }, { "epoch": 0.12969004893964112, "grad_norm": 27.132307052612305, "learning_rate": 6.476345840130505e-05, "loss": 0.491, "num_input_tokens_seen": 1716896, "step": 795 }, { "epoch": 0.13050570962479607, "grad_norm": 31.63263511657715, "learning_rate": 6.517128874388255e-05, "loss": 0.5197, "num_input_tokens_seen": 1727296, "step": 800 }, { "epoch": 0.13132137030995106, "grad_norm": 0.1396075040102005, "learning_rate": 6.557911908646004e-05, "loss": 0.3354, "num_input_tokens_seen": 1737888, "step": 805 }, { "epoch": 0.13213703099510604, "grad_norm": 1.6068007946014404, "learning_rate": 6.598694942903752e-05, "loss": 0.4035, "num_input_tokens_seen": 1747488, "step": 810 }, { "epoch": 0.132952691680261, "grad_norm": 8.138457298278809, "learning_rate": 6.639477977161501e-05, "loss": 0.3723, "num_input_tokens_seen": 1757888, "step": 815 }, { "epoch": 0.13376835236541598, "grad_norm": 8.175885200500488, "learning_rate": 6.68026101141925e-05, "loss": 0.3844, "num_input_tokens_seen": 1768928, "step": 820 }, { "epoch": 0.13458401305057097, "grad_norm": 0.02966875024139881, "learning_rate": 6.721044045676998e-05, "loss": 0.156, "num_input_tokens_seen": 1778688, "step": 825 }, { "epoch": 0.13539967373572595, "grad_norm": 3.6089866161346436, "learning_rate": 6.761827079934747e-05, "loss": 0.4139, "num_input_tokens_seen": 1789088, "step": 830 }, { "epoch": 0.1362153344208809, "grad_norm": 3.528244733810425, "learning_rate": 6.802610114192497e-05, "loss": 0.6408, "num_input_tokens_seen": 1799296, "step": 835 }, { "epoch": 0.1370309951060359, "grad_norm": 2.254758358001709, "learning_rate": 6.843393148450245e-05, "loss": 0.2205, "num_input_tokens_seen": 1809376, "step": 840 }, { "epoch": 0.13784665579119088, "grad_norm": 0.03666594624519348, "learning_rate": 6.884176182707994e-05, "loss": 0.1694, "num_input_tokens_seen": 1819936, "step": 845 }, { "epoch": 0.13866231647634583, "grad_norm": 1.3044015169143677, "learning_rate": 6.924959216965743e-05, "loss": 0.4089, "num_input_tokens_seen": 1829984, "step": 850 }, { "epoch": 0.13947797716150082, "grad_norm": 6.272593975067139, "learning_rate": 6.96574225122349e-05, "loss": 0.3633, "num_input_tokens_seen": 1840384, "step": 855 }, { "epoch": 0.1402936378466558, "grad_norm": 0.21787534654140472, "learning_rate": 7.006525285481239e-05, "loss": 0.1944, "num_input_tokens_seen": 1851488, "step": 860 }, { "epoch": 0.14110929853181076, "grad_norm": 7.665430545806885, "learning_rate": 7.047308319738988e-05, "loss": 0.2363, "num_input_tokens_seen": 1863200, "step": 865 }, { "epoch": 0.14192495921696574, "grad_norm": 25.50941276550293, "learning_rate": 7.088091353996739e-05, "loss": 0.4659, "num_input_tokens_seen": 1874048, "step": 870 }, { "epoch": 0.14274061990212072, "grad_norm": 5.501600742340088, "learning_rate": 7.128874388254486e-05, "loss": 0.1329, "num_input_tokens_seen": 1885920, "step": 875 }, { "epoch": 0.14355628058727568, "grad_norm": 4.4628496170043945, "learning_rate": 7.169657422512235e-05, "loss": 0.606, "num_input_tokens_seen": 1895712, "step": 880 }, { "epoch": 0.14437194127243066, "grad_norm": 1.6383246183395386, "learning_rate": 7.210440456769984e-05, "loss": 0.1534, "num_input_tokens_seen": 1906080, "step": 885 }, { "epoch": 0.14518760195758565, "grad_norm": 3.9695870876312256, "learning_rate": 7.251223491027732e-05, "loss": 0.4495, "num_input_tokens_seen": 1916864, "step": 890 }, { "epoch": 0.14600326264274063, "grad_norm": 1.4498566389083862, "learning_rate": 7.292006525285481e-05, "loss": 0.1282, "num_input_tokens_seen": 1927424, "step": 895 }, { "epoch": 0.1468189233278956, "grad_norm": 1.4727164506912231, "learning_rate": 7.332789559543231e-05, "loss": 0.1961, "num_input_tokens_seen": 1938048, "step": 900 }, { "epoch": 0.14763458401305057, "grad_norm": 6.260464191436768, "learning_rate": 7.373572593800979e-05, "loss": 0.078, "num_input_tokens_seen": 1948896, "step": 905 }, { "epoch": 0.14845024469820556, "grad_norm": 10.657343864440918, "learning_rate": 7.414355628058728e-05, "loss": 0.5685, "num_input_tokens_seen": 1959552, "step": 910 }, { "epoch": 0.14926590538336051, "grad_norm": 2.4051079750061035, "learning_rate": 7.455138662316477e-05, "loss": 0.2152, "num_input_tokens_seen": 1971200, "step": 915 }, { "epoch": 0.1500815660685155, "grad_norm": 3.8425323963165283, "learning_rate": 7.495921696574225e-05, "loss": 0.5765, "num_input_tokens_seen": 1982784, "step": 920 }, { "epoch": 0.15089722675367048, "grad_norm": 2.697840929031372, "learning_rate": 7.536704730831974e-05, "loss": 0.1899, "num_input_tokens_seen": 1994624, "step": 925 }, { "epoch": 0.15171288743882544, "grad_norm": 0.7884184122085571, "learning_rate": 7.577487765089723e-05, "loss": 0.2933, "num_input_tokens_seen": 2005440, "step": 930 }, { "epoch": 0.15252854812398042, "grad_norm": 2.8711278438568115, "learning_rate": 7.618270799347473e-05, "loss": 0.1942, "num_input_tokens_seen": 2016544, "step": 935 }, { "epoch": 0.1533442088091354, "grad_norm": 6.576929092407227, "learning_rate": 7.65905383360522e-05, "loss": 0.1792, "num_input_tokens_seen": 2027104, "step": 940 }, { "epoch": 0.15415986949429036, "grad_norm": 2.28462290763855, "learning_rate": 7.69983686786297e-05, "loss": 0.411, "num_input_tokens_seen": 2037568, "step": 945 }, { "epoch": 0.15497553017944535, "grad_norm": 0.7290262579917908, "learning_rate": 7.740619902120719e-05, "loss": 0.0445, "num_input_tokens_seen": 2047520, "step": 950 }, { "epoch": 0.15579119086460033, "grad_norm": 0.8786523342132568, "learning_rate": 7.781402936378466e-05, "loss": 0.3108, "num_input_tokens_seen": 2058400, "step": 955 }, { "epoch": 0.1566068515497553, "grad_norm": 6.360289573669434, "learning_rate": 7.822185970636215e-05, "loss": 0.2333, "num_input_tokens_seen": 2067040, "step": 960 }, { "epoch": 0.15742251223491027, "grad_norm": 0.7241197228431702, "learning_rate": 7.862969004893964e-05, "loss": 0.457, "num_input_tokens_seen": 2077600, "step": 965 }, { "epoch": 0.15823817292006526, "grad_norm": 0.5921363830566406, "learning_rate": 7.903752039151713e-05, "loss": 0.1244, "num_input_tokens_seen": 2088576, "step": 970 }, { "epoch": 0.15905383360522024, "grad_norm": 9.922910690307617, "learning_rate": 7.944535073409462e-05, "loss": 0.4985, "num_input_tokens_seen": 2098816, "step": 975 }, { "epoch": 0.1598694942903752, "grad_norm": 13.124771118164062, "learning_rate": 7.985318107667211e-05, "loss": 0.2373, "num_input_tokens_seen": 2108896, "step": 980 }, { "epoch": 0.16068515497553018, "grad_norm": 0.973936915397644, "learning_rate": 8.026101141924959e-05, "loss": 0.2923, "num_input_tokens_seen": 2120608, "step": 985 }, { "epoch": 0.16150081566068517, "grad_norm": 6.434631824493408, "learning_rate": 8.066884176182708e-05, "loss": 0.162, "num_input_tokens_seen": 2131840, "step": 990 }, { "epoch": 0.16231647634584012, "grad_norm": 5.561419486999512, "learning_rate": 8.107667210440457e-05, "loss": 0.2436, "num_input_tokens_seen": 2142560, "step": 995 }, { "epoch": 0.1631321370309951, "grad_norm": 1.5226362943649292, "learning_rate": 8.148450244698205e-05, "loss": 0.2927, "num_input_tokens_seen": 2153536, "step": 1000 }, { "epoch": 0.1639477977161501, "grad_norm": 3.2873289585113525, "learning_rate": 8.189233278955955e-05, "loss": 0.1093, "num_input_tokens_seen": 2164512, "step": 1005 }, { "epoch": 0.16476345840130505, "grad_norm": 4.852169513702393, "learning_rate": 8.230016313213704e-05, "loss": 0.3023, "num_input_tokens_seen": 2176608, "step": 1010 }, { "epoch": 0.16557911908646003, "grad_norm": 0.18311728537082672, "learning_rate": 8.270799347471453e-05, "loss": 0.0415, "num_input_tokens_seen": 2186144, "step": 1015 }, { "epoch": 0.16639477977161501, "grad_norm": 0.41068387031555176, "learning_rate": 8.3115823817292e-05, "loss": 0.3044, "num_input_tokens_seen": 2195808, "step": 1020 }, { "epoch": 0.16721044045676997, "grad_norm": 3.3082656860351562, "learning_rate": 8.35236541598695e-05, "loss": 0.2742, "num_input_tokens_seen": 2205152, "step": 1025 }, { "epoch": 0.16802610114192496, "grad_norm": 3.495880365371704, "learning_rate": 8.393148450244699e-05, "loss": 0.0925, "num_input_tokens_seen": 2215680, "step": 1030 }, { "epoch": 0.16884176182707994, "grad_norm": 1.1752636432647705, "learning_rate": 8.433931484502446e-05, "loss": 0.1977, "num_input_tokens_seen": 2227264, "step": 1035 }, { "epoch": 0.16965742251223492, "grad_norm": 7.356622219085693, "learning_rate": 8.474714518760197e-05, "loss": 0.2017, "num_input_tokens_seen": 2238816, "step": 1040 }, { "epoch": 0.17047308319738988, "grad_norm": 0.1461697220802307, "learning_rate": 8.515497553017946e-05, "loss": 0.1876, "num_input_tokens_seen": 2249728, "step": 1045 }, { "epoch": 0.17128874388254486, "grad_norm": 0.3809497654438019, "learning_rate": 8.556280587275693e-05, "loss": 0.0872, "num_input_tokens_seen": 2259584, "step": 1050 }, { "epoch": 0.17210440456769985, "grad_norm": 8.350950241088867, "learning_rate": 8.597063621533442e-05, "loss": 0.2098, "num_input_tokens_seen": 2270272, "step": 1055 }, { "epoch": 0.1729200652528548, "grad_norm": 0.8805423974990845, "learning_rate": 8.637846655791191e-05, "loss": 0.1848, "num_input_tokens_seen": 2281088, "step": 1060 }, { "epoch": 0.1737357259380098, "grad_norm": 0.052302416414022446, "learning_rate": 8.678629690048939e-05, "loss": 0.294, "num_input_tokens_seen": 2292736, "step": 1065 }, { "epoch": 0.17455138662316477, "grad_norm": 7.644049644470215, "learning_rate": 8.719412724306688e-05, "loss": 0.1628, "num_input_tokens_seen": 2303680, "step": 1070 }, { "epoch": 0.17536704730831973, "grad_norm": 0.015755848959088326, "learning_rate": 8.760195758564438e-05, "loss": 0.0716, "num_input_tokens_seen": 2314560, "step": 1075 }, { "epoch": 0.1761827079934747, "grad_norm": 1.3401787281036377, "learning_rate": 8.800978792822187e-05, "loss": 0.3154, "num_input_tokens_seen": 2325600, "step": 1080 }, { "epoch": 0.1769983686786297, "grad_norm": 1.2007181644439697, "learning_rate": 8.841761827079935e-05, "loss": 0.1659, "num_input_tokens_seen": 2336192, "step": 1085 }, { "epoch": 0.17781402936378465, "grad_norm": 0.09776181727647781, "learning_rate": 8.882544861337684e-05, "loss": 0.1127, "num_input_tokens_seen": 2345600, "step": 1090 }, { "epoch": 0.17862969004893964, "grad_norm": 5.559935092926025, "learning_rate": 8.923327895595433e-05, "loss": 0.2861, "num_input_tokens_seen": 2355072, "step": 1095 }, { "epoch": 0.17944535073409462, "grad_norm": 4.259070873260498, "learning_rate": 8.96411092985318e-05, "loss": 0.1156, "num_input_tokens_seen": 2366016, "step": 1100 }, { "epoch": 0.1802610114192496, "grad_norm": 1.227489948272705, "learning_rate": 9.00489396411093e-05, "loss": 0.0608, "num_input_tokens_seen": 2377056, "step": 1105 }, { "epoch": 0.18107667210440456, "grad_norm": 2.803602457046509, "learning_rate": 9.04567699836868e-05, "loss": 0.1865, "num_input_tokens_seen": 2388448, "step": 1110 }, { "epoch": 0.18189233278955955, "grad_norm": 16.156307220458984, "learning_rate": 9.086460032626427e-05, "loss": 0.4312, "num_input_tokens_seen": 2398848, "step": 1115 }, { "epoch": 0.18270799347471453, "grad_norm": 8.4832124710083, "learning_rate": 9.127243066884176e-05, "loss": 0.4412, "num_input_tokens_seen": 2408896, "step": 1120 }, { "epoch": 0.1835236541598695, "grad_norm": 0.3227379620075226, "learning_rate": 9.168026101141925e-05, "loss": 0.2213, "num_input_tokens_seen": 2416800, "step": 1125 }, { "epoch": 0.18433931484502447, "grad_norm": 2.39095139503479, "learning_rate": 9.208809135399673e-05, "loss": 0.1337, "num_input_tokens_seen": 2428064, "step": 1130 }, { "epoch": 0.18515497553017946, "grad_norm": 3.1897928714752197, "learning_rate": 9.249592169657422e-05, "loss": 0.2724, "num_input_tokens_seen": 2438592, "step": 1135 }, { "epoch": 0.1859706362153344, "grad_norm": 2.6958370208740234, "learning_rate": 9.290375203915171e-05, "loss": 0.1855, "num_input_tokens_seen": 2449248, "step": 1140 }, { "epoch": 0.1867862969004894, "grad_norm": 2.666459321975708, "learning_rate": 9.33115823817292e-05, "loss": 0.1996, "num_input_tokens_seen": 2459840, "step": 1145 }, { "epoch": 0.18760195758564438, "grad_norm": 0.06045296788215637, "learning_rate": 9.371941272430669e-05, "loss": 0.0328, "num_input_tokens_seen": 2469728, "step": 1150 }, { "epoch": 0.18841761827079934, "grad_norm": 5.968143939971924, "learning_rate": 9.412724306688418e-05, "loss": 0.3352, "num_input_tokens_seen": 2481184, "step": 1155 }, { "epoch": 0.18923327895595432, "grad_norm": 0.9836628437042236, "learning_rate": 9.453507340946167e-05, "loss": 0.1215, "num_input_tokens_seen": 2492832, "step": 1160 }, { "epoch": 0.1900489396411093, "grad_norm": 0.011035557836294174, "learning_rate": 9.494290375203915e-05, "loss": 0.0372, "num_input_tokens_seen": 2503776, "step": 1165 }, { "epoch": 0.19086460032626426, "grad_norm": 3.0490362644195557, "learning_rate": 9.535073409461664e-05, "loss": 0.2486, "num_input_tokens_seen": 2513376, "step": 1170 }, { "epoch": 0.19168026101141925, "grad_norm": 0.7378765940666199, "learning_rate": 9.575856443719413e-05, "loss": 0.0461, "num_input_tokens_seen": 2523232, "step": 1175 }, { "epoch": 0.19249592169657423, "grad_norm": 3.120060682296753, "learning_rate": 9.616639477977162e-05, "loss": 0.3372, "num_input_tokens_seen": 2534272, "step": 1180 }, { "epoch": 0.1933115823817292, "grad_norm": 0.06731845438480377, "learning_rate": 9.657422512234911e-05, "loss": 0.0175, "num_input_tokens_seen": 2544160, "step": 1185 }, { "epoch": 0.19412724306688417, "grad_norm": 3.397254705429077, "learning_rate": 9.69820554649266e-05, "loss": 0.0651, "num_input_tokens_seen": 2554880, "step": 1190 }, { "epoch": 0.19494290375203915, "grad_norm": 0.0616532638669014, "learning_rate": 9.738988580750407e-05, "loss": 0.3518, "num_input_tokens_seen": 2565472, "step": 1195 }, { "epoch": 0.19575856443719414, "grad_norm": 2.4435131549835205, "learning_rate": 9.779771615008156e-05, "loss": 0.2627, "num_input_tokens_seen": 2577312, "step": 1200 }, { "epoch": 0.1965742251223491, "grad_norm": 0.8077999353408813, "learning_rate": 9.820554649265905e-05, "loss": 0.1565, "num_input_tokens_seen": 2587968, "step": 1205 }, { "epoch": 0.19738988580750408, "grad_norm": 1.178731918334961, "learning_rate": 9.861337683523653e-05, "loss": 0.1934, "num_input_tokens_seen": 2598880, "step": 1210 }, { "epoch": 0.19820554649265906, "grad_norm": 1.8899637460708618, "learning_rate": 9.902120717781403e-05, "loss": 0.1097, "num_input_tokens_seen": 2609184, "step": 1215 }, { "epoch": 0.19902120717781402, "grad_norm": 3.3437256813049316, "learning_rate": 9.942903752039152e-05, "loss": 0.1096, "num_input_tokens_seen": 2620000, "step": 1220 }, { "epoch": 0.199836867862969, "grad_norm": 3.202988386154175, "learning_rate": 9.983686786296901e-05, "loss": 0.5351, "num_input_tokens_seen": 2629408, "step": 1225 }, { "epoch": 0.200652528548124, "grad_norm": 3.736741304397583, "learning_rate": 0.00010024469820554649, "loss": 0.162, "num_input_tokens_seen": 2641504, "step": 1230 }, { "epoch": 0.20146818923327894, "grad_norm": 1.1153422594070435, "learning_rate": 0.00010065252854812398, "loss": 0.2415, "num_input_tokens_seen": 2651296, "step": 1235 }, { "epoch": 0.20228384991843393, "grad_norm": 1.7328730821609497, "learning_rate": 0.00010106035889070147, "loss": 0.2872, "num_input_tokens_seen": 2662560, "step": 1240 }, { "epoch": 0.2030995106035889, "grad_norm": 1.131646990776062, "learning_rate": 0.00010146818923327896, "loss": 0.2203, "num_input_tokens_seen": 2674048, "step": 1245 }, { "epoch": 0.2039151712887439, "grad_norm": 0.7288661003112793, "learning_rate": 0.00010187601957585645, "loss": 0.1819, "num_input_tokens_seen": 2684608, "step": 1250 }, { "epoch": 0.20473083197389885, "grad_norm": 0.30604711174964905, "learning_rate": 0.00010228384991843394, "loss": 0.1496, "num_input_tokens_seen": 2696736, "step": 1255 }, { "epoch": 0.20554649265905384, "grad_norm": 1.700582504272461, "learning_rate": 0.00010269168026101142, "loss": 0.355, "num_input_tokens_seen": 2708000, "step": 1260 }, { "epoch": 0.20636215334420882, "grad_norm": 1.3775745630264282, "learning_rate": 0.00010309951060358891, "loss": 0.0891, "num_input_tokens_seen": 2718848, "step": 1265 }, { "epoch": 0.20717781402936378, "grad_norm": 0.18435822427272797, "learning_rate": 0.0001035073409461664, "loss": 0.0824, "num_input_tokens_seen": 2728960, "step": 1270 }, { "epoch": 0.20799347471451876, "grad_norm": 0.41497907042503357, "learning_rate": 0.00010391517128874387, "loss": 0.1226, "num_input_tokens_seen": 2738336, "step": 1275 }, { "epoch": 0.20880913539967375, "grad_norm": 0.39605382084846497, "learning_rate": 0.00010432300163132138, "loss": 0.0141, "num_input_tokens_seen": 2749056, "step": 1280 }, { "epoch": 0.2096247960848287, "grad_norm": 0.18014244735240936, "learning_rate": 0.00010473083197389887, "loss": 0.1778, "num_input_tokens_seen": 2760000, "step": 1285 }, { "epoch": 0.21044045676998369, "grad_norm": 0.8243248462677002, "learning_rate": 0.00010513866231647634, "loss": 0.0881, "num_input_tokens_seen": 2770944, "step": 1290 }, { "epoch": 0.21125611745513867, "grad_norm": 3.0271449089050293, "learning_rate": 0.00010554649265905383, "loss": 0.0973, "num_input_tokens_seen": 2782432, "step": 1295 }, { "epoch": 0.21207177814029363, "grad_norm": 9.047471046447754, "learning_rate": 0.00010595432300163132, "loss": 0.3144, "num_input_tokens_seen": 2793728, "step": 1300 }, { "epoch": 0.2128874388254486, "grad_norm": 0.9865400195121765, "learning_rate": 0.00010636215334420881, "loss": 0.1445, "num_input_tokens_seen": 2805344, "step": 1305 }, { "epoch": 0.2137030995106036, "grad_norm": 4.585295677185059, "learning_rate": 0.00010676998368678629, "loss": 0.1898, "num_input_tokens_seen": 2815072, "step": 1310 }, { "epoch": 0.21451876019575855, "grad_norm": 0.1313437968492508, "learning_rate": 0.0001071778140293638, "loss": 0.1639, "num_input_tokens_seen": 2825376, "step": 1315 }, { "epoch": 0.21533442088091354, "grad_norm": 0.4302562475204468, "learning_rate": 0.00010758564437194128, "loss": 0.1406, "num_input_tokens_seen": 2837024, "step": 1320 }, { "epoch": 0.21615008156606852, "grad_norm": 0.840142011642456, "learning_rate": 0.00010799347471451876, "loss": 0.1113, "num_input_tokens_seen": 2848096, "step": 1325 }, { "epoch": 0.2169657422512235, "grad_norm": 0.38016220927238464, "learning_rate": 0.00010840130505709625, "loss": 0.0427, "num_input_tokens_seen": 2858784, "step": 1330 }, { "epoch": 0.21778140293637846, "grad_norm": 1.6581228971481323, "learning_rate": 0.00010880913539967374, "loss": 0.1362, "num_input_tokens_seen": 2870592, "step": 1335 }, { "epoch": 0.21859706362153344, "grad_norm": 4.163750171661377, "learning_rate": 0.00010921696574225122, "loss": 0.3312, "num_input_tokens_seen": 2881856, "step": 1340 }, { "epoch": 0.21941272430668843, "grad_norm": 6.064782619476318, "learning_rate": 0.0001096247960848287, "loss": 0.2284, "num_input_tokens_seen": 2893312, "step": 1345 }, { "epoch": 0.22022838499184338, "grad_norm": 2.8906497955322266, "learning_rate": 0.00011003262642740621, "loss": 0.2913, "num_input_tokens_seen": 2903040, "step": 1350 }, { "epoch": 0.22104404567699837, "grad_norm": 1.424268364906311, "learning_rate": 0.00011044045676998369, "loss": 0.3758, "num_input_tokens_seen": 2912928, "step": 1355 }, { "epoch": 0.22185970636215335, "grad_norm": 2.6912379264831543, "learning_rate": 0.00011084828711256118, "loss": 0.2257, "num_input_tokens_seen": 2924032, "step": 1360 }, { "epoch": 0.2226753670473083, "grad_norm": 0.8119432330131531, "learning_rate": 0.00011125611745513867, "loss": 0.0945, "num_input_tokens_seen": 2934688, "step": 1365 }, { "epoch": 0.2234910277324633, "grad_norm": 1.040224552154541, "learning_rate": 0.00011166394779771616, "loss": 0.1211, "num_input_tokens_seen": 2946240, "step": 1370 }, { "epoch": 0.22430668841761828, "grad_norm": 4.126725673675537, "learning_rate": 0.00011207177814029363, "loss": 0.1585, "num_input_tokens_seen": 2956800, "step": 1375 }, { "epoch": 0.22512234910277323, "grad_norm": 0.2680872976779938, "learning_rate": 0.00011247960848287112, "loss": 0.19, "num_input_tokens_seen": 2968576, "step": 1380 }, { "epoch": 0.22593800978792822, "grad_norm": 2.690303325653076, "learning_rate": 0.00011288743882544863, "loss": 0.2149, "num_input_tokens_seen": 2978304, "step": 1385 }, { "epoch": 0.2267536704730832, "grad_norm": 0.7881904244422913, "learning_rate": 0.0001132952691680261, "loss": 0.056, "num_input_tokens_seen": 2990112, "step": 1390 }, { "epoch": 0.2275693311582382, "grad_norm": 5.148525714874268, "learning_rate": 0.00011370309951060359, "loss": 0.1142, "num_input_tokens_seen": 3001120, "step": 1395 }, { "epoch": 0.22838499184339314, "grad_norm": 2.8806674480438232, "learning_rate": 0.00011411092985318108, "loss": 0.1339, "num_input_tokens_seen": 3012608, "step": 1400 }, { "epoch": 0.22920065252854813, "grad_norm": 1.245159387588501, "learning_rate": 0.00011451876019575856, "loss": 0.0559, "num_input_tokens_seen": 3024224, "step": 1405 }, { "epoch": 0.2300163132137031, "grad_norm": 7.675414085388184, "learning_rate": 0.00011492659053833605, "loss": 0.2783, "num_input_tokens_seen": 3035552, "step": 1410 }, { "epoch": 0.23083197389885807, "grad_norm": 3.7686970233917236, "learning_rate": 0.00011533442088091354, "loss": 0.3399, "num_input_tokens_seen": 3045600, "step": 1415 }, { "epoch": 0.23164763458401305, "grad_norm": 4.100951671600342, "learning_rate": 0.00011574225122349103, "loss": 0.2427, "num_input_tokens_seen": 3056288, "step": 1420 }, { "epoch": 0.23246329526916804, "grad_norm": 0.05779648944735527, "learning_rate": 0.00011615008156606852, "loss": 0.1509, "num_input_tokens_seen": 3066720, "step": 1425 }, { "epoch": 0.233278955954323, "grad_norm": 5.888890266418457, "learning_rate": 0.00011655791190864601, "loss": 0.159, "num_input_tokens_seen": 3077696, "step": 1430 }, { "epoch": 0.23409461663947798, "grad_norm": 0.07912061363458633, "learning_rate": 0.0001169657422512235, "loss": 0.1878, "num_input_tokens_seen": 3088352, "step": 1435 }, { "epoch": 0.23491027732463296, "grad_norm": 0.09888610988855362, "learning_rate": 0.00011737357259380098, "loss": 0.2082, "num_input_tokens_seen": 3097888, "step": 1440 }, { "epoch": 0.23572593800978792, "grad_norm": 0.3190143406391144, "learning_rate": 0.00011778140293637847, "loss": 0.1028, "num_input_tokens_seen": 3108192, "step": 1445 }, { "epoch": 0.2365415986949429, "grad_norm": 4.030157089233398, "learning_rate": 0.00011818923327895596, "loss": 0.2342, "num_input_tokens_seen": 3119392, "step": 1450 }, { "epoch": 0.23735725938009788, "grad_norm": 0.27254125475883484, "learning_rate": 0.00011859706362153345, "loss": 0.1051, "num_input_tokens_seen": 3129792, "step": 1455 }, { "epoch": 0.23817292006525284, "grad_norm": 0.3463047742843628, "learning_rate": 0.00011900489396411094, "loss": 0.2056, "num_input_tokens_seen": 3141376, "step": 1460 }, { "epoch": 0.23898858075040783, "grad_norm": 0.16229379177093506, "learning_rate": 0.00011941272430668843, "loss": 0.2061, "num_input_tokens_seen": 3152096, "step": 1465 }, { "epoch": 0.2398042414355628, "grad_norm": 2.574392557144165, "learning_rate": 0.0001198205546492659, "loss": 0.1023, "num_input_tokens_seen": 3163232, "step": 1470 }, { "epoch": 0.2406199021207178, "grad_norm": 1.032649040222168, "learning_rate": 0.00012022838499184339, "loss": 0.0502, "num_input_tokens_seen": 3174496, "step": 1475 }, { "epoch": 0.24143556280587275, "grad_norm": 2.671555757522583, "learning_rate": 0.00012063621533442088, "loss": 0.415, "num_input_tokens_seen": 3184704, "step": 1480 }, { "epoch": 0.24225122349102773, "grad_norm": 1.5858745574951172, "learning_rate": 0.00012104404567699836, "loss": 0.1778, "num_input_tokens_seen": 3195616, "step": 1485 }, { "epoch": 0.24306688417618272, "grad_norm": 0.2738098204135895, "learning_rate": 0.00012145187601957586, "loss": 0.2038, "num_input_tokens_seen": 3207232, "step": 1490 }, { "epoch": 0.24388254486133767, "grad_norm": 1.6844745874404907, "learning_rate": 0.00012185970636215335, "loss": 0.125, "num_input_tokens_seen": 3218272, "step": 1495 }, { "epoch": 0.24469820554649266, "grad_norm": 0.5701693892478943, "learning_rate": 0.00012226753670473083, "loss": 0.1601, "num_input_tokens_seen": 3229312, "step": 1500 }, { "epoch": 0.24551386623164764, "grad_norm": 2.59881329536438, "learning_rate": 0.00012267536704730833, "loss": 0.4606, "num_input_tokens_seen": 3240064, "step": 1505 }, { "epoch": 0.2463295269168026, "grad_norm": 3.780829906463623, "learning_rate": 0.0001230831973898858, "loss": 0.3162, "num_input_tokens_seen": 3250592, "step": 1510 }, { "epoch": 0.24714518760195758, "grad_norm": 0.39131540060043335, "learning_rate": 0.0001234910277324633, "loss": 0.1866, "num_input_tokens_seen": 3260768, "step": 1515 }, { "epoch": 0.24796084828711257, "grad_norm": 0.8658497929573059, "learning_rate": 0.0001238988580750408, "loss": 0.0933, "num_input_tokens_seen": 3271488, "step": 1520 }, { "epoch": 0.24877650897226752, "grad_norm": 2.4224164485931396, "learning_rate": 0.00012430668841761827, "loss": 0.3204, "num_input_tokens_seen": 3281888, "step": 1525 }, { "epoch": 0.2495921696574225, "grad_norm": 0.13757432997226715, "learning_rate": 0.00012471451876019577, "loss": 0.038, "num_input_tokens_seen": 3291488, "step": 1530 }, { "epoch": 0.25040783034257746, "grad_norm": 0.6713064312934875, "learning_rate": 0.00012512234910277325, "loss": 0.1798, "num_input_tokens_seen": 3301888, "step": 1535 }, { "epoch": 0.25122349102773245, "grad_norm": 0.3556808531284332, "learning_rate": 0.00012553017944535072, "loss": 0.176, "num_input_tokens_seen": 3312960, "step": 1540 }, { "epoch": 0.25203915171288743, "grad_norm": 2.722989797592163, "learning_rate": 0.00012593800978792823, "loss": 0.1732, "num_input_tokens_seen": 3324320, "step": 1545 }, { "epoch": 0.2528548123980424, "grad_norm": 3.545426845550537, "learning_rate": 0.0001263458401305057, "loss": 0.258, "num_input_tokens_seen": 3333920, "step": 1550 }, { "epoch": 0.2536704730831974, "grad_norm": 1.3655180931091309, "learning_rate": 0.0001267536704730832, "loss": 0.1254, "num_input_tokens_seen": 3344416, "step": 1555 }, { "epoch": 0.2544861337683524, "grad_norm": 1.262924313545227, "learning_rate": 0.00012716150081566068, "loss": 0.0874, "num_input_tokens_seen": 3354368, "step": 1560 }, { "epoch": 0.2553017944535073, "grad_norm": 3.9419617652893066, "learning_rate": 0.00012756933115823819, "loss": 0.3548, "num_input_tokens_seen": 3364544, "step": 1565 }, { "epoch": 0.2561174551386623, "grad_norm": 0.7948105335235596, "learning_rate": 0.00012797716150081566, "loss": 0.1258, "num_input_tokens_seen": 3375936, "step": 1570 }, { "epoch": 0.2569331158238173, "grad_norm": 0.36828044056892395, "learning_rate": 0.00012838499184339314, "loss": 0.1248, "num_input_tokens_seen": 3388064, "step": 1575 }, { "epoch": 0.25774877650897227, "grad_norm": 1.8976014852523804, "learning_rate": 0.00012879282218597064, "loss": 0.2627, "num_input_tokens_seen": 3399296, "step": 1580 }, { "epoch": 0.25856443719412725, "grad_norm": 0.25936049222946167, "learning_rate": 0.00012920065252854812, "loss": 0.2919, "num_input_tokens_seen": 3410752, "step": 1585 }, { "epoch": 0.25938009787928223, "grad_norm": 0.9021464586257935, "learning_rate": 0.00012960848287112562, "loss": 0.2308, "num_input_tokens_seen": 3420672, "step": 1590 }, { "epoch": 0.2601957585644372, "grad_norm": 0.45900869369506836, "learning_rate": 0.0001300163132137031, "loss": 0.1616, "num_input_tokens_seen": 3430944, "step": 1595 }, { "epoch": 0.26101141924959215, "grad_norm": 2.875729560852051, "learning_rate": 0.0001304241435562806, "loss": 0.3885, "num_input_tokens_seen": 3443264, "step": 1600 }, { "epoch": 0.26182707993474713, "grad_norm": 0.2579473853111267, "learning_rate": 0.00013083197389885805, "loss": 0.222, "num_input_tokens_seen": 3454528, "step": 1605 }, { "epoch": 0.2626427406199021, "grad_norm": 2.6276164054870605, "learning_rate": 0.00013123980424143555, "loss": 0.3094, "num_input_tokens_seen": 3465664, "step": 1610 }, { "epoch": 0.2634584013050571, "grad_norm": 1.3047376871109009, "learning_rate": 0.00013164763458401306, "loss": 0.1175, "num_input_tokens_seen": 3476800, "step": 1615 }, { "epoch": 0.2642740619902121, "grad_norm": 1.1109704971313477, "learning_rate": 0.00013205546492659053, "loss": 0.1968, "num_input_tokens_seen": 3486848, "step": 1620 }, { "epoch": 0.26508972267536707, "grad_norm": 0.40321484208106995, "learning_rate": 0.00013246329526916804, "loss": 0.2308, "num_input_tokens_seen": 3498080, "step": 1625 }, { "epoch": 0.265905383360522, "grad_norm": 0.3324151337146759, "learning_rate": 0.00013287112561174552, "loss": 0.0756, "num_input_tokens_seen": 3508992, "step": 1630 }, { "epoch": 0.266721044045677, "grad_norm": 2.2158138751983643, "learning_rate": 0.00013327895595432302, "loss": 0.1261, "num_input_tokens_seen": 3520128, "step": 1635 }, { "epoch": 0.26753670473083196, "grad_norm": 1.0002572536468506, "learning_rate": 0.00013368678629690047, "loss": 0.1765, "num_input_tokens_seen": 3531680, "step": 1640 }, { "epoch": 0.26835236541598695, "grad_norm": 3.018091917037964, "learning_rate": 0.00013409461663947797, "loss": 0.2285, "num_input_tokens_seen": 3543488, "step": 1645 }, { "epoch": 0.26916802610114193, "grad_norm": 1.290290117263794, "learning_rate": 0.00013450244698205548, "loss": 0.1233, "num_input_tokens_seen": 3553984, "step": 1650 }, { "epoch": 0.2699836867862969, "grad_norm": 0.458624929189682, "learning_rate": 0.00013491027732463295, "loss": 0.2995, "num_input_tokens_seen": 3563712, "step": 1655 }, { "epoch": 0.2707993474714519, "grad_norm": 0.47744181752204895, "learning_rate": 0.00013531810766721046, "loss": 0.2922, "num_input_tokens_seen": 3574080, "step": 1660 }, { "epoch": 0.27161500815660683, "grad_norm": 0.6224934458732605, "learning_rate": 0.00013572593800978793, "loss": 0.1648, "num_input_tokens_seen": 3585376, "step": 1665 }, { "epoch": 0.2724306688417618, "grad_norm": 4.258433818817139, "learning_rate": 0.0001361337683523654, "loss": 0.258, "num_input_tokens_seen": 3596864, "step": 1670 }, { "epoch": 0.2732463295269168, "grad_norm": 1.5842293500900269, "learning_rate": 0.00013654159869494288, "loss": 0.408, "num_input_tokens_seen": 3608864, "step": 1675 }, { "epoch": 0.2740619902120718, "grad_norm": 0.4656829237937927, "learning_rate": 0.0001369494290375204, "loss": 0.0778, "num_input_tokens_seen": 3619584, "step": 1680 }, { "epoch": 0.27487765089722677, "grad_norm": 0.31724807620048523, "learning_rate": 0.0001373572593800979, "loss": 0.0695, "num_input_tokens_seen": 3630304, "step": 1685 }, { "epoch": 0.27569331158238175, "grad_norm": 2.0466132164001465, "learning_rate": 0.00013776508972267537, "loss": 0.0941, "num_input_tokens_seen": 3641696, "step": 1690 }, { "epoch": 0.2765089722675367, "grad_norm": 1.6225666999816895, "learning_rate": 0.00013817292006525287, "loss": 0.1952, "num_input_tokens_seen": 3652256, "step": 1695 }, { "epoch": 0.27732463295269166, "grad_norm": 0.5463118553161621, "learning_rate": 0.00013858075040783035, "loss": 0.1884, "num_input_tokens_seen": 3661408, "step": 1700 }, { "epoch": 0.27814029363784665, "grad_norm": 2.030285358428955, "learning_rate": 0.00013898858075040782, "loss": 0.204, "num_input_tokens_seen": 3672128, "step": 1705 }, { "epoch": 0.27895595432300163, "grad_norm": 3.7042081356048584, "learning_rate": 0.00013939641109298533, "loss": 0.1761, "num_input_tokens_seen": 3682720, "step": 1710 }, { "epoch": 0.2797716150081566, "grad_norm": 1.0897419452667236, "learning_rate": 0.0001398042414355628, "loss": 0.1543, "num_input_tokens_seen": 3694336, "step": 1715 }, { "epoch": 0.2805872756933116, "grad_norm": 2.6066136360168457, "learning_rate": 0.0001402120717781403, "loss": 0.0795, "num_input_tokens_seen": 3705600, "step": 1720 }, { "epoch": 0.2814029363784666, "grad_norm": 0.07373997569084167, "learning_rate": 0.00014061990212071778, "loss": 0.0896, "num_input_tokens_seen": 3716800, "step": 1725 }, { "epoch": 0.2822185970636215, "grad_norm": 0.785860538482666, "learning_rate": 0.0001410277324632953, "loss": 0.145, "num_input_tokens_seen": 3727616, "step": 1730 }, { "epoch": 0.2830342577487765, "grad_norm": 3.2250185012817383, "learning_rate": 0.00014143556280587274, "loss": 0.3106, "num_input_tokens_seen": 3737440, "step": 1735 }, { "epoch": 0.2838499184339315, "grad_norm": 1.9634822607040405, "learning_rate": 0.00014184339314845024, "loss": 0.3055, "num_input_tokens_seen": 3748096, "step": 1740 }, { "epoch": 0.28466557911908646, "grad_norm": 0.15452267229557037, "learning_rate": 0.00014225122349102774, "loss": 0.0721, "num_input_tokens_seen": 3759360, "step": 1745 }, { "epoch": 0.28548123980424145, "grad_norm": 0.2591032087802887, "learning_rate": 0.00014265905383360522, "loss": 0.0791, "num_input_tokens_seen": 3769120, "step": 1750 }, { "epoch": 0.28629690048939643, "grad_norm": 0.490597128868103, "learning_rate": 0.00014306688417618272, "loss": 0.1199, "num_input_tokens_seen": 3779424, "step": 1755 }, { "epoch": 0.28711256117455136, "grad_norm": 0.6109466552734375, "learning_rate": 0.0001434747145187602, "loss": 0.0907, "num_input_tokens_seen": 3790080, "step": 1760 }, { "epoch": 0.28792822185970635, "grad_norm": 1.4278334379196167, "learning_rate": 0.0001438825448613377, "loss": 0.0646, "num_input_tokens_seen": 3800800, "step": 1765 }, { "epoch": 0.28874388254486133, "grad_norm": 0.010350944474339485, "learning_rate": 0.00014429037520391515, "loss": 0.2522, "num_input_tokens_seen": 3811616, "step": 1770 }, { "epoch": 0.2895595432300163, "grad_norm": 0.5677800178527832, "learning_rate": 0.00014469820554649266, "loss": 0.0561, "num_input_tokens_seen": 3822880, "step": 1775 }, { "epoch": 0.2903752039151713, "grad_norm": 1.6626840829849243, "learning_rate": 0.00014510603588907016, "loss": 0.2993, "num_input_tokens_seen": 3833856, "step": 1780 }, { "epoch": 0.2911908646003263, "grad_norm": 0.14165183901786804, "learning_rate": 0.00014551386623164764, "loss": 0.0578, "num_input_tokens_seen": 3845120, "step": 1785 }, { "epoch": 0.29200652528548127, "grad_norm": 0.3040257394313812, "learning_rate": 0.00014592169657422514, "loss": 0.2752, "num_input_tokens_seen": 3856224, "step": 1790 }, { "epoch": 0.2928221859706362, "grad_norm": 0.6070979237556458, "learning_rate": 0.00014632952691680262, "loss": 0.2509, "num_input_tokens_seen": 3866528, "step": 1795 }, { "epoch": 0.2936378466557912, "grad_norm": 0.41614770889282227, "learning_rate": 0.0001467373572593801, "loss": 0.1615, "num_input_tokens_seen": 3875904, "step": 1800 }, { "epoch": 0.29445350734094616, "grad_norm": 0.5180200338363647, "learning_rate": 0.00014714518760195757, "loss": 0.1327, "num_input_tokens_seen": 3886240, "step": 1805 }, { "epoch": 0.29526916802610115, "grad_norm": 0.7735000252723694, "learning_rate": 0.00014755301794453507, "loss": 0.2538, "num_input_tokens_seen": 3897376, "step": 1810 }, { "epoch": 0.29608482871125613, "grad_norm": 0.2597328722476959, "learning_rate": 0.00014796084828711258, "loss": 0.2062, "num_input_tokens_seen": 3909216, "step": 1815 }, { "epoch": 0.2969004893964111, "grad_norm": 0.5922500491142273, "learning_rate": 0.00014836867862969005, "loss": 0.2494, "num_input_tokens_seen": 3919808, "step": 1820 }, { "epoch": 0.29771615008156604, "grad_norm": 1.6240063905715942, "learning_rate": 0.00014877650897226756, "loss": 0.1827, "num_input_tokens_seen": 3930400, "step": 1825 }, { "epoch": 0.29853181076672103, "grad_norm": 0.9891195297241211, "learning_rate": 0.00014918433931484503, "loss": 0.1535, "num_input_tokens_seen": 3941952, "step": 1830 }, { "epoch": 0.299347471451876, "grad_norm": 3.0769333839416504, "learning_rate": 0.0001495921696574225, "loss": 0.2735, "num_input_tokens_seen": 3951552, "step": 1835 }, { "epoch": 0.300163132137031, "grad_norm": 0.09723358601331711, "learning_rate": 0.00015, "loss": 0.0824, "num_input_tokens_seen": 3961248, "step": 1840 }, { "epoch": 0.300978792822186, "grad_norm": 0.5110810399055481, "learning_rate": 0.0001504078303425775, "loss": 0.1103, "num_input_tokens_seen": 3971808, "step": 1845 }, { "epoch": 0.30179445350734097, "grad_norm": 1.3086401224136353, "learning_rate": 0.000150815660685155, "loss": 0.0943, "num_input_tokens_seen": 3983200, "step": 1850 }, { "epoch": 0.30261011419249595, "grad_norm": 0.4401291608810425, "learning_rate": 0.00015122349102773247, "loss": 0.1114, "num_input_tokens_seen": 3993856, "step": 1855 }, { "epoch": 0.3034257748776509, "grad_norm": 0.33483627438545227, "learning_rate": 0.00015163132137030997, "loss": 0.3137, "num_input_tokens_seen": 4005120, "step": 1860 }, { "epoch": 0.30424143556280586, "grad_norm": 1.8863903284072876, "learning_rate": 0.00015203915171288742, "loss": 0.2798, "num_input_tokens_seen": 4015840, "step": 1865 }, { "epoch": 0.30505709624796085, "grad_norm": 1.9507595300674438, "learning_rate": 0.00015244698205546493, "loss": 0.3128, "num_input_tokens_seen": 4025600, "step": 1870 }, { "epoch": 0.30587275693311583, "grad_norm": 0.3814392387866974, "learning_rate": 0.0001528548123980424, "loss": 0.1439, "num_input_tokens_seen": 4036576, "step": 1875 }, { "epoch": 0.3066884176182708, "grad_norm": 1.7550230026245117, "learning_rate": 0.0001532626427406199, "loss": 0.0901, "num_input_tokens_seen": 4047328, "step": 1880 }, { "epoch": 0.3075040783034258, "grad_norm": 0.14621619880199432, "learning_rate": 0.0001536704730831974, "loss": 0.0206, "num_input_tokens_seen": 4059776, "step": 1885 }, { "epoch": 0.3083197389885807, "grad_norm": 0.01841830462217331, "learning_rate": 0.0001540783034257749, "loss": 0.0661, "num_input_tokens_seen": 4070528, "step": 1890 }, { "epoch": 0.3091353996737357, "grad_norm": 2.643216848373413, "learning_rate": 0.00015448613376835236, "loss": 0.2221, "num_input_tokens_seen": 4082432, "step": 1895 }, { "epoch": 0.3099510603588907, "grad_norm": 3.4676759243011475, "learning_rate": 0.00015489396411092984, "loss": 0.1928, "num_input_tokens_seen": 4092800, "step": 1900 }, { "epoch": 0.3107667210440457, "grad_norm": 0.19743825495243073, "learning_rate": 0.00015530179445350734, "loss": 0.1525, "num_input_tokens_seen": 4103296, "step": 1905 }, { "epoch": 0.31158238172920066, "grad_norm": 1.268446683883667, "learning_rate": 0.00015570962479608482, "loss": 0.1171, "num_input_tokens_seen": 4114368, "step": 1910 }, { "epoch": 0.31239804241435565, "grad_norm": 0.7631101608276367, "learning_rate": 0.00015611745513866232, "loss": 0.0849, "num_input_tokens_seen": 4124096, "step": 1915 }, { "epoch": 0.3132137030995106, "grad_norm": 4.268880367279053, "learning_rate": 0.00015652528548123983, "loss": 0.1902, "num_input_tokens_seen": 4134592, "step": 1920 }, { "epoch": 0.31402936378466556, "grad_norm": 0.1221299096941948, "learning_rate": 0.0001569331158238173, "loss": 0.229, "num_input_tokens_seen": 4144064, "step": 1925 }, { "epoch": 0.31484502446982054, "grad_norm": 0.36698073148727417, "learning_rate": 0.00015734094616639478, "loss": 0.0977, "num_input_tokens_seen": 4155904, "step": 1930 }, { "epoch": 0.31566068515497553, "grad_norm": 0.5234968066215515, "learning_rate": 0.00015774877650897226, "loss": 0.2501, "num_input_tokens_seen": 4167520, "step": 1935 }, { "epoch": 0.3164763458401305, "grad_norm": 2.6157760620117188, "learning_rate": 0.00015815660685154976, "loss": 0.2083, "num_input_tokens_seen": 4178048, "step": 1940 }, { "epoch": 0.3172920065252855, "grad_norm": 0.22130391001701355, "learning_rate": 0.00015856443719412724, "loss": 0.068, "num_input_tokens_seen": 4189472, "step": 1945 }, { "epoch": 0.3181076672104405, "grad_norm": 2.8134090900421143, "learning_rate": 0.00015897226753670474, "loss": 0.063, "num_input_tokens_seen": 4201024, "step": 1950 }, { "epoch": 0.3189233278955954, "grad_norm": 0.05283171683549881, "learning_rate": 0.00015938009787928224, "loss": 0.2493, "num_input_tokens_seen": 4211936, "step": 1955 }, { "epoch": 0.3197389885807504, "grad_norm": 0.16477589309215546, "learning_rate": 0.0001597879282218597, "loss": 0.0709, "num_input_tokens_seen": 4222016, "step": 1960 }, { "epoch": 0.3205546492659054, "grad_norm": 2.775834798812866, "learning_rate": 0.0001601957585644372, "loss": 0.1217, "num_input_tokens_seen": 4233376, "step": 1965 }, { "epoch": 0.32137030995106036, "grad_norm": 2.882181167602539, "learning_rate": 0.00016060358890701467, "loss": 0.3029, "num_input_tokens_seen": 4243904, "step": 1970 }, { "epoch": 0.32218597063621535, "grad_norm": 0.08096791058778763, "learning_rate": 0.00016101141924959218, "loss": 0.0284, "num_input_tokens_seen": 4254400, "step": 1975 }, { "epoch": 0.32300163132137033, "grad_norm": 0.46380338072776794, "learning_rate": 0.00016141924959216965, "loss": 0.1601, "num_input_tokens_seen": 4264832, "step": 1980 }, { "epoch": 0.32381729200652526, "grad_norm": 3.6591830253601074, "learning_rate": 0.00016182707993474716, "loss": 0.2942, "num_input_tokens_seen": 4275744, "step": 1985 }, { "epoch": 0.32463295269168024, "grad_norm": 0.1005164384841919, "learning_rate": 0.00016223491027732466, "loss": 0.0562, "num_input_tokens_seen": 4286272, "step": 1990 }, { "epoch": 0.3254486133768352, "grad_norm": 0.3072299659252167, "learning_rate": 0.0001626427406199021, "loss": 0.247, "num_input_tokens_seen": 4297344, "step": 1995 }, { "epoch": 0.3262642740619902, "grad_norm": 1.8554059267044067, "learning_rate": 0.0001630505709624796, "loss": 0.0576, "num_input_tokens_seen": 4307680, "step": 2000 }, { "epoch": 0.3270799347471452, "grad_norm": 0.07843183726072311, "learning_rate": 0.0001634584013050571, "loss": 0.3006, "num_input_tokens_seen": 4318880, "step": 2005 }, { "epoch": 0.3278955954323002, "grad_norm": 4.445835590362549, "learning_rate": 0.0001638662316476346, "loss": 0.1157, "num_input_tokens_seen": 4330080, "step": 2010 }, { "epoch": 0.32871125611745516, "grad_norm": 1.0905181169509888, "learning_rate": 0.00016427406199021207, "loss": 0.0804, "num_input_tokens_seen": 4341472, "step": 2015 }, { "epoch": 0.3295269168026101, "grad_norm": 0.09669951349496841, "learning_rate": 0.00016468189233278957, "loss": 0.071, "num_input_tokens_seen": 4352544, "step": 2020 }, { "epoch": 0.3303425774877651, "grad_norm": 0.22773349285125732, "learning_rate": 0.00016508972267536705, "loss": 0.087, "num_input_tokens_seen": 4363648, "step": 2025 }, { "epoch": 0.33115823817292006, "grad_norm": 5.904660224914551, "learning_rate": 0.00016549755301794453, "loss": 0.1167, "num_input_tokens_seen": 4373856, "step": 2030 }, { "epoch": 0.33197389885807504, "grad_norm": 6.573551654815674, "learning_rate": 0.00016590538336052203, "loss": 0.1439, "num_input_tokens_seen": 4384224, "step": 2035 }, { "epoch": 0.33278955954323003, "grad_norm": 4.354398250579834, "learning_rate": 0.0001663132137030995, "loss": 0.1117, "num_input_tokens_seen": 4395904, "step": 2040 }, { "epoch": 0.333605220228385, "grad_norm": 1.0218340158462524, "learning_rate": 0.000166721044045677, "loss": 0.3701, "num_input_tokens_seen": 4407072, "step": 2045 }, { "epoch": 0.33442088091353994, "grad_norm": 0.2953657805919647, "learning_rate": 0.00016712887438825449, "loss": 0.1502, "num_input_tokens_seen": 4417056, "step": 2050 }, { "epoch": 0.3352365415986949, "grad_norm": 0.04800691083073616, "learning_rate": 0.000167536704730832, "loss": 0.0794, "num_input_tokens_seen": 4427904, "step": 2055 }, { "epoch": 0.3360522022838499, "grad_norm": 0.07293276488780975, "learning_rate": 0.00016794453507340947, "loss": 0.0821, "num_input_tokens_seen": 4439520, "step": 2060 }, { "epoch": 0.3368678629690049, "grad_norm": 1.900673270225525, "learning_rate": 0.00016835236541598694, "loss": 0.3694, "num_input_tokens_seen": 4449728, "step": 2065 }, { "epoch": 0.3376835236541599, "grad_norm": 0.8468218445777893, "learning_rate": 0.00016876019575856445, "loss": 0.2244, "num_input_tokens_seen": 4460224, "step": 2070 }, { "epoch": 0.33849918433931486, "grad_norm": 0.5177363157272339, "learning_rate": 0.00016916802610114192, "loss": 0.1043, "num_input_tokens_seen": 4468960, "step": 2075 }, { "epoch": 0.33931484502446985, "grad_norm": 0.1556384563446045, "learning_rate": 0.00016957585644371943, "loss": 0.0652, "num_input_tokens_seen": 4478880, "step": 2080 }, { "epoch": 0.3401305057096248, "grad_norm": 0.6240658760070801, "learning_rate": 0.0001699836867862969, "loss": 0.0885, "num_input_tokens_seen": 4489920, "step": 2085 }, { "epoch": 0.34094616639477976, "grad_norm": 3.0557613372802734, "learning_rate": 0.00017039151712887438, "loss": 0.0852, "num_input_tokens_seen": 4499136, "step": 2090 }, { "epoch": 0.34176182707993474, "grad_norm": 0.8113916516304016, "learning_rate": 0.00017079934747145188, "loss": 0.166, "num_input_tokens_seen": 4511072, "step": 2095 }, { "epoch": 0.3425774877650897, "grad_norm": 0.240762397646904, "learning_rate": 0.00017120717781402936, "loss": 0.1644, "num_input_tokens_seen": 4521760, "step": 2100 }, { "epoch": 0.3433931484502447, "grad_norm": 0.1695231944322586, "learning_rate": 0.00017161500815660686, "loss": 0.2156, "num_input_tokens_seen": 4532288, "step": 2105 }, { "epoch": 0.3442088091353997, "grad_norm": 1.8336451053619385, "learning_rate": 0.00017202283849918434, "loss": 0.0938, "num_input_tokens_seen": 4543552, "step": 2110 }, { "epoch": 0.3450244698205546, "grad_norm": 0.1630079448223114, "learning_rate": 0.00017243066884176184, "loss": 0.2471, "num_input_tokens_seen": 4554656, "step": 2115 }, { "epoch": 0.3458401305057096, "grad_norm": 1.4173747301101685, "learning_rate": 0.00017283849918433932, "loss": 0.1447, "num_input_tokens_seen": 4565536, "step": 2120 }, { "epoch": 0.3466557911908646, "grad_norm": 0.03745684400200844, "learning_rate": 0.0001732463295269168, "loss": 0.0281, "num_input_tokens_seen": 4576864, "step": 2125 }, { "epoch": 0.3474714518760196, "grad_norm": 4.119412899017334, "learning_rate": 0.0001736541598694943, "loss": 0.392, "num_input_tokens_seen": 4586624, "step": 2130 }, { "epoch": 0.34828711256117456, "grad_norm": 0.09954573959112167, "learning_rate": 0.00017406199021207178, "loss": 0.0884, "num_input_tokens_seen": 4597824, "step": 2135 }, { "epoch": 0.34910277324632955, "grad_norm": 0.9733045101165771, "learning_rate": 0.00017446982055464928, "loss": 0.322, "num_input_tokens_seen": 4609856, "step": 2140 }, { "epoch": 0.34991843393148453, "grad_norm": 0.3934627175331116, "learning_rate": 0.00017487765089722676, "loss": 0.2221, "num_input_tokens_seen": 4621248, "step": 2145 }, { "epoch": 0.35073409461663946, "grad_norm": 0.3459859788417816, "learning_rate": 0.00017528548123980426, "loss": 0.0979, "num_input_tokens_seen": 4631872, "step": 2150 }, { "epoch": 0.35154975530179444, "grad_norm": 1.1433534622192383, "learning_rate": 0.0001756933115823817, "loss": 0.16, "num_input_tokens_seen": 4641632, "step": 2155 }, { "epoch": 0.3523654159869494, "grad_norm": 0.08755321800708771, "learning_rate": 0.0001761011419249592, "loss": 0.1141, "num_input_tokens_seen": 4653664, "step": 2160 }, { "epoch": 0.3531810766721044, "grad_norm": 1.749463677406311, "learning_rate": 0.00017650897226753672, "loss": 0.109, "num_input_tokens_seen": 4664064, "step": 2165 }, { "epoch": 0.3539967373572594, "grad_norm": 1.9594931602478027, "learning_rate": 0.0001769168026101142, "loss": 0.2849, "num_input_tokens_seen": 4676512, "step": 2170 }, { "epoch": 0.3548123980424144, "grad_norm": 0.034678272902965546, "learning_rate": 0.0001773246329526917, "loss": 0.27, "num_input_tokens_seen": 4687008, "step": 2175 }, { "epoch": 0.3556280587275693, "grad_norm": 0.08964840322732925, "learning_rate": 0.00017773246329526917, "loss": 0.1797, "num_input_tokens_seen": 4696512, "step": 2180 }, { "epoch": 0.3564437194127243, "grad_norm": 3.4402599334716797, "learning_rate": 0.00017814029363784668, "loss": 0.1343, "num_input_tokens_seen": 4706304, "step": 2185 }, { "epoch": 0.3572593800978793, "grad_norm": 1.8494372367858887, "learning_rate": 0.00017854812398042412, "loss": 0.1944, "num_input_tokens_seen": 4717216, "step": 2190 }, { "epoch": 0.35807504078303426, "grad_norm": 0.5094508528709412, "learning_rate": 0.00017895595432300163, "loss": 0.0752, "num_input_tokens_seen": 4728480, "step": 2195 }, { "epoch": 0.35889070146818924, "grad_norm": 0.46606236696243286, "learning_rate": 0.00017936378466557913, "loss": 0.1852, "num_input_tokens_seen": 4740704, "step": 2200 }, { "epoch": 0.35970636215334423, "grad_norm": 0.8652101755142212, "learning_rate": 0.0001797716150081566, "loss": 0.1737, "num_input_tokens_seen": 4752064, "step": 2205 }, { "epoch": 0.3605220228384992, "grad_norm": 1.02736234664917, "learning_rate": 0.0001801794453507341, "loss": 0.2175, "num_input_tokens_seen": 4763360, "step": 2210 }, { "epoch": 0.36133768352365414, "grad_norm": 0.8342076539993286, "learning_rate": 0.0001805872756933116, "loss": 0.1874, "num_input_tokens_seen": 4773920, "step": 2215 }, { "epoch": 0.3621533442088091, "grad_norm": 0.6151922345161438, "learning_rate": 0.00018099510603588906, "loss": 0.1649, "num_input_tokens_seen": 4784512, "step": 2220 }, { "epoch": 0.3629690048939641, "grad_norm": 0.7995598316192627, "learning_rate": 0.00018140293637846654, "loss": 0.1312, "num_input_tokens_seen": 4794944, "step": 2225 }, { "epoch": 0.3637846655791191, "grad_norm": 0.6298816204071045, "learning_rate": 0.00018181076672104404, "loss": 0.2568, "num_input_tokens_seen": 4806336, "step": 2230 }, { "epoch": 0.3646003262642741, "grad_norm": 0.801241397857666, "learning_rate": 0.00018221859706362155, "loss": 0.1809, "num_input_tokens_seen": 4815648, "step": 2235 }, { "epoch": 0.36541598694942906, "grad_norm": 1.7616490125656128, "learning_rate": 0.00018262642740619902, "loss": 0.1136, "num_input_tokens_seen": 4825952, "step": 2240 }, { "epoch": 0.366231647634584, "grad_norm": 0.9669632315635681, "learning_rate": 0.00018303425774877653, "loss": 0.0854, "num_input_tokens_seen": 4837888, "step": 2245 }, { "epoch": 0.367047308319739, "grad_norm": 0.5803656578063965, "learning_rate": 0.00018344208809135398, "loss": 0.1566, "num_input_tokens_seen": 4848576, "step": 2250 }, { "epoch": 0.36786296900489396, "grad_norm": 4.13840389251709, "learning_rate": 0.00018384991843393148, "loss": 0.3967, "num_input_tokens_seen": 4858272, "step": 2255 }, { "epoch": 0.36867862969004894, "grad_norm": 4.007920742034912, "learning_rate": 0.00018425774877650896, "loss": 0.3009, "num_input_tokens_seen": 4868480, "step": 2260 }, { "epoch": 0.3694942903752039, "grad_norm": 0.9734199047088623, "learning_rate": 0.00018466557911908646, "loss": 0.1409, "num_input_tokens_seen": 4879136, "step": 2265 }, { "epoch": 0.3703099510603589, "grad_norm": 1.5065789222717285, "learning_rate": 0.00018507340946166396, "loss": 0.1857, "num_input_tokens_seen": 4891392, "step": 2270 }, { "epoch": 0.37112561174551384, "grad_norm": 0.5240089893341064, "learning_rate": 0.00018548123980424144, "loss": 0.1847, "num_input_tokens_seen": 4901280, "step": 2275 }, { "epoch": 0.3719412724306688, "grad_norm": 0.9392548203468323, "learning_rate": 0.00018588907014681894, "loss": 0.1447, "num_input_tokens_seen": 4913056, "step": 2280 }, { "epoch": 0.3727569331158238, "grad_norm": 0.13809747993946075, "learning_rate": 0.0001862969004893964, "loss": 0.0391, "num_input_tokens_seen": 4924928, "step": 2285 }, { "epoch": 0.3735725938009788, "grad_norm": 2.564167022705078, "learning_rate": 0.0001867047308319739, "loss": 0.1365, "num_input_tokens_seen": 4935712, "step": 2290 }, { "epoch": 0.3743882544861338, "grad_norm": 1.2993137836456299, "learning_rate": 0.0001871125611745514, "loss": 0.1246, "num_input_tokens_seen": 4947456, "step": 2295 }, { "epoch": 0.37520391517128876, "grad_norm": 0.20826207101345062, "learning_rate": 0.00018752039151712888, "loss": 0.3083, "num_input_tokens_seen": 4957280, "step": 2300 }, { "epoch": 0.37601957585644374, "grad_norm": 1.6804625988006592, "learning_rate": 0.00018792822185970638, "loss": 0.2095, "num_input_tokens_seen": 4968608, "step": 2305 }, { "epoch": 0.3768352365415987, "grad_norm": 0.22404664754867554, "learning_rate": 0.00018833605220228386, "loss": 0.242, "num_input_tokens_seen": 4979872, "step": 2310 }, { "epoch": 0.37765089722675366, "grad_norm": 0.23048748075962067, "learning_rate": 0.00018874388254486133, "loss": 0.165, "num_input_tokens_seen": 4990688, "step": 2315 }, { "epoch": 0.37846655791190864, "grad_norm": 0.20122453570365906, "learning_rate": 0.0001891517128874388, "loss": 0.214, "num_input_tokens_seen": 5001184, "step": 2320 }, { "epoch": 0.3792822185970636, "grad_norm": 0.654986560344696, "learning_rate": 0.00018955954323001631, "loss": 0.1079, "num_input_tokens_seen": 5013440, "step": 2325 }, { "epoch": 0.3800978792822186, "grad_norm": 0.33664757013320923, "learning_rate": 0.00018996737357259382, "loss": 0.0953, "num_input_tokens_seen": 5023840, "step": 2330 }, { "epoch": 0.3809135399673736, "grad_norm": 2.271440029144287, "learning_rate": 0.0001903752039151713, "loss": 0.2607, "num_input_tokens_seen": 5035232, "step": 2335 }, { "epoch": 0.3817292006525285, "grad_norm": 0.39979860186576843, "learning_rate": 0.0001907830342577488, "loss": 0.3077, "num_input_tokens_seen": 5046848, "step": 2340 }, { "epoch": 0.3825448613376835, "grad_norm": 0.9261636137962341, "learning_rate": 0.00019119086460032627, "loss": 0.3214, "num_input_tokens_seen": 5058720, "step": 2345 }, { "epoch": 0.3833605220228385, "grad_norm": 0.6979706287384033, "learning_rate": 0.00019159869494290375, "loss": 0.118, "num_input_tokens_seen": 5069184, "step": 2350 }, { "epoch": 0.3841761827079935, "grad_norm": 0.2516748905181885, "learning_rate": 0.00019200652528548123, "loss": 0.1562, "num_input_tokens_seen": 5080000, "step": 2355 }, { "epoch": 0.38499184339314846, "grad_norm": 0.6635876297950745, "learning_rate": 0.00019241435562805873, "loss": 0.1031, "num_input_tokens_seen": 5090656, "step": 2360 }, { "epoch": 0.38580750407830344, "grad_norm": 0.6046097874641418, "learning_rate": 0.00019282218597063623, "loss": 0.0264, "num_input_tokens_seen": 5100864, "step": 2365 }, { "epoch": 0.3866231647634584, "grad_norm": 0.6527726650238037, "learning_rate": 0.0001932300163132137, "loss": 0.1176, "num_input_tokens_seen": 5112448, "step": 2370 }, { "epoch": 0.38743882544861336, "grad_norm": 1.493836760520935, "learning_rate": 0.00019363784665579121, "loss": 0.0975, "num_input_tokens_seen": 5123520, "step": 2375 }, { "epoch": 0.38825448613376834, "grad_norm": 0.2501254677772522, "learning_rate": 0.00019404567699836866, "loss": 0.3802, "num_input_tokens_seen": 5133504, "step": 2380 }, { "epoch": 0.3890701468189233, "grad_norm": 0.2545013427734375, "learning_rate": 0.00019445350734094617, "loss": 0.0508, "num_input_tokens_seen": 5144256, "step": 2385 }, { "epoch": 0.3898858075040783, "grad_norm": 1.2192929983139038, "learning_rate": 0.00019486133768352364, "loss": 0.2604, "num_input_tokens_seen": 5154688, "step": 2390 }, { "epoch": 0.3907014681892333, "grad_norm": 0.141750767827034, "learning_rate": 0.00019526916802610115, "loss": 0.0533, "num_input_tokens_seen": 5164384, "step": 2395 }, { "epoch": 0.3915171288743883, "grad_norm": 1.3745176792144775, "learning_rate": 0.00019567699836867865, "loss": 0.154, "num_input_tokens_seen": 5175392, "step": 2400 }, { "epoch": 0.3923327895595432, "grad_norm": 0.5329376459121704, "learning_rate": 0.00019608482871125613, "loss": 0.0784, "num_input_tokens_seen": 5185632, "step": 2405 }, { "epoch": 0.3931484502446982, "grad_norm": 0.038644712418317795, "learning_rate": 0.00019649265905383363, "loss": 0.0978, "num_input_tokens_seen": 5194560, "step": 2410 }, { "epoch": 0.3939641109298532, "grad_norm": 0.1533105969429016, "learning_rate": 0.00019690048939641108, "loss": 0.0367, "num_input_tokens_seen": 5204128, "step": 2415 }, { "epoch": 0.39477977161500816, "grad_norm": 0.07670613378286362, "learning_rate": 0.00019730831973898858, "loss": 0.0366, "num_input_tokens_seen": 5215520, "step": 2420 }, { "epoch": 0.39559543230016314, "grad_norm": 0.01313743181526661, "learning_rate": 0.00019771615008156606, "loss": 0.0208, "num_input_tokens_seen": 5226880, "step": 2425 }, { "epoch": 0.3964110929853181, "grad_norm": 1.977046012878418, "learning_rate": 0.00019812398042414356, "loss": 0.5258, "num_input_tokens_seen": 5237760, "step": 2430 }, { "epoch": 0.3972267536704731, "grad_norm": 0.18579933047294617, "learning_rate": 0.00019853181076672107, "loss": 0.2563, "num_input_tokens_seen": 5248960, "step": 2435 }, { "epoch": 0.39804241435562804, "grad_norm": 2.878202199935913, "learning_rate": 0.00019893964110929854, "loss": 0.298, "num_input_tokens_seen": 5260544, "step": 2440 }, { "epoch": 0.398858075040783, "grad_norm": 0.40211886167526245, "learning_rate": 0.00019934747145187602, "loss": 0.1867, "num_input_tokens_seen": 5270368, "step": 2445 }, { "epoch": 0.399673735725938, "grad_norm": 0.3385109603404999, "learning_rate": 0.0001997553017944535, "loss": 0.0819, "num_input_tokens_seen": 5279328, "step": 2450 }, { "epoch": 0.400489396411093, "grad_norm": 0.4556884169578552, "learning_rate": 0.000200163132137031, "loss": 0.1187, "num_input_tokens_seen": 5290688, "step": 2455 }, { "epoch": 0.401305057096248, "grad_norm": 0.7633600234985352, "learning_rate": 0.00020057096247960848, "loss": 0.2029, "num_input_tokens_seen": 5301760, "step": 2460 }, { "epoch": 0.40212071778140296, "grad_norm": 0.4559105336666107, "learning_rate": 0.00020097879282218598, "loss": 0.1144, "num_input_tokens_seen": 5313216, "step": 2465 }, { "epoch": 0.4029363784665579, "grad_norm": 1.1692734956741333, "learning_rate": 0.00020138662316476348, "loss": 0.2065, "num_input_tokens_seen": 5324160, "step": 2470 }, { "epoch": 0.40375203915171287, "grad_norm": 0.08323055505752563, "learning_rate": 0.00020179445350734096, "loss": 0.0174, "num_input_tokens_seen": 5336256, "step": 2475 }, { "epoch": 0.40456769983686786, "grad_norm": 0.11795398592948914, "learning_rate": 0.00020220228384991844, "loss": 0.1213, "num_input_tokens_seen": 5346464, "step": 2480 }, { "epoch": 0.40538336052202284, "grad_norm": 0.09392614662647247, "learning_rate": 0.0002026101141924959, "loss": 0.0611, "num_input_tokens_seen": 5357984, "step": 2485 }, { "epoch": 0.4061990212071778, "grad_norm": 1.4573779106140137, "learning_rate": 0.00020301794453507342, "loss": 0.1298, "num_input_tokens_seen": 5368032, "step": 2490 }, { "epoch": 0.4070146818923328, "grad_norm": 0.9301354885101318, "learning_rate": 0.0002034257748776509, "loss": 0.2651, "num_input_tokens_seen": 5378496, "step": 2495 }, { "epoch": 0.4078303425774878, "grad_norm": 0.060127660632133484, "learning_rate": 0.0002038336052202284, "loss": 0.1009, "num_input_tokens_seen": 5388000, "step": 2500 }, { "epoch": 0.4086460032626427, "grad_norm": 0.4317387640476227, "learning_rate": 0.0002042414355628059, "loss": 0.147, "num_input_tokens_seen": 5399456, "step": 2505 }, { "epoch": 0.4094616639477977, "grad_norm": 0.13027925789356232, "learning_rate": 0.00020464926590538335, "loss": 0.0456, "num_input_tokens_seen": 5410880, "step": 2510 }, { "epoch": 0.4102773246329527, "grad_norm": 3.7308425903320312, "learning_rate": 0.00020505709624796085, "loss": 0.4372, "num_input_tokens_seen": 5421824, "step": 2515 }, { "epoch": 0.4110929853181077, "grad_norm": 0.08798616379499435, "learning_rate": 0.00020546492659053833, "loss": 0.0874, "num_input_tokens_seen": 5432512, "step": 2520 }, { "epoch": 0.41190864600326266, "grad_norm": 0.22174157202243805, "learning_rate": 0.00020587275693311583, "loss": 0.0762, "num_input_tokens_seen": 5442240, "step": 2525 }, { "epoch": 0.41272430668841764, "grad_norm": 0.6614753007888794, "learning_rate": 0.0002062805872756933, "loss": 0.2842, "num_input_tokens_seen": 5454400, "step": 2530 }, { "epoch": 0.41353996737357257, "grad_norm": 2.1791892051696777, "learning_rate": 0.0002066884176182708, "loss": 0.2093, "num_input_tokens_seen": 5464000, "step": 2535 }, { "epoch": 0.41435562805872755, "grad_norm": 0.3171369135379791, "learning_rate": 0.00020709624796084832, "loss": 0.1258, "num_input_tokens_seen": 5474592, "step": 2540 }, { "epoch": 0.41517128874388254, "grad_norm": 1.5475813150405884, "learning_rate": 0.00020750407830342577, "loss": 0.1588, "num_input_tokens_seen": 5485728, "step": 2545 }, { "epoch": 0.4159869494290375, "grad_norm": 0.596198558807373, "learning_rate": 0.00020791190864600327, "loss": 0.1429, "num_input_tokens_seen": 5496544, "step": 2550 }, { "epoch": 0.4168026101141925, "grad_norm": 1.2663733959197998, "learning_rate": 0.00020831973898858075, "loss": 0.1585, "num_input_tokens_seen": 5506048, "step": 2555 }, { "epoch": 0.4176182707993475, "grad_norm": 2.4509189128875732, "learning_rate": 0.00020872756933115825, "loss": 0.1421, "num_input_tokens_seen": 5517312, "step": 2560 }, { "epoch": 0.4184339314845024, "grad_norm": 0.409859836101532, "learning_rate": 0.00020913539967373573, "loss": 0.1036, "num_input_tokens_seen": 5529024, "step": 2565 }, { "epoch": 0.4192495921696574, "grad_norm": 0.6036291122436523, "learning_rate": 0.00020954323001631323, "loss": 0.1874, "num_input_tokens_seen": 5540000, "step": 2570 }, { "epoch": 0.4200652528548124, "grad_norm": 0.5448421239852905, "learning_rate": 0.0002099510603588907, "loss": 0.2111, "num_input_tokens_seen": 5551968, "step": 2575 }, { "epoch": 0.42088091353996737, "grad_norm": 1.4477311372756958, "learning_rate": 0.00021035889070146818, "loss": 0.1716, "num_input_tokens_seen": 5562528, "step": 2580 }, { "epoch": 0.42169657422512236, "grad_norm": 0.35881543159484863, "learning_rate": 0.00021076672104404569, "loss": 0.1325, "num_input_tokens_seen": 5571136, "step": 2585 }, { "epoch": 0.42251223491027734, "grad_norm": 0.6012031435966492, "learning_rate": 0.00021117455138662316, "loss": 0.099, "num_input_tokens_seen": 5582496, "step": 2590 }, { "epoch": 0.4233278955954323, "grad_norm": 0.40353846549987793, "learning_rate": 0.00021158238172920067, "loss": 0.1689, "num_input_tokens_seen": 5592704, "step": 2595 }, { "epoch": 0.42414355628058725, "grad_norm": 0.15947870910167694, "learning_rate": 0.00021199021207177814, "loss": 0.1365, "num_input_tokens_seen": 5603680, "step": 2600 }, { "epoch": 0.42495921696574224, "grad_norm": 0.014439741149544716, "learning_rate": 0.00021239804241435562, "loss": 0.0842, "num_input_tokens_seen": 5613888, "step": 2605 }, { "epoch": 0.4257748776508972, "grad_norm": 1.1247490644454956, "learning_rate": 0.00021280587275693312, "loss": 0.2804, "num_input_tokens_seen": 5623872, "step": 2610 }, { "epoch": 0.4265905383360522, "grad_norm": 0.47334226965904236, "learning_rate": 0.0002132137030995106, "loss": 0.2241, "num_input_tokens_seen": 5632768, "step": 2615 }, { "epoch": 0.4274061990212072, "grad_norm": 0.20727881789207458, "learning_rate": 0.0002136215334420881, "loss": 0.0871, "num_input_tokens_seen": 5643456, "step": 2620 }, { "epoch": 0.4282218597063622, "grad_norm": 0.5498055815696716, "learning_rate": 0.00021402936378466558, "loss": 0.1776, "num_input_tokens_seen": 5655040, "step": 2625 }, { "epoch": 0.4290375203915171, "grad_norm": 0.9614197611808777, "learning_rate": 0.00021443719412724308, "loss": 0.2229, "num_input_tokens_seen": 5666592, "step": 2630 }, { "epoch": 0.4298531810766721, "grad_norm": 1.0824109315872192, "learning_rate": 0.00021484502446982056, "loss": 0.1308, "num_input_tokens_seen": 5677952, "step": 2635 }, { "epoch": 0.43066884176182707, "grad_norm": 0.2755020260810852, "learning_rate": 0.00021525285481239804, "loss": 0.1492, "num_input_tokens_seen": 5688736, "step": 2640 }, { "epoch": 0.43148450244698205, "grad_norm": 0.5831357836723328, "learning_rate": 0.00021566068515497554, "loss": 0.1088, "num_input_tokens_seen": 5698976, "step": 2645 }, { "epoch": 0.43230016313213704, "grad_norm": 1.5977224111557007, "learning_rate": 0.00021606851549755302, "loss": 0.1708, "num_input_tokens_seen": 5708960, "step": 2650 }, { "epoch": 0.433115823817292, "grad_norm": 0.6231358647346497, "learning_rate": 0.00021647634584013052, "loss": 0.0429, "num_input_tokens_seen": 5720672, "step": 2655 }, { "epoch": 0.433931484502447, "grad_norm": 0.7535364627838135, "learning_rate": 0.000216884176182708, "loss": 0.1002, "num_input_tokens_seen": 5731168, "step": 2660 }, { "epoch": 0.43474714518760194, "grad_norm": 1.0252430438995361, "learning_rate": 0.0002172920065252855, "loss": 0.2121, "num_input_tokens_seen": 5741536, "step": 2665 }, { "epoch": 0.4355628058727569, "grad_norm": 1.6408336162567139, "learning_rate": 0.00021769983686786295, "loss": 0.1324, "num_input_tokens_seen": 5750816, "step": 2670 }, { "epoch": 0.4363784665579119, "grad_norm": 0.70741868019104, "learning_rate": 0.00021810766721044045, "loss": 0.1161, "num_input_tokens_seen": 5762656, "step": 2675 }, { "epoch": 0.4371941272430669, "grad_norm": 0.9057618975639343, "learning_rate": 0.00021851549755301796, "loss": 0.0742, "num_input_tokens_seen": 5774464, "step": 2680 }, { "epoch": 0.43800978792822187, "grad_norm": 4.467531204223633, "learning_rate": 0.00021892332789559543, "loss": 0.2548, "num_input_tokens_seen": 5784288, "step": 2685 }, { "epoch": 0.43882544861337686, "grad_norm": 2.6299984455108643, "learning_rate": 0.00021933115823817294, "loss": 0.2562, "num_input_tokens_seen": 5795424, "step": 2690 }, { "epoch": 0.4396411092985318, "grad_norm": 1.830719232559204, "learning_rate": 0.0002197389885807504, "loss": 0.3032, "num_input_tokens_seen": 5806720, "step": 2695 }, { "epoch": 0.44045676998368677, "grad_norm": 0.06908020377159119, "learning_rate": 0.00022014681892332792, "loss": 0.0262, "num_input_tokens_seen": 5817664, "step": 2700 }, { "epoch": 0.44127243066884175, "grad_norm": 0.10535155981779099, "learning_rate": 0.00022055464926590536, "loss": 0.1952, "num_input_tokens_seen": 5825984, "step": 2705 }, { "epoch": 0.44208809135399674, "grad_norm": 0.7356063723564148, "learning_rate": 0.00022096247960848287, "loss": 0.2932, "num_input_tokens_seen": 5836000, "step": 2710 }, { "epoch": 0.4429037520391517, "grad_norm": 0.4010334610939026, "learning_rate": 0.00022137030995106037, "loss": 0.0671, "num_input_tokens_seen": 5847200, "step": 2715 }, { "epoch": 0.4437194127243067, "grad_norm": 0.04338255897164345, "learning_rate": 0.00022177814029363785, "loss": 0.0736, "num_input_tokens_seen": 5858592, "step": 2720 }, { "epoch": 0.4445350734094617, "grad_norm": 0.6875229477882385, "learning_rate": 0.00022218597063621535, "loss": 0.1125, "num_input_tokens_seen": 5869152, "step": 2725 }, { "epoch": 0.4453507340946166, "grad_norm": 1.8077421188354492, "learning_rate": 0.00022259380097879283, "loss": 0.1094, "num_input_tokens_seen": 5880640, "step": 2730 }, { "epoch": 0.4461663947797716, "grad_norm": 0.037293288856744766, "learning_rate": 0.0002230016313213703, "loss": 0.0334, "num_input_tokens_seen": 5891392, "step": 2735 }, { "epoch": 0.4469820554649266, "grad_norm": 0.5385783910751343, "learning_rate": 0.00022340946166394778, "loss": 0.1456, "num_input_tokens_seen": 5902112, "step": 2740 }, { "epoch": 0.44779771615008157, "grad_norm": 0.13887055218219757, "learning_rate": 0.00022381729200652529, "loss": 0.1901, "num_input_tokens_seen": 5912928, "step": 2745 }, { "epoch": 0.44861337683523655, "grad_norm": 0.7888278365135193, "learning_rate": 0.0002242251223491028, "loss": 0.1294, "num_input_tokens_seen": 5923680, "step": 2750 }, { "epoch": 0.44942903752039154, "grad_norm": 1.8163173198699951, "learning_rate": 0.00022463295269168027, "loss": 0.2117, "num_input_tokens_seen": 5934272, "step": 2755 }, { "epoch": 0.45024469820554647, "grad_norm": 0.6608736515045166, "learning_rate": 0.00022504078303425777, "loss": 0.1259, "num_input_tokens_seen": 5945632, "step": 2760 }, { "epoch": 0.45106035889070145, "grad_norm": 1.5118393898010254, "learning_rate": 0.00022544861337683525, "loss": 0.2826, "num_input_tokens_seen": 5957056, "step": 2765 }, { "epoch": 0.45187601957585644, "grad_norm": 1.0713101625442505, "learning_rate": 0.00022585644371941272, "loss": 0.1269, "num_input_tokens_seen": 5968576, "step": 2770 }, { "epoch": 0.4526916802610114, "grad_norm": 0.7051620483398438, "learning_rate": 0.0002262642740619902, "loss": 0.1739, "num_input_tokens_seen": 5978368, "step": 2775 }, { "epoch": 0.4535073409461664, "grad_norm": 1.0678281784057617, "learning_rate": 0.0002266721044045677, "loss": 0.1633, "num_input_tokens_seen": 5988416, "step": 2780 }, { "epoch": 0.4543230016313214, "grad_norm": 0.42176443338394165, "learning_rate": 0.0002270799347471452, "loss": 0.1514, "num_input_tokens_seen": 5999264, "step": 2785 }, { "epoch": 0.4551386623164764, "grad_norm": 1.083578109741211, "learning_rate": 0.00022748776508972268, "loss": 0.1853, "num_input_tokens_seen": 6010208, "step": 2790 }, { "epoch": 0.4559543230016313, "grad_norm": 0.6855237483978271, "learning_rate": 0.00022789559543230019, "loss": 0.126, "num_input_tokens_seen": 6020704, "step": 2795 }, { "epoch": 0.4567699836867863, "grad_norm": 0.16399504244327545, "learning_rate": 0.00022830342577487763, "loss": 0.1163, "num_input_tokens_seen": 6032640, "step": 2800 }, { "epoch": 0.45758564437194127, "grad_norm": 2.2313852310180664, "learning_rate": 0.00022871125611745514, "loss": 0.0934, "num_input_tokens_seen": 6043424, "step": 2805 }, { "epoch": 0.45840130505709625, "grad_norm": 0.15434958040714264, "learning_rate": 0.00022911908646003261, "loss": 0.0901, "num_input_tokens_seen": 6053280, "step": 2810 }, { "epoch": 0.45921696574225124, "grad_norm": 1.2341140508651733, "learning_rate": 0.00022952691680261012, "loss": 0.1589, "num_input_tokens_seen": 6064896, "step": 2815 }, { "epoch": 0.4600326264274062, "grad_norm": 0.04948023334145546, "learning_rate": 0.00022993474714518762, "loss": 0.1659, "num_input_tokens_seen": 6075360, "step": 2820 }, { "epoch": 0.46084828711256115, "grad_norm": 0.054622214287519455, "learning_rate": 0.0002303425774877651, "loss": 0.056, "num_input_tokens_seen": 6087776, "step": 2825 }, { "epoch": 0.46166394779771613, "grad_norm": 1.5951756238937378, "learning_rate": 0.0002307504078303426, "loss": 0.1424, "num_input_tokens_seen": 6097856, "step": 2830 }, { "epoch": 0.4624796084828711, "grad_norm": 0.7541900873184204, "learning_rate": 0.00023115823817292005, "loss": 0.2233, "num_input_tokens_seen": 6109248, "step": 2835 }, { "epoch": 0.4632952691680261, "grad_norm": 0.4579843580722809, "learning_rate": 0.00023156606851549755, "loss": 0.0721, "num_input_tokens_seen": 6120192, "step": 2840 }, { "epoch": 0.4641109298531811, "grad_norm": 0.6424131393432617, "learning_rate": 0.00023197389885807503, "loss": 0.3416, "num_input_tokens_seen": 6130624, "step": 2845 }, { "epoch": 0.46492659053833607, "grad_norm": 0.32970502972602844, "learning_rate": 0.00023238172920065253, "loss": 0.1382, "num_input_tokens_seen": 6141280, "step": 2850 }, { "epoch": 0.46574225122349105, "grad_norm": 1.3430696725845337, "learning_rate": 0.00023278955954323004, "loss": 0.1088, "num_input_tokens_seen": 6152864, "step": 2855 }, { "epoch": 0.466557911908646, "grad_norm": 0.1254258006811142, "learning_rate": 0.00023319738988580751, "loss": 0.2134, "num_input_tokens_seen": 6162592, "step": 2860 }, { "epoch": 0.46737357259380097, "grad_norm": 0.7237457036972046, "learning_rate": 0.000233605220228385, "loss": 0.1347, "num_input_tokens_seen": 6172512, "step": 2865 }, { "epoch": 0.46818923327895595, "grad_norm": 0.5483401417732239, "learning_rate": 0.00023401305057096247, "loss": 0.1743, "num_input_tokens_seen": 6183520, "step": 2870 }, { "epoch": 0.46900489396411094, "grad_norm": 0.2760385572910309, "learning_rate": 0.00023442088091353997, "loss": 0.1051, "num_input_tokens_seen": 6195040, "step": 2875 }, { "epoch": 0.4698205546492659, "grad_norm": 0.7289674282073975, "learning_rate": 0.00023482871125611747, "loss": 0.0834, "num_input_tokens_seen": 6205088, "step": 2880 }, { "epoch": 0.4706362153344209, "grad_norm": 0.03860371559858322, "learning_rate": 0.00023523654159869495, "loss": 0.069, "num_input_tokens_seen": 6216416, "step": 2885 }, { "epoch": 0.47145187601957583, "grad_norm": 0.6149327754974365, "learning_rate": 0.00023564437194127245, "loss": 0.1547, "num_input_tokens_seen": 6227584, "step": 2890 }, { "epoch": 0.4722675367047308, "grad_norm": 0.02546052262187004, "learning_rate": 0.00023605220228384993, "loss": 0.0881, "num_input_tokens_seen": 6238848, "step": 2895 }, { "epoch": 0.4730831973898858, "grad_norm": 0.7591150403022766, "learning_rate": 0.0002364600326264274, "loss": 0.1208, "num_input_tokens_seen": 6250016, "step": 2900 }, { "epoch": 0.4738988580750408, "grad_norm": 0.5803149342536926, "learning_rate": 0.00023686786296900488, "loss": 0.0669, "num_input_tokens_seen": 6261376, "step": 2905 }, { "epoch": 0.47471451876019577, "grad_norm": 0.5137251019477844, "learning_rate": 0.0002372756933115824, "loss": 0.1055, "num_input_tokens_seen": 6272864, "step": 2910 }, { "epoch": 0.47553017944535075, "grad_norm": 0.08403351157903671, "learning_rate": 0.0002376835236541599, "loss": 0.108, "num_input_tokens_seen": 6283840, "step": 2915 }, { "epoch": 0.4763458401305057, "grad_norm": 0.5208342671394348, "learning_rate": 0.00023809135399673737, "loss": 0.1864, "num_input_tokens_seen": 6295424, "step": 2920 }, { "epoch": 0.47716150081566067, "grad_norm": 0.6363890767097473, "learning_rate": 0.00023849918433931487, "loss": 0.2148, "num_input_tokens_seen": 6306784, "step": 2925 }, { "epoch": 0.47797716150081565, "grad_norm": 0.5094349980354309, "learning_rate": 0.00023890701468189232, "loss": 0.0901, "num_input_tokens_seen": 6318880, "step": 2930 }, { "epoch": 0.47879282218597063, "grad_norm": 0.27168846130371094, "learning_rate": 0.00023931484502446982, "loss": 0.0564, "num_input_tokens_seen": 6329344, "step": 2935 }, { "epoch": 0.4796084828711256, "grad_norm": 0.1775134950876236, "learning_rate": 0.0002397226753670473, "loss": 0.1518, "num_input_tokens_seen": 6340448, "step": 2940 }, { "epoch": 0.4804241435562806, "grad_norm": 0.4747180938720703, "learning_rate": 0.0002401305057096248, "loss": 0.1863, "num_input_tokens_seen": 6351648, "step": 2945 }, { "epoch": 0.4812398042414356, "grad_norm": 0.8763179779052734, "learning_rate": 0.0002405383360522023, "loss": 0.1855, "num_input_tokens_seen": 6362592, "step": 2950 }, { "epoch": 0.4820554649265905, "grad_norm": 1.1362411975860596, "learning_rate": 0.00024094616639477978, "loss": 0.1269, "num_input_tokens_seen": 6373056, "step": 2955 }, { "epoch": 0.4828711256117455, "grad_norm": 0.11866175383329391, "learning_rate": 0.00024135399673735726, "loss": 0.1259, "num_input_tokens_seen": 6383904, "step": 2960 }, { "epoch": 0.4836867862969005, "grad_norm": 0.6890610456466675, "learning_rate": 0.00024176182707993474, "loss": 0.2175, "num_input_tokens_seen": 6394400, "step": 2965 }, { "epoch": 0.48450244698205547, "grad_norm": 1.5552113056182861, "learning_rate": 0.00024216965742251224, "loss": 0.2393, "num_input_tokens_seen": 6405184, "step": 2970 }, { "epoch": 0.48531810766721045, "grad_norm": 0.4742637872695923, "learning_rate": 0.00024257748776508972, "loss": 0.1732, "num_input_tokens_seen": 6416224, "step": 2975 }, { "epoch": 0.48613376835236544, "grad_norm": 0.164584219455719, "learning_rate": 0.00024298531810766722, "loss": 0.0479, "num_input_tokens_seen": 6426944, "step": 2980 }, { "epoch": 0.48694942903752036, "grad_norm": 0.6174113750457764, "learning_rate": 0.00024339314845024472, "loss": 0.1402, "num_input_tokens_seen": 6438944, "step": 2985 }, { "epoch": 0.48776508972267535, "grad_norm": 1.3825546503067017, "learning_rate": 0.0002438009787928222, "loss": 0.2307, "num_input_tokens_seen": 6449632, "step": 2990 }, { "epoch": 0.48858075040783033, "grad_norm": 0.4968127906322479, "learning_rate": 0.0002442088091353997, "loss": 0.1938, "num_input_tokens_seen": 6460576, "step": 2995 }, { "epoch": 0.4893964110929853, "grad_norm": 0.18922187387943268, "learning_rate": 0.00024461663947797715, "loss": 0.0725, "num_input_tokens_seen": 6470336, "step": 3000 }, { "epoch": 0.4902120717781403, "grad_norm": 0.8117813467979431, "learning_rate": 0.00024502446982055463, "loss": 0.1319, "num_input_tokens_seen": 6481568, "step": 3005 }, { "epoch": 0.4910277324632953, "grad_norm": 1.5314102172851562, "learning_rate": 0.00024543230016313216, "loss": 0.1996, "num_input_tokens_seen": 6491616, "step": 3010 }, { "epoch": 0.49184339314845027, "grad_norm": 0.09085095673799515, "learning_rate": 0.00024584013050570964, "loss": 0.1072, "num_input_tokens_seen": 6502624, "step": 3015 }, { "epoch": 0.4926590538336052, "grad_norm": 3.5468132495880127, "learning_rate": 0.0002462479608482871, "loss": 0.2685, "num_input_tokens_seen": 6513824, "step": 3020 }, { "epoch": 0.4934747145187602, "grad_norm": 0.05333206057548523, "learning_rate": 0.0002466557911908646, "loss": 0.3263, "num_input_tokens_seen": 6524896, "step": 3025 }, { "epoch": 0.49429037520391517, "grad_norm": 0.6107070446014404, "learning_rate": 0.00024706362153344207, "loss": 0.0618, "num_input_tokens_seen": 6535968, "step": 3030 }, { "epoch": 0.49510603588907015, "grad_norm": 0.07345915585756302, "learning_rate": 0.0002474714518760196, "loss": 0.1213, "num_input_tokens_seen": 6545920, "step": 3035 }, { "epoch": 0.49592169657422513, "grad_norm": 0.08001483231782913, "learning_rate": 0.0002478792822185971, "loss": 0.2505, "num_input_tokens_seen": 6557056, "step": 3040 }, { "epoch": 0.4967373572593801, "grad_norm": 0.3009280562400818, "learning_rate": 0.00024828711256117455, "loss": 0.1734, "num_input_tokens_seen": 6568160, "step": 3045 }, { "epoch": 0.49755301794453505, "grad_norm": 0.2792985141277313, "learning_rate": 0.000248694942903752, "loss": 0.1591, "num_input_tokens_seen": 6579456, "step": 3050 }, { "epoch": 0.49836867862969003, "grad_norm": 1.3214781284332275, "learning_rate": 0.00024910277324632956, "loss": 0.1591, "num_input_tokens_seen": 6591776, "step": 3055 }, { "epoch": 0.499184339314845, "grad_norm": 1.0001438856124878, "learning_rate": 0.00024951060358890703, "loss": 0.0915, "num_input_tokens_seen": 6601792, "step": 3060 }, { "epoch": 0.5, "grad_norm": 1.075109839439392, "learning_rate": 0.0002499184339314845, "loss": 0.149, "num_input_tokens_seen": 6612416, "step": 3065 }, { "epoch": 0.5008156606851549, "grad_norm": 0.27178657054901123, "learning_rate": 0.00025032626427406204, "loss": 0.0751, "num_input_tokens_seen": 6623616, "step": 3070 }, { "epoch": 0.50163132137031, "grad_norm": 0.6926761269569397, "learning_rate": 0.00025073409461663946, "loss": 0.1677, "num_input_tokens_seen": 6633504, "step": 3075 }, { "epoch": 0.5024469820554649, "grad_norm": 0.36228397488594055, "learning_rate": 0.00025114192495921694, "loss": 0.2522, "num_input_tokens_seen": 6644736, "step": 3080 }, { "epoch": 0.5032626427406199, "grad_norm": 1.0155096054077148, "learning_rate": 0.00025154975530179447, "loss": 0.111, "num_input_tokens_seen": 6656000, "step": 3085 }, { "epoch": 0.5040783034257749, "grad_norm": 1.4222882986068726, "learning_rate": 0.00025195758564437195, "loss": 0.2714, "num_input_tokens_seen": 6666752, "step": 3090 }, { "epoch": 0.5048939641109299, "grad_norm": 1.6422041654586792, "learning_rate": 0.0002523654159869495, "loss": 0.1777, "num_input_tokens_seen": 6677760, "step": 3095 }, { "epoch": 0.5057096247960848, "grad_norm": 0.2527022063732147, "learning_rate": 0.0002527732463295269, "loss": 0.1413, "num_input_tokens_seen": 6688128, "step": 3100 }, { "epoch": 0.5065252854812398, "grad_norm": 0.222728431224823, "learning_rate": 0.0002531810766721044, "loss": 0.076, "num_input_tokens_seen": 6698272, "step": 3105 }, { "epoch": 0.5073409461663948, "grad_norm": 0.956292986869812, "learning_rate": 0.0002535889070146819, "loss": 0.2175, "num_input_tokens_seen": 6708736, "step": 3110 }, { "epoch": 0.5081566068515497, "grad_norm": 0.059738703072071075, "learning_rate": 0.0002539967373572594, "loss": 0.0127, "num_input_tokens_seen": 6719744, "step": 3115 }, { "epoch": 0.5089722675367048, "grad_norm": 0.4819056987762451, "learning_rate": 0.00025440456769983686, "loss": 0.0689, "num_input_tokens_seen": 6730592, "step": 3120 }, { "epoch": 0.5097879282218597, "grad_norm": 0.37790700793266296, "learning_rate": 0.00025481239804241434, "loss": 0.1239, "num_input_tokens_seen": 6740320, "step": 3125 }, { "epoch": 0.5106035889070146, "grad_norm": 0.6084521412849426, "learning_rate": 0.00025522022838499187, "loss": 0.0783, "num_input_tokens_seen": 6751168, "step": 3130 }, { "epoch": 0.5114192495921697, "grad_norm": 0.12591323256492615, "learning_rate": 0.00025562805872756934, "loss": 0.0489, "num_input_tokens_seen": 6761952, "step": 3135 }, { "epoch": 0.5122349102773246, "grad_norm": 0.24676240980625153, "learning_rate": 0.0002560358890701468, "loss": 0.0919, "num_input_tokens_seen": 6772544, "step": 3140 }, { "epoch": 0.5130505709624796, "grad_norm": 0.8799636363983154, "learning_rate": 0.0002564437194127243, "loss": 0.1022, "num_input_tokens_seen": 6783008, "step": 3145 }, { "epoch": 0.5138662316476346, "grad_norm": 0.18100491166114807, "learning_rate": 0.00025685154975530177, "loss": 0.2984, "num_input_tokens_seen": 6794368, "step": 3150 }, { "epoch": 0.5146818923327896, "grad_norm": 0.9455678462982178, "learning_rate": 0.0002572593800978793, "loss": 0.0761, "num_input_tokens_seen": 6805536, "step": 3155 }, { "epoch": 0.5154975530179445, "grad_norm": 0.9998900294303894, "learning_rate": 0.0002576672104404568, "loss": 0.0617, "num_input_tokens_seen": 6815456, "step": 3160 }, { "epoch": 0.5163132137030995, "grad_norm": 0.031957633793354034, "learning_rate": 0.0002580750407830343, "loss": 0.1914, "num_input_tokens_seen": 6826176, "step": 3165 }, { "epoch": 0.5171288743882545, "grad_norm": 1.3518096208572388, "learning_rate": 0.00025848287112561173, "loss": 0.0934, "num_input_tokens_seen": 6835264, "step": 3170 }, { "epoch": 0.5179445350734094, "grad_norm": 0.4357157349586487, "learning_rate": 0.0002588907014681892, "loss": 0.0796, "num_input_tokens_seen": 6845248, "step": 3175 }, { "epoch": 0.5187601957585645, "grad_norm": 0.04730428010225296, "learning_rate": 0.00025929853181076674, "loss": 0.1536, "num_input_tokens_seen": 6856992, "step": 3180 }, { "epoch": 0.5195758564437194, "grad_norm": 0.06421244144439697, "learning_rate": 0.0002597063621533442, "loss": 0.2632, "num_input_tokens_seen": 6866208, "step": 3185 }, { "epoch": 0.5203915171288744, "grad_norm": 0.7773410081863403, "learning_rate": 0.00026011419249592175, "loss": 0.2067, "num_input_tokens_seen": 6877152, "step": 3190 }, { "epoch": 0.5212071778140294, "grad_norm": 0.414943128824234, "learning_rate": 0.00026052202283849917, "loss": 0.1865, "num_input_tokens_seen": 6888128, "step": 3195 }, { "epoch": 0.5220228384991843, "grad_norm": 1.175167441368103, "learning_rate": 0.0002609298531810767, "loss": 0.2406, "num_input_tokens_seen": 6899648, "step": 3200 }, { "epoch": 0.5228384991843393, "grad_norm": 0.16294938325881958, "learning_rate": 0.0002613376835236542, "loss": 0.1442, "num_input_tokens_seen": 6910528, "step": 3205 }, { "epoch": 0.5236541598694943, "grad_norm": 0.9901951551437378, "learning_rate": 0.00026174551386623165, "loss": 0.0793, "num_input_tokens_seen": 6921312, "step": 3210 }, { "epoch": 0.5244698205546493, "grad_norm": 0.05271201580762863, "learning_rate": 0.00026215334420880913, "loss": 0.1157, "num_input_tokens_seen": 6932160, "step": 3215 }, { "epoch": 0.5252854812398042, "grad_norm": 0.4538674056529999, "learning_rate": 0.0002625611745513866, "loss": 0.1275, "num_input_tokens_seen": 6942016, "step": 3220 }, { "epoch": 0.5261011419249593, "grad_norm": 1.5808172225952148, "learning_rate": 0.00026296900489396414, "loss": 0.1901, "num_input_tokens_seen": 6952608, "step": 3225 }, { "epoch": 0.5269168026101142, "grad_norm": 0.2532864809036255, "learning_rate": 0.0002633768352365416, "loss": 0.0235, "num_input_tokens_seen": 6963840, "step": 3230 }, { "epoch": 0.5277324632952691, "grad_norm": 0.3280082643032074, "learning_rate": 0.0002637846655791191, "loss": 0.1728, "num_input_tokens_seen": 6973600, "step": 3235 }, { "epoch": 0.5285481239804242, "grad_norm": 0.21578635275363922, "learning_rate": 0.00026419249592169657, "loss": 0.0855, "num_input_tokens_seen": 6984416, "step": 3240 }, { "epoch": 0.5293637846655791, "grad_norm": 0.08244451135396957, "learning_rate": 0.00026460032626427404, "loss": 0.2195, "num_input_tokens_seen": 6994656, "step": 3245 }, { "epoch": 0.5301794453507341, "grad_norm": 0.5651997327804565, "learning_rate": 0.00026500815660685157, "loss": 0.1278, "num_input_tokens_seen": 7004928, "step": 3250 }, { "epoch": 0.5309951060358891, "grad_norm": 0.12888485193252563, "learning_rate": 0.00026541598694942905, "loss": 0.2201, "num_input_tokens_seen": 7015904, "step": 3255 }, { "epoch": 0.531810766721044, "grad_norm": 0.19950301945209503, "learning_rate": 0.0002658238172920066, "loss": 0.1536, "num_input_tokens_seen": 7026048, "step": 3260 }, { "epoch": 0.532626427406199, "grad_norm": 0.6350084543228149, "learning_rate": 0.000266231647634584, "loss": 0.2835, "num_input_tokens_seen": 7038048, "step": 3265 }, { "epoch": 0.533442088091354, "grad_norm": 0.3020959794521332, "learning_rate": 0.0002666394779771615, "loss": 0.1184, "num_input_tokens_seen": 7049344, "step": 3270 }, { "epoch": 0.534257748776509, "grad_norm": 0.20168974995613098, "learning_rate": 0.000267047308319739, "loss": 0.2246, "num_input_tokens_seen": 7060480, "step": 3275 }, { "epoch": 0.5350734094616639, "grad_norm": 0.2019917219877243, "learning_rate": 0.0002674551386623165, "loss": 0.0739, "num_input_tokens_seen": 7071616, "step": 3280 }, { "epoch": 0.535889070146819, "grad_norm": 0.09606939554214478, "learning_rate": 0.00026786296900489396, "loss": 0.1221, "num_input_tokens_seen": 7081280, "step": 3285 }, { "epoch": 0.5367047308319739, "grad_norm": 0.1847175508737564, "learning_rate": 0.00026827079934747144, "loss": 0.1922, "num_input_tokens_seen": 7092352, "step": 3290 }, { "epoch": 0.5375203915171288, "grad_norm": 0.31789740920066833, "learning_rate": 0.00026867862969004897, "loss": 0.236, "num_input_tokens_seen": 7103008, "step": 3295 }, { "epoch": 0.5383360522022839, "grad_norm": 0.9549635052680969, "learning_rate": 0.00026908646003262645, "loss": 0.3184, "num_input_tokens_seen": 7114112, "step": 3300 }, { "epoch": 0.5391517128874388, "grad_norm": 0.2503586709499359, "learning_rate": 0.0002694942903752039, "loss": 0.0859, "num_input_tokens_seen": 7123424, "step": 3305 }, { "epoch": 0.5399673735725938, "grad_norm": 0.39938780665397644, "learning_rate": 0.0002699021207177814, "loss": 0.1606, "num_input_tokens_seen": 7134432, "step": 3310 }, { "epoch": 0.5407830342577488, "grad_norm": 1.5056469440460205, "learning_rate": 0.0002703099510603589, "loss": 0.2477, "num_input_tokens_seen": 7145376, "step": 3315 }, { "epoch": 0.5415986949429038, "grad_norm": 1.525967001914978, "learning_rate": 0.0002707177814029364, "loss": 0.3393, "num_input_tokens_seen": 7155296, "step": 3320 }, { "epoch": 0.5424143556280587, "grad_norm": 1.1547868251800537, "learning_rate": 0.0002711256117455139, "loss": 0.1702, "num_input_tokens_seen": 7165568, "step": 3325 }, { "epoch": 0.5432300163132137, "grad_norm": 0.500525712966919, "learning_rate": 0.0002715334420880914, "loss": 0.194, "num_input_tokens_seen": 7176064, "step": 3330 }, { "epoch": 0.5440456769983687, "grad_norm": 0.4505944848060608, "learning_rate": 0.00027194127243066883, "loss": 0.1732, "num_input_tokens_seen": 7186688, "step": 3335 }, { "epoch": 0.5448613376835236, "grad_norm": 0.35665813088417053, "learning_rate": 0.0002723491027732463, "loss": 0.0789, "num_input_tokens_seen": 7198304, "step": 3340 }, { "epoch": 0.5456769983686787, "grad_norm": 1.4375234842300415, "learning_rate": 0.00027275693311582384, "loss": 0.1965, "num_input_tokens_seen": 7209792, "step": 3345 }, { "epoch": 0.5464926590538336, "grad_norm": 0.17616061866283417, "learning_rate": 0.0002731647634584013, "loss": 0.1926, "num_input_tokens_seen": 7220352, "step": 3350 }, { "epoch": 0.5473083197389886, "grad_norm": 0.6746667623519897, "learning_rate": 0.0002735725938009788, "loss": 0.1586, "num_input_tokens_seen": 7231264, "step": 3355 }, { "epoch": 0.5481239804241436, "grad_norm": 0.6101897954940796, "learning_rate": 0.00027398042414355627, "loss": 0.0688, "num_input_tokens_seen": 7242208, "step": 3360 }, { "epoch": 0.5489396411092985, "grad_norm": 1.0861992835998535, "learning_rate": 0.00027438825448613375, "loss": 0.191, "num_input_tokens_seen": 7254112, "step": 3365 }, { "epoch": 0.5497553017944535, "grad_norm": 0.8900039196014404, "learning_rate": 0.0002747960848287113, "loss": 0.2213, "num_input_tokens_seen": 7264000, "step": 3370 }, { "epoch": 0.5505709624796085, "grad_norm": 0.15940077602863312, "learning_rate": 0.00027520391517128875, "loss": 0.1057, "num_input_tokens_seen": 7275456, "step": 3375 }, { "epoch": 0.5513866231647635, "grad_norm": 0.38259458541870117, "learning_rate": 0.00027561174551386623, "loss": 0.1261, "num_input_tokens_seen": 7285824, "step": 3380 }, { "epoch": 0.5522022838499184, "grad_norm": 0.3563072383403778, "learning_rate": 0.0002760195758564437, "loss": 0.0997, "num_input_tokens_seen": 7297440, "step": 3385 }, { "epoch": 0.5530179445350734, "grad_norm": 0.05661514773964882, "learning_rate": 0.00027642740619902124, "loss": 0.0725, "num_input_tokens_seen": 7309504, "step": 3390 }, { "epoch": 0.5538336052202284, "grad_norm": 0.049832262098789215, "learning_rate": 0.0002768352365415987, "loss": 0.0136, "num_input_tokens_seen": 7319520, "step": 3395 }, { "epoch": 0.5546492659053833, "grad_norm": 1.6844738721847534, "learning_rate": 0.0002772430668841762, "loss": 0.3233, "num_input_tokens_seen": 7329792, "step": 3400 }, { "epoch": 0.5554649265905384, "grad_norm": 0.03910444304347038, "learning_rate": 0.00027765089722675367, "loss": 0.0232, "num_input_tokens_seen": 7340192, "step": 3405 }, { "epoch": 0.5562805872756933, "grad_norm": 1.7018063068389893, "learning_rate": 0.00027805872756933114, "loss": 0.2212, "num_input_tokens_seen": 7351520, "step": 3410 }, { "epoch": 0.5570962479608483, "grad_norm": 0.8899598717689514, "learning_rate": 0.0002784665579119087, "loss": 0.1034, "num_input_tokens_seen": 7362048, "step": 3415 }, { "epoch": 0.5579119086460033, "grad_norm": 0.10086727142333984, "learning_rate": 0.00027887438825448615, "loss": 0.082, "num_input_tokens_seen": 7372672, "step": 3420 }, { "epoch": 0.5587275693311582, "grad_norm": 0.13564975559711456, "learning_rate": 0.00027928221859706363, "loss": 0.1875, "num_input_tokens_seen": 7382784, "step": 3425 }, { "epoch": 0.5595432300163132, "grad_norm": 0.4356967508792877, "learning_rate": 0.0002796900489396411, "loss": 0.1575, "num_input_tokens_seen": 7394240, "step": 3430 }, { "epoch": 0.5603588907014682, "grad_norm": 0.5186691284179688, "learning_rate": 0.0002800978792822186, "loss": 0.2958, "num_input_tokens_seen": 7404128, "step": 3435 }, { "epoch": 0.5611745513866232, "grad_norm": 0.3353058099746704, "learning_rate": 0.0002805057096247961, "loss": 0.219, "num_input_tokens_seen": 7414976, "step": 3440 }, { "epoch": 0.5619902120717781, "grad_norm": 0.6912668943405151, "learning_rate": 0.0002809135399673736, "loss": 0.2148, "num_input_tokens_seen": 7425120, "step": 3445 }, { "epoch": 0.5628058727569332, "grad_norm": 0.5514436364173889, "learning_rate": 0.00028132137030995106, "loss": 0.202, "num_input_tokens_seen": 7436544, "step": 3450 }, { "epoch": 0.5636215334420881, "grad_norm": 0.5081999897956848, "learning_rate": 0.00028172920065252854, "loss": 0.1299, "num_input_tokens_seen": 7446400, "step": 3455 }, { "epoch": 0.564437194127243, "grad_norm": 0.3306224048137665, "learning_rate": 0.000282137030995106, "loss": 0.1607, "num_input_tokens_seen": 7457344, "step": 3460 }, { "epoch": 0.5652528548123981, "grad_norm": 0.30108895897865295, "learning_rate": 0.00028254486133768355, "loss": 0.1295, "num_input_tokens_seen": 7467648, "step": 3465 }, { "epoch": 0.566068515497553, "grad_norm": 0.2580389678478241, "learning_rate": 0.000282952691680261, "loss": 0.0642, "num_input_tokens_seen": 7478720, "step": 3470 }, { "epoch": 0.566884176182708, "grad_norm": 0.8788881897926331, "learning_rate": 0.0002833605220228385, "loss": 0.0877, "num_input_tokens_seen": 7489344, "step": 3475 }, { "epoch": 0.567699836867863, "grad_norm": 1.1964645385742188, "learning_rate": 0.000283768352365416, "loss": 0.1434, "num_input_tokens_seen": 7499488, "step": 3480 }, { "epoch": 0.5685154975530179, "grad_norm": 1.6125121116638184, "learning_rate": 0.0002841761827079935, "loss": 0.0637, "num_input_tokens_seen": 7508640, "step": 3485 }, { "epoch": 0.5693311582381729, "grad_norm": 1.5338795185089111, "learning_rate": 0.000284584013050571, "loss": 0.1689, "num_input_tokens_seen": 7520096, "step": 3490 }, { "epoch": 0.5701468189233279, "grad_norm": 0.34203585982322693, "learning_rate": 0.0002849918433931484, "loss": 0.1886, "num_input_tokens_seen": 7532032, "step": 3495 }, { "epoch": 0.5709624796084829, "grad_norm": 0.23759399354457855, "learning_rate": 0.00028539967373572594, "loss": 0.0808, "num_input_tokens_seen": 7543456, "step": 3500 }, { "epoch": 0.5717781402936378, "grad_norm": 0.2889970541000366, "learning_rate": 0.0002858075040783034, "loss": 0.208, "num_input_tokens_seen": 7554176, "step": 3505 }, { "epoch": 0.5725938009787929, "grad_norm": 0.33958619832992554, "learning_rate": 0.00028621533442088094, "loss": 0.1452, "num_input_tokens_seen": 7565600, "step": 3510 }, { "epoch": 0.5734094616639478, "grad_norm": 0.7189654111862183, "learning_rate": 0.0002866231647634584, "loss": 0.1832, "num_input_tokens_seen": 7575712, "step": 3515 }, { "epoch": 0.5742251223491027, "grad_norm": 0.6336202025413513, "learning_rate": 0.0002870309951060359, "loss": 0.1294, "num_input_tokens_seen": 7585888, "step": 3520 }, { "epoch": 0.5750407830342578, "grad_norm": 0.7952791452407837, "learning_rate": 0.0002874388254486134, "loss": 0.2646, "num_input_tokens_seen": 7595680, "step": 3525 }, { "epoch": 0.5758564437194127, "grad_norm": 0.9022399187088013, "learning_rate": 0.00028784665579119085, "loss": 0.2425, "num_input_tokens_seen": 7608448, "step": 3530 }, { "epoch": 0.5766721044045677, "grad_norm": 0.4907342791557312, "learning_rate": 0.0002882544861337684, "loss": 0.1082, "num_input_tokens_seen": 7618528, "step": 3535 }, { "epoch": 0.5774877650897227, "grad_norm": 0.24955563247203827, "learning_rate": 0.00028866231647634586, "loss": 0.0918, "num_input_tokens_seen": 7629088, "step": 3540 }, { "epoch": 0.5783034257748777, "grad_norm": 0.280049204826355, "learning_rate": 0.00028907014681892333, "loss": 0.1056, "num_input_tokens_seen": 7639584, "step": 3545 }, { "epoch": 0.5791190864600326, "grad_norm": 0.24541613459587097, "learning_rate": 0.0002894779771615008, "loss": 0.0918, "num_input_tokens_seen": 7650368, "step": 3550 }, { "epoch": 0.5799347471451876, "grad_norm": 2.348043203353882, "learning_rate": 0.00028988580750407834, "loss": 0.281, "num_input_tokens_seen": 7660960, "step": 3555 }, { "epoch": 0.5807504078303426, "grad_norm": 0.8432106375694275, "learning_rate": 0.0002902936378466558, "loss": 0.2382, "num_input_tokens_seen": 7672352, "step": 3560 }, { "epoch": 0.5815660685154975, "grad_norm": 0.265280157327652, "learning_rate": 0.00029070146818923324, "loss": 0.0838, "num_input_tokens_seen": 7684192, "step": 3565 }, { "epoch": 0.5823817292006526, "grad_norm": 0.45557701587677, "learning_rate": 0.00029110929853181077, "loss": 0.1517, "num_input_tokens_seen": 7694848, "step": 3570 }, { "epoch": 0.5831973898858075, "grad_norm": 0.47193533182144165, "learning_rate": 0.00029151712887438825, "loss": 0.1684, "num_input_tokens_seen": 7705888, "step": 3575 }, { "epoch": 0.5840130505709625, "grad_norm": 0.9776928424835205, "learning_rate": 0.0002919249592169658, "loss": 0.3031, "num_input_tokens_seen": 7716800, "step": 3580 }, { "epoch": 0.5848287112561175, "grad_norm": 0.9561527967453003, "learning_rate": 0.00029233278955954325, "loss": 0.2411, "num_input_tokens_seen": 7727168, "step": 3585 }, { "epoch": 0.5856443719412724, "grad_norm": 0.11132800579071045, "learning_rate": 0.0002927406199021207, "loss": 0.0866, "num_input_tokens_seen": 7738816, "step": 3590 }, { "epoch": 0.5864600326264274, "grad_norm": 0.8205469250679016, "learning_rate": 0.0002931484502446982, "loss": 0.1813, "num_input_tokens_seen": 7749440, "step": 3595 }, { "epoch": 0.5872756933115824, "grad_norm": 0.11846394091844559, "learning_rate": 0.0002935562805872757, "loss": 0.0762, "num_input_tokens_seen": 7759488, "step": 3600 }, { "epoch": 0.5880913539967374, "grad_norm": 0.6823602318763733, "learning_rate": 0.0002939641109298532, "loss": 0.155, "num_input_tokens_seen": 7770144, "step": 3605 }, { "epoch": 0.5889070146818923, "grad_norm": 1.1177737712860107, "learning_rate": 0.0002943719412724307, "loss": 0.2291, "num_input_tokens_seen": 7781024, "step": 3610 }, { "epoch": 0.5897226753670473, "grad_norm": 0.3329390585422516, "learning_rate": 0.00029477977161500817, "loss": 0.1907, "num_input_tokens_seen": 7792416, "step": 3615 }, { "epoch": 0.5905383360522023, "grad_norm": 0.16965115070343018, "learning_rate": 0.00029518760195758564, "loss": 0.3433, "num_input_tokens_seen": 7804064, "step": 3620 }, { "epoch": 0.5913539967373572, "grad_norm": 0.34064531326293945, "learning_rate": 0.0002955954323001631, "loss": 0.1509, "num_input_tokens_seen": 7815552, "step": 3625 }, { "epoch": 0.5921696574225123, "grad_norm": 0.23483170568943024, "learning_rate": 0.00029600326264274065, "loss": 0.2641, "num_input_tokens_seen": 7825728, "step": 3630 }, { "epoch": 0.5929853181076672, "grad_norm": 0.3151450753211975, "learning_rate": 0.00029641109298531807, "loss": 0.1879, "num_input_tokens_seen": 7835168, "step": 3635 }, { "epoch": 0.5938009787928222, "grad_norm": 0.4631659984588623, "learning_rate": 0.0002968189233278956, "loss": 0.192, "num_input_tokens_seen": 7845600, "step": 3640 }, { "epoch": 0.5946166394779772, "grad_norm": 0.27379775047302246, "learning_rate": 0.0002972267536704731, "loss": 0.1622, "num_input_tokens_seen": 7856864, "step": 3645 }, { "epoch": 0.5954323001631321, "grad_norm": 0.2607029378414154, "learning_rate": 0.0002976345840130506, "loss": 0.1943, "num_input_tokens_seen": 7868096, "step": 3650 }, { "epoch": 0.5962479608482871, "grad_norm": 0.18975073099136353, "learning_rate": 0.0002980424143556281, "loss": 0.2097, "num_input_tokens_seen": 7878976, "step": 3655 }, { "epoch": 0.5970636215334421, "grad_norm": 0.9271031022071838, "learning_rate": 0.0002984502446982055, "loss": 0.1909, "num_input_tokens_seen": 7890016, "step": 3660 }, { "epoch": 0.5978792822185971, "grad_norm": 0.1685420721769333, "learning_rate": 0.00029885807504078304, "loss": 0.1568, "num_input_tokens_seen": 7900576, "step": 3665 }, { "epoch": 0.598694942903752, "grad_norm": 0.11282738298177719, "learning_rate": 0.0002992659053833605, "loss": 0.0786, "num_input_tokens_seen": 7909984, "step": 3670 }, { "epoch": 0.5995106035889071, "grad_norm": 0.7605422139167786, "learning_rate": 0.00029967373572593805, "loss": 0.1558, "num_input_tokens_seen": 7920384, "step": 3675 }, { "epoch": 0.600326264274062, "grad_norm": 1.06393563747406, "learning_rate": 0.0003000815660685155, "loss": 0.1612, "num_input_tokens_seen": 7931904, "step": 3680 }, { "epoch": 0.6011419249592169, "grad_norm": 0.4068537652492523, "learning_rate": 0.000300489396411093, "loss": 0.1137, "num_input_tokens_seen": 7942528, "step": 3685 }, { "epoch": 0.601957585644372, "grad_norm": 0.2602817416191101, "learning_rate": 0.0003008972267536705, "loss": 0.0354, "num_input_tokens_seen": 7954208, "step": 3690 }, { "epoch": 0.6027732463295269, "grad_norm": 0.03028041124343872, "learning_rate": 0.00030130505709624795, "loss": 0.1495, "num_input_tokens_seen": 7966144, "step": 3695 }, { "epoch": 0.6035889070146819, "grad_norm": 0.1580314040184021, "learning_rate": 0.0003017128874388255, "loss": 0.0374, "num_input_tokens_seen": 7976768, "step": 3700 }, { "epoch": 0.6044045676998369, "grad_norm": 0.6732978224754333, "learning_rate": 0.0003021207177814029, "loss": 0.1501, "num_input_tokens_seen": 7986912, "step": 3705 }, { "epoch": 0.6052202283849919, "grad_norm": 0.019835198298096657, "learning_rate": 0.00030252854812398044, "loss": 0.0407, "num_input_tokens_seen": 7998304, "step": 3710 }, { "epoch": 0.6060358890701468, "grad_norm": 0.5346843004226685, "learning_rate": 0.0003029363784665579, "loss": 0.1234, "num_input_tokens_seen": 8009120, "step": 3715 }, { "epoch": 0.6068515497553018, "grad_norm": 1.5775010585784912, "learning_rate": 0.0003033442088091354, "loss": 0.2867, "num_input_tokens_seen": 8019040, "step": 3720 }, { "epoch": 0.6076672104404568, "grad_norm": 0.4077892005443573, "learning_rate": 0.0003037520391517129, "loss": 0.0487, "num_input_tokens_seen": 8028672, "step": 3725 }, { "epoch": 0.6084828711256117, "grad_norm": 0.47888579964637756, "learning_rate": 0.00030415986949429034, "loss": 0.0656, "num_input_tokens_seen": 8039616, "step": 3730 }, { "epoch": 0.6092985318107668, "grad_norm": 1.7638473510742188, "learning_rate": 0.00030456769983686787, "loss": 0.1721, "num_input_tokens_seen": 8050816, "step": 3735 }, { "epoch": 0.6101141924959217, "grad_norm": 0.4324726462364197, "learning_rate": 0.00030497553017944535, "loss": 0.3066, "num_input_tokens_seen": 8061760, "step": 3740 }, { "epoch": 0.6109298531810766, "grad_norm": 0.6523988246917725, "learning_rate": 0.0003053833605220229, "loss": 0.0546, "num_input_tokens_seen": 8072352, "step": 3745 }, { "epoch": 0.6117455138662317, "grad_norm": 0.08624584972858429, "learning_rate": 0.00030579119086460036, "loss": 0.1529, "num_input_tokens_seen": 8082688, "step": 3750 }, { "epoch": 0.6125611745513866, "grad_norm": 0.11276664584875107, "learning_rate": 0.0003061990212071778, "loss": 0.0846, "num_input_tokens_seen": 8093344, "step": 3755 }, { "epoch": 0.6133768352365416, "grad_norm": 1.6305509805679321, "learning_rate": 0.0003066068515497553, "loss": 0.0747, "num_input_tokens_seen": 8104480, "step": 3760 }, { "epoch": 0.6141924959216966, "grad_norm": 0.03908117488026619, "learning_rate": 0.0003070146818923328, "loss": 0.0809, "num_input_tokens_seen": 8115584, "step": 3765 }, { "epoch": 0.6150081566068516, "grad_norm": 1.7164299488067627, "learning_rate": 0.0003074225122349103, "loss": 0.1173, "num_input_tokens_seen": 8124896, "step": 3770 }, { "epoch": 0.6158238172920065, "grad_norm": 2.2437431812286377, "learning_rate": 0.00030783034257748774, "loss": 0.1011, "num_input_tokens_seen": 8135904, "step": 3775 }, { "epoch": 0.6166394779771615, "grad_norm": 2.1172220706939697, "learning_rate": 0.00030823817292006527, "loss": 0.2308, "num_input_tokens_seen": 8147104, "step": 3780 }, { "epoch": 0.6174551386623165, "grad_norm": 0.4117894470691681, "learning_rate": 0.00030864600326264275, "loss": 0.1095, "num_input_tokens_seen": 8157952, "step": 3785 }, { "epoch": 0.6182707993474714, "grad_norm": 0.20860959589481354, "learning_rate": 0.0003090538336052202, "loss": 0.0387, "num_input_tokens_seen": 8167872, "step": 3790 }, { "epoch": 0.6190864600326265, "grad_norm": 0.5499510765075684, "learning_rate": 0.00030946166394779775, "loss": 0.1577, "num_input_tokens_seen": 8179296, "step": 3795 }, { "epoch": 0.6199021207177814, "grad_norm": 2.767158031463623, "learning_rate": 0.0003098694942903752, "loss": 0.2245, "num_input_tokens_seen": 8189600, "step": 3800 }, { "epoch": 0.6207177814029364, "grad_norm": 2.7224180698394775, "learning_rate": 0.0003102773246329527, "loss": 0.1985, "num_input_tokens_seen": 8201152, "step": 3805 }, { "epoch": 0.6215334420880914, "grad_norm": 1.150516152381897, "learning_rate": 0.0003106851549755302, "loss": 0.0887, "num_input_tokens_seen": 8212416, "step": 3810 }, { "epoch": 0.6223491027732463, "grad_norm": 0.1676824390888214, "learning_rate": 0.00031109298531810766, "loss": 0.0362, "num_input_tokens_seen": 8222240, "step": 3815 }, { "epoch": 0.6231647634584013, "grad_norm": 0.25078436732292175, "learning_rate": 0.0003115008156606852, "loss": 0.0096, "num_input_tokens_seen": 8230720, "step": 3820 }, { "epoch": 0.6239804241435563, "grad_norm": 2.132411003112793, "learning_rate": 0.0003119086460032626, "loss": 0.136, "num_input_tokens_seen": 8241472, "step": 3825 }, { "epoch": 0.6247960848287113, "grad_norm": 0.023053472861647606, "learning_rate": 0.00031231647634584014, "loss": 0.0199, "num_input_tokens_seen": 8252512, "step": 3830 }, { "epoch": 0.6256117455138662, "grad_norm": 0.27660995721817017, "learning_rate": 0.0003127243066884176, "loss": 0.3942, "num_input_tokens_seen": 8263552, "step": 3835 }, { "epoch": 0.6264274061990212, "grad_norm": 1.2623909711837769, "learning_rate": 0.00031313213703099515, "loss": 0.0994, "num_input_tokens_seen": 8273600, "step": 3840 }, { "epoch": 0.6272430668841762, "grad_norm": 0.27877020835876465, "learning_rate": 0.0003135399673735726, "loss": 0.2788, "num_input_tokens_seen": 8285504, "step": 3845 }, { "epoch": 0.6280587275693311, "grad_norm": 0.39705127477645874, "learning_rate": 0.00031394779771615005, "loss": 0.1541, "num_input_tokens_seen": 8296160, "step": 3850 }, { "epoch": 0.6288743882544862, "grad_norm": 0.3460569381713867, "learning_rate": 0.0003143556280587276, "loss": 0.1403, "num_input_tokens_seen": 8307648, "step": 3855 }, { "epoch": 0.6296900489396411, "grad_norm": 0.09723559021949768, "learning_rate": 0.00031476345840130506, "loss": 0.152, "num_input_tokens_seen": 8316608, "step": 3860 }, { "epoch": 0.6305057096247961, "grad_norm": 0.6525206565856934, "learning_rate": 0.0003151712887438826, "loss": 0.1583, "num_input_tokens_seen": 8327904, "step": 3865 }, { "epoch": 0.6313213703099511, "grad_norm": 0.7041473984718323, "learning_rate": 0.00031557911908646, "loss": 0.1108, "num_input_tokens_seen": 8338368, "step": 3870 }, { "epoch": 0.632137030995106, "grad_norm": 0.19396144151687622, "learning_rate": 0.00031598694942903754, "loss": 0.1457, "num_input_tokens_seen": 8349056, "step": 3875 }, { "epoch": 0.632952691680261, "grad_norm": 0.3284074366092682, "learning_rate": 0.000316394779771615, "loss": 0.1117, "num_input_tokens_seen": 8358624, "step": 3880 }, { "epoch": 0.633768352365416, "grad_norm": 2.0825276374816895, "learning_rate": 0.0003168026101141925, "loss": 0.1612, "num_input_tokens_seen": 8367712, "step": 3885 }, { "epoch": 0.634584013050571, "grad_norm": 0.5668126344680786, "learning_rate": 0.00031721044045677, "loss": 0.2583, "num_input_tokens_seen": 8378176, "step": 3890 }, { "epoch": 0.6353996737357259, "grad_norm": 0.08867685496807098, "learning_rate": 0.00031761827079934744, "loss": 0.1216, "num_input_tokens_seen": 8389632, "step": 3895 }, { "epoch": 0.636215334420881, "grad_norm": 0.936852216720581, "learning_rate": 0.000318026101141925, "loss": 0.1999, "num_input_tokens_seen": 8401728, "step": 3900 }, { "epoch": 0.6370309951060359, "grad_norm": 0.20681942999362946, "learning_rate": 0.00031843393148450245, "loss": 0.1531, "num_input_tokens_seen": 8412896, "step": 3905 }, { "epoch": 0.6378466557911908, "grad_norm": 0.06956861168146133, "learning_rate": 0.00031884176182708, "loss": 0.1993, "num_input_tokens_seen": 8423968, "step": 3910 }, { "epoch": 0.6386623164763459, "grad_norm": 0.2920743525028229, "learning_rate": 0.00031924959216965746, "loss": 0.2535, "num_input_tokens_seen": 8435232, "step": 3915 }, { "epoch": 0.6394779771615008, "grad_norm": 0.27511319518089294, "learning_rate": 0.0003196574225122349, "loss": 0.1267, "num_input_tokens_seen": 8444000, "step": 3920 }, { "epoch": 0.6402936378466558, "grad_norm": 0.04231177642941475, "learning_rate": 0.0003200652528548124, "loss": 0.0841, "num_input_tokens_seen": 8455616, "step": 3925 }, { "epoch": 0.6411092985318108, "grad_norm": 0.6883819103240967, "learning_rate": 0.0003204730831973899, "loss": 0.2088, "num_input_tokens_seen": 8466048, "step": 3930 }, { "epoch": 0.6419249592169658, "grad_norm": 0.17316776514053345, "learning_rate": 0.0003208809135399674, "loss": 0.3211, "num_input_tokens_seen": 8477088, "step": 3935 }, { "epoch": 0.6427406199021207, "grad_norm": 0.17017319798469543, "learning_rate": 0.00032128874388254484, "loss": 0.132, "num_input_tokens_seen": 8488544, "step": 3940 }, { "epoch": 0.6435562805872757, "grad_norm": 0.28344351053237915, "learning_rate": 0.0003216965742251223, "loss": 0.1451, "num_input_tokens_seen": 8499328, "step": 3945 }, { "epoch": 0.6443719412724307, "grad_norm": 0.5187141299247742, "learning_rate": 0.00032210440456769985, "loss": 0.2614, "num_input_tokens_seen": 8509920, "step": 3950 }, { "epoch": 0.6451876019575856, "grad_norm": 0.1652027815580368, "learning_rate": 0.0003225122349102773, "loss": 0.1886, "num_input_tokens_seen": 8522336, "step": 3955 }, { "epoch": 0.6460032626427407, "grad_norm": 0.09093683212995529, "learning_rate": 0.00032292006525285486, "loss": 0.0962, "num_input_tokens_seen": 8531968, "step": 3960 }, { "epoch": 0.6468189233278956, "grad_norm": 0.14665372669696808, "learning_rate": 0.0003233278955954323, "loss": 0.111, "num_input_tokens_seen": 8542464, "step": 3965 }, { "epoch": 0.6476345840130505, "grad_norm": 0.39484143257141113, "learning_rate": 0.0003237357259380098, "loss": 0.0827, "num_input_tokens_seen": 8553632, "step": 3970 }, { "epoch": 0.6484502446982056, "grad_norm": 0.47278642654418945, "learning_rate": 0.0003241435562805873, "loss": 0.139, "num_input_tokens_seen": 8565600, "step": 3975 }, { "epoch": 0.6492659053833605, "grad_norm": 0.07566369324922562, "learning_rate": 0.00032455138662316476, "loss": 0.0466, "num_input_tokens_seen": 8574720, "step": 3980 }, { "epoch": 0.6500815660685155, "grad_norm": 0.19841063022613525, "learning_rate": 0.0003249592169657423, "loss": 0.0978, "num_input_tokens_seen": 8584672, "step": 3985 }, { "epoch": 0.6508972267536705, "grad_norm": 0.9803478717803955, "learning_rate": 0.0003253670473083197, "loss": 0.2408, "num_input_tokens_seen": 8594624, "step": 3990 }, { "epoch": 0.6517128874388255, "grad_norm": 0.8381606340408325, "learning_rate": 0.00032577487765089724, "loss": 0.0668, "num_input_tokens_seen": 8606048, "step": 3995 }, { "epoch": 0.6525285481239804, "grad_norm": 0.8982602953910828, "learning_rate": 0.0003261827079934747, "loss": 0.074, "num_input_tokens_seen": 8617568, "step": 4000 }, { "epoch": 0.6533442088091354, "grad_norm": 0.11641210317611694, "learning_rate": 0.00032659053833605225, "loss": 0.0749, "num_input_tokens_seen": 8630624, "step": 4005 }, { "epoch": 0.6541598694942904, "grad_norm": 0.5643419623374939, "learning_rate": 0.0003269983686786297, "loss": 0.215, "num_input_tokens_seen": 8641248, "step": 4010 }, { "epoch": 0.6549755301794453, "grad_norm": 0.24934491515159607, "learning_rate": 0.00032740619902120715, "loss": 0.1645, "num_input_tokens_seen": 8652160, "step": 4015 }, { "epoch": 0.6557911908646004, "grad_norm": 0.27979713678359985, "learning_rate": 0.0003278140293637847, "loss": 0.0633, "num_input_tokens_seen": 8663936, "step": 4020 }, { "epoch": 0.6566068515497553, "grad_norm": 0.14422214031219482, "learning_rate": 0.00032822185970636216, "loss": 0.0496, "num_input_tokens_seen": 8674304, "step": 4025 }, { "epoch": 0.6574225122349103, "grad_norm": 0.6436803340911865, "learning_rate": 0.0003286296900489397, "loss": 0.1935, "num_input_tokens_seen": 8684480, "step": 4030 }, { "epoch": 0.6582381729200653, "grad_norm": 0.19135278463363647, "learning_rate": 0.0003290375203915171, "loss": 0.2801, "num_input_tokens_seen": 8695072, "step": 4035 }, { "epoch": 0.6590538336052202, "grad_norm": 0.5646078586578369, "learning_rate": 0.00032944535073409464, "loss": 0.1045, "num_input_tokens_seen": 8704320, "step": 4040 }, { "epoch": 0.6598694942903752, "grad_norm": 0.9213577508926392, "learning_rate": 0.0003298531810766721, "loss": 0.2945, "num_input_tokens_seen": 8716288, "step": 4045 }, { "epoch": 0.6606851549755302, "grad_norm": 0.5820468068122864, "learning_rate": 0.0003302610114192496, "loss": 0.22, "num_input_tokens_seen": 8727008, "step": 4050 }, { "epoch": 0.6615008156606852, "grad_norm": 0.5371736288070679, "learning_rate": 0.0003306688417618271, "loss": 0.1604, "num_input_tokens_seen": 8736096, "step": 4055 }, { "epoch": 0.6623164763458401, "grad_norm": 0.7975150346755981, "learning_rate": 0.00033107667210440455, "loss": 0.2312, "num_input_tokens_seen": 8746240, "step": 4060 }, { "epoch": 0.6631321370309952, "grad_norm": 0.14706020057201385, "learning_rate": 0.0003314845024469821, "loss": 0.1022, "num_input_tokens_seen": 8756512, "step": 4065 }, { "epoch": 0.6639477977161501, "grad_norm": 0.4020451307296753, "learning_rate": 0.00033189233278955955, "loss": 0.1899, "num_input_tokens_seen": 8767072, "step": 4070 }, { "epoch": 0.664763458401305, "grad_norm": 0.46675869822502136, "learning_rate": 0.00033230016313213703, "loss": 0.2085, "num_input_tokens_seen": 8777600, "step": 4075 }, { "epoch": 0.6655791190864601, "grad_norm": 0.3698190748691559, "learning_rate": 0.0003327079934747145, "loss": 0.1027, "num_input_tokens_seen": 8786624, "step": 4080 }, { "epoch": 0.666394779771615, "grad_norm": 0.4964704215526581, "learning_rate": 0.000333115823817292, "loss": 0.0924, "num_input_tokens_seen": 8797344, "step": 4085 }, { "epoch": 0.66721044045677, "grad_norm": 0.34048619866371155, "learning_rate": 0.0003335236541598695, "loss": 0.0652, "num_input_tokens_seen": 8808288, "step": 4090 }, { "epoch": 0.668026101141925, "grad_norm": 0.07945513725280762, "learning_rate": 0.000333931484502447, "loss": 0.2428, "num_input_tokens_seen": 8818912, "step": 4095 }, { "epoch": 0.6688417618270799, "grad_norm": 0.3367163836956024, "learning_rate": 0.0003343393148450245, "loss": 0.0618, "num_input_tokens_seen": 8829920, "step": 4100 }, { "epoch": 0.6696574225122349, "grad_norm": 0.5875438451766968, "learning_rate": 0.00033474714518760194, "loss": 0.087, "num_input_tokens_seen": 8840672, "step": 4105 }, { "epoch": 0.6704730831973899, "grad_norm": 0.07312188297510147, "learning_rate": 0.0003351549755301794, "loss": 0.0397, "num_input_tokens_seen": 8852128, "step": 4110 }, { "epoch": 0.6712887438825449, "grad_norm": 0.27596715092658997, "learning_rate": 0.00033556280587275695, "loss": 0.0704, "num_input_tokens_seen": 8862816, "step": 4115 }, { "epoch": 0.6721044045676998, "grad_norm": 0.31641918420791626, "learning_rate": 0.0003359706362153344, "loss": 0.0788, "num_input_tokens_seen": 8874240, "step": 4120 }, { "epoch": 0.6729200652528549, "grad_norm": 1.4014451503753662, "learning_rate": 0.00033637846655791196, "loss": 0.1143, "num_input_tokens_seen": 8885344, "step": 4125 }, { "epoch": 0.6737357259380098, "grad_norm": 0.31792235374450684, "learning_rate": 0.0003367862969004894, "loss": 0.1263, "num_input_tokens_seen": 8896928, "step": 4130 }, { "epoch": 0.6745513866231647, "grad_norm": 0.8473581075668335, "learning_rate": 0.0003371941272430669, "loss": 0.1641, "num_input_tokens_seen": 8908000, "step": 4135 }, { "epoch": 0.6753670473083198, "grad_norm": 0.6628263592720032, "learning_rate": 0.0003376019575856444, "loss": 0.0487, "num_input_tokens_seen": 8920288, "step": 4140 }, { "epoch": 0.6761827079934747, "grad_norm": 0.07878489047288895, "learning_rate": 0.00033800978792822186, "loss": 0.067, "num_input_tokens_seen": 8930912, "step": 4145 }, { "epoch": 0.6769983686786297, "grad_norm": 0.31783169507980347, "learning_rate": 0.00033841761827079934, "loss": 0.0253, "num_input_tokens_seen": 8942112, "step": 4150 }, { "epoch": 0.6778140293637847, "grad_norm": 0.02353835664689541, "learning_rate": 0.0003388254486133768, "loss": 0.0554, "num_input_tokens_seen": 8953376, "step": 4155 }, { "epoch": 0.6786296900489397, "grad_norm": 0.026670947670936584, "learning_rate": 0.00033923327895595435, "loss": 0.0283, "num_input_tokens_seen": 8964608, "step": 4160 }, { "epoch": 0.6794453507340946, "grad_norm": 0.3699934184551239, "learning_rate": 0.0003396411092985318, "loss": 0.0756, "num_input_tokens_seen": 8976352, "step": 4165 }, { "epoch": 0.6802610114192496, "grad_norm": 0.9279840588569641, "learning_rate": 0.0003400489396411093, "loss": 0.1136, "num_input_tokens_seen": 8986688, "step": 4170 }, { "epoch": 0.6810766721044046, "grad_norm": 1.0043160915374756, "learning_rate": 0.0003404567699836868, "loss": 0.2385, "num_input_tokens_seen": 8997216, "step": 4175 }, { "epoch": 0.6818923327895595, "grad_norm": 0.34436988830566406, "learning_rate": 0.00034086460032626425, "loss": 0.2652, "num_input_tokens_seen": 9007104, "step": 4180 }, { "epoch": 0.6827079934747146, "grad_norm": 0.12794186174869537, "learning_rate": 0.0003412724306688418, "loss": 0.2116, "num_input_tokens_seen": 9018304, "step": 4185 }, { "epoch": 0.6835236541598695, "grad_norm": 0.3557484447956085, "learning_rate": 0.00034168026101141926, "loss": 0.1831, "num_input_tokens_seen": 9029152, "step": 4190 }, { "epoch": 0.6843393148450244, "grad_norm": 0.20856566727161407, "learning_rate": 0.0003420880913539968, "loss": 0.2004, "num_input_tokens_seen": 9039872, "step": 4195 }, { "epoch": 0.6851549755301795, "grad_norm": 0.43478646874427795, "learning_rate": 0.0003424959216965742, "loss": 0.1934, "num_input_tokens_seen": 9050272, "step": 4200 }, { "epoch": 0.6859706362153344, "grad_norm": 0.5003605484962463, "learning_rate": 0.0003429037520391517, "loss": 0.1099, "num_input_tokens_seen": 9059808, "step": 4205 }, { "epoch": 0.6867862969004894, "grad_norm": 0.18900039792060852, "learning_rate": 0.0003433115823817292, "loss": 0.0781, "num_input_tokens_seen": 9070592, "step": 4210 }, { "epoch": 0.6876019575856444, "grad_norm": 0.34280556440353394, "learning_rate": 0.0003437194127243067, "loss": 0.1849, "num_input_tokens_seen": 9080576, "step": 4215 }, { "epoch": 0.6884176182707994, "grad_norm": 0.6202793717384338, "learning_rate": 0.00034412724306688417, "loss": 0.0922, "num_input_tokens_seen": 9090848, "step": 4220 }, { "epoch": 0.6892332789559543, "grad_norm": 0.03071651980280876, "learning_rate": 0.00034453507340946165, "loss": 0.1457, "num_input_tokens_seen": 9101568, "step": 4225 }, { "epoch": 0.6900489396411092, "grad_norm": 0.3402135670185089, "learning_rate": 0.0003449429037520392, "loss": 0.1056, "num_input_tokens_seen": 9112800, "step": 4230 }, { "epoch": 0.6908646003262643, "grad_norm": 0.1439531296491623, "learning_rate": 0.00034535073409461666, "loss": 0.066, "num_input_tokens_seen": 9123008, "step": 4235 }, { "epoch": 0.6916802610114192, "grad_norm": 0.6293258666992188, "learning_rate": 0.00034575856443719413, "loss": 0.1575, "num_input_tokens_seen": 9134240, "step": 4240 }, { "epoch": 0.6924959216965743, "grad_norm": 0.06901185214519501, "learning_rate": 0.0003461663947797716, "loss": 0.065, "num_input_tokens_seen": 9143904, "step": 4245 }, { "epoch": 0.6933115823817292, "grad_norm": 0.6850996017456055, "learning_rate": 0.0003465742251223491, "loss": 0.2417, "num_input_tokens_seen": 9154656, "step": 4250 }, { "epoch": 0.6941272430668842, "grad_norm": 1.2691036462783813, "learning_rate": 0.0003469820554649266, "loss": 0.1672, "num_input_tokens_seen": 9164384, "step": 4255 }, { "epoch": 0.6949429037520392, "grad_norm": 0.16534829139709473, "learning_rate": 0.0003473898858075041, "loss": 0.0763, "num_input_tokens_seen": 9174528, "step": 4260 }, { "epoch": 0.6957585644371941, "grad_norm": 0.2578425109386444, "learning_rate": 0.0003477977161500816, "loss": 0.059, "num_input_tokens_seen": 9185600, "step": 4265 }, { "epoch": 0.6965742251223491, "grad_norm": 0.06344756484031677, "learning_rate": 0.00034820554649265905, "loss": 0.0882, "num_input_tokens_seen": 9194592, "step": 4270 }, { "epoch": 0.697389885807504, "grad_norm": 0.4837712049484253, "learning_rate": 0.0003486133768352365, "loss": 0.0979, "num_input_tokens_seen": 9205184, "step": 4275 }, { "epoch": 0.6982055464926591, "grad_norm": 0.1504904180765152, "learning_rate": 0.00034902120717781405, "loss": 0.0944, "num_input_tokens_seen": 9215072, "step": 4280 }, { "epoch": 0.699021207177814, "grad_norm": 0.21719999611377716, "learning_rate": 0.00034942903752039153, "loss": 0.2687, "num_input_tokens_seen": 9226432, "step": 4285 }, { "epoch": 0.6998368678629691, "grad_norm": 0.5605940222740173, "learning_rate": 0.000349836867862969, "loss": 0.0898, "num_input_tokens_seen": 9237216, "step": 4290 }, { "epoch": 0.700652528548124, "grad_norm": 0.19678156077861786, "learning_rate": 0.0003502446982055465, "loss": 0.1674, "num_input_tokens_seen": 9249248, "step": 4295 }, { "epoch": 0.7014681892332789, "grad_norm": 0.2982306480407715, "learning_rate": 0.00035065252854812396, "loss": 0.1337, "num_input_tokens_seen": 9258624, "step": 4300 }, { "epoch": 0.702283849918434, "grad_norm": 1.1554383039474487, "learning_rate": 0.0003510603588907015, "loss": 0.165, "num_input_tokens_seen": 9269216, "step": 4305 }, { "epoch": 0.7030995106035889, "grad_norm": 0.5933398604393005, "learning_rate": 0.00035146818923327897, "loss": 0.2717, "num_input_tokens_seen": 9280960, "step": 4310 }, { "epoch": 0.7039151712887439, "grad_norm": 0.35528403520584106, "learning_rate": 0.00035187601957585644, "loss": 0.1377, "num_input_tokens_seen": 9290464, "step": 4315 }, { "epoch": 0.7047308319738989, "grad_norm": 0.3967028856277466, "learning_rate": 0.0003522838499184339, "loss": 0.141, "num_input_tokens_seen": 9301664, "step": 4320 }, { "epoch": 0.7055464926590538, "grad_norm": 1.0400866270065308, "learning_rate": 0.00035269168026101145, "loss": 0.1999, "num_input_tokens_seen": 9313504, "step": 4325 }, { "epoch": 0.7063621533442088, "grad_norm": 0.10210098326206207, "learning_rate": 0.0003530995106035889, "loss": 0.1196, "num_input_tokens_seen": 9324832, "step": 4330 }, { "epoch": 0.7071778140293637, "grad_norm": 0.11643468588590622, "learning_rate": 0.0003535073409461664, "loss": 0.1524, "num_input_tokens_seen": 9336416, "step": 4335 }, { "epoch": 0.7079934747145188, "grad_norm": 0.11834976077079773, "learning_rate": 0.0003539151712887439, "loss": 0.1673, "num_input_tokens_seen": 9346976, "step": 4340 }, { "epoch": 0.7088091353996737, "grad_norm": 0.06556948274374008, "learning_rate": 0.00035432300163132136, "loss": 0.1179, "num_input_tokens_seen": 9356352, "step": 4345 }, { "epoch": 0.7096247960848288, "grad_norm": 0.0435061901807785, "learning_rate": 0.0003547308319738989, "loss": 0.1291, "num_input_tokens_seen": 9366688, "step": 4350 }, { "epoch": 0.7104404567699837, "grad_norm": 0.428297221660614, "learning_rate": 0.00035513866231647636, "loss": 0.3298, "num_input_tokens_seen": 9377856, "step": 4355 }, { "epoch": 0.7112561174551386, "grad_norm": 0.2171289026737213, "learning_rate": 0.0003555464926590539, "loss": 0.113, "num_input_tokens_seen": 9387584, "step": 4360 }, { "epoch": 0.7120717781402937, "grad_norm": 0.729459285736084, "learning_rate": 0.0003559543230016313, "loss": 0.1555, "num_input_tokens_seen": 9398624, "step": 4365 }, { "epoch": 0.7128874388254486, "grad_norm": 0.29332059621810913, "learning_rate": 0.0003563621533442088, "loss": 0.1106, "num_input_tokens_seen": 9410112, "step": 4370 }, { "epoch": 0.7137030995106036, "grad_norm": 0.17363320291042328, "learning_rate": 0.0003567699836867863, "loss": 0.141, "num_input_tokens_seen": 9420928, "step": 4375 }, { "epoch": 0.7145187601957586, "grad_norm": 0.10434304922819138, "learning_rate": 0.0003571778140293638, "loss": 0.0952, "num_input_tokens_seen": 9432032, "step": 4380 }, { "epoch": 0.7153344208809136, "grad_norm": 0.19470295310020447, "learning_rate": 0.0003575856443719413, "loss": 0.0531, "num_input_tokens_seen": 9442176, "step": 4385 }, { "epoch": 0.7161500815660685, "grad_norm": 1.0483896732330322, "learning_rate": 0.00035799347471451875, "loss": 0.1509, "num_input_tokens_seen": 9452512, "step": 4390 }, { "epoch": 0.7169657422512234, "grad_norm": 0.2265237271785736, "learning_rate": 0.0003584013050570963, "loss": 0.1036, "num_input_tokens_seen": 9462944, "step": 4395 }, { "epoch": 0.7177814029363785, "grad_norm": 0.1263553351163864, "learning_rate": 0.00035880913539967376, "loss": 0.2013, "num_input_tokens_seen": 9473184, "step": 4400 }, { "epoch": 0.7185970636215334, "grad_norm": 0.042648423463106155, "learning_rate": 0.00035921696574225124, "loss": 0.2927, "num_input_tokens_seen": 9485024, "step": 4405 }, { "epoch": 0.7194127243066885, "grad_norm": 0.2698555588722229, "learning_rate": 0.0003596247960848287, "loss": 0.1626, "num_input_tokens_seen": 9496960, "step": 4410 }, { "epoch": 0.7202283849918434, "grad_norm": 0.48428159952163696, "learning_rate": 0.0003600326264274062, "loss": 0.1434, "num_input_tokens_seen": 9508064, "step": 4415 }, { "epoch": 0.7210440456769984, "grad_norm": 0.0639631375670433, "learning_rate": 0.0003604404567699837, "loss": 0.0917, "num_input_tokens_seen": 9518976, "step": 4420 }, { "epoch": 0.7218597063621534, "grad_norm": 0.2182617336511612, "learning_rate": 0.0003608482871125612, "loss": 0.148, "num_input_tokens_seen": 9530496, "step": 4425 }, { "epoch": 0.7226753670473083, "grad_norm": 0.509828507900238, "learning_rate": 0.0003612561174551386, "loss": 0.167, "num_input_tokens_seen": 9540480, "step": 4430 }, { "epoch": 0.7234910277324633, "grad_norm": 0.1462268829345703, "learning_rate": 0.00036166394779771615, "loss": 0.0392, "num_input_tokens_seen": 9551520, "step": 4435 }, { "epoch": 0.7243066884176182, "grad_norm": 0.3573182225227356, "learning_rate": 0.0003620717781402936, "loss": 0.1142, "num_input_tokens_seen": 9562464, "step": 4440 }, { "epoch": 0.7251223491027733, "grad_norm": 0.25398215651512146, "learning_rate": 0.00036247960848287116, "loss": 0.1328, "num_input_tokens_seen": 9573248, "step": 4445 }, { "epoch": 0.7259380097879282, "grad_norm": 0.04449588432908058, "learning_rate": 0.00036288743882544863, "loss": 0.1725, "num_input_tokens_seen": 9586240, "step": 4450 }, { "epoch": 0.7267536704730831, "grad_norm": 0.2143605649471283, "learning_rate": 0.0003632952691680261, "loss": 0.1215, "num_input_tokens_seen": 9597824, "step": 4455 }, { "epoch": 0.7275693311582382, "grad_norm": 0.1213168352842331, "learning_rate": 0.0003637030995106036, "loss": 0.093, "num_input_tokens_seen": 9608832, "step": 4460 }, { "epoch": 0.7283849918433931, "grad_norm": 0.8928304314613342, "learning_rate": 0.00036411092985318106, "loss": 0.1558, "num_input_tokens_seen": 9620352, "step": 4465 }, { "epoch": 0.7292006525285482, "grad_norm": 0.3600549101829529, "learning_rate": 0.0003645187601957586, "loss": 0.1229, "num_input_tokens_seen": 9630912, "step": 4470 }, { "epoch": 0.7300163132137031, "grad_norm": 0.03388327360153198, "learning_rate": 0.00036492659053833607, "loss": 0.0749, "num_input_tokens_seen": 9641184, "step": 4475 }, { "epoch": 0.7308319738988581, "grad_norm": 0.7607952952384949, "learning_rate": 0.00036533442088091354, "loss": 0.0672, "num_input_tokens_seen": 9653664, "step": 4480 }, { "epoch": 0.731647634584013, "grad_norm": 0.17047080397605896, "learning_rate": 0.000365742251223491, "loss": 0.1597, "num_input_tokens_seen": 9665600, "step": 4485 }, { "epoch": 0.732463295269168, "grad_norm": 0.14180999994277954, "learning_rate": 0.00036615008156606855, "loss": 0.0348, "num_input_tokens_seen": 9676096, "step": 4490 }, { "epoch": 0.733278955954323, "grad_norm": 0.030971821397542953, "learning_rate": 0.00036655791190864603, "loss": 0.0678, "num_input_tokens_seen": 9687968, "step": 4495 }, { "epoch": 0.734094616639478, "grad_norm": 0.6471733450889587, "learning_rate": 0.0003669657422512235, "loss": 0.1814, "num_input_tokens_seen": 9699968, "step": 4500 }, { "epoch": 0.734910277324633, "grad_norm": 0.3938194811344147, "learning_rate": 0.000367373572593801, "loss": 0.0859, "num_input_tokens_seen": 9711136, "step": 4505 }, { "epoch": 0.7357259380097879, "grad_norm": 0.022863615304231644, "learning_rate": 0.00036778140293637846, "loss": 0.1161, "num_input_tokens_seen": 9722528, "step": 4510 }, { "epoch": 0.736541598694943, "grad_norm": 0.3047546148300171, "learning_rate": 0.000368189233278956, "loss": 0.0754, "num_input_tokens_seen": 9732736, "step": 4515 }, { "epoch": 0.7373572593800979, "grad_norm": 0.14214926958084106, "learning_rate": 0.00036859706362153346, "loss": 0.0707, "num_input_tokens_seen": 9742752, "step": 4520 }, { "epoch": 0.7381729200652528, "grad_norm": 0.8614373803138733, "learning_rate": 0.0003690048939641109, "loss": 0.3203, "num_input_tokens_seen": 9753536, "step": 4525 }, { "epoch": 0.7389885807504079, "grad_norm": 1.4844826459884644, "learning_rate": 0.0003694127243066884, "loss": 0.2591, "num_input_tokens_seen": 9764416, "step": 4530 }, { "epoch": 0.7398042414355628, "grad_norm": 0.08910467475652695, "learning_rate": 0.0003698205546492659, "loss": 0.0372, "num_input_tokens_seen": 9774784, "step": 4535 }, { "epoch": 0.7406199021207178, "grad_norm": 0.030194317921996117, "learning_rate": 0.0003702283849918434, "loss": 0.0418, "num_input_tokens_seen": 9785312, "step": 4540 }, { "epoch": 0.7414355628058727, "grad_norm": 0.3318450152873993, "learning_rate": 0.0003706362153344209, "loss": 0.1948, "num_input_tokens_seen": 9796832, "step": 4545 }, { "epoch": 0.7422512234910277, "grad_norm": 0.11203014105558395, "learning_rate": 0.0003710440456769984, "loss": 0.0186, "num_input_tokens_seen": 9808736, "step": 4550 }, { "epoch": 0.7430668841761827, "grad_norm": 0.11433467268943787, "learning_rate": 0.00037145187601957585, "loss": 0.235, "num_input_tokens_seen": 9820160, "step": 4555 }, { "epoch": 0.7438825448613376, "grad_norm": 0.4989280104637146, "learning_rate": 0.00037185970636215333, "loss": 0.1056, "num_input_tokens_seen": 9831488, "step": 4560 }, { "epoch": 0.7446982055464927, "grad_norm": 0.7271753549575806, "learning_rate": 0.00037226753670473086, "loss": 0.2397, "num_input_tokens_seen": 9841312, "step": 4565 }, { "epoch": 0.7455138662316476, "grad_norm": 0.2024511992931366, "learning_rate": 0.00037267536704730834, "loss": 0.092, "num_input_tokens_seen": 9853440, "step": 4570 }, { "epoch": 0.7463295269168027, "grad_norm": 0.8066221475601196, "learning_rate": 0.0003730831973898858, "loss": 0.2831, "num_input_tokens_seen": 9864896, "step": 4575 }, { "epoch": 0.7471451876019576, "grad_norm": 0.4931485950946808, "learning_rate": 0.0003734910277324633, "loss": 0.1695, "num_input_tokens_seen": 9875040, "step": 4580 }, { "epoch": 0.7479608482871125, "grad_norm": 0.24353817105293274, "learning_rate": 0.0003738988580750408, "loss": 0.1245, "num_input_tokens_seen": 9885344, "step": 4585 }, { "epoch": 0.7487765089722676, "grad_norm": 0.557340681552887, "learning_rate": 0.0003743066884176183, "loss": 0.0886, "num_input_tokens_seen": 9896448, "step": 4590 }, { "epoch": 0.7495921696574225, "grad_norm": 0.509061872959137, "learning_rate": 0.0003747145187601957, "loss": 0.2602, "num_input_tokens_seen": 9907424, "step": 4595 }, { "epoch": 0.7504078303425775, "grad_norm": 0.5898471474647522, "learning_rate": 0.00037512234910277325, "loss": 0.1599, "num_input_tokens_seen": 9917568, "step": 4600 }, { "epoch": 0.7512234910277324, "grad_norm": 1.632683515548706, "learning_rate": 0.00037553017944535073, "loss": 0.244, "num_input_tokens_seen": 9926880, "step": 4605 }, { "epoch": 0.7520391517128875, "grad_norm": 0.3811141550540924, "learning_rate": 0.00037593800978792826, "loss": 0.0901, "num_input_tokens_seen": 9938080, "step": 4610 }, { "epoch": 0.7528548123980424, "grad_norm": 0.29251471161842346, "learning_rate": 0.00037634584013050573, "loss": 0.1127, "num_input_tokens_seen": 9949536, "step": 4615 }, { "epoch": 0.7536704730831973, "grad_norm": 0.49268490076065063, "learning_rate": 0.0003767536704730832, "loss": 0.1044, "num_input_tokens_seen": 9959968, "step": 4620 }, { "epoch": 0.7544861337683524, "grad_norm": 0.12247805297374725, "learning_rate": 0.0003771615008156607, "loss": 0.0989, "num_input_tokens_seen": 9969760, "step": 4625 }, { "epoch": 0.7553017944535073, "grad_norm": 0.30606722831726074, "learning_rate": 0.00037756933115823816, "loss": 0.1447, "num_input_tokens_seen": 9980864, "step": 4630 }, { "epoch": 0.7561174551386624, "grad_norm": 0.6882331371307373, "learning_rate": 0.0003779771615008157, "loss": 0.388, "num_input_tokens_seen": 9991456, "step": 4635 }, { "epoch": 0.7569331158238173, "grad_norm": 0.08077465742826462, "learning_rate": 0.00037838499184339317, "loss": 0.0682, "num_input_tokens_seen": 10001952, "step": 4640 }, { "epoch": 0.7577487765089723, "grad_norm": 0.2678249180316925, "learning_rate": 0.00037879282218597065, "loss": 0.1091, "num_input_tokens_seen": 10012896, "step": 4645 }, { "epoch": 0.7585644371941273, "grad_norm": 0.4471552073955536, "learning_rate": 0.0003792006525285481, "loss": 0.0986, "num_input_tokens_seen": 10025088, "step": 4650 }, { "epoch": 0.7593800978792822, "grad_norm": 0.38659900426864624, "learning_rate": 0.0003796084828711256, "loss": 0.0647, "num_input_tokens_seen": 10036128, "step": 4655 }, { "epoch": 0.7601957585644372, "grad_norm": 0.1630130112171173, "learning_rate": 0.00038001631321370313, "loss": 0.2057, "num_input_tokens_seen": 10046144, "step": 4660 }, { "epoch": 0.7610114192495921, "grad_norm": 0.07900086790323257, "learning_rate": 0.00038042414355628055, "loss": 0.1557, "num_input_tokens_seen": 10057568, "step": 4665 }, { "epoch": 0.7618270799347472, "grad_norm": 0.523443877696991, "learning_rate": 0.0003808319738988581, "loss": 0.0934, "num_input_tokens_seen": 10067648, "step": 4670 }, { "epoch": 0.7626427406199021, "grad_norm": 0.1451176404953003, "learning_rate": 0.00038123980424143556, "loss": 0.3009, "num_input_tokens_seen": 10077312, "step": 4675 }, { "epoch": 0.763458401305057, "grad_norm": 0.1085764691233635, "learning_rate": 0.0003816476345840131, "loss": 0.068, "num_input_tokens_seen": 10088480, "step": 4680 }, { "epoch": 0.7642740619902121, "grad_norm": 0.12196294218301773, "learning_rate": 0.00038205546492659057, "loss": 0.1024, "num_input_tokens_seen": 10098560, "step": 4685 }, { "epoch": 0.765089722675367, "grad_norm": 0.34060388803482056, "learning_rate": 0.000382463295269168, "loss": 0.1151, "num_input_tokens_seen": 10110272, "step": 4690 }, { "epoch": 0.765905383360522, "grad_norm": 0.12083670496940613, "learning_rate": 0.0003828711256117455, "loss": 0.1769, "num_input_tokens_seen": 10121600, "step": 4695 }, { "epoch": 0.766721044045677, "grad_norm": 0.273940771818161, "learning_rate": 0.000383278955954323, "loss": 0.1379, "num_input_tokens_seen": 10132256, "step": 4700 }, { "epoch": 0.767536704730832, "grad_norm": 0.037194617092609406, "learning_rate": 0.00038368678629690053, "loss": 0.0719, "num_input_tokens_seen": 10143168, "step": 4705 }, { "epoch": 0.768352365415987, "grad_norm": 0.06494946032762527, "learning_rate": 0.000384094616639478, "loss": 0.1785, "num_input_tokens_seen": 10153152, "step": 4710 }, { "epoch": 0.7691680261011419, "grad_norm": 0.08680374920368195, "learning_rate": 0.0003845024469820555, "loss": 0.1235, "num_input_tokens_seen": 10164832, "step": 4715 }, { "epoch": 0.7699836867862969, "grad_norm": 0.409417986869812, "learning_rate": 0.00038491027732463296, "loss": 0.1305, "num_input_tokens_seen": 10174880, "step": 4720 }, { "epoch": 0.7707993474714518, "grad_norm": 0.5706987380981445, "learning_rate": 0.00038531810766721043, "loss": 0.1932, "num_input_tokens_seen": 10185568, "step": 4725 }, { "epoch": 0.7716150081566069, "grad_norm": 0.04017696529626846, "learning_rate": 0.00038572593800978796, "loss": 0.0444, "num_input_tokens_seen": 10196320, "step": 4730 }, { "epoch": 0.7724306688417618, "grad_norm": 0.05707043781876564, "learning_rate": 0.0003861337683523654, "loss": 0.0093, "num_input_tokens_seen": 10207136, "step": 4735 }, { "epoch": 0.7732463295269169, "grad_norm": 0.6065145134925842, "learning_rate": 0.0003865415986949429, "loss": 0.0963, "num_input_tokens_seen": 10218400, "step": 4740 }, { "epoch": 0.7740619902120718, "grad_norm": 1.244797945022583, "learning_rate": 0.0003869494290375204, "loss": 0.1794, "num_input_tokens_seen": 10228160, "step": 4745 }, { "epoch": 0.7748776508972267, "grad_norm": 0.11901545524597168, "learning_rate": 0.0003873572593800979, "loss": 0.0878, "num_input_tokens_seen": 10239264, "step": 4750 }, { "epoch": 0.7756933115823818, "grad_norm": 0.3620312511920929, "learning_rate": 0.0003877650897226754, "loss": 0.0841, "num_input_tokens_seen": 10250176, "step": 4755 }, { "epoch": 0.7765089722675367, "grad_norm": 0.03342365846037865, "learning_rate": 0.0003881729200652528, "loss": 0.0735, "num_input_tokens_seen": 10259712, "step": 4760 }, { "epoch": 0.7773246329526917, "grad_norm": 0.6217851042747498, "learning_rate": 0.00038858075040783035, "loss": 0.048, "num_input_tokens_seen": 10270240, "step": 4765 }, { "epoch": 0.7781402936378466, "grad_norm": 0.04555325582623482, "learning_rate": 0.00038898858075040783, "loss": 0.141, "num_input_tokens_seen": 10282240, "step": 4770 }, { "epoch": 0.7789559543230016, "grad_norm": 0.1516691893339157, "learning_rate": 0.00038939641109298536, "loss": 0.0693, "num_input_tokens_seen": 10293504, "step": 4775 }, { "epoch": 0.7797716150081566, "grad_norm": 0.8429942727088928, "learning_rate": 0.00038980424143556284, "loss": 0.1045, "num_input_tokens_seen": 10305504, "step": 4780 }, { "epoch": 0.7805872756933115, "grad_norm": 2.857786178588867, "learning_rate": 0.00039021207177814026, "loss": 0.1063, "num_input_tokens_seen": 10316352, "step": 4785 }, { "epoch": 0.7814029363784666, "grad_norm": 0.1270354837179184, "learning_rate": 0.0003906199021207178, "loss": 0.2191, "num_input_tokens_seen": 10327040, "step": 4790 }, { "epoch": 0.7822185970636215, "grad_norm": 0.11081287264823914, "learning_rate": 0.00039102773246329527, "loss": 0.1663, "num_input_tokens_seen": 10338624, "step": 4795 }, { "epoch": 0.7830342577487766, "grad_norm": 0.19142858684062958, "learning_rate": 0.0003914355628058728, "loss": 0.1574, "num_input_tokens_seen": 10350272, "step": 4800 }, { "epoch": 0.7838499184339315, "grad_norm": 0.3290252089500427, "learning_rate": 0.0003918433931484502, "loss": 0.137, "num_input_tokens_seen": 10360576, "step": 4805 }, { "epoch": 0.7846655791190864, "grad_norm": 0.2330428510904312, "learning_rate": 0.00039225122349102775, "loss": 0.1487, "num_input_tokens_seen": 10370208, "step": 4810 }, { "epoch": 0.7854812398042414, "grad_norm": 0.5706175565719604, "learning_rate": 0.0003926590538336052, "loss": 0.1058, "num_input_tokens_seen": 10380512, "step": 4815 }, { "epoch": 0.7862969004893964, "grad_norm": 0.5336688160896301, "learning_rate": 0.0003930668841761827, "loss": 0.0916, "num_input_tokens_seen": 10390976, "step": 4820 }, { "epoch": 0.7871125611745514, "grad_norm": 0.0918072760105133, "learning_rate": 0.00039347471451876023, "loss": 0.1384, "num_input_tokens_seen": 10401248, "step": 4825 }, { "epoch": 0.7879282218597063, "grad_norm": 0.1732764095067978, "learning_rate": 0.00039388254486133766, "loss": 0.0754, "num_input_tokens_seen": 10412736, "step": 4830 }, { "epoch": 0.7887438825448614, "grad_norm": 0.7121074795722961, "learning_rate": 0.0003942903752039152, "loss": 0.0747, "num_input_tokens_seen": 10423200, "step": 4835 }, { "epoch": 0.7895595432300163, "grad_norm": 0.2947513163089752, "learning_rate": 0.00039469820554649266, "loss": 0.1236, "num_input_tokens_seen": 10434048, "step": 4840 }, { "epoch": 0.7903752039151712, "grad_norm": 0.14916078746318817, "learning_rate": 0.0003951060358890702, "loss": 0.1979, "num_input_tokens_seen": 10445696, "step": 4845 }, { "epoch": 0.7911908646003263, "grad_norm": 0.021900305524468422, "learning_rate": 0.00039551386623164767, "loss": 0.0841, "num_input_tokens_seen": 10457536, "step": 4850 }, { "epoch": 0.7920065252854812, "grad_norm": 0.09651156514883041, "learning_rate": 0.0003959216965742251, "loss": 0.3185, "num_input_tokens_seen": 10468256, "step": 4855 }, { "epoch": 0.7928221859706363, "grad_norm": 0.6483337879180908, "learning_rate": 0.0003963295269168026, "loss": 0.1628, "num_input_tokens_seen": 10480384, "step": 4860 }, { "epoch": 0.7936378466557912, "grad_norm": 0.5290756225585938, "learning_rate": 0.0003967373572593801, "loss": 0.0692, "num_input_tokens_seen": 10491232, "step": 4865 }, { "epoch": 0.7944535073409462, "grad_norm": 0.17572399973869324, "learning_rate": 0.00039714518760195763, "loss": 0.092, "num_input_tokens_seen": 10500960, "step": 4870 }, { "epoch": 0.7952691680261011, "grad_norm": 0.4714900255203247, "learning_rate": 0.00039755301794453505, "loss": 0.2619, "num_input_tokens_seen": 10511744, "step": 4875 }, { "epoch": 0.7960848287112561, "grad_norm": 0.12875710427761078, "learning_rate": 0.00039796084828711253, "loss": 0.1029, "num_input_tokens_seen": 10523488, "step": 4880 }, { "epoch": 0.7969004893964111, "grad_norm": 0.8379729986190796, "learning_rate": 0.00039836867862969006, "loss": 0.3998, "num_input_tokens_seen": 10533152, "step": 4885 }, { "epoch": 0.797716150081566, "grad_norm": 0.11603305488824844, "learning_rate": 0.00039877650897226754, "loss": 0.0658, "num_input_tokens_seen": 10542976, "step": 4890 }, { "epoch": 0.7985318107667211, "grad_norm": 0.25434279441833496, "learning_rate": 0.00039918433931484507, "loss": 0.0931, "num_input_tokens_seen": 10553824, "step": 4895 }, { "epoch": 0.799347471451876, "grad_norm": 0.2415354698896408, "learning_rate": 0.0003995921696574225, "loss": 0.0945, "num_input_tokens_seen": 10564256, "step": 4900 }, { "epoch": 0.8001631321370309, "grad_norm": 0.10237784683704376, "learning_rate": 0.0004, "loss": 0.0592, "num_input_tokens_seen": 10575328, "step": 4905 }, { "epoch": 0.800978792822186, "grad_norm": 0.8274763226509094, "learning_rate": 0.0004004078303425775, "loss": 0.0692, "num_input_tokens_seen": 10585856, "step": 4910 }, { "epoch": 0.8017944535073409, "grad_norm": 0.034058745950460434, "learning_rate": 0.00040081566068515497, "loss": 0.1149, "num_input_tokens_seen": 10596384, "step": 4915 }, { "epoch": 0.802610114192496, "grad_norm": 0.2413230538368225, "learning_rate": 0.0004012234910277325, "loss": 0.1119, "num_input_tokens_seen": 10606656, "step": 4920 }, { "epoch": 0.8034257748776509, "grad_norm": 0.8900132775306702, "learning_rate": 0.0004016313213703099, "loss": 0.2942, "num_input_tokens_seen": 10616768, "step": 4925 }, { "epoch": 0.8042414355628059, "grad_norm": 0.43382081389427185, "learning_rate": 0.00040203915171288746, "loss": 0.0918, "num_input_tokens_seen": 10628672, "step": 4930 }, { "epoch": 0.8050570962479608, "grad_norm": 0.6863502264022827, "learning_rate": 0.00040244698205546493, "loss": 0.1551, "num_input_tokens_seen": 10639840, "step": 4935 }, { "epoch": 0.8058727569331158, "grad_norm": 0.6283614039421082, "learning_rate": 0.00040285481239804246, "loss": 0.1899, "num_input_tokens_seen": 10649664, "step": 4940 }, { "epoch": 0.8066884176182708, "grad_norm": 0.1262378990650177, "learning_rate": 0.0004032626427406199, "loss": 0.0863, "num_input_tokens_seen": 10660736, "step": 4945 }, { "epoch": 0.8075040783034257, "grad_norm": 1.1567460298538208, "learning_rate": 0.00040367047308319736, "loss": 0.1936, "num_input_tokens_seen": 10671200, "step": 4950 }, { "epoch": 0.8083197389885808, "grad_norm": 0.28196683526039124, "learning_rate": 0.0004040783034257749, "loss": 0.187, "num_input_tokens_seen": 10683360, "step": 4955 }, { "epoch": 0.8091353996737357, "grad_norm": 0.09794008731842041, "learning_rate": 0.00040448613376835237, "loss": 0.0692, "num_input_tokens_seen": 10694368, "step": 4960 }, { "epoch": 0.8099510603588908, "grad_norm": 0.23722635209560394, "learning_rate": 0.0004048939641109299, "loss": 0.1261, "num_input_tokens_seen": 10704800, "step": 4965 }, { "epoch": 0.8107667210440457, "grad_norm": 0.03923754394054413, "learning_rate": 0.0004053017944535073, "loss": 0.0668, "num_input_tokens_seen": 10715232, "step": 4970 }, { "epoch": 0.8115823817292006, "grad_norm": 0.18244393169879913, "learning_rate": 0.00040570962479608485, "loss": 0.1288, "num_input_tokens_seen": 10725504, "step": 4975 }, { "epoch": 0.8123980424143556, "grad_norm": 0.03919116407632828, "learning_rate": 0.00040611745513866233, "loss": 0.0194, "num_input_tokens_seen": 10735648, "step": 4980 }, { "epoch": 0.8132137030995106, "grad_norm": 0.6502984166145325, "learning_rate": 0.0004065252854812398, "loss": 0.1544, "num_input_tokens_seen": 10746848, "step": 4985 }, { "epoch": 0.8140293637846656, "grad_norm": 0.7698884010314941, "learning_rate": 0.00040693311582381734, "loss": 0.2579, "num_input_tokens_seen": 10758080, "step": 4990 }, { "epoch": 0.8148450244698205, "grad_norm": 0.09423910826444626, "learning_rate": 0.00040734094616639476, "loss": 0.1021, "num_input_tokens_seen": 10769120, "step": 4995 }, { "epoch": 0.8156606851549756, "grad_norm": 0.05368998646736145, "learning_rate": 0.0004077487765089723, "loss": 0.0539, "num_input_tokens_seen": 10779552, "step": 5000 }, { "epoch": 0.8164763458401305, "grad_norm": 0.05466476082801819, "learning_rate": 0.00040815660685154977, "loss": 0.0742, "num_input_tokens_seen": 10790048, "step": 5005 }, { "epoch": 0.8172920065252854, "grad_norm": 0.11027330160140991, "learning_rate": 0.00040856443719412724, "loss": 0.1864, "num_input_tokens_seen": 10801632, "step": 5010 }, { "epoch": 0.8181076672104405, "grad_norm": 0.2415141463279724, "learning_rate": 0.00040897226753670477, "loss": 0.0768, "num_input_tokens_seen": 10813216, "step": 5015 }, { "epoch": 0.8189233278955954, "grad_norm": 0.020572278648614883, "learning_rate": 0.0004093800978792822, "loss": 0.0702, "num_input_tokens_seen": 10824384, "step": 5020 }, { "epoch": 0.8197389885807504, "grad_norm": 0.046128783375024796, "learning_rate": 0.0004097879282218597, "loss": 0.0452, "num_input_tokens_seen": 10836000, "step": 5025 }, { "epoch": 0.8205546492659054, "grad_norm": 0.2570292055606842, "learning_rate": 0.0004101957585644372, "loss": 0.0974, "num_input_tokens_seen": 10846816, "step": 5030 }, { "epoch": 0.8213703099510603, "grad_norm": 0.1017511859536171, "learning_rate": 0.00041060358890701473, "loss": 0.1483, "num_input_tokens_seen": 10857664, "step": 5035 }, { "epoch": 0.8221859706362153, "grad_norm": 0.13589723408222198, "learning_rate": 0.00041101141924959215, "loss": 0.271, "num_input_tokens_seen": 10868576, "step": 5040 }, { "epoch": 0.8230016313213703, "grad_norm": 0.9280490875244141, "learning_rate": 0.00041141924959216963, "loss": 0.1531, "num_input_tokens_seen": 10880704, "step": 5045 }, { "epoch": 0.8238172920065253, "grad_norm": 0.8153361678123474, "learning_rate": 0.00041182707993474716, "loss": 0.1817, "num_input_tokens_seen": 10890560, "step": 5050 }, { "epoch": 0.8246329526916802, "grad_norm": 0.2815099358558655, "learning_rate": 0.00041223491027732464, "loss": 0.2101, "num_input_tokens_seen": 10902080, "step": 5055 }, { "epoch": 0.8254486133768353, "grad_norm": 0.18435829877853394, "learning_rate": 0.00041264274061990217, "loss": 0.1982, "num_input_tokens_seen": 10912608, "step": 5060 }, { "epoch": 0.8262642740619902, "grad_norm": 0.33697816729545593, "learning_rate": 0.0004130505709624796, "loss": 0.1255, "num_input_tokens_seen": 10923264, "step": 5065 }, { "epoch": 0.8270799347471451, "grad_norm": 0.47781243920326233, "learning_rate": 0.0004134584013050571, "loss": 0.0537, "num_input_tokens_seen": 10934144, "step": 5070 }, { "epoch": 0.8278955954323002, "grad_norm": 0.11506246775388718, "learning_rate": 0.0004138662316476346, "loss": 0.0537, "num_input_tokens_seen": 10946112, "step": 5075 }, { "epoch": 0.8287112561174551, "grad_norm": 0.6900161504745483, "learning_rate": 0.0004142740619902121, "loss": 0.1229, "num_input_tokens_seen": 10957184, "step": 5080 }, { "epoch": 0.8295269168026101, "grad_norm": 0.04726710915565491, "learning_rate": 0.0004146818923327896, "loss": 0.0505, "num_input_tokens_seen": 10967296, "step": 5085 }, { "epoch": 0.8303425774877651, "grad_norm": 0.40265870094299316, "learning_rate": 0.00041508972267536703, "loss": 0.1987, "num_input_tokens_seen": 10977376, "step": 5090 }, { "epoch": 0.8311582381729201, "grad_norm": 0.08369828760623932, "learning_rate": 0.00041549755301794456, "loss": 0.1032, "num_input_tokens_seen": 10988128, "step": 5095 }, { "epoch": 0.831973898858075, "grad_norm": 0.03819310665130615, "learning_rate": 0.00041590538336052203, "loss": 0.138, "num_input_tokens_seen": 10999872, "step": 5100 }, { "epoch": 0.83278955954323, "grad_norm": 0.6052684783935547, "learning_rate": 0.00041631321370309957, "loss": 0.0579, "num_input_tokens_seen": 11010432, "step": 5105 }, { "epoch": 0.833605220228385, "grad_norm": 1.024788498878479, "learning_rate": 0.000416721044045677, "loss": 0.177, "num_input_tokens_seen": 11020032, "step": 5110 }, { "epoch": 0.8344208809135399, "grad_norm": 0.3897826671600342, "learning_rate": 0.00041712887438825446, "loss": 0.0617, "num_input_tokens_seen": 11029248, "step": 5115 }, { "epoch": 0.835236541598695, "grad_norm": 0.08794666081666946, "learning_rate": 0.000417536704730832, "loss": 0.1728, "num_input_tokens_seen": 11040832, "step": 5120 }, { "epoch": 0.8360522022838499, "grad_norm": 0.13968785107135773, "learning_rate": 0.00041794453507340947, "loss": 0.1185, "num_input_tokens_seen": 11052416, "step": 5125 }, { "epoch": 0.8368678629690048, "grad_norm": 0.04266409948468208, "learning_rate": 0.000418352365415987, "loss": 0.0381, "num_input_tokens_seen": 11063008, "step": 5130 }, { "epoch": 0.8376835236541599, "grad_norm": 0.0758720114827156, "learning_rate": 0.0004187601957585644, "loss": 0.1289, "num_input_tokens_seen": 11072928, "step": 5135 }, { "epoch": 0.8384991843393148, "grad_norm": 0.3611088991165161, "learning_rate": 0.0004191680261011419, "loss": 0.3274, "num_input_tokens_seen": 11082208, "step": 5140 }, { "epoch": 0.8393148450244698, "grad_norm": 0.24977266788482666, "learning_rate": 0.00041957585644371943, "loss": 0.1149, "num_input_tokens_seen": 11093152, "step": 5145 }, { "epoch": 0.8401305057096248, "grad_norm": 0.29513368010520935, "learning_rate": 0.0004199836867862969, "loss": 0.0979, "num_input_tokens_seen": 11104192, "step": 5150 }, { "epoch": 0.8409461663947798, "grad_norm": 0.17832569777965546, "learning_rate": 0.00042039151712887444, "loss": 0.0975, "num_input_tokens_seen": 11115008, "step": 5155 }, { "epoch": 0.8417618270799347, "grad_norm": 0.08540595322847366, "learning_rate": 0.00042079934747145186, "loss": 0.2326, "num_input_tokens_seen": 11125440, "step": 5160 }, { "epoch": 0.8425774877650897, "grad_norm": 0.2732952833175659, "learning_rate": 0.0004212071778140294, "loss": 0.107, "num_input_tokens_seen": 11135776, "step": 5165 }, { "epoch": 0.8433931484502447, "grad_norm": 0.3477567434310913, "learning_rate": 0.00042161500815660687, "loss": 0.2004, "num_input_tokens_seen": 11147456, "step": 5170 }, { "epoch": 0.8442088091353996, "grad_norm": 0.08770038187503815, "learning_rate": 0.00042202283849918434, "loss": 0.109, "num_input_tokens_seen": 11157920, "step": 5175 }, { "epoch": 0.8450244698205547, "grad_norm": 0.401098370552063, "learning_rate": 0.0004224306688417618, "loss": 0.1176, "num_input_tokens_seen": 11169344, "step": 5180 }, { "epoch": 0.8458401305057096, "grad_norm": 0.031747378408908844, "learning_rate": 0.0004228384991843393, "loss": 0.0882, "num_input_tokens_seen": 11180000, "step": 5185 }, { "epoch": 0.8466557911908646, "grad_norm": 0.0354638434946537, "learning_rate": 0.00042324632952691683, "loss": 0.0337, "num_input_tokens_seen": 11189760, "step": 5190 }, { "epoch": 0.8474714518760196, "grad_norm": 0.18015307188034058, "learning_rate": 0.0004236541598694943, "loss": 0.0769, "num_input_tokens_seen": 11200736, "step": 5195 }, { "epoch": 0.8482871125611745, "grad_norm": 0.10148556530475616, "learning_rate": 0.00042406199021207183, "loss": 0.1326, "num_input_tokens_seen": 11211616, "step": 5200 }, { "epoch": 0.8491027732463295, "grad_norm": 0.43281859159469604, "learning_rate": 0.00042446982055464926, "loss": 0.1558, "num_input_tokens_seen": 11219840, "step": 5205 }, { "epoch": 0.8499184339314845, "grad_norm": 0.6457136273384094, "learning_rate": 0.00042487765089722673, "loss": 0.049, "num_input_tokens_seen": 11230432, "step": 5210 }, { "epoch": 0.8507340946166395, "grad_norm": 0.3206690549850464, "learning_rate": 0.00042528548123980426, "loss": 0.1233, "num_input_tokens_seen": 11240960, "step": 5215 }, { "epoch": 0.8515497553017944, "grad_norm": 0.228666752576828, "learning_rate": 0.00042569331158238174, "loss": 0.047, "num_input_tokens_seen": 11251872, "step": 5220 }, { "epoch": 0.8523654159869495, "grad_norm": 0.6908738017082214, "learning_rate": 0.00042610114192495927, "loss": 0.1377, "num_input_tokens_seen": 11263808, "step": 5225 }, { "epoch": 0.8531810766721044, "grad_norm": 0.0605216845870018, "learning_rate": 0.0004265089722675367, "loss": 0.0717, "num_input_tokens_seen": 11273632, "step": 5230 }, { "epoch": 0.8539967373572593, "grad_norm": 0.01348411850631237, "learning_rate": 0.00042691680261011417, "loss": 0.2641, "num_input_tokens_seen": 11284672, "step": 5235 }, { "epoch": 0.8548123980424144, "grad_norm": 0.5068708658218384, "learning_rate": 0.0004273246329526917, "loss": 0.155, "num_input_tokens_seen": 11296096, "step": 5240 }, { "epoch": 0.8556280587275693, "grad_norm": 0.0990958958864212, "learning_rate": 0.0004277324632952692, "loss": 0.1024, "num_input_tokens_seen": 11305248, "step": 5245 }, { "epoch": 0.8564437194127243, "grad_norm": 0.44223034381866455, "learning_rate": 0.00042814029363784665, "loss": 0.0936, "num_input_tokens_seen": 11315680, "step": 5250 }, { "epoch": 0.8572593800978793, "grad_norm": 0.09479458630084991, "learning_rate": 0.00042854812398042413, "loss": 0.0919, "num_input_tokens_seen": 11325440, "step": 5255 }, { "epoch": 0.8580750407830342, "grad_norm": 0.539115846157074, "learning_rate": 0.00042895595432300166, "loss": 0.0813, "num_input_tokens_seen": 11336096, "step": 5260 }, { "epoch": 0.8588907014681892, "grad_norm": 0.13355320692062378, "learning_rate": 0.00042936378466557914, "loss": 0.0352, "num_input_tokens_seen": 11346176, "step": 5265 }, { "epoch": 0.8597063621533442, "grad_norm": 0.4405490458011627, "learning_rate": 0.0004297716150081566, "loss": 0.1626, "num_input_tokens_seen": 11356448, "step": 5270 }, { "epoch": 0.8605220228384992, "grad_norm": 1.2932687997817993, "learning_rate": 0.0004301794453507341, "loss": 0.1587, "num_input_tokens_seen": 11367264, "step": 5275 }, { "epoch": 0.8613376835236541, "grad_norm": 0.10941825062036514, "learning_rate": 0.00043058727569331157, "loss": 0.3339, "num_input_tokens_seen": 11378400, "step": 5280 }, { "epoch": 0.8621533442088092, "grad_norm": 0.5706648826599121, "learning_rate": 0.0004309951060358891, "loss": 0.1442, "num_input_tokens_seen": 11389632, "step": 5285 }, { "epoch": 0.8629690048939641, "grad_norm": 0.33230066299438477, "learning_rate": 0.0004314029363784666, "loss": 0.1843, "num_input_tokens_seen": 11401088, "step": 5290 }, { "epoch": 0.863784665579119, "grad_norm": 0.3841840624809265, "learning_rate": 0.0004318107667210441, "loss": 0.3139, "num_input_tokens_seen": 11411712, "step": 5295 }, { "epoch": 0.8646003262642741, "grad_norm": 0.21775907278060913, "learning_rate": 0.0004322185970636215, "loss": 0.1966, "num_input_tokens_seen": 11421376, "step": 5300 }, { "epoch": 0.865415986949429, "grad_norm": 0.3515683710575104, "learning_rate": 0.000432626427406199, "loss": 0.1127, "num_input_tokens_seen": 11431904, "step": 5305 }, { "epoch": 0.866231647634584, "grad_norm": 0.25546693801879883, "learning_rate": 0.00043303425774877653, "loss": 0.1467, "num_input_tokens_seen": 11443936, "step": 5310 }, { "epoch": 0.867047308319739, "grad_norm": 0.23093919456005096, "learning_rate": 0.000433442088091354, "loss": 0.1263, "num_input_tokens_seen": 11454496, "step": 5315 }, { "epoch": 0.867862969004894, "grad_norm": 0.2206592559814453, "learning_rate": 0.0004338499184339315, "loss": 0.2493, "num_input_tokens_seen": 11464928, "step": 5320 }, { "epoch": 0.8686786296900489, "grad_norm": 0.13648030161857605, "learning_rate": 0.00043425774877650896, "loss": 0.1437, "num_input_tokens_seen": 11476064, "step": 5325 }, { "epoch": 0.8694942903752039, "grad_norm": 0.3498726487159729, "learning_rate": 0.0004346655791190865, "loss": 0.1652, "num_input_tokens_seen": 11487136, "step": 5330 }, { "epoch": 0.8703099510603589, "grad_norm": 0.04798632115125656, "learning_rate": 0.00043507340946166397, "loss": 0.1335, "num_input_tokens_seen": 11498464, "step": 5335 }, { "epoch": 0.8711256117455138, "grad_norm": 0.46741530299186707, "learning_rate": 0.00043548123980424145, "loss": 0.1519, "num_input_tokens_seen": 11510112, "step": 5340 }, { "epoch": 0.8719412724306689, "grad_norm": 0.2959222197532654, "learning_rate": 0.0004358890701468189, "loss": 0.1673, "num_input_tokens_seen": 11520160, "step": 5345 }, { "epoch": 0.8727569331158238, "grad_norm": 0.31529924273490906, "learning_rate": 0.0004362969004893964, "loss": 0.1056, "num_input_tokens_seen": 11530944, "step": 5350 }, { "epoch": 0.8735725938009788, "grad_norm": 0.26503002643585205, "learning_rate": 0.00043670473083197393, "loss": 0.0908, "num_input_tokens_seen": 11542144, "step": 5355 }, { "epoch": 0.8743882544861338, "grad_norm": 0.07838082313537598, "learning_rate": 0.0004371125611745514, "loss": 0.0377, "num_input_tokens_seen": 11553184, "step": 5360 }, { "epoch": 0.8752039151712887, "grad_norm": 0.4865676760673523, "learning_rate": 0.0004375203915171289, "loss": 0.2317, "num_input_tokens_seen": 11563520, "step": 5365 }, { "epoch": 0.8760195758564437, "grad_norm": 0.02210296504199505, "learning_rate": 0.00043792822185970636, "loss": 0.1313, "num_input_tokens_seen": 11575584, "step": 5370 }, { "epoch": 0.8768352365415987, "grad_norm": 0.2743991017341614, "learning_rate": 0.00043833605220228384, "loss": 0.0653, "num_input_tokens_seen": 11588352, "step": 5375 }, { "epoch": 0.8776508972267537, "grad_norm": 1.6387585401535034, "learning_rate": 0.00043874388254486137, "loss": 0.1479, "num_input_tokens_seen": 11599968, "step": 5380 }, { "epoch": 0.8784665579119086, "grad_norm": 0.07770185172557831, "learning_rate": 0.00043915171288743884, "loss": 0.0635, "num_input_tokens_seen": 11610592, "step": 5385 }, { "epoch": 0.8792822185970636, "grad_norm": 0.6279473304748535, "learning_rate": 0.0004395595432300163, "loss": 0.2526, "num_input_tokens_seen": 11620864, "step": 5390 }, { "epoch": 0.8800978792822186, "grad_norm": 0.2085501104593277, "learning_rate": 0.0004399673735725938, "loss": 0.1189, "num_input_tokens_seen": 11631808, "step": 5395 }, { "epoch": 0.8809135399673735, "grad_norm": 0.7576339244842529, "learning_rate": 0.00044037520391517127, "loss": 0.1091, "num_input_tokens_seen": 11642688, "step": 5400 }, { "epoch": 0.8817292006525286, "grad_norm": 0.13995878398418427, "learning_rate": 0.0004407830342577488, "loss": 0.1211, "num_input_tokens_seen": 11653760, "step": 5405 }, { "epoch": 0.8825448613376835, "grad_norm": 0.19204126298427582, "learning_rate": 0.0004411908646003263, "loss": 0.0644, "num_input_tokens_seen": 11663936, "step": 5410 }, { "epoch": 0.8833605220228385, "grad_norm": 0.09137711673974991, "learning_rate": 0.00044159869494290376, "loss": 0.279, "num_input_tokens_seen": 11674432, "step": 5415 }, { "epoch": 0.8841761827079935, "grad_norm": 0.5936753749847412, "learning_rate": 0.00044200652528548123, "loss": 0.2478, "num_input_tokens_seen": 11684480, "step": 5420 }, { "epoch": 0.8849918433931484, "grad_norm": 0.1626511961221695, "learning_rate": 0.00044241435562805876, "loss": 0.0841, "num_input_tokens_seen": 11694528, "step": 5425 }, { "epoch": 0.8858075040783034, "grad_norm": 0.15717485547065735, "learning_rate": 0.00044282218597063624, "loss": 0.1001, "num_input_tokens_seen": 11704608, "step": 5430 }, { "epoch": 0.8866231647634584, "grad_norm": 0.47965124249458313, "learning_rate": 0.0004432300163132137, "loss": 0.1468, "num_input_tokens_seen": 11715872, "step": 5435 }, { "epoch": 0.8874388254486134, "grad_norm": 1.353268027305603, "learning_rate": 0.0004436378466557912, "loss": 0.1849, "num_input_tokens_seen": 11727424, "step": 5440 }, { "epoch": 0.8882544861337683, "grad_norm": 0.14924632012844086, "learning_rate": 0.00044404567699836867, "loss": 0.1452, "num_input_tokens_seen": 11739104, "step": 5445 }, { "epoch": 0.8890701468189234, "grad_norm": 0.14718888700008392, "learning_rate": 0.0004444535073409462, "loss": 0.0375, "num_input_tokens_seen": 11749888, "step": 5450 }, { "epoch": 0.8898858075040783, "grad_norm": 0.3804473876953125, "learning_rate": 0.0004448613376835237, "loss": 0.171, "num_input_tokens_seen": 11760096, "step": 5455 }, { "epoch": 0.8907014681892332, "grad_norm": 0.05034136772155762, "learning_rate": 0.0004452691680261011, "loss": 0.122, "num_input_tokens_seen": 11770336, "step": 5460 }, { "epoch": 0.8915171288743883, "grad_norm": 0.37900790572166443, "learning_rate": 0.00044567699836867863, "loss": 0.1052, "num_input_tokens_seen": 11781536, "step": 5465 }, { "epoch": 0.8923327895595432, "grad_norm": 0.3298618197441101, "learning_rate": 0.0004460848287112561, "loss": 0.1307, "num_input_tokens_seen": 11791616, "step": 5470 }, { "epoch": 0.8931484502446982, "grad_norm": 0.5720271468162537, "learning_rate": 0.00044649265905383364, "loss": 0.1129, "num_input_tokens_seen": 11802912, "step": 5475 }, { "epoch": 0.8939641109298532, "grad_norm": 0.15244188904762268, "learning_rate": 0.0004469004893964111, "loss": 0.175, "num_input_tokens_seen": 11812864, "step": 5480 }, { "epoch": 0.8947797716150081, "grad_norm": 0.05999916419386864, "learning_rate": 0.0004473083197389886, "loss": 0.0761, "num_input_tokens_seen": 11824064, "step": 5485 }, { "epoch": 0.8955954323001631, "grad_norm": 0.06615840643644333, "learning_rate": 0.00044771615008156607, "loss": 0.0791, "num_input_tokens_seen": 11835680, "step": 5490 }, { "epoch": 0.8964110929853181, "grad_norm": 0.29329225420951843, "learning_rate": 0.00044812398042414354, "loss": 0.2228, "num_input_tokens_seen": 11846784, "step": 5495 }, { "epoch": 0.8972267536704731, "grad_norm": 0.6449689269065857, "learning_rate": 0.00044853181076672107, "loss": 0.1726, "num_input_tokens_seen": 11856608, "step": 5500 }, { "epoch": 0.898042414355628, "grad_norm": 0.2263220101594925, "learning_rate": 0.00044893964110929855, "loss": 0.0714, "num_input_tokens_seen": 11868384, "step": 5505 }, { "epoch": 0.8988580750407831, "grad_norm": 0.2446340024471283, "learning_rate": 0.000449347471451876, "loss": 0.1793, "num_input_tokens_seen": 11879744, "step": 5510 }, { "epoch": 0.899673735725938, "grad_norm": 1.1096633672714233, "learning_rate": 0.0004497553017944535, "loss": 0.3513, "num_input_tokens_seen": 11890368, "step": 5515 }, { "epoch": 0.9004893964110929, "grad_norm": 0.16337978839874268, "learning_rate": 0.00045016313213703103, "loss": 0.1517, "num_input_tokens_seen": 11902272, "step": 5520 }, { "epoch": 0.901305057096248, "grad_norm": 0.3567659258842468, "learning_rate": 0.0004505709624796085, "loss": 0.159, "num_input_tokens_seen": 11914208, "step": 5525 }, { "epoch": 0.9021207177814029, "grad_norm": 0.1499713659286499, "learning_rate": 0.00045097879282218593, "loss": 0.0737, "num_input_tokens_seen": 11923776, "step": 5530 }, { "epoch": 0.9029363784665579, "grad_norm": 0.14773689210414886, "learning_rate": 0.00045138662316476346, "loss": 0.0889, "num_input_tokens_seen": 11934848, "step": 5535 }, { "epoch": 0.9037520391517129, "grad_norm": 0.1151997298002243, "learning_rate": 0.00045179445350734094, "loss": 0.2026, "num_input_tokens_seen": 11945664, "step": 5540 }, { "epoch": 0.9045676998368679, "grad_norm": 0.22526738047599792, "learning_rate": 0.00045220228384991847, "loss": 0.1415, "num_input_tokens_seen": 11956832, "step": 5545 }, { "epoch": 0.9053833605220228, "grad_norm": 0.4921008348464966, "learning_rate": 0.00045261011419249595, "loss": 0.2493, "num_input_tokens_seen": 11967232, "step": 5550 }, { "epoch": 0.9061990212071778, "grad_norm": 0.06042308360338211, "learning_rate": 0.0004530179445350734, "loss": 0.1225, "num_input_tokens_seen": 11976896, "step": 5555 }, { "epoch": 0.9070146818923328, "grad_norm": 0.13890796899795532, "learning_rate": 0.0004534257748776509, "loss": 0.0834, "num_input_tokens_seen": 11988672, "step": 5560 }, { "epoch": 0.9078303425774877, "grad_norm": 0.604111909866333, "learning_rate": 0.0004538336052202284, "loss": 0.1898, "num_input_tokens_seen": 12001248, "step": 5565 }, { "epoch": 0.9086460032626428, "grad_norm": 0.06061052903532982, "learning_rate": 0.0004542414355628059, "loss": 0.213, "num_input_tokens_seen": 12012640, "step": 5570 }, { "epoch": 0.9094616639477977, "grad_norm": 0.11370633542537689, "learning_rate": 0.0004546492659053834, "loss": 0.1925, "num_input_tokens_seen": 12024416, "step": 5575 }, { "epoch": 0.9102773246329527, "grad_norm": 0.1720307618379593, "learning_rate": 0.00045505709624796086, "loss": 0.1332, "num_input_tokens_seen": 12034112, "step": 5580 }, { "epoch": 0.9110929853181077, "grad_norm": 0.06955350935459137, "learning_rate": 0.00045546492659053833, "loss": 0.0699, "num_input_tokens_seen": 12044864, "step": 5585 }, { "epoch": 0.9119086460032626, "grad_norm": 0.09173255413770676, "learning_rate": 0.0004558727569331158, "loss": 0.0888, "num_input_tokens_seen": 12056928, "step": 5590 }, { "epoch": 0.9127243066884176, "grad_norm": 0.5297556519508362, "learning_rate": 0.00045628058727569334, "loss": 0.1323, "num_input_tokens_seen": 12068352, "step": 5595 }, { "epoch": 0.9135399673735726, "grad_norm": 0.0541716031730175, "learning_rate": 0.00045668841761827076, "loss": 0.2512, "num_input_tokens_seen": 12077696, "step": 5600 }, { "epoch": 0.9143556280587276, "grad_norm": 0.046800531446933746, "learning_rate": 0.0004570962479608483, "loss": 0.0825, "num_input_tokens_seen": 12088320, "step": 5605 }, { "epoch": 0.9151712887438825, "grad_norm": 0.04836220294237137, "learning_rate": 0.00045750407830342577, "loss": 0.1146, "num_input_tokens_seen": 12099232, "step": 5610 }, { "epoch": 0.9159869494290375, "grad_norm": 0.4446544647216797, "learning_rate": 0.0004579119086460033, "loss": 0.1699, "num_input_tokens_seen": 12109632, "step": 5615 }, { "epoch": 0.9168026101141925, "grad_norm": 0.035620883107185364, "learning_rate": 0.0004583197389885808, "loss": 0.1234, "num_input_tokens_seen": 12121088, "step": 5620 }, { "epoch": 0.9176182707993474, "grad_norm": 0.15215860307216644, "learning_rate": 0.0004587275693311582, "loss": 0.0983, "num_input_tokens_seen": 12132352, "step": 5625 }, { "epoch": 0.9184339314845025, "grad_norm": 0.7195250988006592, "learning_rate": 0.00045913539967373573, "loss": 0.1707, "num_input_tokens_seen": 12143104, "step": 5630 }, { "epoch": 0.9192495921696574, "grad_norm": 0.21093983948230743, "learning_rate": 0.0004595432300163132, "loss": 0.047, "num_input_tokens_seen": 12154112, "step": 5635 }, { "epoch": 0.9200652528548124, "grad_norm": 0.129089817404747, "learning_rate": 0.00045995106035889074, "loss": 0.1243, "num_input_tokens_seen": 12163168, "step": 5640 }, { "epoch": 0.9208809135399674, "grad_norm": 0.17322193086147308, "learning_rate": 0.0004603588907014682, "loss": 0.1786, "num_input_tokens_seen": 12173696, "step": 5645 }, { "epoch": 0.9216965742251223, "grad_norm": 0.03901953995227814, "learning_rate": 0.0004607667210440457, "loss": 0.1085, "num_input_tokens_seen": 12185344, "step": 5650 }, { "epoch": 0.9225122349102773, "grad_norm": 0.83636075258255, "learning_rate": 0.00046117455138662317, "loss": 0.139, "num_input_tokens_seen": 12194816, "step": 5655 }, { "epoch": 0.9233278955954323, "grad_norm": 0.3192429840564728, "learning_rate": 0.00046158238172920064, "loss": 0.0533, "num_input_tokens_seen": 12206080, "step": 5660 }, { "epoch": 0.9241435562805873, "grad_norm": 0.02505807764828205, "learning_rate": 0.0004619902120717782, "loss": 0.1454, "num_input_tokens_seen": 12216096, "step": 5665 }, { "epoch": 0.9249592169657422, "grad_norm": 0.022757360711693764, "learning_rate": 0.0004623980424143556, "loss": 0.0651, "num_input_tokens_seen": 12227200, "step": 5670 }, { "epoch": 0.9257748776508973, "grad_norm": 0.07959464937448502, "learning_rate": 0.00046280587275693313, "loss": 0.17, "num_input_tokens_seen": 12237984, "step": 5675 }, { "epoch": 0.9265905383360522, "grad_norm": 0.5698276162147522, "learning_rate": 0.0004632137030995106, "loss": 0.0855, "num_input_tokens_seen": 12249632, "step": 5680 }, { "epoch": 0.9274061990212071, "grad_norm": 0.26433131098747253, "learning_rate": 0.00046362153344208813, "loss": 0.2577, "num_input_tokens_seen": 12262400, "step": 5685 }, { "epoch": 0.9282218597063622, "grad_norm": 0.04673366621136665, "learning_rate": 0.0004640293637846656, "loss": 0.0804, "num_input_tokens_seen": 12272928, "step": 5690 }, { "epoch": 0.9290375203915171, "grad_norm": 0.2298906147480011, "learning_rate": 0.00046443719412724303, "loss": 0.1526, "num_input_tokens_seen": 12283264, "step": 5695 }, { "epoch": 0.9298531810766721, "grad_norm": 0.1987387090921402, "learning_rate": 0.00046484502446982056, "loss": 0.0571, "num_input_tokens_seen": 12293440, "step": 5700 }, { "epoch": 0.9306688417618271, "grad_norm": 0.2803383469581604, "learning_rate": 0.00046525285481239804, "loss": 0.1459, "num_input_tokens_seen": 12304352, "step": 5705 }, { "epoch": 0.9314845024469821, "grad_norm": 0.08396600931882858, "learning_rate": 0.00046566068515497557, "loss": 0.1305, "num_input_tokens_seen": 12316416, "step": 5710 }, { "epoch": 0.932300163132137, "grad_norm": 0.29063013195991516, "learning_rate": 0.00046606851549755305, "loss": 0.0822, "num_input_tokens_seen": 12327424, "step": 5715 }, { "epoch": 0.933115823817292, "grad_norm": 0.0450286902487278, "learning_rate": 0.00046647634584013047, "loss": 0.0988, "num_input_tokens_seen": 12338144, "step": 5720 }, { "epoch": 0.933931484502447, "grad_norm": 0.40084439516067505, "learning_rate": 0.000466884176182708, "loss": 0.1092, "num_input_tokens_seen": 12349344, "step": 5725 }, { "epoch": 0.9347471451876019, "grad_norm": 0.09930945932865143, "learning_rate": 0.0004672920065252855, "loss": 0.1831, "num_input_tokens_seen": 12360256, "step": 5730 }, { "epoch": 0.935562805872757, "grad_norm": 1.05381178855896, "learning_rate": 0.000467699836867863, "loss": 0.1584, "num_input_tokens_seen": 12371328, "step": 5735 }, { "epoch": 0.9363784665579119, "grad_norm": 0.21676240861415863, "learning_rate": 0.0004681076672104405, "loss": 0.108, "num_input_tokens_seen": 12382176, "step": 5740 }, { "epoch": 0.9371941272430668, "grad_norm": 0.2925373315811157, "learning_rate": 0.00046851549755301796, "loss": 0.0776, "num_input_tokens_seen": 12393408, "step": 5745 }, { "epoch": 0.9380097879282219, "grad_norm": 0.17163704335689545, "learning_rate": 0.00046892332789559544, "loss": 0.1537, "num_input_tokens_seen": 12403680, "step": 5750 }, { "epoch": 0.9388254486133768, "grad_norm": 0.15320199728012085, "learning_rate": 0.0004693311582381729, "loss": 0.0628, "num_input_tokens_seen": 12415360, "step": 5755 }, { "epoch": 0.9396411092985318, "grad_norm": 0.06198349967598915, "learning_rate": 0.00046973898858075044, "loss": 0.1163, "num_input_tokens_seen": 12426176, "step": 5760 }, { "epoch": 0.9404567699836868, "grad_norm": 0.5587122440338135, "learning_rate": 0.00047014681892332787, "loss": 0.1656, "num_input_tokens_seen": 12438048, "step": 5765 }, { "epoch": 0.9412724306688418, "grad_norm": 0.05173993110656738, "learning_rate": 0.0004705546492659054, "loss": 0.0669, "num_input_tokens_seen": 12448352, "step": 5770 }, { "epoch": 0.9420880913539967, "grad_norm": 0.8225920796394348, "learning_rate": 0.0004709624796084829, "loss": 0.1327, "num_input_tokens_seen": 12460640, "step": 5775 }, { "epoch": 0.9429037520391517, "grad_norm": 0.852797269821167, "learning_rate": 0.0004713703099510604, "loss": 0.2069, "num_input_tokens_seen": 12471712, "step": 5780 }, { "epoch": 0.9437194127243067, "grad_norm": 0.019185606390237808, "learning_rate": 0.0004717781402936379, "loss": 0.0356, "num_input_tokens_seen": 12484032, "step": 5785 }, { "epoch": 0.9445350734094616, "grad_norm": 0.48908740282058716, "learning_rate": 0.0004721859706362153, "loss": 0.1098, "num_input_tokens_seen": 12493760, "step": 5790 }, { "epoch": 0.9453507340946167, "grad_norm": 0.6392738819122314, "learning_rate": 0.00047259380097879283, "loss": 0.0687, "num_input_tokens_seen": 12504736, "step": 5795 }, { "epoch": 0.9461663947797716, "grad_norm": 0.061548106372356415, "learning_rate": 0.0004730016313213703, "loss": 0.1452, "num_input_tokens_seen": 12515968, "step": 5800 }, { "epoch": 0.9469820554649266, "grad_norm": 0.04611099883913994, "learning_rate": 0.00047340946166394784, "loss": 0.027, "num_input_tokens_seen": 12528032, "step": 5805 }, { "epoch": 0.9477977161500816, "grad_norm": 0.4504237771034241, "learning_rate": 0.0004738172920065253, "loss": 0.2559, "num_input_tokens_seen": 12540640, "step": 5810 }, { "epoch": 0.9486133768352365, "grad_norm": 0.9349026679992676, "learning_rate": 0.00047422512234910274, "loss": 0.3538, "num_input_tokens_seen": 12551104, "step": 5815 }, { "epoch": 0.9494290375203915, "grad_norm": 0.41508322954177856, "learning_rate": 0.00047463295269168027, "loss": 0.1886, "num_input_tokens_seen": 12562592, "step": 5820 }, { "epoch": 0.9502446982055465, "grad_norm": 0.2731354534626007, "learning_rate": 0.00047504078303425775, "loss": 0.2279, "num_input_tokens_seen": 12574304, "step": 5825 }, { "epoch": 0.9510603588907015, "grad_norm": 0.4910064935684204, "learning_rate": 0.0004754486133768353, "loss": 0.1509, "num_input_tokens_seen": 12584448, "step": 5830 }, { "epoch": 0.9518760195758564, "grad_norm": 0.1669609099626541, "learning_rate": 0.0004758564437194127, "loss": 0.141, "num_input_tokens_seen": 12594880, "step": 5835 }, { "epoch": 0.9526916802610114, "grad_norm": 0.21898479759693146, "learning_rate": 0.00047626427406199023, "loss": 0.1649, "num_input_tokens_seen": 12605152, "step": 5840 }, { "epoch": 0.9535073409461664, "grad_norm": 0.3176557719707489, "learning_rate": 0.0004766721044045677, "loss": 0.1757, "num_input_tokens_seen": 12614368, "step": 5845 }, { "epoch": 0.9543230016313213, "grad_norm": 0.4478508234024048, "learning_rate": 0.0004770799347471452, "loss": 0.0906, "num_input_tokens_seen": 12624544, "step": 5850 }, { "epoch": 0.9551386623164764, "grad_norm": 0.18135344982147217, "learning_rate": 0.0004774877650897227, "loss": 0.1237, "num_input_tokens_seen": 12636448, "step": 5855 }, { "epoch": 0.9559543230016313, "grad_norm": 0.22620588541030884, "learning_rate": 0.00047789559543230014, "loss": 0.1327, "num_input_tokens_seen": 12647584, "step": 5860 }, { "epoch": 0.9567699836867863, "grad_norm": 0.6646113991737366, "learning_rate": 0.00047830342577487767, "loss": 0.0694, "num_input_tokens_seen": 12659392, "step": 5865 }, { "epoch": 0.9575856443719413, "grad_norm": 0.2929508090019226, "learning_rate": 0.00047871125611745514, "loss": 0.0659, "num_input_tokens_seen": 12669824, "step": 5870 }, { "epoch": 0.9584013050570962, "grad_norm": 0.42098620533943176, "learning_rate": 0.0004791190864600327, "loss": 0.075, "num_input_tokens_seen": 12681408, "step": 5875 }, { "epoch": 0.9592169657422512, "grad_norm": 0.4520011842250824, "learning_rate": 0.00047952691680261015, "loss": 0.1221, "num_input_tokens_seen": 12692608, "step": 5880 }, { "epoch": 0.9600326264274062, "grad_norm": 0.02840247005224228, "learning_rate": 0.00047993474714518757, "loss": 0.0343, "num_input_tokens_seen": 12703040, "step": 5885 }, { "epoch": 0.9608482871125612, "grad_norm": 0.46804285049438477, "learning_rate": 0.0004803425774877651, "loss": 0.1228, "num_input_tokens_seen": 12712000, "step": 5890 }, { "epoch": 0.9616639477977161, "grad_norm": 0.031320132315158844, "learning_rate": 0.0004807504078303426, "loss": 0.0274, "num_input_tokens_seen": 12723328, "step": 5895 }, { "epoch": 0.9624796084828712, "grad_norm": 0.510845422744751, "learning_rate": 0.0004811582381729201, "loss": 0.0823, "num_input_tokens_seen": 12734336, "step": 5900 }, { "epoch": 0.9632952691680261, "grad_norm": 0.1931709498167038, "learning_rate": 0.00048156606851549753, "loss": 0.0421, "num_input_tokens_seen": 12745440, "step": 5905 }, { "epoch": 0.964110929853181, "grad_norm": 0.3709653913974762, "learning_rate": 0.00048197389885807506, "loss": 0.1297, "num_input_tokens_seen": 12756128, "step": 5910 }, { "epoch": 0.9649265905383361, "grad_norm": 0.023738659918308258, "learning_rate": 0.00048238172920065254, "loss": 0.0662, "num_input_tokens_seen": 12767456, "step": 5915 }, { "epoch": 0.965742251223491, "grad_norm": 0.6917053461074829, "learning_rate": 0.00048278955954323, "loss": 0.0703, "num_input_tokens_seen": 12778688, "step": 5920 }, { "epoch": 0.966557911908646, "grad_norm": 0.05164829269051552, "learning_rate": 0.00048319738988580755, "loss": 0.0063, "num_input_tokens_seen": 12788800, "step": 5925 }, { "epoch": 0.967373572593801, "grad_norm": 0.053878605365753174, "learning_rate": 0.00048360522022838497, "loss": 0.3838, "num_input_tokens_seen": 12800832, "step": 5930 }, { "epoch": 0.968189233278956, "grad_norm": 0.10178588330745697, "learning_rate": 0.0004840130505709625, "loss": 0.1772, "num_input_tokens_seen": 12811744, "step": 5935 }, { "epoch": 0.9690048939641109, "grad_norm": 0.08851680904626846, "learning_rate": 0.00048442088091354, "loss": 0.0671, "num_input_tokens_seen": 12823744, "step": 5940 }, { "epoch": 0.9698205546492659, "grad_norm": 0.16539376974105835, "learning_rate": 0.00048482871125611745, "loss": 0.1384, "num_input_tokens_seen": 12833920, "step": 5945 }, { "epoch": 0.9706362153344209, "grad_norm": 0.12968455255031586, "learning_rate": 0.000485236541598695, "loss": 0.0846, "num_input_tokens_seen": 12844320, "step": 5950 }, { "epoch": 0.9714518760195758, "grad_norm": 0.062151502817869186, "learning_rate": 0.0004856443719412724, "loss": 0.0722, "num_input_tokens_seen": 12854400, "step": 5955 }, { "epoch": 0.9722675367047309, "grad_norm": 0.06598660349845886, "learning_rate": 0.00048605220228384994, "loss": 0.0929, "num_input_tokens_seen": 12864800, "step": 5960 }, { "epoch": 0.9730831973898858, "grad_norm": 0.26300644874572754, "learning_rate": 0.0004864600326264274, "loss": 0.1232, "num_input_tokens_seen": 12874624, "step": 5965 }, { "epoch": 0.9738988580750407, "grad_norm": 0.17284080386161804, "learning_rate": 0.00048686786296900494, "loss": 0.0719, "num_input_tokens_seen": 12886016, "step": 5970 }, { "epoch": 0.9747145187601958, "grad_norm": 0.21855513751506805, "learning_rate": 0.00048727569331158237, "loss": 0.3294, "num_input_tokens_seen": 12897088, "step": 5975 }, { "epoch": 0.9755301794453507, "grad_norm": 0.4430790841579437, "learning_rate": 0.00048768352365415984, "loss": 0.0588, "num_input_tokens_seen": 12908064, "step": 5980 }, { "epoch": 0.9763458401305057, "grad_norm": 0.7205919027328491, "learning_rate": 0.00048809135399673737, "loss": 0.3515, "num_input_tokens_seen": 12918304, "step": 5985 }, { "epoch": 0.9771615008156607, "grad_norm": 0.0197359137237072, "learning_rate": 0.0004884991843393148, "loss": 0.0677, "num_input_tokens_seen": 12928576, "step": 5990 }, { "epoch": 0.9779771615008157, "grad_norm": 0.051976609975099564, "learning_rate": 0.0004889070146818923, "loss": 0.095, "num_input_tokens_seen": 12938080, "step": 5995 }, { "epoch": 0.9787928221859706, "grad_norm": 0.3981427252292633, "learning_rate": 0.0004893148450244698, "loss": 0.0951, "num_input_tokens_seen": 12948480, "step": 6000 }, { "epoch": 0.9796084828711256, "grad_norm": 0.39857617020606995, "learning_rate": 0.0004897226753670474, "loss": 0.0736, "num_input_tokens_seen": 12958464, "step": 6005 }, { "epoch": 0.9804241435562806, "grad_norm": 0.12100239843130112, "learning_rate": 0.0004901305057096248, "loss": 0.0357, "num_input_tokens_seen": 12969696, "step": 6010 }, { "epoch": 0.9812398042414355, "grad_norm": 0.11612671613693237, "learning_rate": 0.0004905383360522022, "loss": 0.1405, "num_input_tokens_seen": 12980384, "step": 6015 }, { "epoch": 0.9820554649265906, "grad_norm": 0.4690355360507965, "learning_rate": 0.0004909461663947798, "loss": 0.1106, "num_input_tokens_seen": 12991968, "step": 6020 }, { "epoch": 0.9828711256117455, "grad_norm": 0.39885714650154114, "learning_rate": 0.0004913539967373573, "loss": 0.0736, "num_input_tokens_seen": 13001504, "step": 6025 }, { "epoch": 0.9836867862969005, "grad_norm": 0.09816106408834457, "learning_rate": 0.0004917618270799348, "loss": 0.1233, "num_input_tokens_seen": 13013184, "step": 6030 }, { "epoch": 0.9845024469820555, "grad_norm": 0.6472817659378052, "learning_rate": 0.0004921696574225122, "loss": 0.3017, "num_input_tokens_seen": 13024160, "step": 6035 }, { "epoch": 0.9853181076672104, "grad_norm": 0.5460825562477112, "learning_rate": 0.0004925774877650897, "loss": 0.1273, "num_input_tokens_seen": 13035904, "step": 6040 }, { "epoch": 0.9861337683523654, "grad_norm": 0.15521462261676788, "learning_rate": 0.0004929853181076672, "loss": 0.067, "num_input_tokens_seen": 13046656, "step": 6045 }, { "epoch": 0.9869494290375204, "grad_norm": 0.18014229834079742, "learning_rate": 0.0004933931484502447, "loss": 0.1247, "num_input_tokens_seen": 13057216, "step": 6050 }, { "epoch": 0.9877650897226754, "grad_norm": 0.07037936896085739, "learning_rate": 0.0004938009787928223, "loss": 0.0704, "num_input_tokens_seen": 13068192, "step": 6055 }, { "epoch": 0.9885807504078303, "grad_norm": 0.2725774049758911, "learning_rate": 0.0004942088091353996, "loss": 0.1099, "num_input_tokens_seen": 13078784, "step": 6060 }, { "epoch": 0.9893964110929854, "grad_norm": 0.4601075053215027, "learning_rate": 0.0004946166394779772, "loss": 0.2392, "num_input_tokens_seen": 13089984, "step": 6065 }, { "epoch": 0.9902120717781403, "grad_norm": 0.167992502450943, "learning_rate": 0.0004950244698205547, "loss": 0.1449, "num_input_tokens_seen": 13100640, "step": 6070 }, { "epoch": 0.9910277324632952, "grad_norm": 0.3866632878780365, "learning_rate": 0.0004954323001631322, "loss": 0.0834, "num_input_tokens_seen": 13113024, "step": 6075 }, { "epoch": 0.9918433931484503, "grad_norm": 0.05210854858160019, "learning_rate": 0.0004958401305057096, "loss": 0.0372, "num_input_tokens_seen": 13125280, "step": 6080 }, { "epoch": 0.9926590538336052, "grad_norm": 0.06647510826587677, "learning_rate": 0.0004962479608482871, "loss": 0.1048, "num_input_tokens_seen": 13135488, "step": 6085 }, { "epoch": 0.9934747145187602, "grad_norm": 0.13753701746463776, "learning_rate": 0.0004966557911908646, "loss": 0.0978, "num_input_tokens_seen": 13146112, "step": 6090 }, { "epoch": 0.9942903752039152, "grad_norm": 0.6358346939086914, "learning_rate": 0.0004970636215334421, "loss": 0.1751, "num_input_tokens_seen": 13157984, "step": 6095 }, { "epoch": 0.9951060358890701, "grad_norm": 0.2948353886604309, "learning_rate": 0.0004974714518760197, "loss": 0.0898, "num_input_tokens_seen": 13169248, "step": 6100 }, { "epoch": 0.9959216965742251, "grad_norm": 0.0644797533750534, "learning_rate": 0.000497879282218597, "loss": 0.0528, "num_input_tokens_seen": 13179072, "step": 6105 }, { "epoch": 0.9967373572593801, "grad_norm": 0.19211618602275848, "learning_rate": 0.0004982871125611745, "loss": 0.1887, "num_input_tokens_seen": 13189824, "step": 6110 }, { "epoch": 0.9975530179445351, "grad_norm": 0.06717045605182648, "learning_rate": 0.0004986949429037521, "loss": 0.0847, "num_input_tokens_seen": 13199744, "step": 6115 }, { "epoch": 0.99836867862969, "grad_norm": 0.07792261242866516, "learning_rate": 0.0004991027732463296, "loss": 0.1448, "num_input_tokens_seen": 13210080, "step": 6120 }, { "epoch": 0.9991843393148451, "grad_norm": 0.09759276360273361, "learning_rate": 0.000499510603588907, "loss": 0.0392, "num_input_tokens_seen": 13220320, "step": 6125 }, { "epoch": 1.0, "grad_norm": 0.03415796160697937, "learning_rate": 0.0004999184339314845, "loss": 0.2743, "num_input_tokens_seen": 13229504, "step": 6130 }, { "epoch": 1.0, "eval_loss": 0.14463983476161957, "eval_runtime": 104.6056, "eval_samples_per_second": 26.05, "eval_steps_per_second": 6.52, "num_input_tokens_seen": 13229504, "step": 6130 }, { "epoch": 1.000815660685155, "grad_norm": 0.22669054567813873, "learning_rate": 0.000500326264274062, "loss": 0.1043, "num_input_tokens_seen": 13239456, "step": 6135 }, { "epoch": 1.0016313213703099, "grad_norm": 0.1711520105600357, "learning_rate": 0.0005007340946166395, "loss": 0.1121, "num_input_tokens_seen": 13250080, "step": 6140 }, { "epoch": 1.002446982055465, "grad_norm": 0.2307485193014145, "learning_rate": 0.0005011419249592169, "loss": 0.0923, "num_input_tokens_seen": 13261728, "step": 6145 }, { "epoch": 1.00326264274062, "grad_norm": 0.22946693003177643, "learning_rate": 0.0005015497553017944, "loss": 0.1817, "num_input_tokens_seen": 13272096, "step": 6150 }, { "epoch": 1.004078303425775, "grad_norm": 0.06394845247268677, "learning_rate": 0.000501957585644372, "loss": 0.1227, "num_input_tokens_seen": 13283808, "step": 6155 }, { "epoch": 1.0048939641109298, "grad_norm": 0.1974440962076187, "learning_rate": 0.0005023654159869494, "loss": 0.0407, "num_input_tokens_seen": 13294656, "step": 6160 }, { "epoch": 1.0057096247960848, "grad_norm": 0.26497170329093933, "learning_rate": 0.000502773246329527, "loss": 0.0837, "num_input_tokens_seen": 13305408, "step": 6165 }, { "epoch": 1.0065252854812399, "grad_norm": 0.6019001007080078, "learning_rate": 0.0005031810766721044, "loss": 0.2081, "num_input_tokens_seen": 13316128, "step": 6170 }, { "epoch": 1.0073409461663947, "grad_norm": 0.4275592863559723, "learning_rate": 0.0005035889070146818, "loss": 0.1707, "num_input_tokens_seen": 13328160, "step": 6175 }, { "epoch": 1.0081566068515497, "grad_norm": 0.5141977071762085, "learning_rate": 0.0005039967373572594, "loss": 0.2066, "num_input_tokens_seen": 13338464, "step": 6180 }, { "epoch": 1.0089722675367048, "grad_norm": 0.08926641196012497, "learning_rate": 0.0005044045676998369, "loss": 0.0773, "num_input_tokens_seen": 13349088, "step": 6185 }, { "epoch": 1.0097879282218598, "grad_norm": 0.23107115924358368, "learning_rate": 0.0005048123980424144, "loss": 0.1325, "num_input_tokens_seen": 13359904, "step": 6190 }, { "epoch": 1.0106035889070146, "grad_norm": 0.07850232720375061, "learning_rate": 0.0005052202283849918, "loss": 0.0864, "num_input_tokens_seen": 13371904, "step": 6195 }, { "epoch": 1.0114192495921697, "grad_norm": 0.1566961705684662, "learning_rate": 0.0005056280587275693, "loss": 0.0558, "num_input_tokens_seen": 13382528, "step": 6200 }, { "epoch": 1.0122349102773247, "grad_norm": 0.031418174505233765, "learning_rate": 0.0005060358890701469, "loss": 0.0989, "num_input_tokens_seen": 13394304, "step": 6205 }, { "epoch": 1.0130505709624795, "grad_norm": 0.36920976638793945, "learning_rate": 0.0005064437194127242, "loss": 0.1261, "num_input_tokens_seen": 13405824, "step": 6210 }, { "epoch": 1.0138662316476346, "grad_norm": 0.22534415125846863, "learning_rate": 0.0005068515497553018, "loss": 0.1797, "num_input_tokens_seen": 13415776, "step": 6215 }, { "epoch": 1.0146818923327896, "grad_norm": 0.08058171719312668, "learning_rate": 0.0005072593800978793, "loss": 0.2495, "num_input_tokens_seen": 13426432, "step": 6220 }, { "epoch": 1.0154975530179446, "grad_norm": 0.10286976397037506, "learning_rate": 0.0005076672104404568, "loss": 0.0436, "num_input_tokens_seen": 13437088, "step": 6225 }, { "epoch": 1.0163132137030995, "grad_norm": 0.3965021073818207, "learning_rate": 0.0005080750407830343, "loss": 0.1188, "num_input_tokens_seen": 13447936, "step": 6230 }, { "epoch": 1.0171288743882545, "grad_norm": 0.5830568075180054, "learning_rate": 0.0005084828711256117, "loss": 0.11, "num_input_tokens_seen": 13458752, "step": 6235 }, { "epoch": 1.0179445350734095, "grad_norm": 0.19195236265659332, "learning_rate": 0.0005088907014681893, "loss": 0.1437, "num_input_tokens_seen": 13470208, "step": 6240 }, { "epoch": 1.0187601957585644, "grad_norm": 0.24721567332744598, "learning_rate": 0.0005092985318107667, "loss": 0.0806, "num_input_tokens_seen": 13480736, "step": 6245 }, { "epoch": 1.0195758564437194, "grad_norm": 0.3341149389743805, "learning_rate": 0.0005097063621533442, "loss": 0.0496, "num_input_tokens_seen": 13490560, "step": 6250 }, { "epoch": 1.0203915171288744, "grad_norm": 0.6549698710441589, "learning_rate": 0.0005101141924959218, "loss": 0.1717, "num_input_tokens_seen": 13501280, "step": 6255 }, { "epoch": 1.0212071778140293, "grad_norm": 0.12289687991142273, "learning_rate": 0.0005105220228384992, "loss": 0.0765, "num_input_tokens_seen": 13511776, "step": 6260 }, { "epoch": 1.0220228384991843, "grad_norm": 1.0222145318984985, "learning_rate": 0.0005109298531810767, "loss": 0.107, "num_input_tokens_seen": 13523168, "step": 6265 }, { "epoch": 1.0228384991843393, "grad_norm": 0.6536434292793274, "learning_rate": 0.0005113376835236542, "loss": 0.1357, "num_input_tokens_seen": 13534304, "step": 6270 }, { "epoch": 1.0236541598694944, "grad_norm": 0.06413447111845016, "learning_rate": 0.0005117455138662317, "loss": 0.0783, "num_input_tokens_seen": 13544800, "step": 6275 }, { "epoch": 1.0244698205546492, "grad_norm": 0.3327781856060028, "learning_rate": 0.0005121533442088091, "loss": 0.0447, "num_input_tokens_seen": 13555264, "step": 6280 }, { "epoch": 1.0252854812398042, "grad_norm": 0.07131163030862808, "learning_rate": 0.0005125611745513866, "loss": 0.0858, "num_input_tokens_seen": 13564672, "step": 6285 }, { "epoch": 1.0261011419249593, "grad_norm": 0.025200894102454185, "learning_rate": 0.0005129690048939642, "loss": 0.0245, "num_input_tokens_seen": 13575552, "step": 6290 }, { "epoch": 1.026916802610114, "grad_norm": 0.12716683745384216, "learning_rate": 0.0005133768352365417, "loss": 0.1274, "num_input_tokens_seen": 13586784, "step": 6295 }, { "epoch": 1.0277324632952691, "grad_norm": 0.18335126340389252, "learning_rate": 0.000513784665579119, "loss": 0.136, "num_input_tokens_seen": 13597312, "step": 6300 }, { "epoch": 1.0285481239804242, "grad_norm": 0.6576940417289734, "learning_rate": 0.0005141924959216966, "loss": 0.1592, "num_input_tokens_seen": 13607648, "step": 6305 }, { "epoch": 1.0293637846655792, "grad_norm": 0.35685011744499207, "learning_rate": 0.0005146003262642741, "loss": 0.0195, "num_input_tokens_seen": 13618048, "step": 6310 }, { "epoch": 1.030179445350734, "grad_norm": 0.1736626774072647, "learning_rate": 0.0005150081566068515, "loss": 0.2939, "num_input_tokens_seen": 13628576, "step": 6315 }, { "epoch": 1.030995106035889, "grad_norm": 0.03455007076263428, "learning_rate": 0.000515415986949429, "loss": 0.0784, "num_input_tokens_seen": 13640448, "step": 6320 }, { "epoch": 1.031810766721044, "grad_norm": 0.02044091187417507, "learning_rate": 0.0005158238172920065, "loss": 0.1542, "num_input_tokens_seen": 13652096, "step": 6325 }, { "epoch": 1.032626427406199, "grad_norm": 0.0856921374797821, "learning_rate": 0.0005162316476345841, "loss": 0.1368, "num_input_tokens_seen": 13661664, "step": 6330 }, { "epoch": 1.033442088091354, "grad_norm": 0.13601349294185638, "learning_rate": 0.0005166394779771615, "loss": 0.0764, "num_input_tokens_seen": 13673344, "step": 6335 }, { "epoch": 1.034257748776509, "grad_norm": 0.12700077891349792, "learning_rate": 0.000517047308319739, "loss": 0.2048, "num_input_tokens_seen": 13685184, "step": 6340 }, { "epoch": 1.035073409461664, "grad_norm": 0.1718670278787613, "learning_rate": 0.0005174551386623165, "loss": 0.0835, "num_input_tokens_seen": 13696416, "step": 6345 }, { "epoch": 1.0358890701468189, "grad_norm": 0.1252424269914627, "learning_rate": 0.0005178629690048939, "loss": 0.1014, "num_input_tokens_seen": 13706048, "step": 6350 }, { "epoch": 1.036704730831974, "grad_norm": 0.7100284099578857, "learning_rate": 0.0005182707993474715, "loss": 0.2015, "num_input_tokens_seen": 13716992, "step": 6355 }, { "epoch": 1.037520391517129, "grad_norm": 0.30915582180023193, "learning_rate": 0.000518678629690049, "loss": 0.0396, "num_input_tokens_seen": 13725600, "step": 6360 }, { "epoch": 1.0383360522022838, "grad_norm": 0.09551280736923218, "learning_rate": 0.0005190864600326263, "loss": 0.1022, "num_input_tokens_seen": 13736288, "step": 6365 }, { "epoch": 1.0391517128874388, "grad_norm": 0.1935325711965561, "learning_rate": 0.0005194942903752039, "loss": 0.1329, "num_input_tokens_seen": 13747936, "step": 6370 }, { "epoch": 1.0399673735725938, "grad_norm": 0.2071111798286438, "learning_rate": 0.0005199021207177814, "loss": 0.0932, "num_input_tokens_seen": 13758304, "step": 6375 }, { "epoch": 1.0407830342577489, "grad_norm": 0.038355812430381775, "learning_rate": 0.000520309951060359, "loss": 0.0644, "num_input_tokens_seen": 13769952, "step": 6380 }, { "epoch": 1.0415986949429037, "grad_norm": 0.21142078936100006, "learning_rate": 0.0005207177814029364, "loss": 0.2768, "num_input_tokens_seen": 13780320, "step": 6385 }, { "epoch": 1.0424143556280587, "grad_norm": 0.05320408195257187, "learning_rate": 0.0005211256117455138, "loss": 0.0379, "num_input_tokens_seen": 13790912, "step": 6390 }, { "epoch": 1.0432300163132138, "grad_norm": 0.9048792719841003, "learning_rate": 0.0005215334420880914, "loss": 0.2107, "num_input_tokens_seen": 13801248, "step": 6395 }, { "epoch": 1.0440456769983686, "grad_norm": 0.6645633578300476, "learning_rate": 0.0005219412724306688, "loss": 0.3812, "num_input_tokens_seen": 13812672, "step": 6400 }, { "epoch": 1.0448613376835236, "grad_norm": 0.04563363641500473, "learning_rate": 0.0005223491027732464, "loss": 0.1391, "num_input_tokens_seen": 13824064, "step": 6405 }, { "epoch": 1.0456769983686787, "grad_norm": 0.31707751750946045, "learning_rate": 0.0005227569331158238, "loss": 0.0982, "num_input_tokens_seen": 13834720, "step": 6410 }, { "epoch": 1.0464926590538337, "grad_norm": 0.5346210598945618, "learning_rate": 0.0005231647634584013, "loss": 0.1642, "num_input_tokens_seen": 13845504, "step": 6415 }, { "epoch": 1.0473083197389885, "grad_norm": 0.13633306324481964, "learning_rate": 0.0005235725938009788, "loss": 0.1766, "num_input_tokens_seen": 13856032, "step": 6420 }, { "epoch": 1.0481239804241436, "grad_norm": 0.1982078105211258, "learning_rate": 0.0005239804241435563, "loss": 0.0703, "num_input_tokens_seen": 13868672, "step": 6425 }, { "epoch": 1.0489396411092986, "grad_norm": 0.46533918380737305, "learning_rate": 0.0005243882544861339, "loss": 0.1527, "num_input_tokens_seen": 13879488, "step": 6430 }, { "epoch": 1.0497553017944534, "grad_norm": 0.4247722625732422, "learning_rate": 0.0005247960848287112, "loss": 0.1553, "num_input_tokens_seen": 13889888, "step": 6435 }, { "epoch": 1.0505709624796085, "grad_norm": 0.15412858128547668, "learning_rate": 0.0005252039151712887, "loss": 0.0301, "num_input_tokens_seen": 13901216, "step": 6440 }, { "epoch": 1.0513866231647635, "grad_norm": 0.24920067191123962, "learning_rate": 0.0005256117455138663, "loss": 0.0651, "num_input_tokens_seen": 13913952, "step": 6445 }, { "epoch": 1.0522022838499185, "grad_norm": 0.6052981019020081, "learning_rate": 0.0005260195758564438, "loss": 0.12, "num_input_tokens_seen": 13924320, "step": 6450 }, { "epoch": 1.0530179445350734, "grad_norm": 0.8095594644546509, "learning_rate": 0.0005264274061990211, "loss": 0.2165, "num_input_tokens_seen": 13935264, "step": 6455 }, { "epoch": 1.0538336052202284, "grad_norm": 0.4418445825576782, "learning_rate": 0.0005268352365415987, "loss": 0.0902, "num_input_tokens_seen": 13946080, "step": 6460 }, { "epoch": 1.0546492659053834, "grad_norm": 0.659645676612854, "learning_rate": 0.0005272430668841762, "loss": 0.2164, "num_input_tokens_seen": 13956768, "step": 6465 }, { "epoch": 1.0554649265905383, "grad_norm": 0.7405608296394348, "learning_rate": 0.0005276508972267537, "loss": 0.3611, "num_input_tokens_seen": 13967360, "step": 6470 }, { "epoch": 1.0562805872756933, "grad_norm": 0.040661633014678955, "learning_rate": 0.0005280587275693311, "loss": 0.0952, "num_input_tokens_seen": 13978368, "step": 6475 }, { "epoch": 1.0570962479608483, "grad_norm": 0.0482853427529335, "learning_rate": 0.0005284665579119086, "loss": 0.2657, "num_input_tokens_seen": 13989248, "step": 6480 }, { "epoch": 1.0579119086460032, "grad_norm": 0.11091592162847519, "learning_rate": 0.0005288743882544862, "loss": 0.0876, "num_input_tokens_seen": 13998880, "step": 6485 }, { "epoch": 1.0587275693311582, "grad_norm": 0.18542087078094482, "learning_rate": 0.0005292822185970636, "loss": 0.1027, "num_input_tokens_seen": 14009504, "step": 6490 }, { "epoch": 1.0595432300163132, "grad_norm": 0.4077613353729248, "learning_rate": 0.0005296900489396412, "loss": 0.1686, "num_input_tokens_seen": 14020704, "step": 6495 }, { "epoch": 1.0603588907014683, "grad_norm": 0.8985447883605957, "learning_rate": 0.0005300978792822186, "loss": 0.3167, "num_input_tokens_seen": 14031168, "step": 6500 }, { "epoch": 1.061174551386623, "grad_norm": 0.36803948879241943, "learning_rate": 0.000530505709624796, "loss": 0.1024, "num_input_tokens_seen": 14041984, "step": 6505 }, { "epoch": 1.0619902120717781, "grad_norm": 0.44054582715034485, "learning_rate": 0.0005309135399673736, "loss": 0.2073, "num_input_tokens_seen": 14052032, "step": 6510 }, { "epoch": 1.0628058727569332, "grad_norm": 0.16428962349891663, "learning_rate": 0.0005313213703099511, "loss": 0.0868, "num_input_tokens_seen": 14064384, "step": 6515 }, { "epoch": 1.0636215334420882, "grad_norm": 0.0772414281964302, "learning_rate": 0.0005317292006525287, "loss": 0.1381, "num_input_tokens_seen": 14075936, "step": 6520 }, { "epoch": 1.064437194127243, "grad_norm": 0.1781790852546692, "learning_rate": 0.000532137030995106, "loss": 0.239, "num_input_tokens_seen": 14086368, "step": 6525 }, { "epoch": 1.065252854812398, "grad_norm": 0.46900323033332825, "learning_rate": 0.0005325448613376835, "loss": 0.1426, "num_input_tokens_seen": 14098016, "step": 6530 }, { "epoch": 1.066068515497553, "grad_norm": 0.18812048435211182, "learning_rate": 0.0005329526916802611, "loss": 0.1078, "num_input_tokens_seen": 14109024, "step": 6535 }, { "epoch": 1.066884176182708, "grad_norm": 0.18054918944835663, "learning_rate": 0.0005333605220228385, "loss": 0.0835, "num_input_tokens_seen": 14119712, "step": 6540 }, { "epoch": 1.067699836867863, "grad_norm": 0.18441586196422577, "learning_rate": 0.000533768352365416, "loss": 0.1403, "num_input_tokens_seen": 14130688, "step": 6545 }, { "epoch": 1.068515497553018, "grad_norm": 0.40736836194992065, "learning_rate": 0.0005341761827079935, "loss": 0.1994, "num_input_tokens_seen": 14140224, "step": 6550 }, { "epoch": 1.0693311582381728, "grad_norm": 0.09616490453481674, "learning_rate": 0.000534584013050571, "loss": 0.2173, "num_input_tokens_seen": 14151744, "step": 6555 }, { "epoch": 1.0701468189233279, "grad_norm": 1.0035185813903809, "learning_rate": 0.0005349918433931485, "loss": 0.1948, "num_input_tokens_seen": 14162656, "step": 6560 }, { "epoch": 1.070962479608483, "grad_norm": 0.05002962797880173, "learning_rate": 0.0005353996737357259, "loss": 0.0564, "num_input_tokens_seen": 14173888, "step": 6565 }, { "epoch": 1.071778140293638, "grad_norm": 0.15767160058021545, "learning_rate": 0.0005358075040783035, "loss": 0.0606, "num_input_tokens_seen": 14184224, "step": 6570 }, { "epoch": 1.0725938009787928, "grad_norm": 0.17425937950611115, "learning_rate": 0.0005362153344208809, "loss": 0.1063, "num_input_tokens_seen": 14194112, "step": 6575 }, { "epoch": 1.0734094616639478, "grad_norm": 0.5236384272575378, "learning_rate": 0.0005366231647634584, "loss": 0.1767, "num_input_tokens_seen": 14205440, "step": 6580 }, { "epoch": 1.0742251223491028, "grad_norm": 0.16773928701877594, "learning_rate": 0.000537030995106036, "loss": 0.0889, "num_input_tokens_seen": 14216512, "step": 6585 }, { "epoch": 1.0750407830342577, "grad_norm": 0.47968098521232605, "learning_rate": 0.0005374388254486133, "loss": 0.1446, "num_input_tokens_seen": 14227424, "step": 6590 }, { "epoch": 1.0758564437194127, "grad_norm": 0.7088977098464966, "learning_rate": 0.0005378466557911908, "loss": 0.1473, "num_input_tokens_seen": 14239008, "step": 6595 }, { "epoch": 1.0766721044045677, "grad_norm": 1.0960109233856201, "learning_rate": 0.0005382544861337684, "loss": 0.2205, "num_input_tokens_seen": 14249888, "step": 6600 }, { "epoch": 1.0774877650897228, "grad_norm": 0.25585508346557617, "learning_rate": 0.0005386623164763459, "loss": 0.1402, "num_input_tokens_seen": 14260736, "step": 6605 }, { "epoch": 1.0783034257748776, "grad_norm": 0.5164852738380432, "learning_rate": 0.0005390701468189233, "loss": 0.1318, "num_input_tokens_seen": 14271552, "step": 6610 }, { "epoch": 1.0791190864600326, "grad_norm": 0.5099876523017883, "learning_rate": 0.0005394779771615008, "loss": 0.1169, "num_input_tokens_seen": 14282272, "step": 6615 }, { "epoch": 1.0799347471451877, "grad_norm": 0.24204134941101074, "learning_rate": 0.0005398858075040783, "loss": 0.1316, "num_input_tokens_seen": 14293728, "step": 6620 }, { "epoch": 1.0807504078303425, "grad_norm": 0.11602164804935455, "learning_rate": 0.0005402936378466558, "loss": 0.1218, "num_input_tokens_seen": 14305024, "step": 6625 }, { "epoch": 1.0815660685154975, "grad_norm": 0.2398003488779068, "learning_rate": 0.0005407014681892332, "loss": 0.1352, "num_input_tokens_seen": 14315712, "step": 6630 }, { "epoch": 1.0823817292006526, "grad_norm": 0.2515527009963989, "learning_rate": 0.0005411092985318108, "loss": 0.1144, "num_input_tokens_seen": 14325856, "step": 6635 }, { "epoch": 1.0831973898858076, "grad_norm": 0.3223993480205536, "learning_rate": 0.0005415171288743883, "loss": 0.0923, "num_input_tokens_seen": 14336960, "step": 6640 }, { "epoch": 1.0840130505709624, "grad_norm": 0.06253989785909653, "learning_rate": 0.0005419249592169657, "loss": 0.097, "num_input_tokens_seen": 14347680, "step": 6645 }, { "epoch": 1.0848287112561175, "grad_norm": 0.20289258658885956, "learning_rate": 0.0005423327895595433, "loss": 0.0629, "num_input_tokens_seen": 14358496, "step": 6650 }, { "epoch": 1.0856443719412725, "grad_norm": 0.7146836519241333, "learning_rate": 0.0005427406199021207, "loss": 0.1659, "num_input_tokens_seen": 14369056, "step": 6655 }, { "epoch": 1.0864600326264273, "grad_norm": 0.09261279553174973, "learning_rate": 0.0005431484502446982, "loss": 0.1233, "num_input_tokens_seen": 14379904, "step": 6660 }, { "epoch": 1.0872756933115824, "grad_norm": 0.15627171099185944, "learning_rate": 0.0005435562805872757, "loss": 0.3493, "num_input_tokens_seen": 14391296, "step": 6665 }, { "epoch": 1.0880913539967374, "grad_norm": 0.5361870527267456, "learning_rate": 0.0005439641109298532, "loss": 0.1007, "num_input_tokens_seen": 14403104, "step": 6670 }, { "epoch": 1.0889070146818924, "grad_norm": 0.16372109949588776, "learning_rate": 0.0005443719412724307, "loss": 0.0647, "num_input_tokens_seen": 14414272, "step": 6675 }, { "epoch": 1.0897226753670473, "grad_norm": 1.0343587398529053, "learning_rate": 0.0005447797716150081, "loss": 0.1939, "num_input_tokens_seen": 14424864, "step": 6680 }, { "epoch": 1.0905383360522023, "grad_norm": 0.19086553156375885, "learning_rate": 0.0005451876019575857, "loss": 0.235, "num_input_tokens_seen": 14434784, "step": 6685 }, { "epoch": 1.0913539967373573, "grad_norm": 0.05324350297451019, "learning_rate": 0.0005455954323001632, "loss": 0.0772, "num_input_tokens_seen": 14445056, "step": 6690 }, { "epoch": 1.0921696574225122, "grad_norm": 0.0572771355509758, "learning_rate": 0.0005460032626427405, "loss": 0.1575, "num_input_tokens_seen": 14454752, "step": 6695 }, { "epoch": 1.0929853181076672, "grad_norm": 0.03882576897740364, "learning_rate": 0.0005464110929853181, "loss": 0.0506, "num_input_tokens_seen": 14465344, "step": 6700 }, { "epoch": 1.0938009787928222, "grad_norm": 0.06413883715867996, "learning_rate": 0.0005468189233278956, "loss": 0.1586, "num_input_tokens_seen": 14474912, "step": 6705 }, { "epoch": 1.094616639477977, "grad_norm": 0.21612389385700226, "learning_rate": 0.0005472267536704732, "loss": 0.1344, "num_input_tokens_seen": 14485344, "step": 6710 }, { "epoch": 1.095432300163132, "grad_norm": 0.1150800958275795, "learning_rate": 0.0005476345840130506, "loss": 0.298, "num_input_tokens_seen": 14496576, "step": 6715 }, { "epoch": 1.0962479608482871, "grad_norm": 0.03761327639222145, "learning_rate": 0.000548042414355628, "loss": 0.131, "num_input_tokens_seen": 14507488, "step": 6720 }, { "epoch": 1.0970636215334422, "grad_norm": 0.03287499397993088, "learning_rate": 0.0005484502446982056, "loss": 0.0815, "num_input_tokens_seen": 14518080, "step": 6725 }, { "epoch": 1.097879282218597, "grad_norm": 0.3721981942653656, "learning_rate": 0.000548858075040783, "loss": 0.1544, "num_input_tokens_seen": 14527872, "step": 6730 }, { "epoch": 1.098694942903752, "grad_norm": 0.2912892997264862, "learning_rate": 0.0005492659053833605, "loss": 0.131, "num_input_tokens_seen": 14539296, "step": 6735 }, { "epoch": 1.099510603588907, "grad_norm": 0.07880754768848419, "learning_rate": 0.000549673735725938, "loss": 0.0845, "num_input_tokens_seen": 14549824, "step": 6740 }, { "epoch": 1.100326264274062, "grad_norm": 0.09879589825868607, "learning_rate": 0.0005500815660685155, "loss": 0.0719, "num_input_tokens_seen": 14559488, "step": 6745 }, { "epoch": 1.101141924959217, "grad_norm": 0.2666039764881134, "learning_rate": 0.000550489396411093, "loss": 0.0994, "num_input_tokens_seen": 14570272, "step": 6750 }, { "epoch": 1.101957585644372, "grad_norm": 0.2310534417629242, "learning_rate": 0.0005508972267536705, "loss": 0.1306, "num_input_tokens_seen": 14580512, "step": 6755 }, { "epoch": 1.102773246329527, "grad_norm": 0.039583489298820496, "learning_rate": 0.000551305057096248, "loss": 0.0409, "num_input_tokens_seen": 14591648, "step": 6760 }, { "epoch": 1.1035889070146818, "grad_norm": 0.07190904021263123, "learning_rate": 0.0005517128874388254, "loss": 0.0439, "num_input_tokens_seen": 14602016, "step": 6765 }, { "epoch": 1.1044045676998369, "grad_norm": 0.05018070712685585, "learning_rate": 0.0005521207177814029, "loss": 0.0843, "num_input_tokens_seen": 14613664, "step": 6770 }, { "epoch": 1.105220228384992, "grad_norm": 0.2662052810192108, "learning_rate": 0.0005525285481239805, "loss": 0.2355, "num_input_tokens_seen": 14624288, "step": 6775 }, { "epoch": 1.1060358890701467, "grad_norm": 0.1371532529592514, "learning_rate": 0.000552936378466558, "loss": 0.0592, "num_input_tokens_seen": 14635200, "step": 6780 }, { "epoch": 1.1068515497553018, "grad_norm": 0.49722447991371155, "learning_rate": 0.0005533442088091353, "loss": 0.1246, "num_input_tokens_seen": 14645472, "step": 6785 }, { "epoch": 1.1076672104404568, "grad_norm": 0.05029119923710823, "learning_rate": 0.0005537520391517129, "loss": 0.0512, "num_input_tokens_seen": 14656384, "step": 6790 }, { "epoch": 1.1084828711256118, "grad_norm": 0.8334574103355408, "learning_rate": 0.0005541598694942904, "loss": 0.2471, "num_input_tokens_seen": 14666496, "step": 6795 }, { "epoch": 1.1092985318107667, "grad_norm": 0.2712143361568451, "learning_rate": 0.0005545676998368679, "loss": 0.166, "num_input_tokens_seen": 14677760, "step": 6800 }, { "epoch": 1.1101141924959217, "grad_norm": 0.049193721264600754, "learning_rate": 0.0005549755301794454, "loss": 0.0884, "num_input_tokens_seen": 14688512, "step": 6805 }, { "epoch": 1.1109298531810767, "grad_norm": 0.1833087056875229, "learning_rate": 0.0005553833605220228, "loss": 0.142, "num_input_tokens_seen": 14699392, "step": 6810 }, { "epoch": 1.1117455138662315, "grad_norm": 0.3210817277431488, "learning_rate": 0.0005557911908646003, "loss": 0.2046, "num_input_tokens_seen": 14711072, "step": 6815 }, { "epoch": 1.1125611745513866, "grad_norm": 0.04551200196146965, "learning_rate": 0.0005561990212071778, "loss": 0.0536, "num_input_tokens_seen": 14721856, "step": 6820 }, { "epoch": 1.1133768352365416, "grad_norm": 0.06565210968255997, "learning_rate": 0.0005566068515497554, "loss": 0.1706, "num_input_tokens_seen": 14732704, "step": 6825 }, { "epoch": 1.1141924959216967, "grad_norm": 0.18691854178905487, "learning_rate": 0.0005570146818923328, "loss": 0.1857, "num_input_tokens_seen": 14743488, "step": 6830 }, { "epoch": 1.1150081566068515, "grad_norm": 0.2810910940170288, "learning_rate": 0.0005574225122349102, "loss": 0.0497, "num_input_tokens_seen": 14754944, "step": 6835 }, { "epoch": 1.1158238172920065, "grad_norm": 0.31971752643585205, "learning_rate": 0.0005578303425774878, "loss": 0.201, "num_input_tokens_seen": 14766400, "step": 6840 }, { "epoch": 1.1166394779771616, "grad_norm": 0.2691393494606018, "learning_rate": 0.0005582381729200653, "loss": 0.1623, "num_input_tokens_seen": 14776192, "step": 6845 }, { "epoch": 1.1174551386623164, "grad_norm": 0.14212316274642944, "learning_rate": 0.0005586460032626428, "loss": 0.1038, "num_input_tokens_seen": 14787328, "step": 6850 }, { "epoch": 1.1182707993474714, "grad_norm": 0.7805279493331909, "learning_rate": 0.0005590538336052202, "loss": 0.118, "num_input_tokens_seen": 14798976, "step": 6855 }, { "epoch": 1.1190864600326265, "grad_norm": 0.0862787738442421, "learning_rate": 0.0005594616639477977, "loss": 0.1175, "num_input_tokens_seen": 14810304, "step": 6860 }, { "epoch": 1.1199021207177815, "grad_norm": 0.13895173370838165, "learning_rate": 0.0005598694942903753, "loss": 0.216, "num_input_tokens_seen": 14822144, "step": 6865 }, { "epoch": 1.1207177814029363, "grad_norm": 0.1773628294467926, "learning_rate": 0.0005602773246329527, "loss": 0.0587, "num_input_tokens_seen": 14831776, "step": 6870 }, { "epoch": 1.1215334420880914, "grad_norm": 0.12145256251096725, "learning_rate": 0.0005606851549755301, "loss": 0.1121, "num_input_tokens_seen": 14842976, "step": 6875 }, { "epoch": 1.1223491027732464, "grad_norm": 0.06835485249757767, "learning_rate": 0.0005610929853181077, "loss": 0.031, "num_input_tokens_seen": 14855648, "step": 6880 }, { "epoch": 1.1231647634584012, "grad_norm": 0.1296158879995346, "learning_rate": 0.0005615008156606851, "loss": 0.39, "num_input_tokens_seen": 14866656, "step": 6885 }, { "epoch": 1.1239804241435563, "grad_norm": 0.6122809052467346, "learning_rate": 0.0005619086460032627, "loss": 0.2375, "num_input_tokens_seen": 14877120, "step": 6890 }, { "epoch": 1.1247960848287113, "grad_norm": 0.4462684988975525, "learning_rate": 0.0005623164763458401, "loss": 0.2148, "num_input_tokens_seen": 14887488, "step": 6895 }, { "epoch": 1.1256117455138663, "grad_norm": 0.2977810502052307, "learning_rate": 0.0005627243066884176, "loss": 0.1558, "num_input_tokens_seen": 14898400, "step": 6900 }, { "epoch": 1.1264274061990212, "grad_norm": 0.3989168405532837, "learning_rate": 0.0005631321370309951, "loss": 0.1493, "num_input_tokens_seen": 14910048, "step": 6905 }, { "epoch": 1.1272430668841762, "grad_norm": 0.15037447214126587, "learning_rate": 0.0005635399673735726, "loss": 0.2427, "num_input_tokens_seen": 14920640, "step": 6910 }, { "epoch": 1.1280587275693312, "grad_norm": 0.3434789180755615, "learning_rate": 0.0005639477977161502, "loss": 0.1769, "num_input_tokens_seen": 14931936, "step": 6915 }, { "epoch": 1.128874388254486, "grad_norm": 0.226259246468544, "learning_rate": 0.0005643556280587275, "loss": 0.0927, "num_input_tokens_seen": 14942432, "step": 6920 }, { "epoch": 1.129690048939641, "grad_norm": 0.35819897055625916, "learning_rate": 0.000564763458401305, "loss": 0.2403, "num_input_tokens_seen": 14953152, "step": 6925 }, { "epoch": 1.1305057096247961, "grad_norm": 0.40049198269844055, "learning_rate": 0.0005651712887438826, "loss": 0.2433, "num_input_tokens_seen": 14963072, "step": 6930 }, { "epoch": 1.131321370309951, "grad_norm": 0.11748334020376205, "learning_rate": 0.0005655791190864601, "loss": 0.1484, "num_input_tokens_seen": 14974176, "step": 6935 }, { "epoch": 1.132137030995106, "grad_norm": 0.20054025948047638, "learning_rate": 0.0005659869494290375, "loss": 0.1368, "num_input_tokens_seen": 14984096, "step": 6940 }, { "epoch": 1.132952691680261, "grad_norm": 0.3618835508823395, "learning_rate": 0.000566394779771615, "loss": 0.1122, "num_input_tokens_seen": 14994752, "step": 6945 }, { "epoch": 1.133768352365416, "grad_norm": 0.2190181463956833, "learning_rate": 0.0005668026101141925, "loss": 0.1526, "num_input_tokens_seen": 15004224, "step": 6950 }, { "epoch": 1.1345840130505709, "grad_norm": 0.06052050739526749, "learning_rate": 0.00056721044045677, "loss": 0.1837, "num_input_tokens_seen": 15014848, "step": 6955 }, { "epoch": 1.135399673735726, "grad_norm": 0.2666686773300171, "learning_rate": 0.0005676182707993474, "loss": 0.1458, "num_input_tokens_seen": 15024768, "step": 6960 }, { "epoch": 1.136215334420881, "grad_norm": 0.30106160044670105, "learning_rate": 0.000568026101141925, "loss": 0.0586, "num_input_tokens_seen": 15036160, "step": 6965 }, { "epoch": 1.137030995106036, "grad_norm": 0.016872631385922432, "learning_rate": 0.0005684339314845025, "loss": 0.1999, "num_input_tokens_seen": 15048064, "step": 6970 }, { "epoch": 1.1378466557911908, "grad_norm": 0.02604484371840954, "learning_rate": 0.0005688417618270799, "loss": 0.0886, "num_input_tokens_seen": 15059328, "step": 6975 }, { "epoch": 1.1386623164763459, "grad_norm": 0.03142395615577698, "learning_rate": 0.0005692495921696575, "loss": 0.0586, "num_input_tokens_seen": 15070944, "step": 6980 }, { "epoch": 1.139477977161501, "grad_norm": 0.06263827532529831, "learning_rate": 0.0005696574225122349, "loss": 0.1178, "num_input_tokens_seen": 15082464, "step": 6985 }, { "epoch": 1.1402936378466557, "grad_norm": 0.021224992349743843, "learning_rate": 0.0005700652528548124, "loss": 0.0818, "num_input_tokens_seen": 15092256, "step": 6990 }, { "epoch": 1.1411092985318108, "grad_norm": 0.36169159412384033, "learning_rate": 0.0005704730831973899, "loss": 0.1942, "num_input_tokens_seen": 15102752, "step": 6995 }, { "epoch": 1.1419249592169658, "grad_norm": 0.2751993238925934, "learning_rate": 0.0005708809135399674, "loss": 0.126, "num_input_tokens_seen": 15113536, "step": 7000 }, { "epoch": 1.1427406199021206, "grad_norm": 0.022923478856682777, "learning_rate": 0.000571288743882545, "loss": 0.1238, "num_input_tokens_seen": 15123584, "step": 7005 }, { "epoch": 1.1435562805872757, "grad_norm": 0.045900750905275345, "learning_rate": 0.0005716965742251223, "loss": 0.0622, "num_input_tokens_seen": 15134304, "step": 7010 }, { "epoch": 1.1443719412724307, "grad_norm": 0.8374168872833252, "learning_rate": 0.0005721044045676999, "loss": 0.1505, "num_input_tokens_seen": 15145376, "step": 7015 }, { "epoch": 1.1451876019575857, "grad_norm": 0.112776979804039, "learning_rate": 0.0005725122349102774, "loss": 0.1575, "num_input_tokens_seen": 15157504, "step": 7020 }, { "epoch": 1.1460032626427405, "grad_norm": 0.0169258750975132, "learning_rate": 0.0005729200652528548, "loss": 0.0759, "num_input_tokens_seen": 15166912, "step": 7025 }, { "epoch": 1.1468189233278956, "grad_norm": 0.3099250793457031, "learning_rate": 0.0005733278955954323, "loss": 0.0559, "num_input_tokens_seen": 15177856, "step": 7030 }, { "epoch": 1.1476345840130506, "grad_norm": 0.1315883845090866, "learning_rate": 0.0005737357259380098, "loss": 0.1174, "num_input_tokens_seen": 15188160, "step": 7035 }, { "epoch": 1.1484502446982057, "grad_norm": 0.10545194149017334, "learning_rate": 0.0005741435562805873, "loss": 0.1168, "num_input_tokens_seen": 15198752, "step": 7040 }, { "epoch": 1.1492659053833605, "grad_norm": 0.2460419088602066, "learning_rate": 0.0005745513866231648, "loss": 0.1042, "num_input_tokens_seen": 15208896, "step": 7045 }, { "epoch": 1.1500815660685155, "grad_norm": 0.05671518296003342, "learning_rate": 0.0005749592169657422, "loss": 0.1513, "num_input_tokens_seen": 15220160, "step": 7050 }, { "epoch": 1.1508972267536706, "grad_norm": 0.2793235778808594, "learning_rate": 0.0005753670473083198, "loss": 0.0446, "num_input_tokens_seen": 15230048, "step": 7055 }, { "epoch": 1.1517128874388254, "grad_norm": 0.6316943168640137, "learning_rate": 0.0005757748776508972, "loss": 0.1533, "num_input_tokens_seen": 15240928, "step": 7060 }, { "epoch": 1.1525285481239804, "grad_norm": 0.03701157122850418, "learning_rate": 0.0005761827079934747, "loss": 0.0972, "num_input_tokens_seen": 15251808, "step": 7065 }, { "epoch": 1.1533442088091355, "grad_norm": 1.0601210594177246, "learning_rate": 0.0005765905383360523, "loss": 0.0891, "num_input_tokens_seen": 15263008, "step": 7070 }, { "epoch": 1.1541598694942903, "grad_norm": 0.44687551259994507, "learning_rate": 0.0005769983686786296, "loss": 0.0852, "num_input_tokens_seen": 15273504, "step": 7075 }, { "epoch": 1.1549755301794453, "grad_norm": 0.07929537445306778, "learning_rate": 0.0005774061990212072, "loss": 0.3562, "num_input_tokens_seen": 15283808, "step": 7080 }, { "epoch": 1.1557911908646004, "grad_norm": 0.08705031871795654, "learning_rate": 0.0005778140293637847, "loss": 0.2972, "num_input_tokens_seen": 15293888, "step": 7085 }, { "epoch": 1.1566068515497552, "grad_norm": 0.3091745674610138, "learning_rate": 0.0005782218597063622, "loss": 0.1283, "num_input_tokens_seen": 15305152, "step": 7090 }, { "epoch": 1.1574225122349102, "grad_norm": 0.3442016839981079, "learning_rate": 0.0005786296900489396, "loss": 0.1219, "num_input_tokens_seen": 15314560, "step": 7095 }, { "epoch": 1.1582381729200653, "grad_norm": 0.08858685940504074, "learning_rate": 0.0005790375203915171, "loss": 0.1612, "num_input_tokens_seen": 15324736, "step": 7100 }, { "epoch": 1.1590538336052203, "grad_norm": 0.13399852812290192, "learning_rate": 0.0005794453507340947, "loss": 0.044, "num_input_tokens_seen": 15335872, "step": 7105 }, { "epoch": 1.1598694942903751, "grad_norm": 0.13570302724838257, "learning_rate": 0.0005798531810766721, "loss": 0.251, "num_input_tokens_seen": 15345824, "step": 7110 }, { "epoch": 1.1606851549755302, "grad_norm": 0.12019189447164536, "learning_rate": 0.0005802610114192495, "loss": 0.0604, "num_input_tokens_seen": 15358112, "step": 7115 }, { "epoch": 1.1615008156606852, "grad_norm": 0.10372965037822723, "learning_rate": 0.0005806688417618271, "loss": 0.1339, "num_input_tokens_seen": 15368256, "step": 7120 }, { "epoch": 1.1623164763458402, "grad_norm": 0.04068510979413986, "learning_rate": 0.0005810766721044046, "loss": 0.0593, "num_input_tokens_seen": 15378912, "step": 7125 }, { "epoch": 1.163132137030995, "grad_norm": 0.5916391015052795, "learning_rate": 0.0005814845024469821, "loss": 0.1282, "num_input_tokens_seen": 15390720, "step": 7130 }, { "epoch": 1.16394779771615, "grad_norm": 0.11436335742473602, "learning_rate": 0.0005818923327895596, "loss": 0.0587, "num_input_tokens_seen": 15402208, "step": 7135 }, { "epoch": 1.1647634584013051, "grad_norm": 0.190968856215477, "learning_rate": 0.000582300163132137, "loss": 0.0673, "num_input_tokens_seen": 15411936, "step": 7140 }, { "epoch": 1.16557911908646, "grad_norm": 0.23346631228923798, "learning_rate": 0.0005827079934747145, "loss": 0.1022, "num_input_tokens_seen": 15421952, "step": 7145 }, { "epoch": 1.166394779771615, "grad_norm": 1.4498358964920044, "learning_rate": 0.000583115823817292, "loss": 0.2133, "num_input_tokens_seen": 15432576, "step": 7150 }, { "epoch": 1.16721044045677, "grad_norm": 0.028827494010329247, "learning_rate": 0.0005835236541598696, "loss": 0.0297, "num_input_tokens_seen": 15444096, "step": 7155 }, { "epoch": 1.1680261011419248, "grad_norm": 0.4379870295524597, "learning_rate": 0.000583931484502447, "loss": 0.1067, "num_input_tokens_seen": 15454656, "step": 7160 }, { "epoch": 1.1688417618270799, "grad_norm": 0.08589452505111694, "learning_rate": 0.0005843393148450244, "loss": 0.1385, "num_input_tokens_seen": 15465696, "step": 7165 }, { "epoch": 1.169657422512235, "grad_norm": 0.15669237077236176, "learning_rate": 0.000584747145187602, "loss": 0.081, "num_input_tokens_seen": 15476352, "step": 7170 }, { "epoch": 1.17047308319739, "grad_norm": 0.35354354977607727, "learning_rate": 0.0005851549755301795, "loss": 0.0732, "num_input_tokens_seen": 15487488, "step": 7175 }, { "epoch": 1.1712887438825448, "grad_norm": 0.06681516766548157, "learning_rate": 0.0005855628058727568, "loss": 0.185, "num_input_tokens_seen": 15495840, "step": 7180 }, { "epoch": 1.1721044045676998, "grad_norm": 0.40308696031570435, "learning_rate": 0.0005859706362153344, "loss": 0.1749, "num_input_tokens_seen": 15505120, "step": 7185 }, { "epoch": 1.1729200652528549, "grad_norm": 0.0307419765740633, "learning_rate": 0.0005863784665579119, "loss": 0.0946, "num_input_tokens_seen": 15515520, "step": 7190 }, { "epoch": 1.17373572593801, "grad_norm": 0.3210947811603546, "learning_rate": 0.0005867862969004895, "loss": 0.0689, "num_input_tokens_seen": 15525888, "step": 7195 }, { "epoch": 1.1745513866231647, "grad_norm": 0.1669154316186905, "learning_rate": 0.0005871941272430669, "loss": 0.1308, "num_input_tokens_seen": 15536064, "step": 7200 }, { "epoch": 1.1753670473083198, "grad_norm": 0.01549902930855751, "learning_rate": 0.0005876019575856443, "loss": 0.0212, "num_input_tokens_seen": 15547776, "step": 7205 }, { "epoch": 1.1761827079934748, "grad_norm": 0.6862303614616394, "learning_rate": 0.0005880097879282219, "loss": 0.2062, "num_input_tokens_seen": 15558208, "step": 7210 }, { "epoch": 1.1769983686786296, "grad_norm": 0.16380605101585388, "learning_rate": 0.0005884176182707993, "loss": 0.0502, "num_input_tokens_seen": 15568288, "step": 7215 }, { "epoch": 1.1778140293637847, "grad_norm": 0.3492911159992218, "learning_rate": 0.0005888254486133769, "loss": 0.1071, "num_input_tokens_seen": 15579232, "step": 7220 }, { "epoch": 1.1786296900489397, "grad_norm": 0.19409194588661194, "learning_rate": 0.0005892332789559544, "loss": 0.1736, "num_input_tokens_seen": 15590272, "step": 7225 }, { "epoch": 1.1794453507340945, "grad_norm": 0.20215953886508942, "learning_rate": 0.0005896411092985318, "loss": 0.057, "num_input_tokens_seen": 15601248, "step": 7230 }, { "epoch": 1.1802610114192496, "grad_norm": 0.0567198321223259, "learning_rate": 0.0005900489396411093, "loss": 0.0653, "num_input_tokens_seen": 15611072, "step": 7235 }, { "epoch": 1.1810766721044046, "grad_norm": 0.20337219536304474, "learning_rate": 0.0005904567699836868, "loss": 0.061, "num_input_tokens_seen": 15621920, "step": 7240 }, { "epoch": 1.1818923327895596, "grad_norm": 0.6545301675796509, "learning_rate": 0.0005908646003262644, "loss": 0.1216, "num_input_tokens_seen": 15633664, "step": 7245 }, { "epoch": 1.1827079934747144, "grad_norm": 0.08207481354475021, "learning_rate": 0.0005912724306688417, "loss": 0.1306, "num_input_tokens_seen": 15645248, "step": 7250 }, { "epoch": 1.1835236541598695, "grad_norm": 0.3701433539390564, "learning_rate": 0.0005916802610114192, "loss": 0.0551, "num_input_tokens_seen": 15656000, "step": 7255 }, { "epoch": 1.1843393148450245, "grad_norm": 0.21617154777050018, "learning_rate": 0.0005920880913539968, "loss": 0.1865, "num_input_tokens_seen": 15667264, "step": 7260 }, { "epoch": 1.1851549755301796, "grad_norm": 0.11689115315675735, "learning_rate": 0.0005924959216965743, "loss": 0.108, "num_input_tokens_seen": 15679648, "step": 7265 }, { "epoch": 1.1859706362153344, "grad_norm": 0.0657183974981308, "learning_rate": 0.0005929037520391517, "loss": 0.1638, "num_input_tokens_seen": 15690656, "step": 7270 }, { "epoch": 1.1867862969004894, "grad_norm": 0.3638814091682434, "learning_rate": 0.0005933115823817292, "loss": 0.2662, "num_input_tokens_seen": 15701600, "step": 7275 }, { "epoch": 1.1876019575856445, "grad_norm": 0.2791052758693695, "learning_rate": 0.0005937194127243067, "loss": 0.1778, "num_input_tokens_seen": 15713312, "step": 7280 }, { "epoch": 1.1884176182707993, "grad_norm": 0.05845532566308975, "learning_rate": 0.0005941272430668842, "loss": 0.08, "num_input_tokens_seen": 15723424, "step": 7285 }, { "epoch": 1.1892332789559543, "grad_norm": 0.28739508986473083, "learning_rate": 0.0005945350734094617, "loss": 0.0961, "num_input_tokens_seen": 15734848, "step": 7290 }, { "epoch": 1.1900489396411094, "grad_norm": 0.05517510324716568, "learning_rate": 0.0005949429037520392, "loss": 0.0469, "num_input_tokens_seen": 15745216, "step": 7295 }, { "epoch": 1.1908646003262642, "grad_norm": 0.02466857247054577, "learning_rate": 0.0005953507340946166, "loss": 0.0641, "num_input_tokens_seen": 15756320, "step": 7300 }, { "epoch": 1.1916802610114192, "grad_norm": 0.2366456538438797, "learning_rate": 0.0005957585644371941, "loss": 0.2608, "num_input_tokens_seen": 15767584, "step": 7305 }, { "epoch": 1.1924959216965743, "grad_norm": 0.31980112195014954, "learning_rate": 0.0005961663947797717, "loss": 0.1194, "num_input_tokens_seen": 15778592, "step": 7310 }, { "epoch": 1.1933115823817293, "grad_norm": 0.13604313135147095, "learning_rate": 0.0005965742251223491, "loss": 0.3305, "num_input_tokens_seen": 15789952, "step": 7315 }, { "epoch": 1.1941272430668841, "grad_norm": 0.28785666823387146, "learning_rate": 0.0005969820554649265, "loss": 0.1873, "num_input_tokens_seen": 15801760, "step": 7320 }, { "epoch": 1.1949429037520392, "grad_norm": 0.33594733476638794, "learning_rate": 0.0005973898858075041, "loss": 0.1818, "num_input_tokens_seen": 15812800, "step": 7325 }, { "epoch": 1.1957585644371942, "grad_norm": 0.1183103695511818, "learning_rate": 0.0005977977161500816, "loss": 0.1725, "num_input_tokens_seen": 15823584, "step": 7330 }, { "epoch": 1.196574225122349, "grad_norm": 0.0766448900103569, "learning_rate": 0.000598205546492659, "loss": 0.1112, "num_input_tokens_seen": 15835200, "step": 7335 }, { "epoch": 1.197389885807504, "grad_norm": 0.28974631428718567, "learning_rate": 0.0005986133768352365, "loss": 0.1505, "num_input_tokens_seen": 15847200, "step": 7340 }, { "epoch": 1.198205546492659, "grad_norm": 0.16131766140460968, "learning_rate": 0.000599021207177814, "loss": 0.1877, "num_input_tokens_seen": 15857408, "step": 7345 }, { "epoch": 1.1990212071778141, "grad_norm": 0.229786217212677, "learning_rate": 0.0005994290375203916, "loss": 0.0782, "num_input_tokens_seen": 15868896, "step": 7350 }, { "epoch": 1.199836867862969, "grad_norm": 0.061700619757175446, "learning_rate": 0.000599836867862969, "loss": 0.1983, "num_input_tokens_seen": 15879296, "step": 7355 }, { "epoch": 1.200652528548124, "grad_norm": 0.10842744261026382, "learning_rate": 0.0006002446982055465, "loss": 0.1543, "num_input_tokens_seen": 15889312, "step": 7360 }, { "epoch": 1.201468189233279, "grad_norm": 0.041350334882736206, "learning_rate": 0.000600652528548124, "loss": 0.1513, "num_input_tokens_seen": 15899648, "step": 7365 }, { "epoch": 1.2022838499184338, "grad_norm": 0.2646767199039459, "learning_rate": 0.0006010603588907014, "loss": 0.3678, "num_input_tokens_seen": 15912192, "step": 7370 }, { "epoch": 1.2030995106035889, "grad_norm": 0.025090012699365616, "learning_rate": 0.000601468189233279, "loss": 0.0829, "num_input_tokens_seen": 15922912, "step": 7375 }, { "epoch": 1.203915171288744, "grad_norm": 0.34381338953971863, "learning_rate": 0.0006018760195758564, "loss": 0.139, "num_input_tokens_seen": 15933696, "step": 7380 }, { "epoch": 1.2047308319738987, "grad_norm": 0.10733022540807724, "learning_rate": 0.000602283849918434, "loss": 0.1384, "num_input_tokens_seen": 15945184, "step": 7385 }, { "epoch": 1.2055464926590538, "grad_norm": 0.1600278615951538, "learning_rate": 0.0006026916802610114, "loss": 0.0644, "num_input_tokens_seen": 15956576, "step": 7390 }, { "epoch": 1.2063621533442088, "grad_norm": 0.24554097652435303, "learning_rate": 0.0006030995106035889, "loss": 0.1247, "num_input_tokens_seen": 15965632, "step": 7395 }, { "epoch": 1.2071778140293639, "grad_norm": 0.11978096514940262, "learning_rate": 0.0006035073409461665, "loss": 0.0664, "num_input_tokens_seen": 15975424, "step": 7400 }, { "epoch": 1.2079934747145187, "grad_norm": 0.020593300461769104, "learning_rate": 0.0006039151712887438, "loss": 0.0749, "num_input_tokens_seen": 15986176, "step": 7405 }, { "epoch": 1.2088091353996737, "grad_norm": 0.5599163770675659, "learning_rate": 0.0006043230016313214, "loss": 0.2294, "num_input_tokens_seen": 15996384, "step": 7410 }, { "epoch": 1.2096247960848288, "grad_norm": 0.23017872869968414, "learning_rate": 0.0006047308319738989, "loss": 0.161, "num_input_tokens_seen": 16006432, "step": 7415 }, { "epoch": 1.2104404567699838, "grad_norm": 0.25191521644592285, "learning_rate": 0.0006051386623164764, "loss": 0.149, "num_input_tokens_seen": 16017856, "step": 7420 }, { "epoch": 1.2112561174551386, "grad_norm": 0.07550939172506332, "learning_rate": 0.0006055464926590538, "loss": 0.0837, "num_input_tokens_seen": 16029248, "step": 7425 }, { "epoch": 1.2120717781402937, "grad_norm": 0.34057366847991943, "learning_rate": 0.0006059543230016313, "loss": 0.1281, "num_input_tokens_seen": 16039968, "step": 7430 }, { "epoch": 1.2128874388254487, "grad_norm": 0.4355306923389435, "learning_rate": 0.0006063621533442089, "loss": 0.1738, "num_input_tokens_seen": 16051840, "step": 7435 }, { "epoch": 1.2137030995106035, "grad_norm": 0.025203794240951538, "learning_rate": 0.0006067699836867863, "loss": 0.0799, "num_input_tokens_seen": 16063456, "step": 7440 }, { "epoch": 1.2145187601957586, "grad_norm": 0.09719034284353256, "learning_rate": 0.0006071778140293637, "loss": 0.1426, "num_input_tokens_seen": 16074304, "step": 7445 }, { "epoch": 1.2153344208809136, "grad_norm": 0.12060704082250595, "learning_rate": 0.0006075856443719413, "loss": 0.1688, "num_input_tokens_seen": 16085536, "step": 7450 }, { "epoch": 1.2161500815660684, "grad_norm": 0.058434948325157166, "learning_rate": 0.0006079934747145188, "loss": 0.0655, "num_input_tokens_seen": 16095488, "step": 7455 }, { "epoch": 1.2169657422512234, "grad_norm": 0.024283265694975853, "learning_rate": 0.0006084013050570962, "loss": 0.1991, "num_input_tokens_seen": 16105024, "step": 7460 }, { "epoch": 1.2177814029363785, "grad_norm": 0.18784849345684052, "learning_rate": 0.0006088091353996738, "loss": 0.0857, "num_input_tokens_seen": 16114816, "step": 7465 }, { "epoch": 1.2185970636215335, "grad_norm": 0.06543607264757156, "learning_rate": 0.0006092169657422512, "loss": 0.261, "num_input_tokens_seen": 16125632, "step": 7470 }, { "epoch": 1.2194127243066883, "grad_norm": 0.14007136225700378, "learning_rate": 0.0006096247960848287, "loss": 0.1246, "num_input_tokens_seen": 16136736, "step": 7475 }, { "epoch": 1.2202283849918434, "grad_norm": 0.33850812911987305, "learning_rate": 0.0006100326264274062, "loss": 0.0903, "num_input_tokens_seen": 16147904, "step": 7480 }, { "epoch": 1.2210440456769984, "grad_norm": 0.013136421330273151, "learning_rate": 0.0006104404567699837, "loss": 0.0679, "num_input_tokens_seen": 16158112, "step": 7485 }, { "epoch": 1.2218597063621535, "grad_norm": 0.160769984126091, "learning_rate": 0.0006108482871125613, "loss": 0.1271, "num_input_tokens_seen": 16168192, "step": 7490 }, { "epoch": 1.2226753670473083, "grad_norm": 0.24859391152858734, "learning_rate": 0.0006112561174551386, "loss": 0.1329, "num_input_tokens_seen": 16178624, "step": 7495 }, { "epoch": 1.2234910277324633, "grad_norm": 0.07763948291540146, "learning_rate": 0.0006116639477977162, "loss": 0.1081, "num_input_tokens_seen": 16189632, "step": 7500 }, { "epoch": 1.2243066884176184, "grad_norm": 0.035184815526008606, "learning_rate": 0.0006120717781402937, "loss": 0.0621, "num_input_tokens_seen": 16199904, "step": 7505 }, { "epoch": 1.2251223491027732, "grad_norm": 0.03430217504501343, "learning_rate": 0.000612479608482871, "loss": 0.1427, "num_input_tokens_seen": 16210016, "step": 7510 }, { "epoch": 1.2259380097879282, "grad_norm": 0.10288599133491516, "learning_rate": 0.0006128874388254486, "loss": 0.1893, "num_input_tokens_seen": 16220896, "step": 7515 }, { "epoch": 1.2267536704730833, "grad_norm": 0.48503223061561584, "learning_rate": 0.0006132952691680261, "loss": 0.2211, "num_input_tokens_seen": 16231616, "step": 7520 }, { "epoch": 1.227569331158238, "grad_norm": 0.1194513738155365, "learning_rate": 0.0006137030995106036, "loss": 0.0981, "num_input_tokens_seen": 16243040, "step": 7525 }, { "epoch": 1.2283849918433931, "grad_norm": 0.43219807744026184, "learning_rate": 0.0006141109298531811, "loss": 0.209, "num_input_tokens_seen": 16255200, "step": 7530 }, { "epoch": 1.2292006525285482, "grad_norm": 0.10171283036470413, "learning_rate": 0.0006145187601957585, "loss": 0.1783, "num_input_tokens_seen": 16264064, "step": 7535 }, { "epoch": 1.2300163132137032, "grad_norm": 0.030886897817254066, "learning_rate": 0.0006149265905383361, "loss": 0.1554, "num_input_tokens_seen": 16273600, "step": 7540 }, { "epoch": 1.230831973898858, "grad_norm": 0.25258806347846985, "learning_rate": 0.0006153344208809135, "loss": 0.2125, "num_input_tokens_seen": 16284000, "step": 7545 }, { "epoch": 1.231647634584013, "grad_norm": 0.2240561991930008, "learning_rate": 0.0006157422512234911, "loss": 0.1463, "num_input_tokens_seen": 16294528, "step": 7550 }, { "epoch": 1.232463295269168, "grad_norm": 0.11328648775815964, "learning_rate": 0.0006161500815660686, "loss": 0.1073, "num_input_tokens_seen": 16305856, "step": 7555 }, { "epoch": 1.233278955954323, "grad_norm": 0.02403135783970356, "learning_rate": 0.0006165579119086459, "loss": 0.0834, "num_input_tokens_seen": 16316256, "step": 7560 }, { "epoch": 1.234094616639478, "grad_norm": 0.07137010991573334, "learning_rate": 0.0006169657422512235, "loss": 0.1792, "num_input_tokens_seen": 16327808, "step": 7565 }, { "epoch": 1.234910277324633, "grad_norm": 0.11252482980489731, "learning_rate": 0.000617373572593801, "loss": 0.1482, "num_input_tokens_seen": 16338848, "step": 7570 }, { "epoch": 1.235725938009788, "grad_norm": 0.05320006608963013, "learning_rate": 0.0006177814029363786, "loss": 0.0715, "num_input_tokens_seen": 16349408, "step": 7575 }, { "epoch": 1.2365415986949428, "grad_norm": 0.13407686352729797, "learning_rate": 0.0006181892332789559, "loss": 0.173, "num_input_tokens_seen": 16359552, "step": 7580 }, { "epoch": 1.2373572593800979, "grad_norm": 0.03154631704092026, "learning_rate": 0.0006185970636215334, "loss": 0.1521, "num_input_tokens_seen": 16370496, "step": 7585 }, { "epoch": 1.238172920065253, "grad_norm": 0.035099178552627563, "learning_rate": 0.000619004893964111, "loss": 0.0434, "num_input_tokens_seen": 16382880, "step": 7590 }, { "epoch": 1.2389885807504077, "grad_norm": 0.06128397583961487, "learning_rate": 0.0006194127243066884, "loss": 0.1764, "num_input_tokens_seen": 16394272, "step": 7595 }, { "epoch": 1.2398042414355628, "grad_norm": 0.13042569160461426, "learning_rate": 0.000619820554649266, "loss": 0.0805, "num_input_tokens_seen": 16405376, "step": 7600 }, { "epoch": 1.2406199021207178, "grad_norm": 0.26769134402275085, "learning_rate": 0.0006202283849918434, "loss": 0.3112, "num_input_tokens_seen": 16415648, "step": 7605 }, { "epoch": 1.2414355628058726, "grad_norm": 0.08796115219593048, "learning_rate": 0.0006206362153344209, "loss": 0.0568, "num_input_tokens_seen": 16424416, "step": 7610 }, { "epoch": 1.2422512234910277, "grad_norm": 0.24431444704532623, "learning_rate": 0.0006210440456769984, "loss": 0.2087, "num_input_tokens_seen": 16434496, "step": 7615 }, { "epoch": 1.2430668841761827, "grad_norm": 0.2015565186738968, "learning_rate": 0.0006214518760195759, "loss": 0.1354, "num_input_tokens_seen": 16445376, "step": 7620 }, { "epoch": 1.2438825448613378, "grad_norm": 0.08070756494998932, "learning_rate": 0.0006218597063621533, "loss": 0.0652, "num_input_tokens_seen": 16457024, "step": 7625 }, { "epoch": 1.2446982055464926, "grad_norm": 0.2412647008895874, "learning_rate": 0.0006222675367047308, "loss": 0.064, "num_input_tokens_seen": 16468032, "step": 7630 }, { "epoch": 1.2455138662316476, "grad_norm": 0.06323897838592529, "learning_rate": 0.0006226753670473083, "loss": 0.1068, "num_input_tokens_seen": 16478432, "step": 7635 }, { "epoch": 1.2463295269168027, "grad_norm": 0.10753598809242249, "learning_rate": 0.0006230831973898859, "loss": 0.091, "num_input_tokens_seen": 16488640, "step": 7640 }, { "epoch": 1.2471451876019577, "grad_norm": 0.12017099559307098, "learning_rate": 0.0006234910277324634, "loss": 0.0816, "num_input_tokens_seen": 16499648, "step": 7645 }, { "epoch": 1.2479608482871125, "grad_norm": 0.07643944770097733, "learning_rate": 0.0006238988580750407, "loss": 0.0612, "num_input_tokens_seen": 16509472, "step": 7650 }, { "epoch": 1.2487765089722676, "grad_norm": 0.22984029352664948, "learning_rate": 0.0006243066884176183, "loss": 0.0638, "num_input_tokens_seen": 16521376, "step": 7655 }, { "epoch": 1.2495921696574226, "grad_norm": 0.22520215809345245, "learning_rate": 0.0006247145187601958, "loss": 0.3796, "num_input_tokens_seen": 16531904, "step": 7660 }, { "epoch": 1.2504078303425774, "grad_norm": 0.3779231905937195, "learning_rate": 0.0006251223491027733, "loss": 0.1013, "num_input_tokens_seen": 16542304, "step": 7665 }, { "epoch": 1.2512234910277324, "grad_norm": 0.11960883438587189, "learning_rate": 0.0006255301794453507, "loss": 0.1395, "num_input_tokens_seen": 16553024, "step": 7670 }, { "epoch": 1.2520391517128875, "grad_norm": 0.10543225705623627, "learning_rate": 0.0006259380097879282, "loss": 0.0996, "num_input_tokens_seen": 16564416, "step": 7675 }, { "epoch": 1.2528548123980423, "grad_norm": 0.16587293148040771, "learning_rate": 0.0006263458401305058, "loss": 0.149, "num_input_tokens_seen": 16575296, "step": 7680 }, { "epoch": 1.2536704730831973, "grad_norm": 0.11914610117673874, "learning_rate": 0.0006267536704730832, "loss": 0.039, "num_input_tokens_seen": 16586656, "step": 7685 }, { "epoch": 1.2544861337683524, "grad_norm": 0.04753011465072632, "learning_rate": 0.0006271615008156607, "loss": 0.141, "num_input_tokens_seen": 16598400, "step": 7690 }, { "epoch": 1.2553017944535072, "grad_norm": 0.22128072381019592, "learning_rate": 0.0006275693311582382, "loss": 0.2197, "num_input_tokens_seen": 16608928, "step": 7695 }, { "epoch": 1.2561174551386622, "grad_norm": 0.0887017697095871, "learning_rate": 0.0006279771615008156, "loss": 0.0612, "num_input_tokens_seen": 16618784, "step": 7700 }, { "epoch": 1.2569331158238173, "grad_norm": 0.43083927035331726, "learning_rate": 0.0006283849918433932, "loss": 0.1195, "num_input_tokens_seen": 16629696, "step": 7705 }, { "epoch": 1.2577487765089723, "grad_norm": 0.18577629327774048, "learning_rate": 0.0006287928221859707, "loss": 0.1363, "num_input_tokens_seen": 16640736, "step": 7710 }, { "epoch": 1.2585644371941274, "grad_norm": 0.06808710098266602, "learning_rate": 0.0006292006525285482, "loss": 0.2029, "num_input_tokens_seen": 16651232, "step": 7715 }, { "epoch": 1.2593800978792822, "grad_norm": 0.20633861422538757, "learning_rate": 0.0006296084828711256, "loss": 0.1681, "num_input_tokens_seen": 16660544, "step": 7720 }, { "epoch": 1.2601957585644372, "grad_norm": 0.1369388848543167, "learning_rate": 0.0006300163132137031, "loss": 0.14, "num_input_tokens_seen": 16670112, "step": 7725 }, { "epoch": 1.2610114192495923, "grad_norm": 0.19147621095180511, "learning_rate": 0.0006304241435562807, "loss": 0.15, "num_input_tokens_seen": 16680032, "step": 7730 }, { "epoch": 1.261827079934747, "grad_norm": 0.06094831973314285, "learning_rate": 0.000630831973898858, "loss": 0.1574, "num_input_tokens_seen": 16690496, "step": 7735 }, { "epoch": 1.2626427406199021, "grad_norm": 0.14175234735012054, "learning_rate": 0.0006312398042414356, "loss": 0.2441, "num_input_tokens_seen": 16701056, "step": 7740 }, { "epoch": 1.2634584013050572, "grad_norm": 0.050266288220882416, "learning_rate": 0.0006316476345840131, "loss": 0.1392, "num_input_tokens_seen": 16712288, "step": 7745 }, { "epoch": 1.264274061990212, "grad_norm": 0.08054324239492416, "learning_rate": 0.0006320554649265906, "loss": 0.1089, "num_input_tokens_seen": 16722528, "step": 7750 }, { "epoch": 1.265089722675367, "grad_norm": 0.3782789409160614, "learning_rate": 0.000632463295269168, "loss": 0.111, "num_input_tokens_seen": 16733184, "step": 7755 }, { "epoch": 1.265905383360522, "grad_norm": 0.07526574283838272, "learning_rate": 0.0006328711256117455, "loss": 0.0651, "num_input_tokens_seen": 16743712, "step": 7760 }, { "epoch": 1.2667210440456769, "grad_norm": 0.04037458077073097, "learning_rate": 0.000633278955954323, "loss": 0.1042, "num_input_tokens_seen": 16753536, "step": 7765 }, { "epoch": 1.267536704730832, "grad_norm": 0.5374637842178345, "learning_rate": 0.0006336867862969005, "loss": 0.1638, "num_input_tokens_seen": 16763904, "step": 7770 }, { "epoch": 1.268352365415987, "grad_norm": 0.2916713356971741, "learning_rate": 0.000634094616639478, "loss": 0.1654, "num_input_tokens_seen": 16774944, "step": 7775 }, { "epoch": 1.269168026101142, "grad_norm": 0.5164362192153931, "learning_rate": 0.0006345024469820555, "loss": 0.2068, "num_input_tokens_seen": 16786752, "step": 7780 }, { "epoch": 1.269983686786297, "grad_norm": 0.09310194104909897, "learning_rate": 0.0006349102773246329, "loss": 0.1144, "num_input_tokens_seen": 16797120, "step": 7785 }, { "epoch": 1.2707993474714518, "grad_norm": 0.2878625988960266, "learning_rate": 0.0006353181076672104, "loss": 0.0434, "num_input_tokens_seen": 16808000, "step": 7790 }, { "epoch": 1.2716150081566069, "grad_norm": 0.4719869792461395, "learning_rate": 0.000635725938009788, "loss": 0.1103, "num_input_tokens_seen": 16818016, "step": 7795 }, { "epoch": 1.272430668841762, "grad_norm": 0.04505976289510727, "learning_rate": 0.0006361337683523654, "loss": 0.077, "num_input_tokens_seen": 16829056, "step": 7800 }, { "epoch": 1.2732463295269167, "grad_norm": 0.024388156831264496, "learning_rate": 0.0006365415986949429, "loss": 0.1077, "num_input_tokens_seen": 16840672, "step": 7805 }, { "epoch": 1.2740619902120718, "grad_norm": 0.4880041182041168, "learning_rate": 0.0006369494290375204, "loss": 0.1448, "num_input_tokens_seen": 16850176, "step": 7810 }, { "epoch": 1.2748776508972268, "grad_norm": 0.1726599782705307, "learning_rate": 0.0006373572593800979, "loss": 0.0336, "num_input_tokens_seen": 16861600, "step": 7815 }, { "epoch": 1.2756933115823816, "grad_norm": 0.16897621750831604, "learning_rate": 0.0006377650897226754, "loss": 0.1536, "num_input_tokens_seen": 16871968, "step": 7820 }, { "epoch": 1.2765089722675367, "grad_norm": 0.07816699147224426, "learning_rate": 0.0006381729200652528, "loss": 0.0236, "num_input_tokens_seen": 16882496, "step": 7825 }, { "epoch": 1.2773246329526917, "grad_norm": 0.025296438485383987, "learning_rate": 0.0006385807504078304, "loss": 0.058, "num_input_tokens_seen": 16892672, "step": 7830 }, { "epoch": 1.2781402936378465, "grad_norm": 0.4938370883464813, "learning_rate": 0.0006389885807504079, "loss": 0.2244, "num_input_tokens_seen": 16904320, "step": 7835 }, { "epoch": 1.2789559543230016, "grad_norm": 0.5541727542877197, "learning_rate": 0.0006393964110929853, "loss": 0.1312, "num_input_tokens_seen": 16914144, "step": 7840 }, { "epoch": 1.2797716150081566, "grad_norm": 0.15682333707809448, "learning_rate": 0.0006398042414355628, "loss": 0.1559, "num_input_tokens_seen": 16925376, "step": 7845 }, { "epoch": 1.2805872756933117, "grad_norm": 0.09988781809806824, "learning_rate": 0.0006402120717781403, "loss": 0.0982, "num_input_tokens_seen": 16937056, "step": 7850 }, { "epoch": 1.2814029363784667, "grad_norm": 0.5827205181121826, "learning_rate": 0.0006406199021207178, "loss": 0.1499, "num_input_tokens_seen": 16947328, "step": 7855 }, { "epoch": 1.2822185970636215, "grad_norm": 0.22042497992515564, "learning_rate": 0.0006410277324632953, "loss": 0.124, "num_input_tokens_seen": 16957248, "step": 7860 }, { "epoch": 1.2830342577487766, "grad_norm": 0.14786145091056824, "learning_rate": 0.0006414355628058727, "loss": 0.0466, "num_input_tokens_seen": 16967136, "step": 7865 }, { "epoch": 1.2838499184339316, "grad_norm": 0.025133898481726646, "learning_rate": 0.0006418433931484503, "loss": 0.019, "num_input_tokens_seen": 16977632, "step": 7870 }, { "epoch": 1.2846655791190864, "grad_norm": 0.09242935478687286, "learning_rate": 0.0006422512234910277, "loss": 0.0517, "num_input_tokens_seen": 16988160, "step": 7875 }, { "epoch": 1.2854812398042414, "grad_norm": 0.390905499458313, "learning_rate": 0.0006426590538336053, "loss": 0.2289, "num_input_tokens_seen": 16999328, "step": 7880 }, { "epoch": 1.2862969004893965, "grad_norm": 0.06836092472076416, "learning_rate": 0.0006430668841761828, "loss": 0.035, "num_input_tokens_seen": 17011168, "step": 7885 }, { "epoch": 1.2871125611745513, "grad_norm": 0.049555547535419464, "learning_rate": 0.0006434747145187601, "loss": 0.1018, "num_input_tokens_seen": 17022496, "step": 7890 }, { "epoch": 1.2879282218597063, "grad_norm": 0.03462597727775574, "learning_rate": 0.0006438825448613377, "loss": 0.1101, "num_input_tokens_seen": 17031520, "step": 7895 }, { "epoch": 1.2887438825448614, "grad_norm": 0.0627148374915123, "learning_rate": 0.0006442903752039152, "loss": 0.1473, "num_input_tokens_seen": 17043488, "step": 7900 }, { "epoch": 1.2895595432300162, "grad_norm": 0.09450127929449081, "learning_rate": 0.0006446982055464927, "loss": 0.1608, "num_input_tokens_seen": 17052832, "step": 7905 }, { "epoch": 1.2903752039151712, "grad_norm": 0.14406728744506836, "learning_rate": 0.0006451060358890701, "loss": 0.1011, "num_input_tokens_seen": 17063232, "step": 7910 }, { "epoch": 1.2911908646003263, "grad_norm": 0.27636778354644775, "learning_rate": 0.0006455138662316476, "loss": 0.1524, "num_input_tokens_seen": 17073152, "step": 7915 }, { "epoch": 1.2920065252854813, "grad_norm": 0.11189326643943787, "learning_rate": 0.0006459216965742252, "loss": 0.2106, "num_input_tokens_seen": 17084192, "step": 7920 }, { "epoch": 1.2928221859706361, "grad_norm": 0.19050921499729156, "learning_rate": 0.0006463295269168026, "loss": 0.1131, "num_input_tokens_seen": 17095744, "step": 7925 }, { "epoch": 1.2936378466557912, "grad_norm": 0.19964060187339783, "learning_rate": 0.00064673735725938, "loss": 0.1004, "num_input_tokens_seen": 17106048, "step": 7930 }, { "epoch": 1.2944535073409462, "grad_norm": 0.029773879796266556, "learning_rate": 0.0006471451876019576, "loss": 0.0451, "num_input_tokens_seen": 17117600, "step": 7935 }, { "epoch": 1.2952691680261013, "grad_norm": 0.18368203938007355, "learning_rate": 0.0006475530179445351, "loss": 0.131, "num_input_tokens_seen": 17128288, "step": 7940 }, { "epoch": 1.296084828711256, "grad_norm": 0.36438411474227905, "learning_rate": 0.0006479608482871126, "loss": 0.1585, "num_input_tokens_seen": 17139264, "step": 7945 }, { "epoch": 1.2969004893964111, "grad_norm": 0.5437729954719543, "learning_rate": 0.0006483686786296901, "loss": 0.1037, "num_input_tokens_seen": 17149792, "step": 7950 }, { "epoch": 1.2977161500815662, "grad_norm": 0.13218331336975098, "learning_rate": 0.0006487765089722675, "loss": 0.041, "num_input_tokens_seen": 17160736, "step": 7955 }, { "epoch": 1.298531810766721, "grad_norm": 0.464276522397995, "learning_rate": 0.000649184339314845, "loss": 0.1942, "num_input_tokens_seen": 17171520, "step": 7960 }, { "epoch": 1.299347471451876, "grad_norm": 0.24800249934196472, "learning_rate": 0.0006495921696574225, "loss": 0.2342, "num_input_tokens_seen": 17182208, "step": 7965 }, { "epoch": 1.300163132137031, "grad_norm": 0.15870343148708344, "learning_rate": 0.0006500000000000001, "loss": 0.052, "num_input_tokens_seen": 17193152, "step": 7970 }, { "epoch": 1.3009787928221859, "grad_norm": 0.11960924416780472, "learning_rate": 0.0006504078303425776, "loss": 0.04, "num_input_tokens_seen": 17203968, "step": 7975 }, { "epoch": 1.301794453507341, "grad_norm": 0.17720720171928406, "learning_rate": 0.0006508156606851549, "loss": 0.053, "num_input_tokens_seen": 17215264, "step": 7980 }, { "epoch": 1.302610114192496, "grad_norm": 0.14831577241420746, "learning_rate": 0.0006512234910277325, "loss": 0.0419, "num_input_tokens_seen": 17226368, "step": 7985 }, { "epoch": 1.3034257748776508, "grad_norm": 0.5955997109413147, "learning_rate": 0.00065163132137031, "loss": 0.1687, "num_input_tokens_seen": 17236384, "step": 7990 }, { "epoch": 1.3042414355628058, "grad_norm": 0.028910916298627853, "learning_rate": 0.0006520391517128875, "loss": 0.1409, "num_input_tokens_seen": 17247648, "step": 7995 }, { "epoch": 1.3050570962479608, "grad_norm": 0.4046897888183594, "learning_rate": 0.0006524469820554649, "loss": 0.2976, "num_input_tokens_seen": 17258912, "step": 8000 }, { "epoch": 1.3058727569331159, "grad_norm": 0.4455738961696625, "learning_rate": 0.0006528548123980424, "loss": 0.0464, "num_input_tokens_seen": 17269472, "step": 8005 }, { "epoch": 1.306688417618271, "grad_norm": 0.09905628859996796, "learning_rate": 0.0006532626427406199, "loss": 0.1039, "num_input_tokens_seen": 17279328, "step": 8010 }, { "epoch": 1.3075040783034257, "grad_norm": 0.17130684852600098, "learning_rate": 0.0006536704730831974, "loss": 0.176, "num_input_tokens_seen": 17290208, "step": 8015 }, { "epoch": 1.3083197389885808, "grad_norm": 0.06475099921226501, "learning_rate": 0.000654078303425775, "loss": 0.0664, "num_input_tokens_seen": 17299680, "step": 8020 }, { "epoch": 1.3091353996737358, "grad_norm": 0.20851196348667145, "learning_rate": 0.0006544861337683524, "loss": 0.1362, "num_input_tokens_seen": 17310592, "step": 8025 }, { "epoch": 1.3099510603588906, "grad_norm": 0.0579352043569088, "learning_rate": 0.0006548939641109298, "loss": 0.1495, "num_input_tokens_seen": 17321920, "step": 8030 }, { "epoch": 1.3107667210440457, "grad_norm": 0.35480427742004395, "learning_rate": 0.0006553017944535074, "loss": 0.1244, "num_input_tokens_seen": 17332928, "step": 8035 }, { "epoch": 1.3115823817292007, "grad_norm": 0.015755582600831985, "learning_rate": 0.0006557096247960849, "loss": 0.1367, "num_input_tokens_seen": 17342464, "step": 8040 }, { "epoch": 1.3123980424143555, "grad_norm": 0.031894516199827194, "learning_rate": 0.0006561174551386622, "loss": 0.1105, "num_input_tokens_seen": 17353440, "step": 8045 }, { "epoch": 1.3132137030995106, "grad_norm": 0.026898646727204323, "learning_rate": 0.0006565252854812398, "loss": 0.1453, "num_input_tokens_seen": 17364224, "step": 8050 }, { "epoch": 1.3140293637846656, "grad_norm": 0.3691110610961914, "learning_rate": 0.0006569331158238173, "loss": 0.1111, "num_input_tokens_seen": 17375360, "step": 8055 }, { "epoch": 1.3148450244698204, "grad_norm": 0.5418222546577454, "learning_rate": 0.0006573409461663949, "loss": 0.1226, "num_input_tokens_seen": 17386528, "step": 8060 }, { "epoch": 1.3156606851549755, "grad_norm": 0.2650986909866333, "learning_rate": 0.0006577487765089722, "loss": 0.0979, "num_input_tokens_seen": 17396608, "step": 8065 }, { "epoch": 1.3164763458401305, "grad_norm": 0.5021603107452393, "learning_rate": 0.0006581566068515497, "loss": 0.1621, "num_input_tokens_seen": 17407520, "step": 8070 }, { "epoch": 1.3172920065252856, "grad_norm": 0.11378972977399826, "learning_rate": 0.0006585644371941273, "loss": 0.1594, "num_input_tokens_seen": 17418912, "step": 8075 }, { "epoch": 1.3181076672104406, "grad_norm": 0.3335539400577545, "learning_rate": 0.0006589722675367047, "loss": 0.2771, "num_input_tokens_seen": 17429696, "step": 8080 }, { "epoch": 1.3189233278955954, "grad_norm": 0.04663245007395744, "learning_rate": 0.0006593800978792823, "loss": 0.072, "num_input_tokens_seen": 17440736, "step": 8085 }, { "epoch": 1.3197389885807504, "grad_norm": 0.5012274384498596, "learning_rate": 0.0006597879282218597, "loss": 0.1518, "num_input_tokens_seen": 17450624, "step": 8090 }, { "epoch": 1.3205546492659055, "grad_norm": 0.027161110192537308, "learning_rate": 0.0006601957585644372, "loss": 0.045, "num_input_tokens_seen": 17460704, "step": 8095 }, { "epoch": 1.3213703099510603, "grad_norm": 0.14079618453979492, "learning_rate": 0.0006606035889070147, "loss": 0.1353, "num_input_tokens_seen": 17469344, "step": 8100 }, { "epoch": 1.3221859706362153, "grad_norm": 0.07207119464874268, "learning_rate": 0.0006610114192495922, "loss": 0.1585, "num_input_tokens_seen": 17479360, "step": 8105 }, { "epoch": 1.3230016313213704, "grad_norm": 0.16957060992717743, "learning_rate": 0.0006614192495921697, "loss": 0.1118, "num_input_tokens_seen": 17490368, "step": 8110 }, { "epoch": 1.3238172920065252, "grad_norm": 0.14331547915935516, "learning_rate": 0.0006618270799347471, "loss": 0.103, "num_input_tokens_seen": 17501216, "step": 8115 }, { "epoch": 1.3246329526916802, "grad_norm": 0.42282775044441223, "learning_rate": 0.0006622349102773246, "loss": 0.1276, "num_input_tokens_seen": 17512608, "step": 8120 }, { "epoch": 1.3254486133768353, "grad_norm": 0.296787828207016, "learning_rate": 0.0006626427406199022, "loss": 0.0514, "num_input_tokens_seen": 17523296, "step": 8125 }, { "epoch": 1.32626427406199, "grad_norm": 0.5302480459213257, "learning_rate": 0.0006630505709624797, "loss": 0.0805, "num_input_tokens_seen": 17535584, "step": 8130 }, { "epoch": 1.3270799347471451, "grad_norm": 0.16484972834587097, "learning_rate": 0.0006634584013050571, "loss": 0.1524, "num_input_tokens_seen": 17545760, "step": 8135 }, { "epoch": 1.3278955954323002, "grad_norm": 1.141710638999939, "learning_rate": 0.0006638662316476346, "loss": 0.0932, "num_input_tokens_seen": 17557824, "step": 8140 }, { "epoch": 1.3287112561174552, "grad_norm": 0.07456161081790924, "learning_rate": 0.0006642740619902121, "loss": 0.0703, "num_input_tokens_seen": 17568544, "step": 8145 }, { "epoch": 1.32952691680261, "grad_norm": 0.07217202335596085, "learning_rate": 0.0006646818923327896, "loss": 0.0692, "num_input_tokens_seen": 17578752, "step": 8150 }, { "epoch": 1.330342577487765, "grad_norm": 0.11335808038711548, "learning_rate": 0.000665089722675367, "loss": 0.0672, "num_input_tokens_seen": 17590016, "step": 8155 }, { "epoch": 1.3311582381729201, "grad_norm": 0.01930646039545536, "learning_rate": 0.0006654975530179446, "loss": 0.1185, "num_input_tokens_seen": 17601024, "step": 8160 }, { "epoch": 1.3319738988580752, "grad_norm": 0.7293866872787476, "learning_rate": 0.0006659053833605221, "loss": 0.1618, "num_input_tokens_seen": 17612704, "step": 8165 }, { "epoch": 1.33278955954323, "grad_norm": 0.1737803965806961, "learning_rate": 0.0006663132137030995, "loss": 0.1404, "num_input_tokens_seen": 17624352, "step": 8170 }, { "epoch": 1.333605220228385, "grad_norm": 0.2746480703353882, "learning_rate": 0.000666721044045677, "loss": 0.0848, "num_input_tokens_seen": 17635648, "step": 8175 }, { "epoch": 1.33442088091354, "grad_norm": 0.2406052052974701, "learning_rate": 0.0006671288743882545, "loss": 0.157, "num_input_tokens_seen": 17646816, "step": 8180 }, { "epoch": 1.3352365415986949, "grad_norm": 0.2070399522781372, "learning_rate": 0.0006675367047308319, "loss": 0.1039, "num_input_tokens_seen": 17658560, "step": 8185 }, { "epoch": 1.33605220228385, "grad_norm": 0.07368674874305725, "learning_rate": 0.0006679445350734095, "loss": 0.1204, "num_input_tokens_seen": 17668704, "step": 8190 }, { "epoch": 1.336867862969005, "grad_norm": 0.18350110948085785, "learning_rate": 0.000668352365415987, "loss": 0.1511, "num_input_tokens_seen": 17680352, "step": 8195 }, { "epoch": 1.3376835236541598, "grad_norm": 0.03614252805709839, "learning_rate": 0.0006687601957585645, "loss": 0.1651, "num_input_tokens_seen": 17692480, "step": 8200 }, { "epoch": 1.3384991843393148, "grad_norm": 0.41711729764938354, "learning_rate": 0.0006691680261011419, "loss": 0.0891, "num_input_tokens_seen": 17703616, "step": 8205 }, { "epoch": 1.3393148450244698, "grad_norm": 1.3330495357513428, "learning_rate": 0.0006695758564437194, "loss": 0.1856, "num_input_tokens_seen": 17714112, "step": 8210 }, { "epoch": 1.3401305057096247, "grad_norm": 0.5402922630310059, "learning_rate": 0.000669983686786297, "loss": 0.211, "num_input_tokens_seen": 17725216, "step": 8215 }, { "epoch": 1.3409461663947797, "grad_norm": 0.31854021549224854, "learning_rate": 0.0006703915171288743, "loss": 0.0899, "num_input_tokens_seen": 17736768, "step": 8220 }, { "epoch": 1.3417618270799347, "grad_norm": 0.04605162516236305, "learning_rate": 0.0006707993474714519, "loss": 0.3071, "num_input_tokens_seen": 17747264, "step": 8225 }, { "epoch": 1.3425774877650898, "grad_norm": 0.3851441442966461, "learning_rate": 0.0006712071778140294, "loss": 0.1368, "num_input_tokens_seen": 17758720, "step": 8230 }, { "epoch": 1.3433931484502448, "grad_norm": 0.2544032037258148, "learning_rate": 0.0006716150081566068, "loss": 0.1754, "num_input_tokens_seen": 17769632, "step": 8235 }, { "epoch": 1.3442088091353996, "grad_norm": 0.3576222360134125, "learning_rate": 0.0006720228384991843, "loss": 0.1323, "num_input_tokens_seen": 17780896, "step": 8240 }, { "epoch": 1.3450244698205547, "grad_norm": 0.2332669347524643, "learning_rate": 0.0006724306688417618, "loss": 0.1215, "num_input_tokens_seen": 17792608, "step": 8245 }, { "epoch": 1.3458401305057097, "grad_norm": 0.07991407811641693, "learning_rate": 0.0006728384991843394, "loss": 0.0674, "num_input_tokens_seen": 17803488, "step": 8250 }, { "epoch": 1.3466557911908645, "grad_norm": 0.4569050967693329, "learning_rate": 0.0006732463295269168, "loss": 0.1047, "num_input_tokens_seen": 17813536, "step": 8255 }, { "epoch": 1.3474714518760196, "grad_norm": 0.04510252922773361, "learning_rate": 0.0006736541598694943, "loss": 0.0256, "num_input_tokens_seen": 17824768, "step": 8260 }, { "epoch": 1.3482871125611746, "grad_norm": 0.04424011707305908, "learning_rate": 0.0006740619902120718, "loss": 0.0787, "num_input_tokens_seen": 17835808, "step": 8265 }, { "epoch": 1.3491027732463294, "grad_norm": 0.06642396748065948, "learning_rate": 0.0006744698205546492, "loss": 0.1645, "num_input_tokens_seen": 17846688, "step": 8270 }, { "epoch": 1.3499184339314845, "grad_norm": 0.21267910301685333, "learning_rate": 0.0006748776508972268, "loss": 0.0707, "num_input_tokens_seen": 17856832, "step": 8275 }, { "epoch": 1.3507340946166395, "grad_norm": 0.05674583837389946, "learning_rate": 0.0006752854812398043, "loss": 0.1255, "num_input_tokens_seen": 17868192, "step": 8280 }, { "epoch": 1.3515497553017943, "grad_norm": 0.05528345704078674, "learning_rate": 0.0006756933115823817, "loss": 0.1228, "num_input_tokens_seen": 17878848, "step": 8285 }, { "epoch": 1.3523654159869494, "grad_norm": 0.24763894081115723, "learning_rate": 0.0006761011419249592, "loss": 0.1208, "num_input_tokens_seen": 17888224, "step": 8290 }, { "epoch": 1.3531810766721044, "grad_norm": 0.11547470837831497, "learning_rate": 0.0006765089722675367, "loss": 0.0228, "num_input_tokens_seen": 17900352, "step": 8295 }, { "epoch": 1.3539967373572595, "grad_norm": 0.016927413642406464, "learning_rate": 0.0006769168026101143, "loss": 0.0445, "num_input_tokens_seen": 17911488, "step": 8300 }, { "epoch": 1.3548123980424145, "grad_norm": 0.2924193739891052, "learning_rate": 0.0006773246329526917, "loss": 0.1098, "num_input_tokens_seen": 17922880, "step": 8305 }, { "epoch": 1.3556280587275693, "grad_norm": 0.15187495946884155, "learning_rate": 0.0006777324632952691, "loss": 0.0566, "num_input_tokens_seen": 17934080, "step": 8310 }, { "epoch": 1.3564437194127243, "grad_norm": 0.4529854953289032, "learning_rate": 0.0006781402936378467, "loss": 0.1625, "num_input_tokens_seen": 17944352, "step": 8315 }, { "epoch": 1.3572593800978794, "grad_norm": 0.343124121427536, "learning_rate": 0.0006785481239804242, "loss": 0.0865, "num_input_tokens_seen": 17955232, "step": 8320 }, { "epoch": 1.3580750407830342, "grad_norm": 0.21366773545742035, "learning_rate": 0.0006789559543230017, "loss": 0.0717, "num_input_tokens_seen": 17966496, "step": 8325 }, { "epoch": 1.3588907014681892, "grad_norm": 0.3879798650741577, "learning_rate": 0.0006793637846655791, "loss": 0.3778, "num_input_tokens_seen": 17976768, "step": 8330 }, { "epoch": 1.3597063621533443, "grad_norm": 0.27293097972869873, "learning_rate": 0.0006797716150081566, "loss": 0.1147, "num_input_tokens_seen": 17987936, "step": 8335 }, { "epoch": 1.360522022838499, "grad_norm": 0.279996395111084, "learning_rate": 0.0006801794453507341, "loss": 0.184, "num_input_tokens_seen": 17999712, "step": 8340 }, { "epoch": 1.3613376835236541, "grad_norm": 0.09360643476247787, "learning_rate": 0.0006805872756933116, "loss": 0.0531, "num_input_tokens_seen": 18011328, "step": 8345 }, { "epoch": 1.3621533442088092, "grad_norm": 0.4578707814216614, "learning_rate": 0.000680995106035889, "loss": 0.1682, "num_input_tokens_seen": 18022080, "step": 8350 }, { "epoch": 1.362969004893964, "grad_norm": 0.13789981603622437, "learning_rate": 0.0006814029363784666, "loss": 0.0887, "num_input_tokens_seen": 18033184, "step": 8355 }, { "epoch": 1.363784665579119, "grad_norm": 0.604446530342102, "learning_rate": 0.000681810766721044, "loss": 0.1974, "num_input_tokens_seen": 18043488, "step": 8360 }, { "epoch": 1.364600326264274, "grad_norm": 0.5044865012168884, "learning_rate": 0.0006822185970636216, "loss": 0.1191, "num_input_tokens_seen": 18054368, "step": 8365 }, { "epoch": 1.3654159869494291, "grad_norm": 0.09271303564310074, "learning_rate": 0.0006826264274061991, "loss": 0.1519, "num_input_tokens_seen": 18065376, "step": 8370 }, { "epoch": 1.366231647634584, "grad_norm": 0.06919936835765839, "learning_rate": 0.0006830342577487764, "loss": 0.1629, "num_input_tokens_seen": 18076384, "step": 8375 }, { "epoch": 1.367047308319739, "grad_norm": 0.14162668585777283, "learning_rate": 0.000683442088091354, "loss": 0.0873, "num_input_tokens_seen": 18087776, "step": 8380 }, { "epoch": 1.367862969004894, "grad_norm": 0.11541574448347092, "learning_rate": 0.0006838499184339315, "loss": 0.1372, "num_input_tokens_seen": 18098272, "step": 8385 }, { "epoch": 1.368678629690049, "grad_norm": 0.06780945509672165, "learning_rate": 0.0006842577487765091, "loss": 0.1449, "num_input_tokens_seen": 18109792, "step": 8390 }, { "epoch": 1.3694942903752039, "grad_norm": 0.29994329810142517, "learning_rate": 0.0006846655791190864, "loss": 0.2581, "num_input_tokens_seen": 18118912, "step": 8395 }, { "epoch": 1.370309951060359, "grad_norm": 0.09013523161411285, "learning_rate": 0.0006850734094616639, "loss": 0.1347, "num_input_tokens_seen": 18130432, "step": 8400 }, { "epoch": 1.371125611745514, "grad_norm": 0.33663830161094666, "learning_rate": 0.0006854812398042415, "loss": 0.3156, "num_input_tokens_seen": 18141440, "step": 8405 }, { "epoch": 1.3719412724306688, "grad_norm": 0.09541526436805725, "learning_rate": 0.0006858890701468189, "loss": 0.1582, "num_input_tokens_seen": 18150816, "step": 8410 }, { "epoch": 1.3727569331158238, "grad_norm": 0.1449032723903656, "learning_rate": 0.0006862969004893965, "loss": 0.0987, "num_input_tokens_seen": 18162272, "step": 8415 }, { "epoch": 1.3735725938009788, "grad_norm": 0.1413501799106598, "learning_rate": 0.0006867047308319739, "loss": 0.1189, "num_input_tokens_seen": 18174016, "step": 8420 }, { "epoch": 1.3743882544861337, "grad_norm": 0.10539831221103668, "learning_rate": 0.0006871125611745514, "loss": 0.1362, "num_input_tokens_seen": 18185440, "step": 8425 }, { "epoch": 1.3752039151712887, "grad_norm": 0.04691213741898537, "learning_rate": 0.0006875203915171289, "loss": 0.052, "num_input_tokens_seen": 18196960, "step": 8430 }, { "epoch": 1.3760195758564437, "grad_norm": 0.1963319480419159, "learning_rate": 0.0006879282218597064, "loss": 0.0949, "num_input_tokens_seen": 18208512, "step": 8435 }, { "epoch": 1.3768352365415986, "grad_norm": 0.46225208044052124, "learning_rate": 0.000688336052202284, "loss": 0.1175, "num_input_tokens_seen": 18219776, "step": 8440 }, { "epoch": 1.3776508972267536, "grad_norm": 0.09256060421466827, "learning_rate": 0.0006887438825448613, "loss": 0.2648, "num_input_tokens_seen": 18230528, "step": 8445 }, { "epoch": 1.3784665579119086, "grad_norm": 0.13652892410755157, "learning_rate": 0.0006891517128874388, "loss": 0.2253, "num_input_tokens_seen": 18241312, "step": 8450 }, { "epoch": 1.3792822185970637, "grad_norm": 0.5313224196434021, "learning_rate": 0.0006895595432300164, "loss": 0.2571, "num_input_tokens_seen": 18252064, "step": 8455 }, { "epoch": 1.3800978792822187, "grad_norm": 0.20080803334712982, "learning_rate": 0.0006899673735725939, "loss": 0.201, "num_input_tokens_seen": 18263488, "step": 8460 }, { "epoch": 1.3809135399673735, "grad_norm": 0.22107291221618652, "learning_rate": 0.0006903752039151713, "loss": 0.2256, "num_input_tokens_seen": 18274016, "step": 8465 }, { "epoch": 1.3817292006525286, "grad_norm": 0.22587135434150696, "learning_rate": 0.0006907830342577488, "loss": 0.14, "num_input_tokens_seen": 18285056, "step": 8470 }, { "epoch": 1.3825448613376836, "grad_norm": 0.12272298336029053, "learning_rate": 0.0006911908646003263, "loss": 0.1437, "num_input_tokens_seen": 18297088, "step": 8475 }, { "epoch": 1.3833605220228384, "grad_norm": 0.07741540670394897, "learning_rate": 0.0006915986949429038, "loss": 0.0847, "num_input_tokens_seen": 18307776, "step": 8480 }, { "epoch": 1.3841761827079935, "grad_norm": 0.0871734768152237, "learning_rate": 0.0006920065252854812, "loss": 0.0866, "num_input_tokens_seen": 18318592, "step": 8485 }, { "epoch": 1.3849918433931485, "grad_norm": 0.5682582259178162, "learning_rate": 0.0006924143556280587, "loss": 0.2097, "num_input_tokens_seen": 18330048, "step": 8490 }, { "epoch": 1.3858075040783033, "grad_norm": 0.5466875433921814, "learning_rate": 0.0006928221859706362, "loss": 0.1734, "num_input_tokens_seen": 18341248, "step": 8495 }, { "epoch": 1.3866231647634584, "grad_norm": 0.10931766778230667, "learning_rate": 0.0006932300163132137, "loss": 0.0498, "num_input_tokens_seen": 18352000, "step": 8500 }, { "epoch": 1.3874388254486134, "grad_norm": 0.14139024913311005, "learning_rate": 0.0006936378466557913, "loss": 0.062, "num_input_tokens_seen": 18362368, "step": 8505 }, { "epoch": 1.3882544861337682, "grad_norm": 0.09381916373968124, "learning_rate": 0.0006940456769983687, "loss": 0.3007, "num_input_tokens_seen": 18373536, "step": 8510 }, { "epoch": 1.3890701468189233, "grad_norm": 0.21973715722560883, "learning_rate": 0.0006944535073409461, "loss": 0.2505, "num_input_tokens_seen": 18383648, "step": 8515 }, { "epoch": 1.3898858075040783, "grad_norm": 0.40013498067855835, "learning_rate": 0.0006948613376835237, "loss": 0.2033, "num_input_tokens_seen": 18393856, "step": 8520 }, { "epoch": 1.3907014681892333, "grad_norm": 0.16729186475276947, "learning_rate": 0.0006952691680261012, "loss": 0.1213, "num_input_tokens_seen": 18404896, "step": 8525 }, { "epoch": 1.3915171288743884, "grad_norm": 0.2630608081817627, "learning_rate": 0.0006956769983686786, "loss": 0.1436, "num_input_tokens_seen": 18415808, "step": 8530 }, { "epoch": 1.3923327895595432, "grad_norm": 0.2588391900062561, "learning_rate": 0.0006960848287112561, "loss": 0.0573, "num_input_tokens_seen": 18426464, "step": 8535 }, { "epoch": 1.3931484502446982, "grad_norm": 0.35269695520401, "learning_rate": 0.0006964926590538336, "loss": 0.1665, "num_input_tokens_seen": 18436992, "step": 8540 }, { "epoch": 1.3939641109298533, "grad_norm": 0.14328861236572266, "learning_rate": 0.0006969004893964112, "loss": 0.0591, "num_input_tokens_seen": 18448160, "step": 8545 }, { "epoch": 1.394779771615008, "grad_norm": 0.2937766909599304, "learning_rate": 0.0006973083197389885, "loss": 0.1344, "num_input_tokens_seen": 18458656, "step": 8550 }, { "epoch": 1.3955954323001631, "grad_norm": 0.5940945744514465, "learning_rate": 0.0006977161500815661, "loss": 0.2424, "num_input_tokens_seen": 18468448, "step": 8555 }, { "epoch": 1.3964110929853182, "grad_norm": 0.4724957346916199, "learning_rate": 0.0006981239804241436, "loss": 0.1667, "num_input_tokens_seen": 18478624, "step": 8560 }, { "epoch": 1.397226753670473, "grad_norm": 0.32531189918518066, "learning_rate": 0.000698531810766721, "loss": 0.1569, "num_input_tokens_seen": 18489984, "step": 8565 }, { "epoch": 1.398042414355628, "grad_norm": 0.21875634789466858, "learning_rate": 0.0006989396411092986, "loss": 0.2226, "num_input_tokens_seen": 18500512, "step": 8570 }, { "epoch": 1.398858075040783, "grad_norm": 0.22302477061748505, "learning_rate": 0.000699347471451876, "loss": 0.116, "num_input_tokens_seen": 18510848, "step": 8575 }, { "epoch": 1.399673735725938, "grad_norm": 0.13238047063350677, "learning_rate": 0.0006997553017944536, "loss": 0.1321, "num_input_tokens_seen": 18521856, "step": 8580 }, { "epoch": 1.400489396411093, "grad_norm": 0.5457648038864136, "learning_rate": 0.000700163132137031, "loss": 0.2685, "num_input_tokens_seen": 18532768, "step": 8585 }, { "epoch": 1.401305057096248, "grad_norm": 0.19160877168178558, "learning_rate": 0.0007005709624796085, "loss": 0.1296, "num_input_tokens_seen": 18544288, "step": 8590 }, { "epoch": 1.402120717781403, "grad_norm": 0.1325426548719406, "learning_rate": 0.000700978792822186, "loss": 0.1317, "num_input_tokens_seen": 18554432, "step": 8595 }, { "epoch": 1.4029363784665578, "grad_norm": 0.1033167615532875, "learning_rate": 0.0007013866231647634, "loss": 0.0917, "num_input_tokens_seen": 18565696, "step": 8600 }, { "epoch": 1.4037520391517129, "grad_norm": 0.1074594110250473, "learning_rate": 0.000701794453507341, "loss": 0.0598, "num_input_tokens_seen": 18576064, "step": 8605 }, { "epoch": 1.404567699836868, "grad_norm": 0.051021743565797806, "learning_rate": 0.0007022022838499185, "loss": 0.2075, "num_input_tokens_seen": 18586336, "step": 8610 }, { "epoch": 1.405383360522023, "grad_norm": 0.29024407267570496, "learning_rate": 0.000702610114192496, "loss": 0.1067, "num_input_tokens_seen": 18597184, "step": 8615 }, { "epoch": 1.4061990212071778, "grad_norm": 0.02979191392660141, "learning_rate": 0.0007030179445350734, "loss": 0.0341, "num_input_tokens_seen": 18608480, "step": 8620 }, { "epoch": 1.4070146818923328, "grad_norm": 0.05098017305135727, "learning_rate": 0.0007034257748776509, "loss": 0.0736, "num_input_tokens_seen": 18618784, "step": 8625 }, { "epoch": 1.4078303425774878, "grad_norm": 0.09707712382078171, "learning_rate": 0.0007038336052202285, "loss": 0.1573, "num_input_tokens_seen": 18629472, "step": 8630 }, { "epoch": 1.4086460032626427, "grad_norm": 0.04260914772748947, "learning_rate": 0.0007042414355628059, "loss": 0.0942, "num_input_tokens_seen": 18640928, "step": 8635 }, { "epoch": 1.4094616639477977, "grad_norm": 0.323981910943985, "learning_rate": 0.0007046492659053833, "loss": 0.2579, "num_input_tokens_seen": 18652064, "step": 8640 }, { "epoch": 1.4102773246329527, "grad_norm": 0.05252254381775856, "learning_rate": 0.0007050570962479609, "loss": 0.1369, "num_input_tokens_seen": 18662272, "step": 8645 }, { "epoch": 1.4110929853181076, "grad_norm": 0.2244993895292282, "learning_rate": 0.0007054649265905384, "loss": 0.0692, "num_input_tokens_seen": 18672672, "step": 8650 }, { "epoch": 1.4119086460032626, "grad_norm": 0.1471857726573944, "learning_rate": 0.0007058727569331158, "loss": 0.1012, "num_input_tokens_seen": 18683776, "step": 8655 }, { "epoch": 1.4127243066884176, "grad_norm": 0.13936777412891388, "learning_rate": 0.0007062805872756933, "loss": 0.1535, "num_input_tokens_seen": 18695200, "step": 8660 }, { "epoch": 1.4135399673735725, "grad_norm": 0.23006023466587067, "learning_rate": 0.0007066884176182708, "loss": 0.1054, "num_input_tokens_seen": 18706208, "step": 8665 }, { "epoch": 1.4143556280587275, "grad_norm": 0.5761478543281555, "learning_rate": 0.0007070962479608483, "loss": 0.2049, "num_input_tokens_seen": 18717216, "step": 8670 }, { "epoch": 1.4151712887438825, "grad_norm": 0.40842342376708984, "learning_rate": 0.0007075040783034258, "loss": 0.1859, "num_input_tokens_seen": 18726912, "step": 8675 }, { "epoch": 1.4159869494290376, "grad_norm": 0.3666206896305084, "learning_rate": 0.0007079119086460033, "loss": 0.1707, "num_input_tokens_seen": 18736992, "step": 8680 }, { "epoch": 1.4168026101141926, "grad_norm": 0.22289107739925385, "learning_rate": 0.0007083197389885808, "loss": 0.0405, "num_input_tokens_seen": 18748640, "step": 8685 }, { "epoch": 1.4176182707993474, "grad_norm": 0.6535608768463135, "learning_rate": 0.0007087275693311582, "loss": 0.3728, "num_input_tokens_seen": 18759104, "step": 8690 }, { "epoch": 1.4184339314845025, "grad_norm": 0.10991555452346802, "learning_rate": 0.0007091353996737358, "loss": 0.13, "num_input_tokens_seen": 18770240, "step": 8695 }, { "epoch": 1.4192495921696575, "grad_norm": 0.24327223002910614, "learning_rate": 0.0007095432300163133, "loss": 0.1275, "num_input_tokens_seen": 18781216, "step": 8700 }, { "epoch": 1.4200652528548123, "grad_norm": 0.3419354259967804, "learning_rate": 0.0007099510603588906, "loss": 0.1935, "num_input_tokens_seen": 18792864, "step": 8705 }, { "epoch": 1.4208809135399674, "grad_norm": 0.0489824078977108, "learning_rate": 0.0007103588907014682, "loss": 0.0808, "num_input_tokens_seen": 18803552, "step": 8710 }, { "epoch": 1.4216965742251224, "grad_norm": 0.07126122713088989, "learning_rate": 0.0007107667210440457, "loss": 0.1425, "num_input_tokens_seen": 18816096, "step": 8715 }, { "epoch": 1.4225122349102772, "grad_norm": 0.0662030428647995, "learning_rate": 0.0007111745513866232, "loss": 0.0256, "num_input_tokens_seen": 18825632, "step": 8720 }, { "epoch": 1.4233278955954323, "grad_norm": 0.012720330618321896, "learning_rate": 0.0007115823817292006, "loss": 0.114, "num_input_tokens_seen": 18837504, "step": 8725 }, { "epoch": 1.4241435562805873, "grad_norm": 0.12858402729034424, "learning_rate": 0.0007119902120717781, "loss": 0.1659, "num_input_tokens_seen": 18848160, "step": 8730 }, { "epoch": 1.4249592169657421, "grad_norm": 0.09809096157550812, "learning_rate": 0.0007123980424143557, "loss": 0.0531, "num_input_tokens_seen": 18858080, "step": 8735 }, { "epoch": 1.4257748776508972, "grad_norm": 0.5575476884841919, "learning_rate": 0.0007128058727569331, "loss": 0.1769, "num_input_tokens_seen": 18868928, "step": 8740 }, { "epoch": 1.4265905383360522, "grad_norm": 0.07153128832578659, "learning_rate": 0.0007132137030995107, "loss": 0.1841, "num_input_tokens_seen": 18879200, "step": 8745 }, { "epoch": 1.4274061990212072, "grad_norm": 0.11059188097715378, "learning_rate": 0.0007136215334420881, "loss": 0.1261, "num_input_tokens_seen": 18890208, "step": 8750 }, { "epoch": 1.4282218597063623, "grad_norm": 0.14170938730239868, "learning_rate": 0.0007140293637846655, "loss": 0.0995, "num_input_tokens_seen": 18900224, "step": 8755 }, { "epoch": 1.429037520391517, "grad_norm": 0.2830321490764618, "learning_rate": 0.0007144371941272431, "loss": 0.1901, "num_input_tokens_seen": 18912224, "step": 8760 }, { "epoch": 1.4298531810766721, "grad_norm": 0.022736432030797005, "learning_rate": 0.0007148450244698206, "loss": 0.0344, "num_input_tokens_seen": 18924000, "step": 8765 }, { "epoch": 1.4306688417618272, "grad_norm": 0.018026141449809074, "learning_rate": 0.0007152528548123982, "loss": 0.1078, "num_input_tokens_seen": 18934688, "step": 8770 }, { "epoch": 1.431484502446982, "grad_norm": 0.02352656051516533, "learning_rate": 0.0007156606851549755, "loss": 0.1773, "num_input_tokens_seen": 18945440, "step": 8775 }, { "epoch": 1.432300163132137, "grad_norm": 0.05708549916744232, "learning_rate": 0.000716068515497553, "loss": 0.0575, "num_input_tokens_seen": 18956704, "step": 8780 }, { "epoch": 1.433115823817292, "grad_norm": 0.0232357457280159, "learning_rate": 0.0007164763458401306, "loss": 0.0517, "num_input_tokens_seen": 18967232, "step": 8785 }, { "epoch": 1.433931484502447, "grad_norm": 0.5626628398895264, "learning_rate": 0.000716884176182708, "loss": 0.1688, "num_input_tokens_seen": 18977568, "step": 8790 }, { "epoch": 1.434747145187602, "grad_norm": 0.12963317334651947, "learning_rate": 0.0007172920065252854, "loss": 0.072, "num_input_tokens_seen": 18988832, "step": 8795 }, { "epoch": 1.435562805872757, "grad_norm": 0.05077396333217621, "learning_rate": 0.000717699836867863, "loss": 0.138, "num_input_tokens_seen": 18999456, "step": 8800 }, { "epoch": 1.4363784665579118, "grad_norm": 0.02634805254638195, "learning_rate": 0.0007181076672104405, "loss": 0.1217, "num_input_tokens_seen": 19011424, "step": 8805 }, { "epoch": 1.4371941272430668, "grad_norm": 0.029207633808255196, "learning_rate": 0.000718515497553018, "loss": 0.0715, "num_input_tokens_seen": 19022880, "step": 8810 }, { "epoch": 1.4380097879282219, "grad_norm": 0.39853039383888245, "learning_rate": 0.0007189233278955954, "loss": 0.2949, "num_input_tokens_seen": 19033248, "step": 8815 }, { "epoch": 1.438825448613377, "grad_norm": 0.14748430252075195, "learning_rate": 0.0007193311582381729, "loss": 0.1243, "num_input_tokens_seen": 19044768, "step": 8820 }, { "epoch": 1.4396411092985317, "grad_norm": 0.15088698267936707, "learning_rate": 0.0007197389885807504, "loss": 0.0534, "num_input_tokens_seen": 19055008, "step": 8825 }, { "epoch": 1.4404567699836868, "grad_norm": 0.060153789818286896, "learning_rate": 0.0007201468189233279, "loss": 0.0946, "num_input_tokens_seen": 19064800, "step": 8830 }, { "epoch": 1.4412724306688418, "grad_norm": 0.05774616450071335, "learning_rate": 0.0007205546492659055, "loss": 0.1347, "num_input_tokens_seen": 19075776, "step": 8835 }, { "epoch": 1.4420880913539968, "grad_norm": 0.12727181613445282, "learning_rate": 0.0007209624796084829, "loss": 0.1642, "num_input_tokens_seen": 19086944, "step": 8840 }, { "epoch": 1.4429037520391517, "grad_norm": 0.017124155536293983, "learning_rate": 0.0007213703099510603, "loss": 0.0854, "num_input_tokens_seen": 19097504, "step": 8845 }, { "epoch": 1.4437194127243067, "grad_norm": 0.23721224069595337, "learning_rate": 0.0007217781402936379, "loss": 0.1945, "num_input_tokens_seen": 19108192, "step": 8850 }, { "epoch": 1.4445350734094617, "grad_norm": 0.29939067363739014, "learning_rate": 0.0007221859706362154, "loss": 0.2627, "num_input_tokens_seen": 19119072, "step": 8855 }, { "epoch": 1.4453507340946166, "grad_norm": 0.1577085554599762, "learning_rate": 0.0007225938009787928, "loss": 0.0734, "num_input_tokens_seen": 19129920, "step": 8860 }, { "epoch": 1.4461663947797716, "grad_norm": 0.04445808008313179, "learning_rate": 0.0007230016313213703, "loss": 0.054, "num_input_tokens_seen": 19142368, "step": 8865 }, { "epoch": 1.4469820554649266, "grad_norm": 0.2555520534515381, "learning_rate": 0.0007234094616639478, "loss": 0.0852, "num_input_tokens_seen": 19152544, "step": 8870 }, { "epoch": 1.4477977161500815, "grad_norm": 0.4121549725532532, "learning_rate": 0.0007238172920065254, "loss": 0.1061, "num_input_tokens_seen": 19162944, "step": 8875 }, { "epoch": 1.4486133768352365, "grad_norm": 0.10008703172206879, "learning_rate": 0.0007242251223491027, "loss": 0.1639, "num_input_tokens_seen": 19172640, "step": 8880 }, { "epoch": 1.4494290375203915, "grad_norm": 0.09088125824928284, "learning_rate": 0.0007246329526916803, "loss": 0.0752, "num_input_tokens_seen": 19184064, "step": 8885 }, { "epoch": 1.4502446982055464, "grad_norm": 0.01956341788172722, "learning_rate": 0.0007250407830342578, "loss": 0.198, "num_input_tokens_seen": 19194080, "step": 8890 }, { "epoch": 1.4510603588907014, "grad_norm": 0.3076225519180298, "learning_rate": 0.0007254486133768352, "loss": 0.0865, "num_input_tokens_seen": 19205216, "step": 8895 }, { "epoch": 1.4518760195758564, "grad_norm": 0.05666419491171837, "learning_rate": 0.0007258564437194128, "loss": 0.0999, "num_input_tokens_seen": 19215584, "step": 8900 }, { "epoch": 1.4526916802610115, "grad_norm": 0.19242066144943237, "learning_rate": 0.0007262642740619902, "loss": 0.0541, "num_input_tokens_seen": 19226656, "step": 8905 }, { "epoch": 1.4535073409461665, "grad_norm": 0.07745897769927979, "learning_rate": 0.0007266721044045678, "loss": 0.1022, "num_input_tokens_seen": 19236192, "step": 8910 }, { "epoch": 1.4543230016313213, "grad_norm": 0.4327331781387329, "learning_rate": 0.0007270799347471452, "loss": 0.09, "num_input_tokens_seen": 19246496, "step": 8915 }, { "epoch": 1.4551386623164764, "grad_norm": 0.13469044864177704, "learning_rate": 0.0007274877650897227, "loss": 0.1074, "num_input_tokens_seen": 19255872, "step": 8920 }, { "epoch": 1.4559543230016314, "grad_norm": 0.8886367678642273, "learning_rate": 0.0007278955954323002, "loss": 0.112, "num_input_tokens_seen": 19265664, "step": 8925 }, { "epoch": 1.4567699836867862, "grad_norm": 0.028607875108718872, "learning_rate": 0.0007283034257748776, "loss": 0.0505, "num_input_tokens_seen": 19277024, "step": 8930 }, { "epoch": 1.4575856443719413, "grad_norm": 0.08467423915863037, "learning_rate": 0.0007287112561174551, "loss": 0.083, "num_input_tokens_seen": 19287776, "step": 8935 }, { "epoch": 1.4584013050570963, "grad_norm": 0.03396514803171158, "learning_rate": 0.0007291190864600327, "loss": 0.0611, "num_input_tokens_seen": 19298880, "step": 8940 }, { "epoch": 1.4592169657422511, "grad_norm": 0.010857423767447472, "learning_rate": 0.00072952691680261, "loss": 0.072, "num_input_tokens_seen": 19309152, "step": 8945 }, { "epoch": 1.4600326264274062, "grad_norm": 0.38745319843292236, "learning_rate": 0.0007299347471451876, "loss": 0.1052, "num_input_tokens_seen": 19320448, "step": 8950 }, { "epoch": 1.4608482871125612, "grad_norm": 0.5516444444656372, "learning_rate": 0.0007303425774877651, "loss": 0.0779, "num_input_tokens_seen": 19331552, "step": 8955 }, { "epoch": 1.461663947797716, "grad_norm": 0.008668281137943268, "learning_rate": 0.0007307504078303426, "loss": 0.1627, "num_input_tokens_seen": 19343104, "step": 8960 }, { "epoch": 1.462479608482871, "grad_norm": 0.26222047209739685, "learning_rate": 0.0007311582381729201, "loss": 0.0466, "num_input_tokens_seen": 19354720, "step": 8965 }, { "epoch": 1.463295269168026, "grad_norm": 0.011961002834141254, "learning_rate": 0.0007315660685154975, "loss": 0.1049, "num_input_tokens_seen": 19365120, "step": 8970 }, { "epoch": 1.4641109298531811, "grad_norm": 0.6442130208015442, "learning_rate": 0.0007319738988580751, "loss": 0.1338, "num_input_tokens_seen": 19377408, "step": 8975 }, { "epoch": 1.4649265905383362, "grad_norm": 0.540549635887146, "learning_rate": 0.0007323817292006525, "loss": 0.247, "num_input_tokens_seen": 19387744, "step": 8980 }, { "epoch": 1.465742251223491, "grad_norm": 0.3950026035308838, "learning_rate": 0.00073278955954323, "loss": 0.1815, "num_input_tokens_seen": 19398656, "step": 8985 }, { "epoch": 1.466557911908646, "grad_norm": 0.17897483706474304, "learning_rate": 0.0007331973898858076, "loss": 0.0835, "num_input_tokens_seen": 19409088, "step": 8990 }, { "epoch": 1.467373572593801, "grad_norm": 0.260405570268631, "learning_rate": 0.000733605220228385, "loss": 0.1324, "num_input_tokens_seen": 19419136, "step": 8995 }, { "epoch": 1.468189233278956, "grad_norm": 0.04354459419846535, "learning_rate": 0.0007340130505709625, "loss": 0.0499, "num_input_tokens_seen": 19429312, "step": 9000 }, { "epoch": 1.469004893964111, "grad_norm": 0.10292547196149826, "learning_rate": 0.00073442088091354, "loss": 0.1799, "num_input_tokens_seen": 19439680, "step": 9005 }, { "epoch": 1.469820554649266, "grad_norm": 0.5261200070381165, "learning_rate": 0.0007348287112561175, "loss": 0.1938, "num_input_tokens_seen": 19451104, "step": 9010 }, { "epoch": 1.4706362153344208, "grad_norm": 0.07087134569883347, "learning_rate": 0.0007352365415986949, "loss": 0.131, "num_input_tokens_seen": 19461184, "step": 9015 }, { "epoch": 1.4714518760195758, "grad_norm": 0.2747659981250763, "learning_rate": 0.0007356443719412724, "loss": 0.2159, "num_input_tokens_seen": 19471840, "step": 9020 }, { "epoch": 1.4722675367047309, "grad_norm": 0.03912227973341942, "learning_rate": 0.00073605220228385, "loss": 0.0911, "num_input_tokens_seen": 19483680, "step": 9025 }, { "epoch": 1.4730831973898857, "grad_norm": 0.27806100249290466, "learning_rate": 0.0007364600326264275, "loss": 0.1829, "num_input_tokens_seen": 19495008, "step": 9030 }, { "epoch": 1.4738988580750407, "grad_norm": 0.5069888830184937, "learning_rate": 0.0007368678629690048, "loss": 0.1236, "num_input_tokens_seen": 19507296, "step": 9035 }, { "epoch": 1.4747145187601958, "grad_norm": 0.09842549264431, "learning_rate": 0.0007372756933115824, "loss": 0.0689, "num_input_tokens_seen": 19518240, "step": 9040 }, { "epoch": 1.4755301794453508, "grad_norm": 0.2105843424797058, "learning_rate": 0.0007376835236541599, "loss": 0.0806, "num_input_tokens_seen": 19528128, "step": 9045 }, { "epoch": 1.4763458401305056, "grad_norm": 0.5425255298614502, "learning_rate": 0.0007380913539967374, "loss": 0.1853, "num_input_tokens_seen": 19538624, "step": 9050 }, { "epoch": 1.4771615008156607, "grad_norm": 0.5130831003189087, "learning_rate": 0.0007384991843393149, "loss": 0.2301, "num_input_tokens_seen": 19549376, "step": 9055 }, { "epoch": 1.4779771615008157, "grad_norm": 0.42606836557388306, "learning_rate": 0.0007389070146818923, "loss": 0.1922, "num_input_tokens_seen": 19560832, "step": 9060 }, { "epoch": 1.4787928221859707, "grad_norm": 0.03763745725154877, "learning_rate": 0.0007393148450244699, "loss": 0.2425, "num_input_tokens_seen": 19572640, "step": 9065 }, { "epoch": 1.4796084828711256, "grad_norm": 0.07457976788282394, "learning_rate": 0.0007397226753670473, "loss": 0.1815, "num_input_tokens_seen": 19582656, "step": 9070 }, { "epoch": 1.4804241435562806, "grad_norm": 0.09364946186542511, "learning_rate": 0.0007401305057096248, "loss": 0.1208, "num_input_tokens_seen": 19592640, "step": 9075 }, { "epoch": 1.4812398042414356, "grad_norm": 0.48853427171707153, "learning_rate": 0.0007405383360522023, "loss": 0.1576, "num_input_tokens_seen": 19603424, "step": 9080 }, { "epoch": 1.4820554649265905, "grad_norm": 0.06722915172576904, "learning_rate": 0.0007409461663947797, "loss": 0.1761, "num_input_tokens_seen": 19614432, "step": 9085 }, { "epoch": 1.4828711256117455, "grad_norm": 0.21998947858810425, "learning_rate": 0.0007413539967373573, "loss": 0.1698, "num_input_tokens_seen": 19624928, "step": 9090 }, { "epoch": 1.4836867862969005, "grad_norm": 0.09100175648927689, "learning_rate": 0.0007417618270799348, "loss": 0.1378, "num_input_tokens_seen": 19635072, "step": 9095 }, { "epoch": 1.4845024469820554, "grad_norm": 0.07700017094612122, "learning_rate": 0.0007421696574225123, "loss": 0.1987, "num_input_tokens_seen": 19645120, "step": 9100 }, { "epoch": 1.4853181076672104, "grad_norm": 0.09189625829458237, "learning_rate": 0.0007425774877650897, "loss": 0.1201, "num_input_tokens_seen": 19655360, "step": 9105 }, { "epoch": 1.4861337683523654, "grad_norm": 0.7358422875404358, "learning_rate": 0.0007429853181076672, "loss": 0.0803, "num_input_tokens_seen": 19666336, "step": 9110 }, { "epoch": 1.4869494290375203, "grad_norm": 0.4167027175426483, "learning_rate": 0.0007433931484502448, "loss": 0.1758, "num_input_tokens_seen": 19676320, "step": 9115 }, { "epoch": 1.4877650897226753, "grad_norm": 0.13785138726234436, "learning_rate": 0.0007438009787928222, "loss": 0.1534, "num_input_tokens_seen": 19685568, "step": 9120 }, { "epoch": 1.4885807504078303, "grad_norm": 0.21748493611812592, "learning_rate": 0.0007442088091353996, "loss": 0.0863, "num_input_tokens_seen": 19696896, "step": 9125 }, { "epoch": 1.4893964110929854, "grad_norm": 0.08794530481100082, "learning_rate": 0.0007446166394779772, "loss": 0.159, "num_input_tokens_seen": 19706912, "step": 9130 }, { "epoch": 1.4902120717781404, "grad_norm": 0.09349031746387482, "learning_rate": 0.0007450244698205547, "loss": 0.112, "num_input_tokens_seen": 19716640, "step": 9135 }, { "epoch": 1.4910277324632952, "grad_norm": 0.16520120203495026, "learning_rate": 0.0007454323001631322, "loss": 0.077, "num_input_tokens_seen": 19726560, "step": 9140 }, { "epoch": 1.4918433931484503, "grad_norm": 0.26663780212402344, "learning_rate": 0.0007458401305057096, "loss": 0.1215, "num_input_tokens_seen": 19738208, "step": 9145 }, { "epoch": 1.4926590538336053, "grad_norm": 0.0554833710193634, "learning_rate": 0.0007462479608482871, "loss": 0.0411, "num_input_tokens_seen": 19748512, "step": 9150 }, { "epoch": 1.4934747145187601, "grad_norm": 0.029788140207529068, "learning_rate": 0.0007466557911908646, "loss": 0.1369, "num_input_tokens_seen": 19759968, "step": 9155 }, { "epoch": 1.4942903752039152, "grad_norm": 0.10392051935195923, "learning_rate": 0.0007470636215334421, "loss": 0.0579, "num_input_tokens_seen": 19770624, "step": 9160 }, { "epoch": 1.4951060358890702, "grad_norm": 0.17505186796188354, "learning_rate": 0.0007474714518760197, "loss": 0.1947, "num_input_tokens_seen": 19781600, "step": 9165 }, { "epoch": 1.495921696574225, "grad_norm": 0.14129331707954407, "learning_rate": 0.0007478792822185971, "loss": 0.0867, "num_input_tokens_seen": 19791520, "step": 9170 }, { "epoch": 1.49673735725938, "grad_norm": 0.12131684273481369, "learning_rate": 0.0007482871125611745, "loss": 0.0627, "num_input_tokens_seen": 19804800, "step": 9175 }, { "epoch": 1.497553017944535, "grad_norm": 0.012487274594604969, "learning_rate": 0.0007486949429037521, "loss": 0.156, "num_input_tokens_seen": 19815520, "step": 9180 }, { "epoch": 1.49836867862969, "grad_norm": 0.11800535768270493, "learning_rate": 0.0007491027732463296, "loss": 0.1424, "num_input_tokens_seen": 19826944, "step": 9185 }, { "epoch": 1.499184339314845, "grad_norm": 0.12160804867744446, "learning_rate": 0.000749510603588907, "loss": 0.0554, "num_input_tokens_seen": 19837888, "step": 9190 }, { "epoch": 1.5, "grad_norm": 0.2705550491809845, "learning_rate": 0.0007499184339314845, "loss": 0.0691, "num_input_tokens_seen": 19847936, "step": 9195 }, { "epoch": 1.5008156606851548, "grad_norm": 0.3171461224555969, "learning_rate": 0.000750326264274062, "loss": 0.1523, "num_input_tokens_seen": 19858368, "step": 9200 }, { "epoch": 1.50163132137031, "grad_norm": 0.049991387873888016, "learning_rate": 0.0007507340946166395, "loss": 0.0486, "num_input_tokens_seen": 19868768, "step": 9205 }, { "epoch": 1.502446982055465, "grad_norm": 0.42294853925704956, "learning_rate": 0.000751141924959217, "loss": 0.3334, "num_input_tokens_seen": 19878656, "step": 9210 }, { "epoch": 1.50326264274062, "grad_norm": 0.10725937783718109, "learning_rate": 0.0007515497553017944, "loss": 0.1204, "num_input_tokens_seen": 19888864, "step": 9215 }, { "epoch": 1.504078303425775, "grad_norm": 0.06396641582250595, "learning_rate": 0.000751957585644372, "loss": 0.07, "num_input_tokens_seen": 19899520, "step": 9220 }, { "epoch": 1.5048939641109298, "grad_norm": 0.1212659627199173, "learning_rate": 0.0007523654159869494, "loss": 0.0319, "num_input_tokens_seen": 19910752, "step": 9225 }, { "epoch": 1.5057096247960848, "grad_norm": 0.48352620005607605, "learning_rate": 0.000752773246329527, "loss": 0.211, "num_input_tokens_seen": 19922592, "step": 9230 }, { "epoch": 1.5065252854812399, "grad_norm": 0.03408307209610939, "learning_rate": 0.0007531810766721044, "loss": 0.0435, "num_input_tokens_seen": 19932672, "step": 9235 }, { "epoch": 1.5073409461663947, "grad_norm": 0.09828560799360275, "learning_rate": 0.0007535889070146818, "loss": 0.1639, "num_input_tokens_seen": 19942368, "step": 9240 }, { "epoch": 1.5081566068515497, "grad_norm": 0.018490830436348915, "learning_rate": 0.0007539967373572594, "loss": 0.1025, "num_input_tokens_seen": 19953056, "step": 9245 }, { "epoch": 1.5089722675367048, "grad_norm": 0.024988073855638504, "learning_rate": 0.0007544045676998369, "loss": 0.09, "num_input_tokens_seen": 19964192, "step": 9250 }, { "epoch": 1.5097879282218596, "grad_norm": 0.13558480143547058, "learning_rate": 0.0007548123980424145, "loss": 0.1149, "num_input_tokens_seen": 19974272, "step": 9255 }, { "epoch": 1.5106035889070146, "grad_norm": 0.35169100761413574, "learning_rate": 0.0007552202283849918, "loss": 0.2235, "num_input_tokens_seen": 19984160, "step": 9260 }, { "epoch": 1.5114192495921697, "grad_norm": 0.13259977102279663, "learning_rate": 0.0007556280587275693, "loss": 0.0638, "num_input_tokens_seen": 19994816, "step": 9265 }, { "epoch": 1.5122349102773245, "grad_norm": 0.02667655237019062, "learning_rate": 0.0007560358890701469, "loss": 0.0458, "num_input_tokens_seen": 20004544, "step": 9270 }, { "epoch": 1.5130505709624797, "grad_norm": 0.34472835063934326, "learning_rate": 0.0007564437194127243, "loss": 0.1027, "num_input_tokens_seen": 20015680, "step": 9275 }, { "epoch": 1.5138662316476346, "grad_norm": 0.23594188690185547, "learning_rate": 0.0007568515497553018, "loss": 0.1152, "num_input_tokens_seen": 20027680, "step": 9280 }, { "epoch": 1.5146818923327896, "grad_norm": 0.10879641026258469, "learning_rate": 0.0007572593800978793, "loss": 0.1502, "num_input_tokens_seen": 20038752, "step": 9285 }, { "epoch": 1.5154975530179446, "grad_norm": 0.02940518967807293, "learning_rate": 0.0007576672104404568, "loss": 0.2269, "num_input_tokens_seen": 20049920, "step": 9290 }, { "epoch": 1.5163132137030995, "grad_norm": 0.03213325887918472, "learning_rate": 0.0007580750407830343, "loss": 0.0707, "num_input_tokens_seen": 20060832, "step": 9295 }, { "epoch": 1.5171288743882545, "grad_norm": 0.6781498789787292, "learning_rate": 0.0007584828711256117, "loss": 0.1456, "num_input_tokens_seen": 20072192, "step": 9300 }, { "epoch": 1.5179445350734095, "grad_norm": 0.027464263141155243, "learning_rate": 0.0007588907014681893, "loss": 0.0566, "num_input_tokens_seen": 20081344, "step": 9305 }, { "epoch": 1.5187601957585644, "grad_norm": 0.05875181406736374, "learning_rate": 0.0007592985318107667, "loss": 0.0866, "num_input_tokens_seen": 20093920, "step": 9310 }, { "epoch": 1.5195758564437194, "grad_norm": 0.0171678364276886, "learning_rate": 0.0007597063621533442, "loss": 0.0929, "num_input_tokens_seen": 20104704, "step": 9315 }, { "epoch": 1.5203915171288744, "grad_norm": 0.060405634343624115, "learning_rate": 0.0007601141924959218, "loss": 0.1748, "num_input_tokens_seen": 20115584, "step": 9320 }, { "epoch": 1.5212071778140293, "grad_norm": 0.39530906081199646, "learning_rate": 0.0007605220228384992, "loss": 0.1741, "num_input_tokens_seen": 20125600, "step": 9325 }, { "epoch": 1.5220228384991843, "grad_norm": 0.390580952167511, "learning_rate": 0.0007609298531810767, "loss": 0.1388, "num_input_tokens_seen": 20135328, "step": 9330 }, { "epoch": 1.5228384991843393, "grad_norm": 0.06861675530672073, "learning_rate": 0.0007613376835236542, "loss": 0.1651, "num_input_tokens_seen": 20146176, "step": 9335 }, { "epoch": 1.5236541598694942, "grad_norm": 0.031185589730739594, "learning_rate": 0.0007617455138662317, "loss": 0.185, "num_input_tokens_seen": 20156608, "step": 9340 }, { "epoch": 1.5244698205546494, "grad_norm": 0.104819655418396, "learning_rate": 0.0007621533442088091, "loss": 0.0529, "num_input_tokens_seen": 20165888, "step": 9345 }, { "epoch": 1.5252854812398042, "grad_norm": 0.11199242621660233, "learning_rate": 0.0007625611745513866, "loss": 0.1121, "num_input_tokens_seen": 20177440, "step": 9350 }, { "epoch": 1.5261011419249593, "grad_norm": 0.14065009355545044, "learning_rate": 0.0007629690048939642, "loss": 0.0426, "num_input_tokens_seen": 20187456, "step": 9355 }, { "epoch": 1.5269168026101143, "grad_norm": 0.16130954027175903, "learning_rate": 0.0007633768352365417, "loss": 0.0441, "num_input_tokens_seen": 20197920, "step": 9360 }, { "epoch": 1.5277324632952691, "grad_norm": 0.029700618237257004, "learning_rate": 0.000763784665579119, "loss": 0.0527, "num_input_tokens_seen": 20208672, "step": 9365 }, { "epoch": 1.5285481239804242, "grad_norm": 0.019296688959002495, "learning_rate": 0.0007641924959216966, "loss": 0.0651, "num_input_tokens_seen": 20219616, "step": 9370 }, { "epoch": 1.5293637846655792, "grad_norm": 0.01708332635462284, "learning_rate": 0.0007646003262642741, "loss": 0.0644, "num_input_tokens_seen": 20231168, "step": 9375 }, { "epoch": 1.530179445350734, "grad_norm": 0.10777116566896439, "learning_rate": 0.0007650081566068515, "loss": 0.0659, "num_input_tokens_seen": 20241312, "step": 9380 }, { "epoch": 1.530995106035889, "grad_norm": 0.19223034381866455, "learning_rate": 0.0007654159869494291, "loss": 0.1807, "num_input_tokens_seen": 20252352, "step": 9385 }, { "epoch": 1.531810766721044, "grad_norm": 0.1572011262178421, "learning_rate": 0.0007658238172920065, "loss": 0.3226, "num_input_tokens_seen": 20262720, "step": 9390 }, { "epoch": 1.532626427406199, "grad_norm": 0.18543611466884613, "learning_rate": 0.0007662316476345841, "loss": 0.0979, "num_input_tokens_seen": 20273312, "step": 9395 }, { "epoch": 1.533442088091354, "grad_norm": 0.056582748889923096, "learning_rate": 0.0007666394779771615, "loss": 0.0788, "num_input_tokens_seen": 20284544, "step": 9400 }, { "epoch": 1.534257748776509, "grad_norm": 0.49562299251556396, "learning_rate": 0.000767047308319739, "loss": 0.2864, "num_input_tokens_seen": 20294400, "step": 9405 }, { "epoch": 1.5350734094616638, "grad_norm": 0.1681860387325287, "learning_rate": 0.0007674551386623165, "loss": 0.0836, "num_input_tokens_seen": 20305920, "step": 9410 }, { "epoch": 1.535889070146819, "grad_norm": 0.030374186113476753, "learning_rate": 0.0007678629690048939, "loss": 0.087, "num_input_tokens_seen": 20315232, "step": 9415 }, { "epoch": 1.536704730831974, "grad_norm": 0.27291780710220337, "learning_rate": 0.0007682707993474715, "loss": 0.2199, "num_input_tokens_seen": 20325568, "step": 9420 }, { "epoch": 1.5375203915171287, "grad_norm": 0.10427863895893097, "learning_rate": 0.000768678629690049, "loss": 0.2277, "num_input_tokens_seen": 20335552, "step": 9425 }, { "epoch": 1.538336052202284, "grad_norm": 0.06367757171392441, "learning_rate": 0.0007690864600326263, "loss": 0.0527, "num_input_tokens_seen": 20346624, "step": 9430 }, { "epoch": 1.5391517128874388, "grad_norm": 0.04572995379567146, "learning_rate": 0.0007694942903752039, "loss": 0.0832, "num_input_tokens_seen": 20357088, "step": 9435 }, { "epoch": 1.5399673735725938, "grad_norm": 0.17307651042938232, "learning_rate": 0.0007699021207177814, "loss": 0.0753, "num_input_tokens_seen": 20368672, "step": 9440 }, { "epoch": 1.5407830342577489, "grad_norm": 0.2099994719028473, "learning_rate": 0.000770309951060359, "loss": 0.0716, "num_input_tokens_seen": 20378976, "step": 9445 }, { "epoch": 1.5415986949429037, "grad_norm": 0.2195855975151062, "learning_rate": 0.0007707177814029364, "loss": 0.1396, "num_input_tokens_seen": 20388416, "step": 9450 }, { "epoch": 1.5424143556280587, "grad_norm": 0.10352769494056702, "learning_rate": 0.0007711256117455138, "loss": 0.0629, "num_input_tokens_seen": 20398496, "step": 9455 }, { "epoch": 1.5432300163132138, "grad_norm": 0.06875469535589218, "learning_rate": 0.0007715334420880914, "loss": 0.0571, "num_input_tokens_seen": 20410016, "step": 9460 }, { "epoch": 1.5440456769983686, "grad_norm": 0.17823068797588348, "learning_rate": 0.0007719412724306688, "loss": 0.0708, "num_input_tokens_seen": 20419424, "step": 9465 }, { "epoch": 1.5448613376835236, "grad_norm": 0.0178715530782938, "learning_rate": 0.0007723491027732464, "loss": 0.05, "num_input_tokens_seen": 20429760, "step": 9470 }, { "epoch": 1.5456769983686787, "grad_norm": 0.7066415548324585, "learning_rate": 0.0007727569331158239, "loss": 0.1799, "num_input_tokens_seen": 20439488, "step": 9475 }, { "epoch": 1.5464926590538335, "grad_norm": 0.08177358657121658, "learning_rate": 0.0007731647634584013, "loss": 0.2001, "num_input_tokens_seen": 20449312, "step": 9480 }, { "epoch": 1.5473083197389887, "grad_norm": 0.28617024421691895, "learning_rate": 0.0007735725938009788, "loss": 0.3216, "num_input_tokens_seen": 20460960, "step": 9485 }, { "epoch": 1.5481239804241436, "grad_norm": 0.06347467750310898, "learning_rate": 0.0007739804241435563, "loss": 0.0917, "num_input_tokens_seen": 20471968, "step": 9490 }, { "epoch": 1.5489396411092984, "grad_norm": 0.2772424817085266, "learning_rate": 0.0007743882544861339, "loss": 0.1908, "num_input_tokens_seen": 20483136, "step": 9495 }, { "epoch": 1.5497553017944536, "grad_norm": 0.04900914803147316, "learning_rate": 0.0007747960848287112, "loss": 0.0548, "num_input_tokens_seen": 20494304, "step": 9500 }, { "epoch": 1.5505709624796085, "grad_norm": 0.1593443751335144, "learning_rate": 0.0007752039151712887, "loss": 0.1128, "num_input_tokens_seen": 20505440, "step": 9505 }, { "epoch": 1.5513866231647635, "grad_norm": 0.2504079341888428, "learning_rate": 0.0007756117455138663, "loss": 0.0887, "num_input_tokens_seen": 20514560, "step": 9510 }, { "epoch": 1.5522022838499185, "grad_norm": 0.3188331723213196, "learning_rate": 0.0007760195758564438, "loss": 0.2318, "num_input_tokens_seen": 20526304, "step": 9515 }, { "epoch": 1.5530179445350734, "grad_norm": 0.21443334221839905, "learning_rate": 0.0007764274061990211, "loss": 0.1421, "num_input_tokens_seen": 20536448, "step": 9520 }, { "epoch": 1.5538336052202284, "grad_norm": 0.15462572872638702, "learning_rate": 0.0007768352365415987, "loss": 0.0968, "num_input_tokens_seen": 20548160, "step": 9525 }, { "epoch": 1.5546492659053834, "grad_norm": 0.25904881954193115, "learning_rate": 0.0007772430668841762, "loss": 0.2007, "num_input_tokens_seen": 20557952, "step": 9530 }, { "epoch": 1.5554649265905383, "grad_norm": 0.10822169482707977, "learning_rate": 0.0007776508972267537, "loss": 0.2691, "num_input_tokens_seen": 20569728, "step": 9535 }, { "epoch": 1.5562805872756933, "grad_norm": 0.11618134379386902, "learning_rate": 0.0007780587275693312, "loss": 0.0971, "num_input_tokens_seen": 20580512, "step": 9540 }, { "epoch": 1.5570962479608483, "grad_norm": 0.1241903305053711, "learning_rate": 0.0007784665579119086, "loss": 0.0491, "num_input_tokens_seen": 20591808, "step": 9545 }, { "epoch": 1.5579119086460032, "grad_norm": 0.008565060794353485, "learning_rate": 0.0007788743882544862, "loss": 0.0913, "num_input_tokens_seen": 20603264, "step": 9550 }, { "epoch": 1.5587275693311582, "grad_norm": 0.08833223581314087, "learning_rate": 0.0007792822185970636, "loss": 0.1887, "num_input_tokens_seen": 20614336, "step": 9555 }, { "epoch": 1.5595432300163132, "grad_norm": 0.1794343888759613, "learning_rate": 0.0007796900489396412, "loss": 0.1581, "num_input_tokens_seen": 20625440, "step": 9560 }, { "epoch": 1.560358890701468, "grad_norm": 0.08367512375116348, "learning_rate": 0.0007800978792822186, "loss": 0.2056, "num_input_tokens_seen": 20636960, "step": 9565 }, { "epoch": 1.5611745513866233, "grad_norm": 0.08427362143993378, "learning_rate": 0.000780505709624796, "loss": 0.0939, "num_input_tokens_seen": 20647488, "step": 9570 }, { "epoch": 1.5619902120717781, "grad_norm": 0.15433242917060852, "learning_rate": 0.0007809135399673736, "loss": 0.0805, "num_input_tokens_seen": 20658144, "step": 9575 }, { "epoch": 1.5628058727569332, "grad_norm": 0.07495057582855225, "learning_rate": 0.0007813213703099511, "loss": 0.095, "num_input_tokens_seen": 20668480, "step": 9580 }, { "epoch": 1.5636215334420882, "grad_norm": 0.1063779890537262, "learning_rate": 0.0007817292006525287, "loss": 0.0357, "num_input_tokens_seen": 20679168, "step": 9585 }, { "epoch": 1.564437194127243, "grad_norm": 0.17189694941043854, "learning_rate": 0.000782137030995106, "loss": 0.1453, "num_input_tokens_seen": 20689248, "step": 9590 }, { "epoch": 1.565252854812398, "grad_norm": 0.360895574092865, "learning_rate": 0.0007825448613376835, "loss": 0.2521, "num_input_tokens_seen": 20700704, "step": 9595 }, { "epoch": 1.566068515497553, "grad_norm": 0.12502089142799377, "learning_rate": 0.0007829526916802611, "loss": 0.1937, "num_input_tokens_seen": 20710496, "step": 9600 }, { "epoch": 1.566884176182708, "grad_norm": 0.12940464913845062, "learning_rate": 0.0007833605220228385, "loss": 0.0631, "num_input_tokens_seen": 20723104, "step": 9605 }, { "epoch": 1.567699836867863, "grad_norm": 0.07108697295188904, "learning_rate": 0.000783768352365416, "loss": 0.1448, "num_input_tokens_seen": 20733920, "step": 9610 }, { "epoch": 1.568515497553018, "grad_norm": 0.2675967216491699, "learning_rate": 0.0007841761827079935, "loss": 0.1815, "num_input_tokens_seen": 20745504, "step": 9615 }, { "epoch": 1.5693311582381728, "grad_norm": 0.04413705691695213, "learning_rate": 0.000784584013050571, "loss": 0.0498, "num_input_tokens_seen": 20756960, "step": 9620 }, { "epoch": 1.5701468189233279, "grad_norm": 0.1790713518857956, "learning_rate": 0.0007849918433931485, "loss": 0.0985, "num_input_tokens_seen": 20765568, "step": 9625 }, { "epoch": 1.570962479608483, "grad_norm": 0.03748449310660362, "learning_rate": 0.000785399673735726, "loss": 0.0273, "num_input_tokens_seen": 20775840, "step": 9630 }, { "epoch": 1.5717781402936377, "grad_norm": 0.05571736395359039, "learning_rate": 0.0007858075040783035, "loss": 0.073, "num_input_tokens_seen": 20786144, "step": 9635 }, { "epoch": 1.572593800978793, "grad_norm": 0.3538409173488617, "learning_rate": 0.0007862153344208809, "loss": 0.1499, "num_input_tokens_seen": 20797664, "step": 9640 }, { "epoch": 1.5734094616639478, "grad_norm": 0.31620872020721436, "learning_rate": 0.0007866231647634584, "loss": 0.1908, "num_input_tokens_seen": 20808768, "step": 9645 }, { "epoch": 1.5742251223491026, "grad_norm": 0.024917548522353172, "learning_rate": 0.000787030995106036, "loss": 0.1592, "num_input_tokens_seen": 20819808, "step": 9650 }, { "epoch": 1.5750407830342579, "grad_norm": 0.3046981990337372, "learning_rate": 0.0007874388254486133, "loss": 0.1763, "num_input_tokens_seen": 20830336, "step": 9655 }, { "epoch": 1.5758564437194127, "grad_norm": 0.1293632537126541, "learning_rate": 0.0007878466557911908, "loss": 0.248, "num_input_tokens_seen": 20840352, "step": 9660 }, { "epoch": 1.5766721044045677, "grad_norm": 0.25274598598480225, "learning_rate": 0.0007882544861337684, "loss": 0.1658, "num_input_tokens_seen": 20852000, "step": 9665 }, { "epoch": 1.5774877650897228, "grad_norm": 0.12369292229413986, "learning_rate": 0.0007886623164763459, "loss": 0.1023, "num_input_tokens_seen": 20864064, "step": 9670 }, { "epoch": 1.5783034257748776, "grad_norm": 0.040855523198843, "learning_rate": 0.0007890701468189233, "loss": 0.1479, "num_input_tokens_seen": 20874912, "step": 9675 }, { "epoch": 1.5791190864600326, "grad_norm": 0.020169921219348907, "learning_rate": 0.0007894779771615008, "loss": 0.0621, "num_input_tokens_seen": 20886080, "step": 9680 }, { "epoch": 1.5799347471451877, "grad_norm": 0.31696078181266785, "learning_rate": 0.0007898858075040783, "loss": 0.1032, "num_input_tokens_seen": 20896608, "step": 9685 }, { "epoch": 1.5807504078303425, "grad_norm": 0.14060260355472565, "learning_rate": 0.0007902936378466558, "loss": 0.0354, "num_input_tokens_seen": 20907296, "step": 9690 }, { "epoch": 1.5815660685154975, "grad_norm": 0.02468281425535679, "learning_rate": 0.0007907014681892332, "loss": 0.0562, "num_input_tokens_seen": 20917856, "step": 9695 }, { "epoch": 1.5823817292006526, "grad_norm": 0.0263371542096138, "learning_rate": 0.0007911092985318108, "loss": 0.016, "num_input_tokens_seen": 20929728, "step": 9700 }, { "epoch": 1.5831973898858074, "grad_norm": 0.02989843674004078, "learning_rate": 0.0007915171288743883, "loss": 0.0353, "num_input_tokens_seen": 20940448, "step": 9705 }, { "epoch": 1.5840130505709626, "grad_norm": 0.23173603415489197, "learning_rate": 0.0007919249592169657, "loss": 0.1607, "num_input_tokens_seen": 20951456, "step": 9710 }, { "epoch": 1.5848287112561175, "grad_norm": 0.46060606837272644, "learning_rate": 0.0007923327895595433, "loss": 0.2838, "num_input_tokens_seen": 20962400, "step": 9715 }, { "epoch": 1.5856443719412723, "grad_norm": 0.047276612371206284, "learning_rate": 0.0007927406199021207, "loss": 0.1177, "num_input_tokens_seen": 20972544, "step": 9720 }, { "epoch": 1.5864600326264275, "grad_norm": 0.07967732846736908, "learning_rate": 0.0007931484502446982, "loss": 0.0505, "num_input_tokens_seen": 20983104, "step": 9725 }, { "epoch": 1.5872756933115824, "grad_norm": 0.06475040316581726, "learning_rate": 0.0007935562805872757, "loss": 0.05, "num_input_tokens_seen": 20993120, "step": 9730 }, { "epoch": 1.5880913539967374, "grad_norm": 0.2018146514892578, "learning_rate": 0.0007939641109298532, "loss": 0.059, "num_input_tokens_seen": 21003392, "step": 9735 }, { "epoch": 1.5889070146818924, "grad_norm": 0.2184900939464569, "learning_rate": 0.0007943719412724308, "loss": 0.2506, "num_input_tokens_seen": 21015296, "step": 9740 }, { "epoch": 1.5897226753670473, "grad_norm": 0.06441153585910797, "learning_rate": 0.0007947797716150081, "loss": 0.1236, "num_input_tokens_seen": 21025280, "step": 9745 }, { "epoch": 1.5905383360522023, "grad_norm": 0.3672851622104645, "learning_rate": 0.0007951876019575857, "loss": 0.068, "num_input_tokens_seen": 21036448, "step": 9750 }, { "epoch": 1.5913539967373573, "grad_norm": 0.08671893924474716, "learning_rate": 0.0007955954323001632, "loss": 0.0291, "num_input_tokens_seen": 21048288, "step": 9755 }, { "epoch": 1.5921696574225122, "grad_norm": 0.11941304057836533, "learning_rate": 0.0007960032626427406, "loss": 0.1557, "num_input_tokens_seen": 21057536, "step": 9760 }, { "epoch": 1.5929853181076672, "grad_norm": 0.5349796414375305, "learning_rate": 0.0007964110929853181, "loss": 0.0903, "num_input_tokens_seen": 21068192, "step": 9765 }, { "epoch": 1.5938009787928222, "grad_norm": 0.1064114198088646, "learning_rate": 0.0007968189233278956, "loss": 0.1053, "num_input_tokens_seen": 21078592, "step": 9770 }, { "epoch": 1.594616639477977, "grad_norm": 0.6762012243270874, "learning_rate": 0.0007972267536704732, "loss": 0.1266, "num_input_tokens_seen": 21088576, "step": 9775 }, { "epoch": 1.595432300163132, "grad_norm": 0.0502680242061615, "learning_rate": 0.0007976345840130506, "loss": 0.0144, "num_input_tokens_seen": 21100576, "step": 9780 }, { "epoch": 1.5962479608482871, "grad_norm": 0.039726510643959045, "learning_rate": 0.000798042414355628, "loss": 0.1852, "num_input_tokens_seen": 21110784, "step": 9785 }, { "epoch": 1.597063621533442, "grad_norm": 0.1143261045217514, "learning_rate": 0.0007984502446982056, "loss": 0.2359, "num_input_tokens_seen": 21120448, "step": 9790 }, { "epoch": 1.5978792822185972, "grad_norm": 0.4162430465221405, "learning_rate": 0.000798858075040783, "loss": 0.1795, "num_input_tokens_seen": 21132224, "step": 9795 }, { "epoch": 1.598694942903752, "grad_norm": 0.7590615749359131, "learning_rate": 0.0007992659053833605, "loss": 0.2824, "num_input_tokens_seen": 21143744, "step": 9800 }, { "epoch": 1.599510603588907, "grad_norm": 0.20848359167575836, "learning_rate": 0.0007996737357259381, "loss": 0.0794, "num_input_tokens_seen": 21153760, "step": 9805 }, { "epoch": 1.600326264274062, "grad_norm": 0.25150153040885925, "learning_rate": 0.0008000815660685155, "loss": 0.1213, "num_input_tokens_seen": 21163552, "step": 9810 }, { "epoch": 1.601141924959217, "grad_norm": 0.22185729444026947, "learning_rate": 0.000800489396411093, "loss": 0.121, "num_input_tokens_seen": 21174912, "step": 9815 }, { "epoch": 1.601957585644372, "grad_norm": 0.43506550788879395, "learning_rate": 0.0008008972267536705, "loss": 0.1873, "num_input_tokens_seen": 21186464, "step": 9820 }, { "epoch": 1.602773246329527, "grad_norm": 0.4134158492088318, "learning_rate": 0.000801305057096248, "loss": 0.072, "num_input_tokens_seen": 21197280, "step": 9825 }, { "epoch": 1.6035889070146818, "grad_norm": 0.03559480607509613, "learning_rate": 0.0008017128874388254, "loss": 0.215, "num_input_tokens_seen": 21209248, "step": 9830 }, { "epoch": 1.6044045676998369, "grad_norm": 0.3652363419532776, "learning_rate": 0.0008021207177814029, "loss": 0.1629, "num_input_tokens_seen": 21219808, "step": 9835 }, { "epoch": 1.605220228384992, "grad_norm": 0.03462637960910797, "learning_rate": 0.0008025285481239805, "loss": 0.1418, "num_input_tokens_seen": 21230688, "step": 9840 }, { "epoch": 1.6060358890701467, "grad_norm": 0.23562374711036682, "learning_rate": 0.000802936378466558, "loss": 0.1095, "num_input_tokens_seen": 21241312, "step": 9845 }, { "epoch": 1.6068515497553018, "grad_norm": 0.3672410547733307, "learning_rate": 0.0008033442088091353, "loss": 0.164, "num_input_tokens_seen": 21252512, "step": 9850 }, { "epoch": 1.6076672104404568, "grad_norm": 0.1089744046330452, "learning_rate": 0.0008037520391517129, "loss": 0.2384, "num_input_tokens_seen": 21263296, "step": 9855 }, { "epoch": 1.6084828711256116, "grad_norm": 0.2814162075519562, "learning_rate": 0.0008041598694942904, "loss": 0.0554, "num_input_tokens_seen": 21274368, "step": 9860 }, { "epoch": 1.6092985318107669, "grad_norm": 0.23238016664981842, "learning_rate": 0.0008045676998368679, "loss": 0.0573, "num_input_tokens_seen": 21284288, "step": 9865 }, { "epoch": 1.6101141924959217, "grad_norm": 0.2972049415111542, "learning_rate": 0.0008049755301794454, "loss": 0.0735, "num_input_tokens_seen": 21295296, "step": 9870 }, { "epoch": 1.6109298531810765, "grad_norm": 0.37032145261764526, "learning_rate": 0.0008053833605220228, "loss": 0.3909, "num_input_tokens_seen": 21305632, "step": 9875 }, { "epoch": 1.6117455138662318, "grad_norm": 0.3014730215072632, "learning_rate": 0.0008057911908646003, "loss": 0.1988, "num_input_tokens_seen": 21315904, "step": 9880 }, { "epoch": 1.6125611745513866, "grad_norm": 0.17088168859481812, "learning_rate": 0.0008061990212071778, "loss": 0.0707, "num_input_tokens_seen": 21327616, "step": 9885 }, { "epoch": 1.6133768352365416, "grad_norm": 0.27868807315826416, "learning_rate": 0.0008066068515497554, "loss": 0.2604, "num_input_tokens_seen": 21338048, "step": 9890 }, { "epoch": 1.6141924959216967, "grad_norm": 0.04883329197764397, "learning_rate": 0.0008070146818923329, "loss": 0.0449, "num_input_tokens_seen": 21348704, "step": 9895 }, { "epoch": 1.6150081566068515, "grad_norm": 0.07456479966640472, "learning_rate": 0.0008074225122349102, "loss": 0.1264, "num_input_tokens_seen": 21359680, "step": 9900 }, { "epoch": 1.6158238172920065, "grad_norm": 0.029087895527482033, "learning_rate": 0.0008078303425774878, "loss": 0.0679, "num_input_tokens_seen": 21370208, "step": 9905 }, { "epoch": 1.6166394779771616, "grad_norm": 0.09739410132169724, "learning_rate": 0.0008082381729200653, "loss": 0.1088, "num_input_tokens_seen": 21380384, "step": 9910 }, { "epoch": 1.6174551386623164, "grad_norm": 0.10695986449718475, "learning_rate": 0.0008086460032626428, "loss": 0.0893, "num_input_tokens_seen": 21391648, "step": 9915 }, { "epoch": 1.6182707993474714, "grad_norm": 0.034157272428274155, "learning_rate": 0.0008090538336052202, "loss": 0.0749, "num_input_tokens_seen": 21402656, "step": 9920 }, { "epoch": 1.6190864600326265, "grad_norm": 0.12145096063613892, "learning_rate": 0.0008094616639477977, "loss": 0.0809, "num_input_tokens_seen": 21413216, "step": 9925 }, { "epoch": 1.6199021207177813, "grad_norm": 0.016647957265377045, "learning_rate": 0.0008098694942903753, "loss": 0.0766, "num_input_tokens_seen": 21423392, "step": 9930 }, { "epoch": 1.6207177814029365, "grad_norm": 0.29953184723854065, "learning_rate": 0.0008102773246329527, "loss": 0.0386, "num_input_tokens_seen": 21433600, "step": 9935 }, { "epoch": 1.6215334420880914, "grad_norm": 0.03813500329852104, "learning_rate": 0.0008106851549755301, "loss": 0.2418, "num_input_tokens_seen": 21445056, "step": 9940 }, { "epoch": 1.6223491027732462, "grad_norm": 0.1778724640607834, "learning_rate": 0.0008110929853181077, "loss": 0.1502, "num_input_tokens_seen": 21456992, "step": 9945 }, { "epoch": 1.6231647634584014, "grad_norm": 0.12489598244428635, "learning_rate": 0.0008115008156606851, "loss": 0.0928, "num_input_tokens_seen": 21467072, "step": 9950 }, { "epoch": 1.6239804241435563, "grad_norm": 0.769350528717041, "learning_rate": 0.0008119086460032627, "loss": 0.3364, "num_input_tokens_seen": 21478208, "step": 9955 }, { "epoch": 1.6247960848287113, "grad_norm": 0.42202746868133545, "learning_rate": 0.0008123164763458402, "loss": 0.1316, "num_input_tokens_seen": 21488672, "step": 9960 }, { "epoch": 1.6256117455138663, "grad_norm": 0.11347547918558121, "learning_rate": 0.0008127243066884176, "loss": 0.1814, "num_input_tokens_seen": 21499520, "step": 9965 }, { "epoch": 1.6264274061990212, "grad_norm": 0.0843597799539566, "learning_rate": 0.0008131321370309951, "loss": 0.0594, "num_input_tokens_seen": 21510144, "step": 9970 }, { "epoch": 1.6272430668841762, "grad_norm": 0.18747107684612274, "learning_rate": 0.0008135399673735726, "loss": 0.1776, "num_input_tokens_seen": 21520800, "step": 9975 }, { "epoch": 1.6280587275693312, "grad_norm": 0.43321093916893005, "learning_rate": 0.0008139477977161502, "loss": 0.2039, "num_input_tokens_seen": 21531744, "step": 9980 }, { "epoch": 1.628874388254486, "grad_norm": 0.19053810834884644, "learning_rate": 0.0008143556280587275, "loss": 0.0684, "num_input_tokens_seen": 21543040, "step": 9985 }, { "epoch": 1.629690048939641, "grad_norm": 0.18892325460910797, "learning_rate": 0.000814763458401305, "loss": 0.0794, "num_input_tokens_seen": 21553536, "step": 9990 }, { "epoch": 1.6305057096247961, "grad_norm": 0.3265056014060974, "learning_rate": 0.0008151712887438826, "loss": 0.0919, "num_input_tokens_seen": 21565120, "step": 9995 }, { "epoch": 1.631321370309951, "grad_norm": 0.03408828005194664, "learning_rate": 0.0008155791190864601, "loss": 0.03, "num_input_tokens_seen": 21575712, "step": 10000 }, { "epoch": 1.632137030995106, "grad_norm": 0.30715054273605347, "learning_rate": 0.0008159869494290375, "loss": 0.1695, "num_input_tokens_seen": 21588000, "step": 10005 }, { "epoch": 1.632952691680261, "grad_norm": 0.16547569632530212, "learning_rate": 0.000816394779771615, "loss": 0.2501, "num_input_tokens_seen": 21599552, "step": 10010 }, { "epoch": 1.6337683523654158, "grad_norm": 0.02181277982890606, "learning_rate": 0.0008168026101141925, "loss": 0.0536, "num_input_tokens_seen": 21610464, "step": 10015 }, { "epoch": 1.634584013050571, "grad_norm": 0.3717193603515625, "learning_rate": 0.00081721044045677, "loss": 0.1419, "num_input_tokens_seen": 21620960, "step": 10020 }, { "epoch": 1.635399673735726, "grad_norm": 0.06014539301395416, "learning_rate": 0.0008176182707993475, "loss": 0.0583, "num_input_tokens_seen": 21631072, "step": 10025 }, { "epoch": 1.636215334420881, "grad_norm": 0.10776666551828384, "learning_rate": 0.000818026101141925, "loss": 0.1506, "num_input_tokens_seen": 21641600, "step": 10030 }, { "epoch": 1.637030995106036, "grad_norm": 0.10929621011018753, "learning_rate": 0.0008184339314845025, "loss": 0.114, "num_input_tokens_seen": 21651936, "step": 10035 }, { "epoch": 1.6378466557911908, "grad_norm": 0.03556487709283829, "learning_rate": 0.0008188417618270799, "loss": 0.0889, "num_input_tokens_seen": 21663808, "step": 10040 }, { "epoch": 1.6386623164763459, "grad_norm": 0.14710848033428192, "learning_rate": 0.0008192495921696575, "loss": 0.074, "num_input_tokens_seen": 21675296, "step": 10045 }, { "epoch": 1.639477977161501, "grad_norm": 0.1414177119731903, "learning_rate": 0.0008196574225122349, "loss": 0.0777, "num_input_tokens_seen": 21686720, "step": 10050 }, { "epoch": 1.6402936378466557, "grad_norm": 0.5499042868614197, "learning_rate": 0.0008200652528548124, "loss": 0.2805, "num_input_tokens_seen": 21698016, "step": 10055 }, { "epoch": 1.6411092985318108, "grad_norm": 0.2095194160938263, "learning_rate": 0.0008204730831973899, "loss": 0.0487, "num_input_tokens_seen": 21708544, "step": 10060 }, { "epoch": 1.6419249592169658, "grad_norm": 0.5783413648605347, "learning_rate": 0.0008208809135399674, "loss": 0.0899, "num_input_tokens_seen": 21719968, "step": 10065 }, { "epoch": 1.6427406199021206, "grad_norm": 0.5263043642044067, "learning_rate": 0.000821288743882545, "loss": 0.2638, "num_input_tokens_seen": 21729056, "step": 10070 }, { "epoch": 1.6435562805872757, "grad_norm": 0.19671334326267242, "learning_rate": 0.0008216965742251223, "loss": 0.1522, "num_input_tokens_seen": 21739520, "step": 10075 }, { "epoch": 1.6443719412724307, "grad_norm": 0.22772231698036194, "learning_rate": 0.0008221044045676999, "loss": 0.1843, "num_input_tokens_seen": 21749632, "step": 10080 }, { "epoch": 1.6451876019575855, "grad_norm": 0.07171687483787537, "learning_rate": 0.0008225122349102774, "loss": 0.0599, "num_input_tokens_seen": 21760864, "step": 10085 }, { "epoch": 1.6460032626427408, "grad_norm": 0.2419193536043167, "learning_rate": 0.0008229200652528548, "loss": 0.1392, "num_input_tokens_seen": 21772192, "step": 10090 }, { "epoch": 1.6468189233278956, "grad_norm": 0.05721985176205635, "learning_rate": 0.0008233278955954323, "loss": 0.037, "num_input_tokens_seen": 21783744, "step": 10095 }, { "epoch": 1.6476345840130504, "grad_norm": 0.0413338840007782, "learning_rate": 0.0008237357259380098, "loss": 0.0205, "num_input_tokens_seen": 21794880, "step": 10100 }, { "epoch": 1.6484502446982057, "grad_norm": 0.16911466419696808, "learning_rate": 0.0008241435562805873, "loss": 0.2502, "num_input_tokens_seen": 21805568, "step": 10105 }, { "epoch": 1.6492659053833605, "grad_norm": 0.20941373705863953, "learning_rate": 0.0008245513866231648, "loss": 0.048, "num_input_tokens_seen": 21815392, "step": 10110 }, { "epoch": 1.6500815660685155, "grad_norm": 0.07771088927984238, "learning_rate": 0.0008249592169657422, "loss": 0.0657, "num_input_tokens_seen": 21826432, "step": 10115 }, { "epoch": 1.6508972267536706, "grad_norm": 0.01342721562832594, "learning_rate": 0.0008253670473083198, "loss": 0.1064, "num_input_tokens_seen": 21837664, "step": 10120 }, { "epoch": 1.6517128874388254, "grad_norm": 0.0390215627849102, "learning_rate": 0.0008257748776508972, "loss": 0.115, "num_input_tokens_seen": 21847872, "step": 10125 }, { "epoch": 1.6525285481239804, "grad_norm": 0.07819672673940659, "learning_rate": 0.0008261827079934747, "loss": 0.0219, "num_input_tokens_seen": 21859648, "step": 10130 }, { "epoch": 1.6533442088091355, "grad_norm": 0.12740814685821533, "learning_rate": 0.0008265905383360523, "loss": 0.0595, "num_input_tokens_seen": 21870656, "step": 10135 }, { "epoch": 1.6541598694942903, "grad_norm": 0.5126218795776367, "learning_rate": 0.0008269983686786296, "loss": 0.0843, "num_input_tokens_seen": 21881152, "step": 10140 }, { "epoch": 1.6549755301794453, "grad_norm": 0.4186317026615143, "learning_rate": 0.0008274061990212072, "loss": 0.0848, "num_input_tokens_seen": 21891168, "step": 10145 }, { "epoch": 1.6557911908646004, "grad_norm": 0.018293803557753563, "learning_rate": 0.0008278140293637847, "loss": 0.0377, "num_input_tokens_seen": 21901088, "step": 10150 }, { "epoch": 1.6566068515497552, "grad_norm": 0.11163417994976044, "learning_rate": 0.0008282218597063622, "loss": 0.0889, "num_input_tokens_seen": 21910880, "step": 10155 }, { "epoch": 1.6574225122349104, "grad_norm": 0.11267869174480438, "learning_rate": 0.0008286296900489396, "loss": 0.0686, "num_input_tokens_seen": 21922752, "step": 10160 }, { "epoch": 1.6582381729200653, "grad_norm": 0.27868160605430603, "learning_rate": 0.0008290375203915171, "loss": 0.1464, "num_input_tokens_seen": 21933120, "step": 10165 }, { "epoch": 1.65905383360522, "grad_norm": 0.20478451251983643, "learning_rate": 0.0008294453507340947, "loss": 0.0327, "num_input_tokens_seen": 21942976, "step": 10170 }, { "epoch": 1.6598694942903753, "grad_norm": 0.04750271141529083, "learning_rate": 0.0008298531810766721, "loss": 0.1543, "num_input_tokens_seen": 21952672, "step": 10175 }, { "epoch": 1.6606851549755302, "grad_norm": 0.5159620642662048, "learning_rate": 0.0008302610114192496, "loss": 0.1855, "num_input_tokens_seen": 21962944, "step": 10180 }, { "epoch": 1.6615008156606852, "grad_norm": 0.4801236391067505, "learning_rate": 0.0008306688417618271, "loss": 0.1926, "num_input_tokens_seen": 21973216, "step": 10185 }, { "epoch": 1.6623164763458402, "grad_norm": 0.46325141191482544, "learning_rate": 0.0008310766721044046, "loss": 0.2177, "num_input_tokens_seen": 21984640, "step": 10190 }, { "epoch": 1.663132137030995, "grad_norm": 0.16884745657444, "learning_rate": 0.0008314845024469821, "loss": 0.2141, "num_input_tokens_seen": 21995808, "step": 10195 }, { "epoch": 1.66394779771615, "grad_norm": 0.13277685642242432, "learning_rate": 0.0008318923327895596, "loss": 0.1601, "num_input_tokens_seen": 22005568, "step": 10200 }, { "epoch": 1.6647634584013051, "grad_norm": 0.12077904492616653, "learning_rate": 0.000832300163132137, "loss": 0.1269, "num_input_tokens_seen": 22016640, "step": 10205 }, { "epoch": 1.66557911908646, "grad_norm": 0.12961402535438538, "learning_rate": 0.0008327079934747145, "loss": 0.1934, "num_input_tokens_seen": 22027584, "step": 10210 }, { "epoch": 1.666394779771615, "grad_norm": 0.06692396104335785, "learning_rate": 0.000833115823817292, "loss": 0.1084, "num_input_tokens_seen": 22038592, "step": 10215 }, { "epoch": 1.66721044045677, "grad_norm": 0.2980732321739197, "learning_rate": 0.0008335236541598696, "loss": 0.1579, "num_input_tokens_seen": 22048352, "step": 10220 }, { "epoch": 1.6680261011419248, "grad_norm": 0.11856962740421295, "learning_rate": 0.0008339314845024471, "loss": 0.1597, "num_input_tokens_seen": 22059456, "step": 10225 }, { "epoch": 1.6688417618270799, "grad_norm": 0.14023758471012115, "learning_rate": 0.0008343393148450244, "loss": 0.1298, "num_input_tokens_seen": 22070048, "step": 10230 }, { "epoch": 1.669657422512235, "grad_norm": 0.08485733717679977, "learning_rate": 0.000834747145187602, "loss": 0.0493, "num_input_tokens_seen": 22081120, "step": 10235 }, { "epoch": 1.6704730831973897, "grad_norm": 0.0494561642408371, "learning_rate": 0.0008351549755301795, "loss": 0.1566, "num_input_tokens_seen": 22092896, "step": 10240 }, { "epoch": 1.671288743882545, "grad_norm": 0.13931237161159515, "learning_rate": 0.0008355628058727569, "loss": 0.1632, "num_input_tokens_seen": 22102400, "step": 10245 }, { "epoch": 1.6721044045676998, "grad_norm": 0.052524060010910034, "learning_rate": 0.0008359706362153344, "loss": 0.2201, "num_input_tokens_seen": 22112928, "step": 10250 }, { "epoch": 1.6729200652528549, "grad_norm": 0.04889857769012451, "learning_rate": 0.0008363784665579119, "loss": 0.0883, "num_input_tokens_seen": 22123584, "step": 10255 }, { "epoch": 1.67373572593801, "grad_norm": 0.05692030116915703, "learning_rate": 0.0008367862969004895, "loss": 0.0713, "num_input_tokens_seen": 22135392, "step": 10260 }, { "epoch": 1.6745513866231647, "grad_norm": 0.05608496069908142, "learning_rate": 0.0008371941272430669, "loss": 0.0352, "num_input_tokens_seen": 22146208, "step": 10265 }, { "epoch": 1.6753670473083198, "grad_norm": 0.392633318901062, "learning_rate": 0.0008376019575856443, "loss": 0.119, "num_input_tokens_seen": 22157792, "step": 10270 }, { "epoch": 1.6761827079934748, "grad_norm": 0.29057684540748596, "learning_rate": 0.0008380097879282219, "loss": 0.1608, "num_input_tokens_seen": 22168640, "step": 10275 }, { "epoch": 1.6769983686786296, "grad_norm": 0.16684100031852722, "learning_rate": 0.0008384176182707993, "loss": 0.0343, "num_input_tokens_seen": 22180224, "step": 10280 }, { "epoch": 1.6778140293637847, "grad_norm": 0.25748613476753235, "learning_rate": 0.0008388254486133769, "loss": 0.0765, "num_input_tokens_seen": 22192096, "step": 10285 }, { "epoch": 1.6786296900489397, "grad_norm": 0.1589033156633377, "learning_rate": 0.0008392332789559544, "loss": 0.056, "num_input_tokens_seen": 22202912, "step": 10290 }, { "epoch": 1.6794453507340945, "grad_norm": 0.2979421317577362, "learning_rate": 0.0008396411092985318, "loss": 0.1286, "num_input_tokens_seen": 22213728, "step": 10295 }, { "epoch": 1.6802610114192496, "grad_norm": 0.011314273811876774, "learning_rate": 0.0008400489396411093, "loss": 0.0273, "num_input_tokens_seen": 22224192, "step": 10300 }, { "epoch": 1.6810766721044046, "grad_norm": 0.057240743190050125, "learning_rate": 0.0008404567699836868, "loss": 0.1703, "num_input_tokens_seen": 22234912, "step": 10305 }, { "epoch": 1.6818923327895594, "grad_norm": 0.2604765295982361, "learning_rate": 0.0008408646003262644, "loss": 0.0883, "num_input_tokens_seen": 22245728, "step": 10310 }, { "epoch": 1.6827079934747147, "grad_norm": 0.33515799045562744, "learning_rate": 0.0008412724306688417, "loss": 0.0826, "num_input_tokens_seen": 22257280, "step": 10315 }, { "epoch": 1.6835236541598695, "grad_norm": 0.354188472032547, "learning_rate": 0.0008416802610114192, "loss": 0.1299, "num_input_tokens_seen": 22268352, "step": 10320 }, { "epoch": 1.6843393148450243, "grad_norm": 0.04823071509599686, "learning_rate": 0.0008420880913539968, "loss": 0.0762, "num_input_tokens_seen": 22279936, "step": 10325 }, { "epoch": 1.6851549755301796, "grad_norm": 0.19252288341522217, "learning_rate": 0.0008424959216965743, "loss": 0.1041, "num_input_tokens_seen": 22290432, "step": 10330 }, { "epoch": 1.6859706362153344, "grad_norm": 0.36038297414779663, "learning_rate": 0.0008429037520391518, "loss": 0.1282, "num_input_tokens_seen": 22302048, "step": 10335 }, { "epoch": 1.6867862969004894, "grad_norm": 0.26984208822250366, "learning_rate": 0.0008433115823817292, "loss": 0.2371, "num_input_tokens_seen": 22312064, "step": 10340 }, { "epoch": 1.6876019575856445, "grad_norm": 0.18921194970607758, "learning_rate": 0.0008437194127243067, "loss": 0.0887, "num_input_tokens_seen": 22323584, "step": 10345 }, { "epoch": 1.6884176182707993, "grad_norm": 0.02138206921517849, "learning_rate": 0.0008441272430668842, "loss": 0.1625, "num_input_tokens_seen": 22335072, "step": 10350 }, { "epoch": 1.6892332789559543, "grad_norm": 0.020113101229071617, "learning_rate": 0.0008445350734094617, "loss": 0.2191, "num_input_tokens_seen": 22346560, "step": 10355 }, { "epoch": 1.6900489396411094, "grad_norm": 0.05559573322534561, "learning_rate": 0.0008449429037520392, "loss": 0.1147, "num_input_tokens_seen": 22356320, "step": 10360 }, { "epoch": 1.6908646003262642, "grad_norm": 0.27253538370132446, "learning_rate": 0.0008453507340946166, "loss": 0.2729, "num_input_tokens_seen": 22368000, "step": 10365 }, { "epoch": 1.6916802610114192, "grad_norm": 0.06690158694982529, "learning_rate": 0.0008457585644371941, "loss": 0.1155, "num_input_tokens_seen": 22377824, "step": 10370 }, { "epoch": 1.6924959216965743, "grad_norm": 0.370029479265213, "learning_rate": 0.0008461663947797717, "loss": 0.0696, "num_input_tokens_seen": 22388960, "step": 10375 }, { "epoch": 1.693311582381729, "grad_norm": 0.08654008060693741, "learning_rate": 0.0008465742251223492, "loss": 0.056, "num_input_tokens_seen": 22399616, "step": 10380 }, { "epoch": 1.6941272430668843, "grad_norm": 0.04206390306353569, "learning_rate": 0.0008469820554649265, "loss": 0.1014, "num_input_tokens_seen": 22410400, "step": 10385 }, { "epoch": 1.6949429037520392, "grad_norm": 0.008084801957011223, "learning_rate": 0.0008473898858075041, "loss": 0.0446, "num_input_tokens_seen": 22421280, "step": 10390 }, { "epoch": 1.695758564437194, "grad_norm": 0.06108564883470535, "learning_rate": 0.0008477977161500816, "loss": 0.0342, "num_input_tokens_seen": 22432032, "step": 10395 }, { "epoch": 1.6965742251223492, "grad_norm": 0.017083844169974327, "learning_rate": 0.0008482055464926591, "loss": 0.0414, "num_input_tokens_seen": 22444320, "step": 10400 }, { "epoch": 1.697389885807504, "grad_norm": 0.009416508488357067, "learning_rate": 0.0008486133768352365, "loss": 0.1465, "num_input_tokens_seen": 22456576, "step": 10405 }, { "epoch": 1.698205546492659, "grad_norm": 0.0258281659334898, "learning_rate": 0.000849021207177814, "loss": 0.0956, "num_input_tokens_seen": 22466496, "step": 10410 }, { "epoch": 1.6990212071778141, "grad_norm": 0.44075819849967957, "learning_rate": 0.0008494290375203916, "loss": 0.1902, "num_input_tokens_seen": 22476448, "step": 10415 }, { "epoch": 1.699836867862969, "grad_norm": 0.006966481450945139, "learning_rate": 0.000849836867862969, "loss": 0.0451, "num_input_tokens_seen": 22487648, "step": 10420 }, { "epoch": 1.700652528548124, "grad_norm": 0.38139283657073975, "learning_rate": 0.0008502446982055465, "loss": 0.1871, "num_input_tokens_seen": 22497024, "step": 10425 }, { "epoch": 1.701468189233279, "grad_norm": 0.016298463568091393, "learning_rate": 0.000850652528548124, "loss": 0.0806, "num_input_tokens_seen": 22507616, "step": 10430 }, { "epoch": 1.7022838499184338, "grad_norm": 0.31898435950279236, "learning_rate": 0.0008510603588907014, "loss": 0.2007, "num_input_tokens_seen": 22517920, "step": 10435 }, { "epoch": 1.7030995106035889, "grad_norm": 0.3023619055747986, "learning_rate": 0.000851468189233279, "loss": 0.1188, "num_input_tokens_seen": 22529696, "step": 10440 }, { "epoch": 1.703915171288744, "grad_norm": 0.4784544110298157, "learning_rate": 0.0008518760195758565, "loss": 0.2416, "num_input_tokens_seen": 22539008, "step": 10445 }, { "epoch": 1.7047308319738987, "grad_norm": 0.02311321720480919, "learning_rate": 0.000852283849918434, "loss": 0.0754, "num_input_tokens_seen": 22549920, "step": 10450 }, { "epoch": 1.7055464926590538, "grad_norm": 0.25937774777412415, "learning_rate": 0.0008526916802610114, "loss": 0.1627, "num_input_tokens_seen": 22559680, "step": 10455 }, { "epoch": 1.7063621533442088, "grad_norm": 0.06441766768693924, "learning_rate": 0.0008530995106035889, "loss": 0.1562, "num_input_tokens_seen": 22569920, "step": 10460 }, { "epoch": 1.7071778140293636, "grad_norm": 0.1344326138496399, "learning_rate": 0.0008535073409461665, "loss": 0.0537, "num_input_tokens_seen": 22580864, "step": 10465 }, { "epoch": 1.707993474714519, "grad_norm": 0.15353924036026, "learning_rate": 0.0008539151712887438, "loss": 0.0988, "num_input_tokens_seen": 22593088, "step": 10470 }, { "epoch": 1.7088091353996737, "grad_norm": 0.05935300886631012, "learning_rate": 0.0008543230016313214, "loss": 0.0687, "num_input_tokens_seen": 22602944, "step": 10475 }, { "epoch": 1.7096247960848288, "grad_norm": 0.18250879645347595, "learning_rate": 0.0008547308319738989, "loss": 0.1543, "num_input_tokens_seen": 22612480, "step": 10480 }, { "epoch": 1.7104404567699838, "grad_norm": 0.24191398918628693, "learning_rate": 0.0008551386623164764, "loss": 0.191, "num_input_tokens_seen": 22623456, "step": 10485 }, { "epoch": 1.7112561174551386, "grad_norm": 0.2722257673740387, "learning_rate": 0.0008555464926590538, "loss": 0.1643, "num_input_tokens_seen": 22633952, "step": 10490 }, { "epoch": 1.7120717781402937, "grad_norm": 0.01177589874714613, "learning_rate": 0.0008559543230016313, "loss": 0.0276, "num_input_tokens_seen": 22645568, "step": 10495 }, { "epoch": 1.7128874388254487, "grad_norm": 0.014250610955059528, "learning_rate": 0.0008563621533442089, "loss": 0.0656, "num_input_tokens_seen": 22657088, "step": 10500 }, { "epoch": 1.7137030995106035, "grad_norm": 0.1232733279466629, "learning_rate": 0.0008567699836867863, "loss": 0.0781, "num_input_tokens_seen": 22668256, "step": 10505 }, { "epoch": 1.7145187601957586, "grad_norm": 0.020341793075203896, "learning_rate": 0.0008571778140293638, "loss": 0.0303, "num_input_tokens_seen": 22680192, "step": 10510 }, { "epoch": 1.7153344208809136, "grad_norm": 0.029026813805103302, "learning_rate": 0.0008575856443719413, "loss": 0.1341, "num_input_tokens_seen": 22691104, "step": 10515 }, { "epoch": 1.7161500815660684, "grad_norm": 0.3541639447212219, "learning_rate": 0.0008579934747145188, "loss": 0.1998, "num_input_tokens_seen": 22701856, "step": 10520 }, { "epoch": 1.7169657422512234, "grad_norm": 0.04162002354860306, "learning_rate": 0.0008584013050570962, "loss": 0.133, "num_input_tokens_seen": 22712320, "step": 10525 }, { "epoch": 1.7177814029363785, "grad_norm": 0.03524169698357582, "learning_rate": 0.0008588091353996738, "loss": 0.203, "num_input_tokens_seen": 22722752, "step": 10530 }, { "epoch": 1.7185970636215333, "grad_norm": 0.02017100900411606, "learning_rate": 0.0008592169657422512, "loss": 0.0393, "num_input_tokens_seen": 22733600, "step": 10535 }, { "epoch": 1.7194127243066886, "grad_norm": 0.06829187273979187, "learning_rate": 0.0008596247960848287, "loss": 0.0548, "num_input_tokens_seen": 22744480, "step": 10540 }, { "epoch": 1.7202283849918434, "grad_norm": 0.26298680901527405, "learning_rate": 0.0008600326264274062, "loss": 0.1089, "num_input_tokens_seen": 22755680, "step": 10545 }, { "epoch": 1.7210440456769984, "grad_norm": 0.12350499629974365, "learning_rate": 0.0008604404567699837, "loss": 0.1089, "num_input_tokens_seen": 22766720, "step": 10550 }, { "epoch": 1.7218597063621535, "grad_norm": 0.225529745221138, "learning_rate": 0.0008608482871125613, "loss": 0.1449, "num_input_tokens_seen": 22776576, "step": 10555 }, { "epoch": 1.7226753670473083, "grad_norm": 0.22411943972110748, "learning_rate": 0.0008612561174551386, "loss": 0.2474, "num_input_tokens_seen": 22788064, "step": 10560 }, { "epoch": 1.7234910277324633, "grad_norm": 0.024716923013329506, "learning_rate": 0.0008616639477977162, "loss": 0.1076, "num_input_tokens_seen": 22798368, "step": 10565 }, { "epoch": 1.7243066884176184, "grad_norm": 0.31456273794174194, "learning_rate": 0.0008620717781402937, "loss": 0.3193, "num_input_tokens_seen": 22810048, "step": 10570 }, { "epoch": 1.7251223491027732, "grad_norm": 0.18001605570316315, "learning_rate": 0.0008624796084828711, "loss": 0.1269, "num_input_tokens_seen": 22821376, "step": 10575 }, { "epoch": 1.7259380097879282, "grad_norm": 0.26557862758636475, "learning_rate": 0.0008628874388254486, "loss": 0.1023, "num_input_tokens_seen": 22831904, "step": 10580 }, { "epoch": 1.7267536704730833, "grad_norm": 0.34982994198799133, "learning_rate": 0.0008632952691680261, "loss": 0.1396, "num_input_tokens_seen": 22843328, "step": 10585 }, { "epoch": 1.727569331158238, "grad_norm": 0.1309492588043213, "learning_rate": 0.0008637030995106036, "loss": 0.1461, "num_input_tokens_seen": 22853952, "step": 10590 }, { "epoch": 1.7283849918433931, "grad_norm": 0.04652172327041626, "learning_rate": 0.0008641109298531811, "loss": 0.2912, "num_input_tokens_seen": 22865152, "step": 10595 }, { "epoch": 1.7292006525285482, "grad_norm": 0.06612179428339005, "learning_rate": 0.0008645187601957585, "loss": 0.0927, "num_input_tokens_seen": 22877152, "step": 10600 }, { "epoch": 1.730016313213703, "grad_norm": 0.14288435876369476, "learning_rate": 0.0008649265905383361, "loss": 0.1112, "num_input_tokens_seen": 22889344, "step": 10605 }, { "epoch": 1.7308319738988582, "grad_norm": 0.1495279222726822, "learning_rate": 0.0008653344208809135, "loss": 0.0979, "num_input_tokens_seen": 22900192, "step": 10610 }, { "epoch": 1.731647634584013, "grad_norm": 0.016567563638091087, "learning_rate": 0.0008657422512234911, "loss": 0.1129, "num_input_tokens_seen": 22910880, "step": 10615 }, { "epoch": 1.7324632952691679, "grad_norm": 0.22587095201015472, "learning_rate": 0.0008661500815660686, "loss": 0.1625, "num_input_tokens_seen": 22921888, "step": 10620 }, { "epoch": 1.7332789559543231, "grad_norm": 0.10033811628818512, "learning_rate": 0.0008665579119086459, "loss": 0.0776, "num_input_tokens_seen": 22932480, "step": 10625 }, { "epoch": 1.734094616639478, "grad_norm": 0.0885920375585556, "learning_rate": 0.0008669657422512235, "loss": 0.1037, "num_input_tokens_seen": 22942656, "step": 10630 }, { "epoch": 1.734910277324633, "grad_norm": 0.014857303351163864, "learning_rate": 0.000867373572593801, "loss": 0.0825, "num_input_tokens_seen": 22954240, "step": 10635 }, { "epoch": 1.735725938009788, "grad_norm": 0.33602920174598694, "learning_rate": 0.0008677814029363786, "loss": 0.2, "num_input_tokens_seen": 22964576, "step": 10640 }, { "epoch": 1.7365415986949428, "grad_norm": 0.426072895526886, "learning_rate": 0.0008681892332789559, "loss": 0.2581, "num_input_tokens_seen": 22976128, "step": 10645 }, { "epoch": 1.7373572593800979, "grad_norm": 0.19785428047180176, "learning_rate": 0.0008685970636215334, "loss": 0.0463, "num_input_tokens_seen": 22988352, "step": 10650 }, { "epoch": 1.738172920065253, "grad_norm": 0.3039417266845703, "learning_rate": 0.000869004893964111, "loss": 0.1939, "num_input_tokens_seen": 22998624, "step": 10655 }, { "epoch": 1.7389885807504077, "grad_norm": 0.12964244186878204, "learning_rate": 0.0008694127243066884, "loss": 0.0544, "num_input_tokens_seen": 23008480, "step": 10660 }, { "epoch": 1.7398042414355628, "grad_norm": 0.21774335205554962, "learning_rate": 0.000869820554649266, "loss": 0.0924, "num_input_tokens_seen": 23019424, "step": 10665 }, { "epoch": 1.7406199021207178, "grad_norm": 0.29323050379753113, "learning_rate": 0.0008702283849918434, "loss": 0.1329, "num_input_tokens_seen": 23029984, "step": 10670 }, { "epoch": 1.7414355628058726, "grad_norm": 0.37941205501556396, "learning_rate": 0.0008706362153344209, "loss": 0.2135, "num_input_tokens_seen": 23040544, "step": 10675 }, { "epoch": 1.7422512234910277, "grad_norm": 0.04229533672332764, "learning_rate": 0.0008710440456769984, "loss": 0.0403, "num_input_tokens_seen": 23051616, "step": 10680 }, { "epoch": 1.7430668841761827, "grad_norm": 0.2826554477214813, "learning_rate": 0.0008714518760195759, "loss": 0.1427, "num_input_tokens_seen": 23060960, "step": 10685 }, { "epoch": 1.7438825448613375, "grad_norm": 0.022358382120728493, "learning_rate": 0.0008718597063621533, "loss": 0.046, "num_input_tokens_seen": 23071040, "step": 10690 }, { "epoch": 1.7446982055464928, "grad_norm": 0.866997241973877, "learning_rate": 0.0008722675367047308, "loss": 0.2073, "num_input_tokens_seen": 23080768, "step": 10695 }, { "epoch": 1.7455138662316476, "grad_norm": 0.6679326891899109, "learning_rate": 0.0008726753670473083, "loss": 0.1909, "num_input_tokens_seen": 23091008, "step": 10700 }, { "epoch": 1.7463295269168027, "grad_norm": 0.06603264808654785, "learning_rate": 0.0008730831973898859, "loss": 0.2333, "num_input_tokens_seen": 23102080, "step": 10705 }, { "epoch": 1.7471451876019577, "grad_norm": 0.34906458854675293, "learning_rate": 0.0008734910277324634, "loss": 0.114, "num_input_tokens_seen": 23111488, "step": 10710 }, { "epoch": 1.7479608482871125, "grad_norm": 0.08307372778654099, "learning_rate": 0.0008738988580750407, "loss": 0.0745, "num_input_tokens_seen": 23122560, "step": 10715 }, { "epoch": 1.7487765089722676, "grad_norm": 0.22066713869571686, "learning_rate": 0.0008743066884176183, "loss": 0.0858, "num_input_tokens_seen": 23133120, "step": 10720 }, { "epoch": 1.7495921696574226, "grad_norm": 0.16944842040538788, "learning_rate": 0.0008747145187601958, "loss": 0.1504, "num_input_tokens_seen": 23144288, "step": 10725 }, { "epoch": 1.7504078303425774, "grad_norm": 0.18997831642627716, "learning_rate": 0.0008751223491027733, "loss": 0.108, "num_input_tokens_seen": 23156032, "step": 10730 }, { "epoch": 1.7512234910277324, "grad_norm": 0.10597430914640427, "learning_rate": 0.0008755301794453507, "loss": 0.0625, "num_input_tokens_seen": 23167776, "step": 10735 }, { "epoch": 1.7520391517128875, "grad_norm": 0.10041946917772293, "learning_rate": 0.0008759380097879282, "loss": 0.3393, "num_input_tokens_seen": 23178368, "step": 10740 }, { "epoch": 1.7528548123980423, "grad_norm": 0.29294222593307495, "learning_rate": 0.0008763458401305058, "loss": 0.1283, "num_input_tokens_seen": 23189376, "step": 10745 }, { "epoch": 1.7536704730831973, "grad_norm": 0.19184724986553192, "learning_rate": 0.0008767536704730832, "loss": 0.1511, "num_input_tokens_seen": 23199936, "step": 10750 }, { "epoch": 1.7544861337683524, "grad_norm": 0.08055436611175537, "learning_rate": 0.0008771615008156608, "loss": 0.0415, "num_input_tokens_seen": 23211360, "step": 10755 }, { "epoch": 1.7553017944535072, "grad_norm": 0.0785033255815506, "learning_rate": 0.0008775693311582382, "loss": 0.0846, "num_input_tokens_seen": 23222784, "step": 10760 }, { "epoch": 1.7561174551386625, "grad_norm": 0.08889570832252502, "learning_rate": 0.0008779771615008156, "loss": 0.0588, "num_input_tokens_seen": 23233184, "step": 10765 }, { "epoch": 1.7569331158238173, "grad_norm": 0.16072241961956024, "learning_rate": 0.0008783849918433932, "loss": 0.0467, "num_input_tokens_seen": 23244288, "step": 10770 }, { "epoch": 1.7577487765089723, "grad_norm": 0.275436669588089, "learning_rate": 0.0008787928221859707, "loss": 0.1584, "num_input_tokens_seen": 23255488, "step": 10775 }, { "epoch": 1.7585644371941274, "grad_norm": 0.4465494453907013, "learning_rate": 0.0008792006525285482, "loss": 0.1352, "num_input_tokens_seen": 23266656, "step": 10780 }, { "epoch": 1.7593800978792822, "grad_norm": 0.21242108941078186, "learning_rate": 0.0008796084828711256, "loss": 0.1329, "num_input_tokens_seen": 23277280, "step": 10785 }, { "epoch": 1.7601957585644372, "grad_norm": 0.048975709825754166, "learning_rate": 0.0008800163132137031, "loss": 0.0273, "num_input_tokens_seen": 23288416, "step": 10790 }, { "epoch": 1.7610114192495923, "grad_norm": 0.10063568502664566, "learning_rate": 0.0008804241435562807, "loss": 0.1383, "num_input_tokens_seen": 23299328, "step": 10795 }, { "epoch": 1.761827079934747, "grad_norm": 0.4981708824634552, "learning_rate": 0.000880831973898858, "loss": 0.2903, "num_input_tokens_seen": 23309632, "step": 10800 }, { "epoch": 1.7626427406199021, "grad_norm": 0.20495256781578064, "learning_rate": 0.0008812398042414356, "loss": 0.0751, "num_input_tokens_seen": 23321344, "step": 10805 }, { "epoch": 1.7634584013050572, "grad_norm": 0.07956171780824661, "learning_rate": 0.0008816476345840131, "loss": 0.1528, "num_input_tokens_seen": 23332288, "step": 10810 }, { "epoch": 1.764274061990212, "grad_norm": 0.18191587924957275, "learning_rate": 0.0008820554649265906, "loss": 0.1808, "num_input_tokens_seen": 23342560, "step": 10815 }, { "epoch": 1.765089722675367, "grad_norm": 0.36843302845954895, "learning_rate": 0.000882463295269168, "loss": 0.2674, "num_input_tokens_seen": 23353280, "step": 10820 }, { "epoch": 1.765905383360522, "grad_norm": 0.16178090870380402, "learning_rate": 0.0008828711256117455, "loss": 0.0929, "num_input_tokens_seen": 23364800, "step": 10825 }, { "epoch": 1.7667210440456769, "grad_norm": 0.07261096686124802, "learning_rate": 0.000883278955954323, "loss": 0.1032, "num_input_tokens_seen": 23376608, "step": 10830 }, { "epoch": 1.7675367047308321, "grad_norm": 0.42369386553764343, "learning_rate": 0.0008836867862969005, "loss": 0.1078, "num_input_tokens_seen": 23387680, "step": 10835 }, { "epoch": 1.768352365415987, "grad_norm": 0.1306907832622528, "learning_rate": 0.000884094616639478, "loss": 0.2057, "num_input_tokens_seen": 23397664, "step": 10840 }, { "epoch": 1.7691680261011418, "grad_norm": 0.051022302359342575, "learning_rate": 0.0008845024469820555, "loss": 0.0911, "num_input_tokens_seen": 23408928, "step": 10845 }, { "epoch": 1.769983686786297, "grad_norm": 0.10085247457027435, "learning_rate": 0.0008849102773246329, "loss": 0.0912, "num_input_tokens_seen": 23418400, "step": 10850 }, { "epoch": 1.7707993474714518, "grad_norm": 0.15397600829601288, "learning_rate": 0.0008853181076672104, "loss": 0.0775, "num_input_tokens_seen": 23428448, "step": 10855 }, { "epoch": 1.7716150081566069, "grad_norm": 0.039258819073438644, "learning_rate": 0.000885725938009788, "loss": 0.1456, "num_input_tokens_seen": 23440416, "step": 10860 }, { "epoch": 1.772430668841762, "grad_norm": 0.15988309681415558, "learning_rate": 0.0008861337683523655, "loss": 0.1032, "num_input_tokens_seen": 23451072, "step": 10865 }, { "epoch": 1.7732463295269167, "grad_norm": 0.1870967596769333, "learning_rate": 0.0008865415986949429, "loss": 0.0866, "num_input_tokens_seen": 23462880, "step": 10870 }, { "epoch": 1.7740619902120718, "grad_norm": 0.11405563354492188, "learning_rate": 0.0008869494290375204, "loss": 0.1186, "num_input_tokens_seen": 23473600, "step": 10875 }, { "epoch": 1.7748776508972268, "grad_norm": 0.11215522885322571, "learning_rate": 0.0008873572593800979, "loss": 0.107, "num_input_tokens_seen": 23484896, "step": 10880 }, { "epoch": 1.7756933115823816, "grad_norm": 0.07809615135192871, "learning_rate": 0.0008877650897226754, "loss": 0.0907, "num_input_tokens_seen": 23496288, "step": 10885 }, { "epoch": 1.7765089722675367, "grad_norm": 0.01887686364352703, "learning_rate": 0.0008881729200652528, "loss": 0.1843, "num_input_tokens_seen": 23507584, "step": 10890 }, { "epoch": 1.7773246329526917, "grad_norm": 0.10509473085403442, "learning_rate": 0.0008885807504078304, "loss": 0.1657, "num_input_tokens_seen": 23518496, "step": 10895 }, { "epoch": 1.7781402936378465, "grad_norm": 0.024599969387054443, "learning_rate": 0.0008889885807504079, "loss": 0.1605, "num_input_tokens_seen": 23529792, "step": 10900 }, { "epoch": 1.7789559543230016, "grad_norm": 0.42292332649230957, "learning_rate": 0.0008893964110929853, "loss": 0.1332, "num_input_tokens_seen": 23540224, "step": 10905 }, { "epoch": 1.7797716150081566, "grad_norm": 0.06820008903741837, "learning_rate": 0.0008898042414355628, "loss": 0.1348, "num_input_tokens_seen": 23550752, "step": 10910 }, { "epoch": 1.7805872756933114, "grad_norm": 0.10379303991794586, "learning_rate": 0.0008902120717781403, "loss": 0.1349, "num_input_tokens_seen": 23561504, "step": 10915 }, { "epoch": 1.7814029363784667, "grad_norm": 0.25459805130958557, "learning_rate": 0.0008906199021207178, "loss": 0.1642, "num_input_tokens_seen": 23572960, "step": 10920 }, { "epoch": 1.7822185970636215, "grad_norm": 0.26913684606552124, "learning_rate": 0.0008910277324632953, "loss": 0.1226, "num_input_tokens_seen": 23584800, "step": 10925 }, { "epoch": 1.7830342577487766, "grad_norm": 0.06758326292037964, "learning_rate": 0.0008914355628058728, "loss": 0.0662, "num_input_tokens_seen": 23595712, "step": 10930 }, { "epoch": 1.7838499184339316, "grad_norm": 0.12106800079345703, "learning_rate": 0.0008918433931484503, "loss": 0.1308, "num_input_tokens_seen": 23606560, "step": 10935 }, { "epoch": 1.7846655791190864, "grad_norm": 0.28355008363723755, "learning_rate": 0.0008922512234910277, "loss": 0.1769, "num_input_tokens_seen": 23618240, "step": 10940 }, { "epoch": 1.7854812398042414, "grad_norm": 0.06424345821142197, "learning_rate": 0.0008926590538336053, "loss": 0.1335, "num_input_tokens_seen": 23628608, "step": 10945 }, { "epoch": 1.7862969004893965, "grad_norm": 0.32301387190818787, "learning_rate": 0.0008930668841761828, "loss": 0.124, "num_input_tokens_seen": 23638848, "step": 10950 }, { "epoch": 1.7871125611745513, "grad_norm": 0.029248030856251717, "learning_rate": 0.0008934747145187601, "loss": 0.0765, "num_input_tokens_seen": 23650112, "step": 10955 }, { "epoch": 1.7879282218597063, "grad_norm": 0.015438450500369072, "learning_rate": 0.0008938825448613377, "loss": 0.0536, "num_input_tokens_seen": 23661184, "step": 10960 }, { "epoch": 1.7887438825448614, "grad_norm": 0.05406321957707405, "learning_rate": 0.0008942903752039152, "loss": 0.1059, "num_input_tokens_seen": 23673312, "step": 10965 }, { "epoch": 1.7895595432300162, "grad_norm": 0.21868672966957092, "learning_rate": 0.0008946982055464927, "loss": 0.0824, "num_input_tokens_seen": 23685120, "step": 10970 }, { "epoch": 1.7903752039151712, "grad_norm": 0.07514999061822891, "learning_rate": 0.0008951060358890701, "loss": 0.1488, "num_input_tokens_seen": 23696672, "step": 10975 }, { "epoch": 1.7911908646003263, "grad_norm": 0.13577716052532196, "learning_rate": 0.0008955138662316476, "loss": 0.2018, "num_input_tokens_seen": 23707200, "step": 10980 }, { "epoch": 1.792006525285481, "grad_norm": 0.14963217079639435, "learning_rate": 0.0008959216965742252, "loss": 0.1548, "num_input_tokens_seen": 23718336, "step": 10985 }, { "epoch": 1.7928221859706364, "grad_norm": 0.10415702313184738, "learning_rate": 0.0008963295269168026, "loss": 0.3021, "num_input_tokens_seen": 23729792, "step": 10990 }, { "epoch": 1.7936378466557912, "grad_norm": 0.06272199749946594, "learning_rate": 0.0008967373572593801, "loss": 0.1084, "num_input_tokens_seen": 23741856, "step": 10995 }, { "epoch": 1.7944535073409462, "grad_norm": 0.052787017077207565, "learning_rate": 0.0008971451876019576, "loss": 0.0428, "num_input_tokens_seen": 23752736, "step": 11000 }, { "epoch": 1.7952691680261013, "grad_norm": 0.07515407353639603, "learning_rate": 0.0008975530179445351, "loss": 0.046, "num_input_tokens_seen": 23762848, "step": 11005 }, { "epoch": 1.796084828711256, "grad_norm": 0.026560088619589806, "learning_rate": 0.0008979608482871126, "loss": 0.0944, "num_input_tokens_seen": 23773088, "step": 11010 }, { "epoch": 1.7969004893964111, "grad_norm": 0.19898484647274017, "learning_rate": 0.0008983686786296901, "loss": 0.1779, "num_input_tokens_seen": 23782944, "step": 11015 }, { "epoch": 1.7977161500815662, "grad_norm": 0.11401655524969101, "learning_rate": 0.0008987765089722675, "loss": 0.0414, "num_input_tokens_seen": 23794048, "step": 11020 }, { "epoch": 1.798531810766721, "grad_norm": 0.03812574967741966, "learning_rate": 0.000899184339314845, "loss": 0.0244, "num_input_tokens_seen": 23804320, "step": 11025 }, { "epoch": 1.799347471451876, "grad_norm": 0.3827955424785614, "learning_rate": 0.0008995921696574225, "loss": 0.1552, "num_input_tokens_seen": 23814880, "step": 11030 }, { "epoch": 1.800163132137031, "grad_norm": 0.00530984066426754, "learning_rate": 0.0009000000000000001, "loss": 0.0231, "num_input_tokens_seen": 23824672, "step": 11035 }, { "epoch": 1.8009787928221859, "grad_norm": 0.10105440020561218, "learning_rate": 0.0009004078303425776, "loss": 0.1479, "num_input_tokens_seen": 23835744, "step": 11040 }, { "epoch": 1.801794453507341, "grad_norm": 0.062126077711582184, "learning_rate": 0.0009008156606851549, "loss": 0.0691, "num_input_tokens_seen": 23846752, "step": 11045 }, { "epoch": 1.802610114192496, "grad_norm": 0.05113992094993591, "learning_rate": 0.0009012234910277325, "loss": 0.0424, "num_input_tokens_seen": 23857632, "step": 11050 }, { "epoch": 1.8034257748776508, "grad_norm": 0.17284096777439117, "learning_rate": 0.00090163132137031, "loss": 0.1012, "num_input_tokens_seen": 23868544, "step": 11055 }, { "epoch": 1.804241435562806, "grad_norm": 0.05556831136345863, "learning_rate": 0.0009020391517128875, "loss": 0.0557, "num_input_tokens_seen": 23879008, "step": 11060 }, { "epoch": 1.8050570962479608, "grad_norm": 0.021276628598570824, "learning_rate": 0.0009024469820554649, "loss": 0.0194, "num_input_tokens_seen": 23890048, "step": 11065 }, { "epoch": 1.8058727569331157, "grad_norm": 0.054949935525655746, "learning_rate": 0.0009028548123980424, "loss": 0.04, "num_input_tokens_seen": 23900192, "step": 11070 }, { "epoch": 1.806688417618271, "grad_norm": 0.35755670070648193, "learning_rate": 0.0009032626427406199, "loss": 0.1912, "num_input_tokens_seen": 23910464, "step": 11075 }, { "epoch": 1.8075040783034257, "grad_norm": 0.31437793374061584, "learning_rate": 0.0009036704730831974, "loss": 0.1528, "num_input_tokens_seen": 23919872, "step": 11080 }, { "epoch": 1.8083197389885808, "grad_norm": 0.0792701467871666, "learning_rate": 0.000904078303425775, "loss": 0.1873, "num_input_tokens_seen": 23931328, "step": 11085 }, { "epoch": 1.8091353996737358, "grad_norm": 0.1359127163887024, "learning_rate": 0.0009044861337683524, "loss": 0.1544, "num_input_tokens_seen": 23942656, "step": 11090 }, { "epoch": 1.8099510603588906, "grad_norm": 0.07275751978158951, "learning_rate": 0.0009048939641109298, "loss": 0.0603, "num_input_tokens_seen": 23952832, "step": 11095 }, { "epoch": 1.8107667210440457, "grad_norm": 0.03263649344444275, "learning_rate": 0.0009053017944535074, "loss": 0.1243, "num_input_tokens_seen": 23964192, "step": 11100 }, { "epoch": 1.8115823817292007, "grad_norm": 0.12664511799812317, "learning_rate": 0.0009057096247960849, "loss": 0.2061, "num_input_tokens_seen": 23974560, "step": 11105 }, { "epoch": 1.8123980424143555, "grad_norm": 0.07404330372810364, "learning_rate": 0.0009061174551386622, "loss": 0.077, "num_input_tokens_seen": 23983712, "step": 11110 }, { "epoch": 1.8132137030995106, "grad_norm": 0.07391966879367828, "learning_rate": 0.0009065252854812398, "loss": 0.183, "num_input_tokens_seen": 23994592, "step": 11115 }, { "epoch": 1.8140293637846656, "grad_norm": 0.01809052936732769, "learning_rate": 0.0009069331158238173, "loss": 0.1643, "num_input_tokens_seen": 24005312, "step": 11120 }, { "epoch": 1.8148450244698204, "grad_norm": 0.02823469415307045, "learning_rate": 0.0009073409461663949, "loss": 0.0415, "num_input_tokens_seen": 24016960, "step": 11125 }, { "epoch": 1.8156606851549757, "grad_norm": 0.29391705989837646, "learning_rate": 0.0009077487765089722, "loss": 0.127, "num_input_tokens_seen": 24028256, "step": 11130 }, { "epoch": 1.8164763458401305, "grad_norm": 0.15838123857975006, "learning_rate": 0.0009081566068515497, "loss": 0.1629, "num_input_tokens_seen": 24039520, "step": 11135 }, { "epoch": 1.8172920065252853, "grad_norm": 0.17011554539203644, "learning_rate": 0.0009085644371941273, "loss": 0.1005, "num_input_tokens_seen": 24049664, "step": 11140 }, { "epoch": 1.8181076672104406, "grad_norm": 0.1175650954246521, "learning_rate": 0.0009089722675367047, "loss": 0.057, "num_input_tokens_seen": 24060992, "step": 11145 }, { "epoch": 1.8189233278955954, "grad_norm": 0.04222079738974571, "learning_rate": 0.0009093800978792823, "loss": 0.1202, "num_input_tokens_seen": 24070944, "step": 11150 }, { "epoch": 1.8197389885807504, "grad_norm": 0.11924027651548386, "learning_rate": 0.0009097879282218597, "loss": 0.0586, "num_input_tokens_seen": 24081344, "step": 11155 }, { "epoch": 1.8205546492659055, "grad_norm": 0.023688890039920807, "learning_rate": 0.0009101957585644372, "loss": 0.088, "num_input_tokens_seen": 24091392, "step": 11160 }, { "epoch": 1.8213703099510603, "grad_norm": 0.03881914168596268, "learning_rate": 0.0009106035889070147, "loss": 0.1794, "num_input_tokens_seen": 24100768, "step": 11165 }, { "epoch": 1.8221859706362153, "grad_norm": 0.032460518181324005, "learning_rate": 0.0009110114192495922, "loss": 0.1367, "num_input_tokens_seen": 24112608, "step": 11170 }, { "epoch": 1.8230016313213704, "grad_norm": 0.352861225605011, "learning_rate": 0.0009114192495921697, "loss": 0.1475, "num_input_tokens_seen": 24123872, "step": 11175 }, { "epoch": 1.8238172920065252, "grad_norm": 0.1843346357345581, "learning_rate": 0.0009118270799347471, "loss": 0.0818, "num_input_tokens_seen": 24134752, "step": 11180 }, { "epoch": 1.8246329526916802, "grad_norm": 0.2625465393066406, "learning_rate": 0.0009122349102773246, "loss": 0.1161, "num_input_tokens_seen": 24144416, "step": 11185 }, { "epoch": 1.8254486133768353, "grad_norm": 0.053409505635499954, "learning_rate": 0.0009126427406199022, "loss": 0.1263, "num_input_tokens_seen": 24154528, "step": 11190 }, { "epoch": 1.82626427406199, "grad_norm": 0.03175493702292442, "learning_rate": 0.0009130505709624797, "loss": 0.1215, "num_input_tokens_seen": 24165248, "step": 11195 }, { "epoch": 1.8270799347471451, "grad_norm": 0.09125782549381256, "learning_rate": 0.0009134584013050571, "loss": 0.1797, "num_input_tokens_seen": 24175488, "step": 11200 }, { "epoch": 1.8278955954323002, "grad_norm": 0.21432854235172272, "learning_rate": 0.0009138662316476346, "loss": 0.0614, "num_input_tokens_seen": 24185952, "step": 11205 }, { "epoch": 1.828711256117455, "grad_norm": 0.030250659212470055, "learning_rate": 0.0009142740619902121, "loss": 0.0812, "num_input_tokens_seen": 24197280, "step": 11210 }, { "epoch": 1.8295269168026103, "grad_norm": 0.026117807254195213, "learning_rate": 0.0009146818923327896, "loss": 0.1533, "num_input_tokens_seen": 24208672, "step": 11215 }, { "epoch": 1.830342577487765, "grad_norm": 0.13739944994449615, "learning_rate": 0.000915089722675367, "loss": 0.0382, "num_input_tokens_seen": 24218784, "step": 11220 }, { "epoch": 1.8311582381729201, "grad_norm": 0.22546479105949402, "learning_rate": 0.0009154975530179446, "loss": 0.1697, "num_input_tokens_seen": 24228928, "step": 11225 }, { "epoch": 1.8319738988580752, "grad_norm": 0.3378293216228485, "learning_rate": 0.0009159053833605221, "loss": 0.1535, "num_input_tokens_seen": 24240640, "step": 11230 }, { "epoch": 1.83278955954323, "grad_norm": 0.06929034739732742, "learning_rate": 0.0009163132137030995, "loss": 0.0667, "num_input_tokens_seen": 24250496, "step": 11235 }, { "epoch": 1.833605220228385, "grad_norm": 0.1936279535293579, "learning_rate": 0.000916721044045677, "loss": 0.0761, "num_input_tokens_seen": 24261024, "step": 11240 }, { "epoch": 1.83442088091354, "grad_norm": 0.5969166159629822, "learning_rate": 0.0009171288743882545, "loss": 0.3511, "num_input_tokens_seen": 24271712, "step": 11245 }, { "epoch": 1.8352365415986949, "grad_norm": 0.08674336969852448, "learning_rate": 0.0009175367047308319, "loss": 0.1219, "num_input_tokens_seen": 24282944, "step": 11250 }, { "epoch": 1.83605220228385, "grad_norm": 0.053366925567388535, "learning_rate": 0.0009179445350734095, "loss": 0.0706, "num_input_tokens_seen": 24293760, "step": 11255 }, { "epoch": 1.836867862969005, "grad_norm": 0.19823257625102997, "learning_rate": 0.000918352365415987, "loss": 0.1377, "num_input_tokens_seen": 24304192, "step": 11260 }, { "epoch": 1.8376835236541598, "grad_norm": 0.2764589190483093, "learning_rate": 0.0009187601957585645, "loss": 0.1539, "num_input_tokens_seen": 24315840, "step": 11265 }, { "epoch": 1.8384991843393148, "grad_norm": 0.21002626419067383, "learning_rate": 0.0009191680261011419, "loss": 0.0473, "num_input_tokens_seen": 24325280, "step": 11270 }, { "epoch": 1.8393148450244698, "grad_norm": 0.028787972405552864, "learning_rate": 0.0009195758564437194, "loss": 0.1059, "num_input_tokens_seen": 24336416, "step": 11275 }, { "epoch": 1.8401305057096247, "grad_norm": 0.12305663526058197, "learning_rate": 0.000919983686786297, "loss": 0.0593, "num_input_tokens_seen": 24347712, "step": 11280 }, { "epoch": 1.84094616639478, "grad_norm": 0.3399862051010132, "learning_rate": 0.0009203915171288743, "loss": 0.2367, "num_input_tokens_seen": 24358112, "step": 11285 }, { "epoch": 1.8417618270799347, "grad_norm": 0.03217494860291481, "learning_rate": 0.0009207993474714519, "loss": 0.0532, "num_input_tokens_seen": 24368672, "step": 11290 }, { "epoch": 1.8425774877650896, "grad_norm": 0.06780222058296204, "learning_rate": 0.0009212071778140294, "loss": 0.1138, "num_input_tokens_seen": 24378688, "step": 11295 }, { "epoch": 1.8433931484502448, "grad_norm": 0.1660727858543396, "learning_rate": 0.0009216150081566068, "loss": 0.1891, "num_input_tokens_seen": 24390784, "step": 11300 }, { "epoch": 1.8442088091353996, "grad_norm": 0.2785295844078064, "learning_rate": 0.0009220228384991844, "loss": 0.3042, "num_input_tokens_seen": 24401920, "step": 11305 }, { "epoch": 1.8450244698205547, "grad_norm": 0.04958690330386162, "learning_rate": 0.0009224306688417618, "loss": 0.1294, "num_input_tokens_seen": 24412640, "step": 11310 }, { "epoch": 1.8458401305057097, "grad_norm": 0.0846824198961258, "learning_rate": 0.0009228384991843394, "loss": 0.2645, "num_input_tokens_seen": 24422144, "step": 11315 }, { "epoch": 1.8466557911908645, "grad_norm": 0.1107596606016159, "learning_rate": 0.0009232463295269168, "loss": 0.1663, "num_input_tokens_seen": 24433888, "step": 11320 }, { "epoch": 1.8474714518760196, "grad_norm": 0.07402174919843674, "learning_rate": 0.0009236541598694943, "loss": 0.1351, "num_input_tokens_seen": 24444896, "step": 11325 }, { "epoch": 1.8482871125611746, "grad_norm": 0.17059257626533508, "learning_rate": 0.0009240619902120718, "loss": 0.1688, "num_input_tokens_seen": 24457344, "step": 11330 }, { "epoch": 1.8491027732463294, "grad_norm": 0.12378732115030289, "learning_rate": 0.0009244698205546492, "loss": 0.1595, "num_input_tokens_seen": 24468256, "step": 11335 }, { "epoch": 1.8499184339314845, "grad_norm": 0.16451109945774078, "learning_rate": 0.0009248776508972268, "loss": 0.1585, "num_input_tokens_seen": 24478816, "step": 11340 }, { "epoch": 1.8507340946166395, "grad_norm": 0.037742845714092255, "learning_rate": 0.0009252854812398043, "loss": 0.1056, "num_input_tokens_seen": 24488800, "step": 11345 }, { "epoch": 1.8515497553017943, "grad_norm": 0.11471888422966003, "learning_rate": 0.0009256933115823818, "loss": 0.0758, "num_input_tokens_seen": 24498368, "step": 11350 }, { "epoch": 1.8523654159869496, "grad_norm": 0.08365796506404877, "learning_rate": 0.0009261011419249592, "loss": 0.0939, "num_input_tokens_seen": 24508800, "step": 11355 }, { "epoch": 1.8531810766721044, "grad_norm": 0.13153505325317383, "learning_rate": 0.0009265089722675367, "loss": 0.2289, "num_input_tokens_seen": 24518976, "step": 11360 }, { "epoch": 1.8539967373572592, "grad_norm": 0.16009916365146637, "learning_rate": 0.0009269168026101143, "loss": 0.0411, "num_input_tokens_seen": 24530464, "step": 11365 }, { "epoch": 1.8548123980424145, "grad_norm": 0.047121528536081314, "learning_rate": 0.0009273246329526917, "loss": 0.0605, "num_input_tokens_seen": 24542048, "step": 11370 }, { "epoch": 1.8556280587275693, "grad_norm": 0.014136620797216892, "learning_rate": 0.0009277324632952691, "loss": 0.0741, "num_input_tokens_seen": 24553728, "step": 11375 }, { "epoch": 1.8564437194127243, "grad_norm": 0.24734677374362946, "learning_rate": 0.0009281402936378467, "loss": 0.1227, "num_input_tokens_seen": 24564320, "step": 11380 }, { "epoch": 1.8572593800978794, "grad_norm": 0.19953761994838715, "learning_rate": 0.0009285481239804242, "loss": 0.2258, "num_input_tokens_seen": 24575616, "step": 11385 }, { "epoch": 1.8580750407830342, "grad_norm": 0.1580800563097, "learning_rate": 0.0009289559543230017, "loss": 0.0558, "num_input_tokens_seen": 24587200, "step": 11390 }, { "epoch": 1.8588907014681892, "grad_norm": 0.2762386202812195, "learning_rate": 0.0009293637846655791, "loss": 0.1316, "num_input_tokens_seen": 24598880, "step": 11395 }, { "epoch": 1.8597063621533443, "grad_norm": 0.04900749772787094, "learning_rate": 0.0009297716150081566, "loss": 0.0491, "num_input_tokens_seen": 24610112, "step": 11400 }, { "epoch": 1.860522022838499, "grad_norm": 0.10823460668325424, "learning_rate": 0.0009301794453507341, "loss": 0.0273, "num_input_tokens_seen": 24619936, "step": 11405 }, { "epoch": 1.8613376835236541, "grad_norm": 0.10455655306577682, "learning_rate": 0.0009305872756933116, "loss": 0.028, "num_input_tokens_seen": 24630688, "step": 11410 }, { "epoch": 1.8621533442088092, "grad_norm": 0.01597657985985279, "learning_rate": 0.000930995106035889, "loss": 0.1142, "num_input_tokens_seen": 24641152, "step": 11415 }, { "epoch": 1.862969004893964, "grad_norm": 0.3545233905315399, "learning_rate": 0.0009314029363784666, "loss": 0.1284, "num_input_tokens_seen": 24652224, "step": 11420 }, { "epoch": 1.863784665579119, "grad_norm": 0.004473595879971981, "learning_rate": 0.000931810766721044, "loss": 0.0634, "num_input_tokens_seen": 24661728, "step": 11425 }, { "epoch": 1.864600326264274, "grad_norm": 0.03330934792757034, "learning_rate": 0.0009322185970636216, "loss": 0.038, "num_input_tokens_seen": 24672768, "step": 11430 }, { "epoch": 1.865415986949429, "grad_norm": 0.12943190336227417, "learning_rate": 0.0009326264274061991, "loss": 0.1302, "num_input_tokens_seen": 24683808, "step": 11435 }, { "epoch": 1.8662316476345842, "grad_norm": 0.46725228428840637, "learning_rate": 0.0009330342577487764, "loss": 0.2927, "num_input_tokens_seen": 24694464, "step": 11440 }, { "epoch": 1.867047308319739, "grad_norm": 0.1158914566040039, "learning_rate": 0.000933442088091354, "loss": 0.215, "num_input_tokens_seen": 24703168, "step": 11445 }, { "epoch": 1.867862969004894, "grad_norm": 0.28803718090057373, "learning_rate": 0.0009338499184339315, "loss": 0.1351, "num_input_tokens_seen": 24713632, "step": 11450 }, { "epoch": 1.868678629690049, "grad_norm": 0.09609079360961914, "learning_rate": 0.0009342577487765091, "loss": 0.1688, "num_input_tokens_seen": 24724576, "step": 11455 }, { "epoch": 1.8694942903752039, "grad_norm": 0.14100462198257446, "learning_rate": 0.0009346655791190864, "loss": 0.1903, "num_input_tokens_seen": 24734240, "step": 11460 }, { "epoch": 1.870309951060359, "grad_norm": 0.05888713523745537, "learning_rate": 0.0009350734094616639, "loss": 0.251, "num_input_tokens_seen": 24746080, "step": 11465 }, { "epoch": 1.871125611745514, "grad_norm": 0.186430424451828, "learning_rate": 0.0009354812398042415, "loss": 0.2011, "num_input_tokens_seen": 24755392, "step": 11470 }, { "epoch": 1.8719412724306688, "grad_norm": 0.16929329931735992, "learning_rate": 0.0009358890701468189, "loss": 0.1629, "num_input_tokens_seen": 24766272, "step": 11475 }, { "epoch": 1.8727569331158238, "grad_norm": 0.15553727746009827, "learning_rate": 0.0009362969004893965, "loss": 0.1588, "num_input_tokens_seen": 24776320, "step": 11480 }, { "epoch": 1.8735725938009788, "grad_norm": 0.1553117036819458, "learning_rate": 0.0009367047308319739, "loss": 0.111, "num_input_tokens_seen": 24786880, "step": 11485 }, { "epoch": 1.8743882544861337, "grad_norm": 0.1936817169189453, "learning_rate": 0.0009371125611745514, "loss": 0.1489, "num_input_tokens_seen": 24798688, "step": 11490 }, { "epoch": 1.8752039151712887, "grad_norm": 0.2653498351573944, "learning_rate": 0.0009375203915171289, "loss": 0.163, "num_input_tokens_seen": 24809088, "step": 11495 }, { "epoch": 1.8760195758564437, "grad_norm": 0.273576945066452, "learning_rate": 0.0009379282218597064, "loss": 0.1792, "num_input_tokens_seen": 24819520, "step": 11500 }, { "epoch": 1.8768352365415986, "grad_norm": 0.08695437759160995, "learning_rate": 0.000938336052202284, "loss": 0.0565, "num_input_tokens_seen": 24829824, "step": 11505 }, { "epoch": 1.8776508972267538, "grad_norm": 0.051579494029283524, "learning_rate": 0.0009387438825448613, "loss": 0.2356, "num_input_tokens_seen": 24840544, "step": 11510 }, { "epoch": 1.8784665579119086, "grad_norm": 0.07981032878160477, "learning_rate": 0.0009391517128874388, "loss": 0.0826, "num_input_tokens_seen": 24851520, "step": 11515 }, { "epoch": 1.8792822185970635, "grad_norm": 0.05732285603880882, "learning_rate": 0.0009395595432300164, "loss": 0.1388, "num_input_tokens_seen": 24863616, "step": 11520 }, { "epoch": 1.8800978792822187, "grad_norm": 0.10791336745023727, "learning_rate": 0.0009399673735725939, "loss": 0.2076, "num_input_tokens_seen": 24875040, "step": 11525 }, { "epoch": 1.8809135399673735, "grad_norm": 0.05827794224023819, "learning_rate": 0.0009403752039151713, "loss": 0.1343, "num_input_tokens_seen": 24885152, "step": 11530 }, { "epoch": 1.8817292006525286, "grad_norm": 0.3328348398208618, "learning_rate": 0.0009407830342577488, "loss": 0.1854, "num_input_tokens_seen": 24896608, "step": 11535 }, { "epoch": 1.8825448613376836, "grad_norm": 0.026570772752165794, "learning_rate": 0.0009411908646003263, "loss": 0.0952, "num_input_tokens_seen": 24907616, "step": 11540 }, { "epoch": 1.8833605220228384, "grad_norm": 0.11195064336061478, "learning_rate": 0.0009415986949429038, "loss": 0.0487, "num_input_tokens_seen": 24918784, "step": 11545 }, { "epoch": 1.8841761827079935, "grad_norm": 0.07066580653190613, "learning_rate": 0.0009420065252854812, "loss": 0.0771, "num_input_tokens_seen": 24928448, "step": 11550 }, { "epoch": 1.8849918433931485, "grad_norm": 0.017555493861436844, "learning_rate": 0.0009424143556280587, "loss": 0.0694, "num_input_tokens_seen": 24938816, "step": 11555 }, { "epoch": 1.8858075040783033, "grad_norm": 0.262389212846756, "learning_rate": 0.0009428221859706362, "loss": 0.1385, "num_input_tokens_seen": 24948864, "step": 11560 }, { "epoch": 1.8866231647634584, "grad_norm": 0.11231456696987152, "learning_rate": 0.0009432300163132137, "loss": 0.1131, "num_input_tokens_seen": 24959456, "step": 11565 }, { "epoch": 1.8874388254486134, "grad_norm": 0.019032534211874008, "learning_rate": 0.0009436378466557913, "loss": 0.107, "num_input_tokens_seen": 24969472, "step": 11570 }, { "epoch": 1.8882544861337682, "grad_norm": 0.08830331265926361, "learning_rate": 0.0009440456769983687, "loss": 0.0315, "num_input_tokens_seen": 24980416, "step": 11575 }, { "epoch": 1.8890701468189235, "grad_norm": 0.019184298813343048, "learning_rate": 0.0009444535073409461, "loss": 0.0491, "num_input_tokens_seen": 24991712, "step": 11580 }, { "epoch": 1.8898858075040783, "grad_norm": 0.008962858468294144, "learning_rate": 0.0009448613376835237, "loss": 0.0126, "num_input_tokens_seen": 25003456, "step": 11585 }, { "epoch": 1.8907014681892331, "grad_norm": 0.01834687404334545, "learning_rate": 0.0009452691680261012, "loss": 0.1124, "num_input_tokens_seen": 25013728, "step": 11590 }, { "epoch": 1.8915171288743884, "grad_norm": 0.06564921140670776, "learning_rate": 0.0009456769983686786, "loss": 0.0458, "num_input_tokens_seen": 25024672, "step": 11595 }, { "epoch": 1.8923327895595432, "grad_norm": 0.03144608065485954, "learning_rate": 0.0009460848287112561, "loss": 0.0997, "num_input_tokens_seen": 25035328, "step": 11600 }, { "epoch": 1.8931484502446982, "grad_norm": 0.19243009388446808, "learning_rate": 0.0009464926590538336, "loss": 0.1292, "num_input_tokens_seen": 25045856, "step": 11605 }, { "epoch": 1.8939641109298533, "grad_norm": 0.029403777793049812, "learning_rate": 0.0009469004893964112, "loss": 0.0861, "num_input_tokens_seen": 25056960, "step": 11610 }, { "epoch": 1.894779771615008, "grad_norm": 0.13157737255096436, "learning_rate": 0.0009473083197389885, "loss": 0.3731, "num_input_tokens_seen": 25068064, "step": 11615 }, { "epoch": 1.8955954323001631, "grad_norm": 0.3912639915943146, "learning_rate": 0.0009477161500815661, "loss": 0.1025, "num_input_tokens_seen": 25079008, "step": 11620 }, { "epoch": 1.8964110929853182, "grad_norm": 0.07395552098751068, "learning_rate": 0.0009481239804241436, "loss": 0.0276, "num_input_tokens_seen": 25091136, "step": 11625 }, { "epoch": 1.897226753670473, "grad_norm": 0.28641292452812195, "learning_rate": 0.000948531810766721, "loss": 0.0695, "num_input_tokens_seen": 25102112, "step": 11630 }, { "epoch": 1.898042414355628, "grad_norm": 0.06429030001163483, "learning_rate": 0.0009489396411092986, "loss": 0.1522, "num_input_tokens_seen": 25113440, "step": 11635 }, { "epoch": 1.898858075040783, "grad_norm": 0.037417296320199966, "learning_rate": 0.000949347471451876, "loss": 0.0626, "num_input_tokens_seen": 25124512, "step": 11640 }, { "epoch": 1.899673735725938, "grad_norm": 0.07068507373332977, "learning_rate": 0.0009497553017944536, "loss": 0.1235, "num_input_tokens_seen": 25134784, "step": 11645 }, { "epoch": 1.900489396411093, "grad_norm": 0.41760483384132385, "learning_rate": 0.000950163132137031, "loss": 0.1746, "num_input_tokens_seen": 25143648, "step": 11650 }, { "epoch": 1.901305057096248, "grad_norm": 0.48049110174179077, "learning_rate": 0.0009505709624796085, "loss": 0.2486, "num_input_tokens_seen": 25154080, "step": 11655 }, { "epoch": 1.9021207177814028, "grad_norm": 0.035018663853406906, "learning_rate": 0.000950978792822186, "loss": 0.2544, "num_input_tokens_seen": 25164512, "step": 11660 }, { "epoch": 1.902936378466558, "grad_norm": 0.09550262242555618, "learning_rate": 0.0009513866231647634, "loss": 0.0813, "num_input_tokens_seen": 25173728, "step": 11665 }, { "epoch": 1.9037520391517129, "grad_norm": 0.05390708148479462, "learning_rate": 0.000951794453507341, "loss": 0.0406, "num_input_tokens_seen": 25185088, "step": 11670 }, { "epoch": 1.904567699836868, "grad_norm": 0.034918103367090225, "learning_rate": 0.0009522022838499185, "loss": 0.1359, "num_input_tokens_seen": 25198176, "step": 11675 }, { "epoch": 1.905383360522023, "grad_norm": 0.168876513838768, "learning_rate": 0.000952610114192496, "loss": 0.1207, "num_input_tokens_seen": 25208960, "step": 11680 }, { "epoch": 1.9061990212071778, "grad_norm": 0.09788458049297333, "learning_rate": 0.0009530179445350734, "loss": 0.1404, "num_input_tokens_seen": 25218304, "step": 11685 }, { "epoch": 1.9070146818923328, "grad_norm": 0.08673513680696487, "learning_rate": 0.0009534257748776509, "loss": 0.2439, "num_input_tokens_seen": 25229120, "step": 11690 }, { "epoch": 1.9078303425774878, "grad_norm": 0.4093356728553772, "learning_rate": 0.0009538336052202285, "loss": 0.1343, "num_input_tokens_seen": 25239904, "step": 11695 }, { "epoch": 1.9086460032626427, "grad_norm": 0.10013367980718613, "learning_rate": 0.0009542414355628059, "loss": 0.125, "num_input_tokens_seen": 25250304, "step": 11700 }, { "epoch": 1.9094616639477977, "grad_norm": 0.07225148379802704, "learning_rate": 0.0009546492659053833, "loss": 0.1319, "num_input_tokens_seen": 25259936, "step": 11705 }, { "epoch": 1.9102773246329527, "grad_norm": 0.11120744794607162, "learning_rate": 0.0009550570962479609, "loss": 0.1896, "num_input_tokens_seen": 25270240, "step": 11710 }, { "epoch": 1.9110929853181076, "grad_norm": 0.2841385006904602, "learning_rate": 0.0009554649265905384, "loss": 0.0955, "num_input_tokens_seen": 25280320, "step": 11715 }, { "epoch": 1.9119086460032626, "grad_norm": 0.35351407527923584, "learning_rate": 0.0009558727569331158, "loss": 0.2494, "num_input_tokens_seen": 25292480, "step": 11720 }, { "epoch": 1.9127243066884176, "grad_norm": 0.2722187042236328, "learning_rate": 0.0009562805872756934, "loss": 0.0918, "num_input_tokens_seen": 25301920, "step": 11725 }, { "epoch": 1.9135399673735725, "grad_norm": 0.047410085797309875, "learning_rate": 0.0009566884176182708, "loss": 0.2907, "num_input_tokens_seen": 25312992, "step": 11730 }, { "epoch": 1.9143556280587277, "grad_norm": 0.39301028847694397, "learning_rate": 0.0009570962479608483, "loss": 0.2398, "num_input_tokens_seen": 25322272, "step": 11735 }, { "epoch": 1.9151712887438825, "grad_norm": 0.06450144201517105, "learning_rate": 0.0009575040783034258, "loss": 0.0729, "num_input_tokens_seen": 25331904, "step": 11740 }, { "epoch": 1.9159869494290374, "grad_norm": 0.0937434658408165, "learning_rate": 0.0009579119086460033, "loss": 0.1056, "num_input_tokens_seen": 25342592, "step": 11745 }, { "epoch": 1.9168026101141926, "grad_norm": 0.08313151448965073, "learning_rate": 0.0009583197389885808, "loss": 0.0732, "num_input_tokens_seen": 25354624, "step": 11750 }, { "epoch": 1.9176182707993474, "grad_norm": 0.07151429355144501, "learning_rate": 0.0009587275693311582, "loss": 0.1666, "num_input_tokens_seen": 25364448, "step": 11755 }, { "epoch": 1.9184339314845025, "grad_norm": 0.14911410212516785, "learning_rate": 0.0009591353996737358, "loss": 0.0964, "num_input_tokens_seen": 25374592, "step": 11760 }, { "epoch": 1.9192495921696575, "grad_norm": 0.11153820902109146, "learning_rate": 0.0009595432300163133, "loss": 0.1349, "num_input_tokens_seen": 25385824, "step": 11765 }, { "epoch": 1.9200652528548123, "grad_norm": 0.3738478720188141, "learning_rate": 0.0009599510603588906, "loss": 0.1651, "num_input_tokens_seen": 25396544, "step": 11770 }, { "epoch": 1.9208809135399674, "grad_norm": 0.22172443568706512, "learning_rate": 0.0009603588907014682, "loss": 0.1261, "num_input_tokens_seen": 25406304, "step": 11775 }, { "epoch": 1.9216965742251224, "grad_norm": 0.045931387692689896, "learning_rate": 0.0009607667210440457, "loss": 0.0987, "num_input_tokens_seen": 25417056, "step": 11780 }, { "epoch": 1.9225122349102772, "grad_norm": 0.3327309489250183, "learning_rate": 0.0009611745513866232, "loss": 0.1288, "num_input_tokens_seen": 25429120, "step": 11785 }, { "epoch": 1.9233278955954323, "grad_norm": 0.03299754112958908, "learning_rate": 0.0009615823817292007, "loss": 0.0451, "num_input_tokens_seen": 25440416, "step": 11790 }, { "epoch": 1.9241435562805873, "grad_norm": 0.17451678216457367, "learning_rate": 0.0009619902120717781, "loss": 0.1219, "num_input_tokens_seen": 25450336, "step": 11795 }, { "epoch": 1.9249592169657421, "grad_norm": 0.1690349131822586, "learning_rate": 0.0009623980424143557, "loss": 0.1284, "num_input_tokens_seen": 25461344, "step": 11800 }, { "epoch": 1.9257748776508974, "grad_norm": 0.07164038717746735, "learning_rate": 0.0009628058727569331, "loss": 0.0843, "num_input_tokens_seen": 25472768, "step": 11805 }, { "epoch": 1.9265905383360522, "grad_norm": 0.02030120976269245, "learning_rate": 0.0009632137030995107, "loss": 0.0589, "num_input_tokens_seen": 25485888, "step": 11810 }, { "epoch": 1.927406199021207, "grad_norm": 0.027827885001897812, "learning_rate": 0.0009636215334420881, "loss": 0.246, "num_input_tokens_seen": 25497312, "step": 11815 }, { "epoch": 1.9282218597063623, "grad_norm": 0.036817461252212524, "learning_rate": 0.0009640293637846655, "loss": 0.1378, "num_input_tokens_seen": 25508256, "step": 11820 }, { "epoch": 1.929037520391517, "grad_norm": 0.023798756301403046, "learning_rate": 0.0009644371941272431, "loss": 0.099, "num_input_tokens_seen": 25518944, "step": 11825 }, { "epoch": 1.9298531810766721, "grad_norm": 0.04707795009016991, "learning_rate": 0.0009648450244698206, "loss": 0.086, "num_input_tokens_seen": 25529408, "step": 11830 }, { "epoch": 1.9306688417618272, "grad_norm": 0.06785164773464203, "learning_rate": 0.0009652528548123982, "loss": 0.1091, "num_input_tokens_seen": 25537984, "step": 11835 }, { "epoch": 1.931484502446982, "grad_norm": 0.41442543268203735, "learning_rate": 0.0009656606851549755, "loss": 0.2, "num_input_tokens_seen": 25547488, "step": 11840 }, { "epoch": 1.932300163132137, "grad_norm": 0.006887958385050297, "learning_rate": 0.000966068515497553, "loss": 0.1453, "num_input_tokens_seen": 25558464, "step": 11845 }, { "epoch": 1.933115823817292, "grad_norm": 0.20356802642345428, "learning_rate": 0.0009664763458401306, "loss": 0.2493, "num_input_tokens_seen": 25570304, "step": 11850 }, { "epoch": 1.933931484502447, "grad_norm": 0.03407203406095505, "learning_rate": 0.000966884176182708, "loss": 0.0485, "num_input_tokens_seen": 25582400, "step": 11855 }, { "epoch": 1.934747145187602, "grad_norm": 0.11882657557725906, "learning_rate": 0.0009672920065252854, "loss": 0.1577, "num_input_tokens_seen": 25593920, "step": 11860 }, { "epoch": 1.935562805872757, "grad_norm": 0.05393798276782036, "learning_rate": 0.000967699836867863, "loss": 0.1453, "num_input_tokens_seen": 25604192, "step": 11865 }, { "epoch": 1.9363784665579118, "grad_norm": 0.09706702083349228, "learning_rate": 0.0009681076672104405, "loss": 0.1867, "num_input_tokens_seen": 25615104, "step": 11870 }, { "epoch": 1.9371941272430668, "grad_norm": 0.25087207555770874, "learning_rate": 0.000968515497553018, "loss": 0.2569, "num_input_tokens_seen": 25624896, "step": 11875 }, { "epoch": 1.9380097879282219, "grad_norm": 0.04858435317873955, "learning_rate": 0.0009689233278955954, "loss": 0.0841, "num_input_tokens_seen": 25636160, "step": 11880 }, { "epoch": 1.9388254486133767, "grad_norm": 0.08043601363897324, "learning_rate": 0.0009693311582381729, "loss": 0.2168, "num_input_tokens_seen": 25647072, "step": 11885 }, { "epoch": 1.939641109298532, "grad_norm": 0.09139817208051682, "learning_rate": 0.0009697389885807504, "loss": 0.2848, "num_input_tokens_seen": 25657600, "step": 11890 }, { "epoch": 1.9404567699836868, "grad_norm": 0.02732432633638382, "learning_rate": 0.0009701468189233279, "loss": 0.0563, "num_input_tokens_seen": 25668576, "step": 11895 }, { "epoch": 1.9412724306688418, "grad_norm": 0.058726947754621506, "learning_rate": 0.0009705546492659055, "loss": 0.0822, "num_input_tokens_seen": 25679680, "step": 11900 }, { "epoch": 1.9420880913539968, "grad_norm": 0.03139631077647209, "learning_rate": 0.0009709624796084829, "loss": 0.0802, "num_input_tokens_seen": 25691744, "step": 11905 }, { "epoch": 1.9429037520391517, "grad_norm": 0.0890437439084053, "learning_rate": 0.0009713703099510603, "loss": 0.1051, "num_input_tokens_seen": 25702976, "step": 11910 }, { "epoch": 1.9437194127243067, "grad_norm": 0.17661869525909424, "learning_rate": 0.0009717781402936379, "loss": 0.1135, "num_input_tokens_seen": 25713984, "step": 11915 }, { "epoch": 1.9445350734094617, "grad_norm": 0.10681112110614777, "learning_rate": 0.0009721859706362154, "loss": 0.1662, "num_input_tokens_seen": 25724096, "step": 11920 }, { "epoch": 1.9453507340946166, "grad_norm": 0.04582669213414192, "learning_rate": 0.0009725938009787928, "loss": 0.1612, "num_input_tokens_seen": 25735680, "step": 11925 }, { "epoch": 1.9461663947797716, "grad_norm": 0.08026120066642761, "learning_rate": 0.0009730016313213703, "loss": 0.1707, "num_input_tokens_seen": 25746112, "step": 11930 }, { "epoch": 1.9469820554649266, "grad_norm": 0.25330090522766113, "learning_rate": 0.0009734094616639478, "loss": 0.0991, "num_input_tokens_seen": 25756544, "step": 11935 }, { "epoch": 1.9477977161500815, "grad_norm": 0.08931022137403488, "learning_rate": 0.0009738172920065254, "loss": 0.1708, "num_input_tokens_seen": 25767872, "step": 11940 }, { "epoch": 1.9486133768352365, "grad_norm": 0.18170134723186493, "learning_rate": 0.0009742251223491027, "loss": 0.1082, "num_input_tokens_seen": 25779584, "step": 11945 }, { "epoch": 1.9494290375203915, "grad_norm": 0.06718965619802475, "learning_rate": 0.0009746329526916803, "loss": 0.1246, "num_input_tokens_seen": 25789824, "step": 11950 }, { "epoch": 1.9502446982055464, "grad_norm": 0.29470276832580566, "learning_rate": 0.0009750407830342578, "loss": 0.1122, "num_input_tokens_seen": 25801184, "step": 11955 }, { "epoch": 1.9510603588907016, "grad_norm": 0.10300743579864502, "learning_rate": 0.0009754486133768352, "loss": 0.0643, "num_input_tokens_seen": 25812256, "step": 11960 }, { "epoch": 1.9518760195758564, "grad_norm": 0.38056859374046326, "learning_rate": 0.0009758564437194128, "loss": 0.146, "num_input_tokens_seen": 25823744, "step": 11965 }, { "epoch": 1.9526916802610113, "grad_norm": 0.025903569534420967, "learning_rate": 0.0009762642740619902, "loss": 0.1258, "num_input_tokens_seen": 25835584, "step": 11970 }, { "epoch": 1.9535073409461665, "grad_norm": 0.07436147332191467, "learning_rate": 0.0009766721044045677, "loss": 0.0996, "num_input_tokens_seen": 25846688, "step": 11975 }, { "epoch": 1.9543230016313213, "grad_norm": 0.11942356824874878, "learning_rate": 0.0009770799347471452, "loss": 0.0688, "num_input_tokens_seen": 25856320, "step": 11980 }, { "epoch": 1.9551386623164764, "grad_norm": 0.32324638962745667, "learning_rate": 0.0009774877650897227, "loss": 0.231, "num_input_tokens_seen": 25866336, "step": 11985 }, { "epoch": 1.9559543230016314, "grad_norm": 0.08361072838306427, "learning_rate": 0.0009778955954323001, "loss": 0.1574, "num_input_tokens_seen": 25878144, "step": 11990 }, { "epoch": 1.9567699836867862, "grad_norm": 0.28034019470214844, "learning_rate": 0.0009783034257748776, "loss": 0.089, "num_input_tokens_seen": 25889120, "step": 11995 }, { "epoch": 1.9575856443719413, "grad_norm": 0.031037453562021255, "learning_rate": 0.000978711256117455, "loss": 0.1165, "num_input_tokens_seen": 25900576, "step": 12000 }, { "epoch": 1.9584013050570963, "grad_norm": 0.32065916061401367, "learning_rate": 0.0009791190864600326, "loss": 0.1196, "num_input_tokens_seen": 25911872, "step": 12005 }, { "epoch": 1.9592169657422511, "grad_norm": 0.20853395760059357, "learning_rate": 0.00097952691680261, "loss": 0.1229, "num_input_tokens_seen": 25923232, "step": 12010 }, { "epoch": 1.9600326264274062, "grad_norm": 0.07003075629472733, "learning_rate": 0.0009799347471451875, "loss": 0.1153, "num_input_tokens_seen": 25933760, "step": 12015 }, { "epoch": 1.9608482871125612, "grad_norm": 0.08619290590286255, "learning_rate": 0.0009803425774877652, "loss": 0.0728, "num_input_tokens_seen": 25945088, "step": 12020 }, { "epoch": 1.961663947797716, "grad_norm": 0.277591735124588, "learning_rate": 0.0009807504078303427, "loss": 0.0571, "num_input_tokens_seen": 25955488, "step": 12025 }, { "epoch": 1.9624796084828713, "grad_norm": 0.304699182510376, "learning_rate": 0.00098115823817292, "loss": 0.0923, "num_input_tokens_seen": 25965952, "step": 12030 }, { "epoch": 1.963295269168026, "grad_norm": 0.38396796584129333, "learning_rate": 0.0009815660685154977, "loss": 0.1514, "num_input_tokens_seen": 25977120, "step": 12035 }, { "epoch": 1.964110929853181, "grad_norm": 0.032324135303497314, "learning_rate": 0.0009819738988580751, "loss": 0.1398, "num_input_tokens_seen": 25988928, "step": 12040 }, { "epoch": 1.9649265905383362, "grad_norm": 0.06509353220462799, "learning_rate": 0.0009823817292006526, "loss": 0.1015, "num_input_tokens_seen": 25998208, "step": 12045 }, { "epoch": 1.965742251223491, "grad_norm": 0.027890460565686226, "learning_rate": 0.00098278955954323, "loss": 0.1551, "num_input_tokens_seen": 26009152, "step": 12050 }, { "epoch": 1.966557911908646, "grad_norm": 0.03368251025676727, "learning_rate": 0.0009831973898858076, "loss": 0.0284, "num_input_tokens_seen": 26019552, "step": 12055 }, { "epoch": 1.967373572593801, "grad_norm": 0.10436016321182251, "learning_rate": 0.000983605220228385, "loss": 0.216, "num_input_tokens_seen": 26030656, "step": 12060 }, { "epoch": 1.968189233278956, "grad_norm": 0.37846919894218445, "learning_rate": 0.0009840130505709625, "loss": 0.1034, "num_input_tokens_seen": 26042880, "step": 12065 }, { "epoch": 1.969004893964111, "grad_norm": 0.22562405467033386, "learning_rate": 0.00098442088091354, "loss": 0.073, "num_input_tokens_seen": 26053152, "step": 12070 }, { "epoch": 1.969820554649266, "grad_norm": 0.11009881645441055, "learning_rate": 0.0009848287112561175, "loss": 0.3011, "num_input_tokens_seen": 26063904, "step": 12075 }, { "epoch": 1.9706362153344208, "grad_norm": 0.08640393614768982, "learning_rate": 0.000985236541598695, "loss": 0.0732, "num_input_tokens_seen": 26074272, "step": 12080 }, { "epoch": 1.9714518760195758, "grad_norm": 0.25562584400177, "learning_rate": 0.0009856443719412724, "loss": 0.135, "num_input_tokens_seen": 26084928, "step": 12085 }, { "epoch": 1.9722675367047309, "grad_norm": 0.03937864676117897, "learning_rate": 0.00098605220228385, "loss": 0.1944, "num_input_tokens_seen": 26096352, "step": 12090 }, { "epoch": 1.9730831973898857, "grad_norm": 0.030537165701389313, "learning_rate": 0.0009864600326264274, "loss": 0.1941, "num_input_tokens_seen": 26107072, "step": 12095 }, { "epoch": 1.9738988580750407, "grad_norm": 0.2124103605747223, "learning_rate": 0.0009868678629690048, "loss": 0.2041, "num_input_tokens_seen": 26117280, "step": 12100 }, { "epoch": 1.9747145187601958, "grad_norm": 0.021676253527402878, "learning_rate": 0.0009872756933115823, "loss": 0.0817, "num_input_tokens_seen": 26128032, "step": 12105 }, { "epoch": 1.9755301794453506, "grad_norm": 0.22161167860031128, "learning_rate": 0.00098768352365416, "loss": 0.241, "num_input_tokens_seen": 26138176, "step": 12110 }, { "epoch": 1.9763458401305058, "grad_norm": 0.08345066010951996, "learning_rate": 0.0009880913539967373, "loss": 0.0704, "num_input_tokens_seen": 26149504, "step": 12115 }, { "epoch": 1.9771615008156607, "grad_norm": 0.06257042288780212, "learning_rate": 0.0009884991843393148, "loss": 0.0992, "num_input_tokens_seen": 26161088, "step": 12120 }, { "epoch": 1.9779771615008157, "grad_norm": 0.18597102165222168, "learning_rate": 0.0009889070146818924, "loss": 0.1075, "num_input_tokens_seen": 26172416, "step": 12125 }, { "epoch": 1.9787928221859707, "grad_norm": 0.3705085217952728, "learning_rate": 0.00098931484502447, "loss": 0.1574, "num_input_tokens_seen": 26182560, "step": 12130 }, { "epoch": 1.9796084828711256, "grad_norm": 0.05272971838712692, "learning_rate": 0.0009897226753670474, "loss": 0.0547, "num_input_tokens_seen": 26192800, "step": 12135 }, { "epoch": 1.9804241435562806, "grad_norm": 0.020186539739370346, "learning_rate": 0.0009901305057096249, "loss": 0.1219, "num_input_tokens_seen": 26202848, "step": 12140 }, { "epoch": 1.9812398042414356, "grad_norm": 0.24151450395584106, "learning_rate": 0.0009905383360522024, "loss": 0.1623, "num_input_tokens_seen": 26214016, "step": 12145 }, { "epoch": 1.9820554649265905, "grad_norm": 0.16644741594791412, "learning_rate": 0.0009909461663947798, "loss": 0.117, "num_input_tokens_seen": 26224736, "step": 12150 }, { "epoch": 1.9828711256117455, "grad_norm": 0.07633837312459946, "learning_rate": 0.0009913539967373573, "loss": 0.096, "num_input_tokens_seen": 26234688, "step": 12155 }, { "epoch": 1.9836867862969005, "grad_norm": 0.17716972529888153, "learning_rate": 0.0009917618270799348, "loss": 0.0872, "num_input_tokens_seen": 26245504, "step": 12160 }, { "epoch": 1.9845024469820554, "grad_norm": 0.04457416385412216, "learning_rate": 0.0009921696574225123, "loss": 0.0731, "num_input_tokens_seen": 26255744, "step": 12165 }, { "epoch": 1.9853181076672104, "grad_norm": 0.16964846849441528, "learning_rate": 0.0009925774877650897, "loss": 0.114, "num_input_tokens_seen": 26267424, "step": 12170 }, { "epoch": 1.9861337683523654, "grad_norm": 0.008210341446101665, "learning_rate": 0.0009929853181076672, "loss": 0.1074, "num_input_tokens_seen": 26278016, "step": 12175 }, { "epoch": 1.9869494290375203, "grad_norm": 0.01941620372235775, "learning_rate": 0.0009933931484502447, "loss": 0.0791, "num_input_tokens_seen": 26288672, "step": 12180 }, { "epoch": 1.9877650897226755, "grad_norm": 0.019863169640302658, "learning_rate": 0.0009938009787928222, "loss": 0.0932, "num_input_tokens_seen": 26299584, "step": 12185 }, { "epoch": 1.9885807504078303, "grad_norm": 0.2860211730003357, "learning_rate": 0.0009942088091353996, "loss": 0.1549, "num_input_tokens_seen": 26308832, "step": 12190 }, { "epoch": 1.9893964110929854, "grad_norm": 0.14237594604492188, "learning_rate": 0.0009946166394779771, "loss": 0.1752, "num_input_tokens_seen": 26318592, "step": 12195 }, { "epoch": 1.9902120717781404, "grad_norm": 0.2574072778224945, "learning_rate": 0.0009950244698205548, "loss": 0.2773, "num_input_tokens_seen": 26330752, "step": 12200 }, { "epoch": 1.9910277324632952, "grad_norm": 0.3009999096393585, "learning_rate": 0.000995432300163132, "loss": 0.2324, "num_input_tokens_seen": 26341568, "step": 12205 }, { "epoch": 1.9918433931484503, "grad_norm": 0.24709255993366241, "learning_rate": 0.0009958401305057095, "loss": 0.1409, "num_input_tokens_seen": 26352576, "step": 12210 }, { "epoch": 1.9926590538336053, "grad_norm": 0.025392819195985794, "learning_rate": 0.0009962479608482872, "loss": 0.027, "num_input_tokens_seen": 26363168, "step": 12215 }, { "epoch": 1.9934747145187601, "grad_norm": 0.13007204234600067, "learning_rate": 0.0009966557911908645, "loss": 0.1651, "num_input_tokens_seen": 26373984, "step": 12220 }, { "epoch": 1.9942903752039152, "grad_norm": 0.024487672373652458, "learning_rate": 0.0009970636215334422, "loss": 0.2128, "num_input_tokens_seen": 26385568, "step": 12225 }, { "epoch": 1.9951060358890702, "grad_norm": 0.08277083188295364, "learning_rate": 0.0009974714518760197, "loss": 0.1647, "num_input_tokens_seen": 26395744, "step": 12230 }, { "epoch": 1.995921696574225, "grad_norm": 0.08057410269975662, "learning_rate": 0.0009978792822185971, "loss": 0.0532, "num_input_tokens_seen": 26406048, "step": 12235 }, { "epoch": 1.99673735725938, "grad_norm": 0.10625851154327393, "learning_rate": 0.0009982871125611746, "loss": 0.1057, "num_input_tokens_seen": 26417088, "step": 12240 }, { "epoch": 1.997553017944535, "grad_norm": 0.0426391176879406, "learning_rate": 0.000998694942903752, "loss": 0.0847, "num_input_tokens_seen": 26427232, "step": 12245 }, { "epoch": 1.99836867862969, "grad_norm": 0.1759200096130371, "learning_rate": 0.0009991027732463296, "loss": 0.1455, "num_input_tokens_seen": 26438368, "step": 12250 }, { "epoch": 1.9991843393148452, "grad_norm": 0.08528167754411697, "learning_rate": 0.000999510603588907, "loss": 0.1714, "num_input_tokens_seen": 26450240, "step": 12255 }, { "epoch": 2.0, "grad_norm": 0.5821802616119385, "learning_rate": 0.0009999184339314845, "loss": 0.3508, "num_input_tokens_seen": 26459312, "step": 12260 }, { "epoch": 2.0, "eval_loss": 0.13189947605133057, "eval_runtime": 104.3915, "eval_samples_per_second": 26.104, "eval_steps_per_second": 6.533, "num_input_tokens_seen": 26459312, "step": 12260 }, { "epoch": 2.000815660685155, "grad_norm": 0.091957688331604, "learning_rate": 0.000999999996757397, "loss": 0.1558, "num_input_tokens_seen": 26470608, "step": 12265 }, { "epoch": 2.00163132137031, "grad_norm": 0.04683307930827141, "learning_rate": 0.0009999999835843226, "loss": 0.1074, "num_input_tokens_seen": 26481040, "step": 12270 }, { "epoch": 2.002446982055465, "grad_norm": 0.15949326753616333, "learning_rate": 0.000999999960278114, "loss": 0.1274, "num_input_tokens_seen": 26492496, "step": 12275 }, { "epoch": 2.0032626427406197, "grad_norm": 0.06547245383262634, "learning_rate": 0.000999999926838772, "loss": 0.0675, "num_input_tokens_seen": 26504176, "step": 12280 }, { "epoch": 2.004078303425775, "grad_norm": 0.11735440045595169, "learning_rate": 0.0009999998832662972, "loss": 0.1477, "num_input_tokens_seen": 26515280, "step": 12285 }, { "epoch": 2.00489396411093, "grad_norm": 0.0564153678715229, "learning_rate": 0.0009999998295606907, "loss": 0.084, "num_input_tokens_seen": 26526640, "step": 12290 }, { "epoch": 2.0057096247960846, "grad_norm": 0.05549488216638565, "learning_rate": 0.000999999765721953, "loss": 0.0961, "num_input_tokens_seen": 26537872, "step": 12295 }, { "epoch": 2.00652528548124, "grad_norm": 0.062287479639053345, "learning_rate": 0.000999999691750086, "loss": 0.1745, "num_input_tokens_seen": 26549072, "step": 12300 }, { "epoch": 2.0073409461663947, "grad_norm": 0.06329841166734695, "learning_rate": 0.0009999996076450908, "loss": 0.0282, "num_input_tokens_seen": 26560880, "step": 12305 }, { "epoch": 2.00815660685155, "grad_norm": 0.024249162524938583, "learning_rate": 0.0009999995134069692, "loss": 0.0796, "num_input_tokens_seen": 26572048, "step": 12310 }, { "epoch": 2.0089722675367048, "grad_norm": 0.06986931711435318, "learning_rate": 0.0009999994090357234, "loss": 0.1879, "num_input_tokens_seen": 26581712, "step": 12315 }, { "epoch": 2.0097879282218596, "grad_norm": 0.10082012414932251, "learning_rate": 0.0009999992945313551, "loss": 0.1945, "num_input_tokens_seen": 26592784, "step": 12320 }, { "epoch": 2.010603588907015, "grad_norm": 0.2454589605331421, "learning_rate": 0.0009999991698938669, "loss": 0.0691, "num_input_tokens_seen": 26603248, "step": 12325 }, { "epoch": 2.0114192495921697, "grad_norm": 0.1426398754119873, "learning_rate": 0.000999999035123261, "loss": 0.0395, "num_input_tokens_seen": 26613104, "step": 12330 }, { "epoch": 2.0122349102773245, "grad_norm": 0.03306707739830017, "learning_rate": 0.0009999988902195407, "loss": 0.0899, "num_input_tokens_seen": 26622576, "step": 12335 }, { "epoch": 2.0130505709624797, "grad_norm": 0.14272212982177734, "learning_rate": 0.0009999987351827085, "loss": 0.1707, "num_input_tokens_seen": 26632848, "step": 12340 }, { "epoch": 2.0138662316476346, "grad_norm": 0.10915648937225342, "learning_rate": 0.0009999985700127674, "loss": 0.1019, "num_input_tokens_seen": 26644240, "step": 12345 }, { "epoch": 2.0146818923327894, "grad_norm": 0.19499437510967255, "learning_rate": 0.0009999983947097213, "loss": 0.1731, "num_input_tokens_seen": 26654832, "step": 12350 }, { "epoch": 2.0154975530179446, "grad_norm": 0.18871870636940002, "learning_rate": 0.0009999982092735733, "loss": 0.1073, "num_input_tokens_seen": 26665680, "step": 12355 }, { "epoch": 2.0163132137030995, "grad_norm": 0.0631629154086113, "learning_rate": 0.0009999980137043274, "loss": 0.1633, "num_input_tokens_seen": 26676944, "step": 12360 }, { "epoch": 2.0171288743882543, "grad_norm": 0.04746744781732559, "learning_rate": 0.0009999978080019872, "loss": 0.0681, "num_input_tokens_seen": 26686224, "step": 12365 }, { "epoch": 2.0179445350734095, "grad_norm": 0.045048635452985764, "learning_rate": 0.0009999975921665574, "loss": 0.0485, "num_input_tokens_seen": 26697840, "step": 12370 }, { "epoch": 2.0187601957585644, "grad_norm": 0.17351840436458588, "learning_rate": 0.000999997366198042, "loss": 0.1126, "num_input_tokens_seen": 26708656, "step": 12375 }, { "epoch": 2.0195758564437196, "grad_norm": 0.37556731700897217, "learning_rate": 0.0009999971300964456, "loss": 0.1802, "num_input_tokens_seen": 26719728, "step": 12380 }, { "epoch": 2.0203915171288744, "grad_norm": 0.13337551057338715, "learning_rate": 0.0009999968838617732, "loss": 0.1859, "num_input_tokens_seen": 26730576, "step": 12385 }, { "epoch": 2.0212071778140293, "grad_norm": 0.09129732847213745, "learning_rate": 0.0009999966274940296, "loss": 0.1598, "num_input_tokens_seen": 26742160, "step": 12390 }, { "epoch": 2.0220228384991845, "grad_norm": 0.0923326388001442, "learning_rate": 0.00099999636099322, "loss": 0.0431, "num_input_tokens_seen": 26753136, "step": 12395 }, { "epoch": 2.0228384991843393, "grad_norm": 0.03943445160984993, "learning_rate": 0.0009999960843593498, "loss": 0.0445, "num_input_tokens_seen": 26764208, "step": 12400 }, { "epoch": 2.023654159869494, "grad_norm": 0.04379410669207573, "learning_rate": 0.0009999957975924249, "loss": 0.2082, "num_input_tokens_seen": 26774192, "step": 12405 }, { "epoch": 2.0244698205546494, "grad_norm": 0.38741981983184814, "learning_rate": 0.0009999955006924507, "loss": 0.1183, "num_input_tokens_seen": 26784624, "step": 12410 }, { "epoch": 2.0252854812398042, "grad_norm": 0.17714372277259827, "learning_rate": 0.0009999951936594334, "loss": 0.0851, "num_input_tokens_seen": 26796336, "step": 12415 }, { "epoch": 2.026101141924959, "grad_norm": 0.04768671467900276, "learning_rate": 0.0009999948764933793, "loss": 0.1054, "num_input_tokens_seen": 26806352, "step": 12420 }, { "epoch": 2.0269168026101143, "grad_norm": 0.12000925093889236, "learning_rate": 0.0009999945491942946, "loss": 0.1096, "num_input_tokens_seen": 26817136, "step": 12425 }, { "epoch": 2.027732463295269, "grad_norm": 0.45664483308792114, "learning_rate": 0.0009999942117621863, "loss": 0.0742, "num_input_tokens_seen": 26828688, "step": 12430 }, { "epoch": 2.028548123980424, "grad_norm": 0.06032763049006462, "learning_rate": 0.0009999938641970607, "loss": 0.1716, "num_input_tokens_seen": 26838512, "step": 12435 }, { "epoch": 2.029363784665579, "grad_norm": 0.41563645005226135, "learning_rate": 0.0009999935064989255, "loss": 0.1207, "num_input_tokens_seen": 26849168, "step": 12440 }, { "epoch": 2.030179445350734, "grad_norm": 0.22545364499092102, "learning_rate": 0.0009999931386677873, "loss": 0.0737, "num_input_tokens_seen": 26860752, "step": 12445 }, { "epoch": 2.0309951060358893, "grad_norm": 0.37955546379089355, "learning_rate": 0.000999992760703654, "loss": 0.2172, "num_input_tokens_seen": 26871696, "step": 12450 }, { "epoch": 2.031810766721044, "grad_norm": 0.04539509862661362, "learning_rate": 0.000999992372606533, "loss": 0.0899, "num_input_tokens_seen": 26883440, "step": 12455 }, { "epoch": 2.032626427406199, "grad_norm": 0.06539620459079742, "learning_rate": 0.0009999919743764324, "loss": 0.1631, "num_input_tokens_seen": 26894064, "step": 12460 }, { "epoch": 2.033442088091354, "grad_norm": 0.11231246590614319, "learning_rate": 0.00099999156601336, "loss": 0.082, "num_input_tokens_seen": 26906832, "step": 12465 }, { "epoch": 2.034257748776509, "grad_norm": 0.132769376039505, "learning_rate": 0.0009999911475173245, "loss": 0.1973, "num_input_tokens_seen": 26918064, "step": 12470 }, { "epoch": 2.035073409461664, "grad_norm": 0.026820335537195206, "learning_rate": 0.000999990718888334, "loss": 0.058, "num_input_tokens_seen": 26928272, "step": 12475 }, { "epoch": 2.035889070146819, "grad_norm": 0.028243016451597214, "learning_rate": 0.0009999902801263974, "loss": 0.0784, "num_input_tokens_seen": 26939888, "step": 12480 }, { "epoch": 2.036704730831974, "grad_norm": 0.037158284336328506, "learning_rate": 0.0009999898312315232, "loss": 0.0748, "num_input_tokens_seen": 26948880, "step": 12485 }, { "epoch": 2.0375203915171287, "grad_norm": 0.18932610750198364, "learning_rate": 0.000999989372203721, "loss": 0.0766, "num_input_tokens_seen": 26960688, "step": 12490 }, { "epoch": 2.038336052202284, "grad_norm": 0.43904629349708557, "learning_rate": 0.0009999889030429998, "loss": 0.1184, "num_input_tokens_seen": 26972400, "step": 12495 }, { "epoch": 2.039151712887439, "grad_norm": 0.03631747141480446, "learning_rate": 0.0009999884237493692, "loss": 0.0979, "num_input_tokens_seen": 26982832, "step": 12500 }, { "epoch": 2.0399673735725936, "grad_norm": 0.036465875804424286, "learning_rate": 0.000999987934322839, "loss": 0.1628, "num_input_tokens_seen": 26993840, "step": 12505 }, { "epoch": 2.040783034257749, "grad_norm": 0.06492004543542862, "learning_rate": 0.000999987434763419, "loss": 0.0214, "num_input_tokens_seen": 27005360, "step": 12510 }, { "epoch": 2.0415986949429037, "grad_norm": 0.045513175427913666, "learning_rate": 0.0009999869250711193, "loss": 0.0235, "num_input_tokens_seen": 27015888, "step": 12515 }, { "epoch": 2.0424143556280585, "grad_norm": 0.26531872153282166, "learning_rate": 0.0009999864052459503, "loss": 0.1465, "num_input_tokens_seen": 27027472, "step": 12520 }, { "epoch": 2.0432300163132138, "grad_norm": 0.024290679022669792, "learning_rate": 0.0009999858752879228, "loss": 0.3314, "num_input_tokens_seen": 27039088, "step": 12525 }, { "epoch": 2.0440456769983686, "grad_norm": 0.1931566745042801, "learning_rate": 0.0009999853351970469, "loss": 0.1482, "num_input_tokens_seen": 27051184, "step": 12530 }, { "epoch": 2.044861337683524, "grad_norm": 0.13722002506256104, "learning_rate": 0.000999984784973334, "loss": 0.1658, "num_input_tokens_seen": 27061808, "step": 12535 }, { "epoch": 2.0456769983686787, "grad_norm": 0.03927312791347504, "learning_rate": 0.0009999842246167952, "loss": 0.1107, "num_input_tokens_seen": 27072208, "step": 12540 }, { "epoch": 2.0464926590538335, "grad_norm": 0.1521948277950287, "learning_rate": 0.0009999836541274417, "loss": 0.1889, "num_input_tokens_seen": 27082704, "step": 12545 }, { "epoch": 2.0473083197389887, "grad_norm": 0.04311520233750343, "learning_rate": 0.0009999830735052853, "loss": 0.1563, "num_input_tokens_seen": 27093616, "step": 12550 }, { "epoch": 2.0481239804241436, "grad_norm": 0.054621484130620956, "learning_rate": 0.0009999824827503377, "loss": 0.1022, "num_input_tokens_seen": 27103600, "step": 12555 }, { "epoch": 2.0489396411092984, "grad_norm": 0.06017155572772026, "learning_rate": 0.0009999818818626105, "loss": 0.0922, "num_input_tokens_seen": 27115376, "step": 12560 }, { "epoch": 2.0497553017944536, "grad_norm": 0.22011828422546387, "learning_rate": 0.0009999812708421166, "loss": 0.1234, "num_input_tokens_seen": 27125616, "step": 12565 }, { "epoch": 2.0505709624796085, "grad_norm": 0.19063395261764526, "learning_rate": 0.0009999806496888677, "loss": 0.1023, "num_input_tokens_seen": 27134864, "step": 12570 }, { "epoch": 2.0513866231647633, "grad_norm": 0.04882698506116867, "learning_rate": 0.0009999800184028766, "loss": 0.2276, "num_input_tokens_seen": 27146576, "step": 12575 }, { "epoch": 2.0522022838499185, "grad_norm": 0.10137622058391571, "learning_rate": 0.0009999793769841564, "loss": 0.1666, "num_input_tokens_seen": 27157840, "step": 12580 }, { "epoch": 2.0530179445350734, "grad_norm": 0.164845272898674, "learning_rate": 0.0009999787254327196, "loss": 0.0997, "num_input_tokens_seen": 27168112, "step": 12585 }, { "epoch": 2.053833605220228, "grad_norm": 0.02998611517250538, "learning_rate": 0.00099997806374858, "loss": 0.0678, "num_input_tokens_seen": 27178864, "step": 12590 }, { "epoch": 2.0546492659053834, "grad_norm": 0.06711497902870178, "learning_rate": 0.0009999773919317505, "loss": 0.0337, "num_input_tokens_seen": 27190896, "step": 12595 }, { "epoch": 2.0554649265905383, "grad_norm": 0.033714987337589264, "learning_rate": 0.000999976709982245, "loss": 0.0778, "num_input_tokens_seen": 27200720, "step": 12600 }, { "epoch": 2.0562805872756935, "grad_norm": 0.009462432935833931, "learning_rate": 0.000999976017900077, "loss": 0.1323, "num_input_tokens_seen": 27212912, "step": 12605 }, { "epoch": 2.0570962479608483, "grad_norm": 0.04082736372947693, "learning_rate": 0.0009999753156852609, "loss": 0.0768, "num_input_tokens_seen": 27223536, "step": 12610 }, { "epoch": 2.057911908646003, "grad_norm": 0.030248409137129784, "learning_rate": 0.0009999746033378105, "loss": 0.0449, "num_input_tokens_seen": 27234640, "step": 12615 }, { "epoch": 2.0587275693311584, "grad_norm": 0.28210654854774475, "learning_rate": 0.0009999738808577408, "loss": 0.0792, "num_input_tokens_seen": 27246096, "step": 12620 }, { "epoch": 2.0595432300163132, "grad_norm": 0.2939055263996124, "learning_rate": 0.000999973148245066, "loss": 0.1663, "num_input_tokens_seen": 27256304, "step": 12625 }, { "epoch": 2.060358890701468, "grad_norm": 0.27927902340888977, "learning_rate": 0.000999972405499801, "loss": 0.0937, "num_input_tokens_seen": 27266960, "step": 12630 }, { "epoch": 2.0611745513866233, "grad_norm": 0.003989933989942074, "learning_rate": 0.0009999716526219611, "loss": 0.1182, "num_input_tokens_seen": 27278288, "step": 12635 }, { "epoch": 2.061990212071778, "grad_norm": 0.03730406239628792, "learning_rate": 0.0009999708896115613, "loss": 0.1365, "num_input_tokens_seen": 27288816, "step": 12640 }, { "epoch": 2.062805872756933, "grad_norm": 0.06156677380204201, "learning_rate": 0.0009999701164686173, "loss": 0.2249, "num_input_tokens_seen": 27300400, "step": 12645 }, { "epoch": 2.063621533442088, "grad_norm": 0.21471218764781952, "learning_rate": 0.0009999693331931446, "loss": 0.0665, "num_input_tokens_seen": 27310160, "step": 12650 }, { "epoch": 2.064437194127243, "grad_norm": 0.03852491453289986, "learning_rate": 0.000999968539785159, "loss": 0.1108, "num_input_tokens_seen": 27319824, "step": 12655 }, { "epoch": 2.065252854812398, "grad_norm": 0.42548006772994995, "learning_rate": 0.0009999677362446768, "loss": 0.376, "num_input_tokens_seen": 27330736, "step": 12660 }, { "epoch": 2.066068515497553, "grad_norm": 0.0473458431661129, "learning_rate": 0.000999966922571714, "loss": 0.127, "num_input_tokens_seen": 27341264, "step": 12665 }, { "epoch": 2.066884176182708, "grad_norm": 0.3421942889690399, "learning_rate": 0.0009999660987662876, "loss": 0.1843, "num_input_tokens_seen": 27353808, "step": 12670 }, { "epoch": 2.067699836867863, "grad_norm": 0.09542140364646912, "learning_rate": 0.0009999652648284136, "loss": 0.1922, "num_input_tokens_seen": 27364368, "step": 12675 }, { "epoch": 2.068515497553018, "grad_norm": 0.20856496691703796, "learning_rate": 0.0009999644207581092, "loss": 0.1782, "num_input_tokens_seen": 27374192, "step": 12680 }, { "epoch": 2.069331158238173, "grad_norm": 0.047887347638607025, "learning_rate": 0.000999963566555392, "loss": 0.1126, "num_input_tokens_seen": 27386096, "step": 12685 }, { "epoch": 2.070146818923328, "grad_norm": 0.05187802389264107, "learning_rate": 0.0009999627022202785, "loss": 0.1785, "num_input_tokens_seen": 27397104, "step": 12690 }, { "epoch": 2.070962479608483, "grad_norm": 0.03325652703642845, "learning_rate": 0.0009999618277527868, "loss": 0.086, "num_input_tokens_seen": 27407856, "step": 12695 }, { "epoch": 2.0717781402936377, "grad_norm": 0.23733551800251007, "learning_rate": 0.0009999609431529345, "loss": 0.1192, "num_input_tokens_seen": 27419920, "step": 12700 }, { "epoch": 2.072593800978793, "grad_norm": 0.02450222708284855, "learning_rate": 0.0009999600484207392, "loss": 0.1028, "num_input_tokens_seen": 27431536, "step": 12705 }, { "epoch": 2.073409461663948, "grad_norm": 0.18467335402965546, "learning_rate": 0.0009999591435562193, "loss": 0.1271, "num_input_tokens_seen": 27443600, "step": 12710 }, { "epoch": 2.0742251223491026, "grad_norm": 0.07422156631946564, "learning_rate": 0.0009999582285593932, "loss": 0.0666, "num_input_tokens_seen": 27453648, "step": 12715 }, { "epoch": 2.075040783034258, "grad_norm": 0.05030030757188797, "learning_rate": 0.0009999573034302793, "loss": 0.1587, "num_input_tokens_seen": 27464016, "step": 12720 }, { "epoch": 2.0758564437194127, "grad_norm": 0.26597270369529724, "learning_rate": 0.0009999563681688964, "loss": 0.1515, "num_input_tokens_seen": 27475888, "step": 12725 }, { "epoch": 2.0766721044045675, "grad_norm": 0.27194589376449585, "learning_rate": 0.0009999554227752634, "loss": 0.0924, "num_input_tokens_seen": 27486960, "step": 12730 }, { "epoch": 2.0774877650897228, "grad_norm": 0.1502992808818817, "learning_rate": 0.0009999544672493997, "loss": 0.1265, "num_input_tokens_seen": 27498416, "step": 12735 }, { "epoch": 2.0783034257748776, "grad_norm": 0.02647317387163639, "learning_rate": 0.0009999535015913243, "loss": 0.1943, "num_input_tokens_seen": 27509616, "step": 12740 }, { "epoch": 2.0791190864600324, "grad_norm": 0.18644946813583374, "learning_rate": 0.0009999525258010571, "loss": 0.0811, "num_input_tokens_seen": 27520176, "step": 12745 }, { "epoch": 2.0799347471451877, "grad_norm": 0.2048216313123703, "learning_rate": 0.0009999515398786177, "loss": 0.1732, "num_input_tokens_seen": 27530608, "step": 12750 }, { "epoch": 2.0807504078303425, "grad_norm": 0.020477568730711937, "learning_rate": 0.000999950543824026, "loss": 0.1434, "num_input_tokens_seen": 27541328, "step": 12755 }, { "epoch": 2.0815660685154977, "grad_norm": 0.33716729283332825, "learning_rate": 0.0009999495376373025, "loss": 0.1933, "num_input_tokens_seen": 27552944, "step": 12760 }, { "epoch": 2.0823817292006526, "grad_norm": 0.18186968564987183, "learning_rate": 0.0009999485213184672, "loss": 0.1005, "num_input_tokens_seen": 27564112, "step": 12765 }, { "epoch": 2.0831973898858074, "grad_norm": 0.47105491161346436, "learning_rate": 0.000999947494867541, "loss": 0.1894, "num_input_tokens_seen": 27574608, "step": 12770 }, { "epoch": 2.0840130505709626, "grad_norm": 0.029688429087400436, "learning_rate": 0.0009999464582845445, "loss": 0.0796, "num_input_tokens_seen": 27586640, "step": 12775 }, { "epoch": 2.0848287112561175, "grad_norm": 0.05238907039165497, "learning_rate": 0.0009999454115694989, "loss": 0.1806, "num_input_tokens_seen": 27596720, "step": 12780 }, { "epoch": 2.0856443719412723, "grad_norm": 0.07289808988571167, "learning_rate": 0.0009999443547224253, "loss": 0.0959, "num_input_tokens_seen": 27608944, "step": 12785 }, { "epoch": 2.0864600326264275, "grad_norm": 0.17282697558403015, "learning_rate": 0.0009999432877433449, "loss": 0.0556, "num_input_tokens_seen": 27620208, "step": 12790 }, { "epoch": 2.0872756933115824, "grad_norm": 0.06098805367946625, "learning_rate": 0.0009999422106322798, "loss": 0.0621, "num_input_tokens_seen": 27631600, "step": 12795 }, { "epoch": 2.088091353996737, "grad_norm": 0.3002886176109314, "learning_rate": 0.0009999411233892516, "loss": 0.0645, "num_input_tokens_seen": 27643600, "step": 12800 }, { "epoch": 2.0889070146818924, "grad_norm": 0.011986999772489071, "learning_rate": 0.000999940026014282, "loss": 0.0551, "num_input_tokens_seen": 27654128, "step": 12805 }, { "epoch": 2.0897226753670473, "grad_norm": 0.3947140872478485, "learning_rate": 0.000999938918507394, "loss": 0.252, "num_input_tokens_seen": 27663664, "step": 12810 }, { "epoch": 2.090538336052202, "grad_norm": 0.025123989209532738, "learning_rate": 0.0009999378008686093, "loss": 0.0922, "num_input_tokens_seen": 27675824, "step": 12815 }, { "epoch": 2.0913539967373573, "grad_norm": 0.5061216950416565, "learning_rate": 0.000999936673097951, "loss": 0.1418, "num_input_tokens_seen": 27685936, "step": 12820 }, { "epoch": 2.092169657422512, "grad_norm": 0.2288796752691269, "learning_rate": 0.0009999355351954418, "loss": 0.1475, "num_input_tokens_seen": 27696272, "step": 12825 }, { "epoch": 2.0929853181076674, "grad_norm": 0.25870925188064575, "learning_rate": 0.0009999343871611045, "loss": 0.1028, "num_input_tokens_seen": 27706960, "step": 12830 }, { "epoch": 2.0938009787928222, "grad_norm": 0.14753036201000214, "learning_rate": 0.000999933228994963, "loss": 0.1601, "num_input_tokens_seen": 27717136, "step": 12835 }, { "epoch": 2.094616639477977, "grad_norm": 0.37874147295951843, "learning_rate": 0.00099993206069704, "loss": 0.2632, "num_input_tokens_seen": 27728432, "step": 12840 }, { "epoch": 2.0954323001631323, "grad_norm": 0.06045134738087654, "learning_rate": 0.0009999308822673599, "loss": 0.0897, "num_input_tokens_seen": 27739248, "step": 12845 }, { "epoch": 2.096247960848287, "grad_norm": 0.2825985550880432, "learning_rate": 0.000999929693705946, "loss": 0.1056, "num_input_tokens_seen": 27751088, "step": 12850 }, { "epoch": 2.097063621533442, "grad_norm": 0.3056178092956543, "learning_rate": 0.000999928495012823, "loss": 0.1218, "num_input_tokens_seen": 27761488, "step": 12855 }, { "epoch": 2.097879282218597, "grad_norm": 0.03444916382431984, "learning_rate": 0.0009999272861880148, "loss": 0.1034, "num_input_tokens_seen": 27771536, "step": 12860 }, { "epoch": 2.098694942903752, "grad_norm": 0.020597606897354126, "learning_rate": 0.0009999260672315456, "loss": 0.1726, "num_input_tokens_seen": 27782416, "step": 12865 }, { "epoch": 2.099510603588907, "grad_norm": 0.08517801016569138, "learning_rate": 0.0009999248381434406, "loss": 0.0881, "num_input_tokens_seen": 27793680, "step": 12870 }, { "epoch": 2.100326264274062, "grad_norm": 0.34481775760650635, "learning_rate": 0.0009999235989237249, "loss": 0.2068, "num_input_tokens_seen": 27803856, "step": 12875 }, { "epoch": 2.101141924959217, "grad_norm": 0.010505754500627518, "learning_rate": 0.0009999223495724228, "loss": 0.1864, "num_input_tokens_seen": 27814576, "step": 12880 }, { "epoch": 2.1019575856443717, "grad_norm": 0.06264909356832504, "learning_rate": 0.0009999210900895603, "loss": 0.0513, "num_input_tokens_seen": 27826320, "step": 12885 }, { "epoch": 2.102773246329527, "grad_norm": 0.060290805995464325, "learning_rate": 0.0009999198204751628, "loss": 0.1246, "num_input_tokens_seen": 27836432, "step": 12890 }, { "epoch": 2.103588907014682, "grad_norm": 0.15587708353996277, "learning_rate": 0.0009999185407292557, "loss": 0.1074, "num_input_tokens_seen": 27847792, "step": 12895 }, { "epoch": 2.104404567699837, "grad_norm": 0.2705962657928467, "learning_rate": 0.0009999172508518654, "loss": 0.1321, "num_input_tokens_seen": 27858192, "step": 12900 }, { "epoch": 2.105220228384992, "grad_norm": 0.2392251044511795, "learning_rate": 0.0009999159508430177, "loss": 0.0781, "num_input_tokens_seen": 27868624, "step": 12905 }, { "epoch": 2.1060358890701467, "grad_norm": 0.24834436178207397, "learning_rate": 0.000999914640702739, "loss": 0.1518, "num_input_tokens_seen": 27878064, "step": 12910 }, { "epoch": 2.106851549755302, "grad_norm": 0.00970530603080988, "learning_rate": 0.000999913320431056, "loss": 0.1186, "num_input_tokens_seen": 27888496, "step": 12915 }, { "epoch": 2.107667210440457, "grad_norm": 0.10250834375619888, "learning_rate": 0.0009999119900279956, "loss": 0.1892, "num_input_tokens_seen": 27900400, "step": 12920 }, { "epoch": 2.1084828711256116, "grad_norm": 0.14984969794750214, "learning_rate": 0.0009999106494935843, "loss": 0.0935, "num_input_tokens_seen": 27910896, "step": 12925 }, { "epoch": 2.109298531810767, "grad_norm": 0.30274906754493713, "learning_rate": 0.0009999092988278496, "loss": 0.2147, "num_input_tokens_seen": 27921936, "step": 12930 }, { "epoch": 2.1101141924959217, "grad_norm": 0.3028600811958313, "learning_rate": 0.0009999079380308186, "loss": 0.077, "num_input_tokens_seen": 27931600, "step": 12935 }, { "epoch": 2.1109298531810765, "grad_norm": 0.06969098746776581, "learning_rate": 0.000999906567102519, "loss": 0.1058, "num_input_tokens_seen": 27942320, "step": 12940 }, { "epoch": 2.1117455138662318, "grad_norm": 0.1617831289768219, "learning_rate": 0.0009999051860429791, "loss": 0.2045, "num_input_tokens_seen": 27953712, "step": 12945 }, { "epoch": 2.1125611745513866, "grad_norm": 0.4179234206676483, "learning_rate": 0.000999903794852226, "loss": 0.1425, "num_input_tokens_seen": 27964528, "step": 12950 }, { "epoch": 2.1133768352365414, "grad_norm": 0.35501933097839355, "learning_rate": 0.0009999023935302886, "loss": 0.1856, "num_input_tokens_seen": 27974992, "step": 12955 }, { "epoch": 2.1141924959216967, "grad_norm": 0.03591923788189888, "learning_rate": 0.000999900982077195, "loss": 0.1337, "num_input_tokens_seen": 27985584, "step": 12960 }, { "epoch": 2.1150081566068515, "grad_norm": 0.02245388552546501, "learning_rate": 0.0009998995604929735, "loss": 0.0711, "num_input_tokens_seen": 27995504, "step": 12965 }, { "epoch": 2.1158238172920063, "grad_norm": 0.10969027876853943, "learning_rate": 0.0009998981287776536, "loss": 0.0497, "num_input_tokens_seen": 28006960, "step": 12970 }, { "epoch": 2.1166394779771616, "grad_norm": 0.08348109573125839, "learning_rate": 0.0009998966869312637, "loss": 0.1379, "num_input_tokens_seen": 28018288, "step": 12975 }, { "epoch": 2.1174551386623164, "grad_norm": 0.2260345071554184, "learning_rate": 0.0009998952349538335, "loss": 0.2664, "num_input_tokens_seen": 28030032, "step": 12980 }, { "epoch": 2.1182707993474716, "grad_norm": 0.019346920773386955, "learning_rate": 0.000999893772845392, "loss": 0.1405, "num_input_tokens_seen": 28040464, "step": 12985 }, { "epoch": 2.1190864600326265, "grad_norm": 0.012573124840855598, "learning_rate": 0.0009998923006059692, "loss": 0.0266, "num_input_tokens_seen": 28049328, "step": 12990 }, { "epoch": 2.1199021207177813, "grad_norm": 0.08271928131580353, "learning_rate": 0.0009998908182355948, "loss": 0.0779, "num_input_tokens_seen": 28060336, "step": 12995 }, { "epoch": 2.1207177814029365, "grad_norm": 0.02619396336376667, "learning_rate": 0.0009998893257342986, "loss": 0.0619, "num_input_tokens_seen": 28070448, "step": 13000 }, { "epoch": 2.1215334420880914, "grad_norm": 0.04019603133201599, "learning_rate": 0.000999887823102111, "loss": 0.0366, "num_input_tokens_seen": 28081968, "step": 13005 }, { "epoch": 2.122349102773246, "grad_norm": 0.03693268075585365, "learning_rate": 0.0009998863103390628, "loss": 0.0467, "num_input_tokens_seen": 28092944, "step": 13010 }, { "epoch": 2.1231647634584014, "grad_norm": 0.4358840584754944, "learning_rate": 0.0009998847874451843, "loss": 0.2266, "num_input_tokens_seen": 28103888, "step": 13015 }, { "epoch": 2.1239804241435563, "grad_norm": 0.08920583873987198, "learning_rate": 0.0009998832544205064, "loss": 0.0905, "num_input_tokens_seen": 28115088, "step": 13020 }, { "epoch": 2.124796084828711, "grad_norm": 0.15785475075244904, "learning_rate": 0.0009998817112650603, "loss": 0.1523, "num_input_tokens_seen": 28125232, "step": 13025 }, { "epoch": 2.1256117455138663, "grad_norm": 0.12181179225444794, "learning_rate": 0.000999880157978877, "loss": 0.1596, "num_input_tokens_seen": 28136240, "step": 13030 }, { "epoch": 2.126427406199021, "grad_norm": 0.13666953146457672, "learning_rate": 0.0009998785945619882, "loss": 0.1797, "num_input_tokens_seen": 28146992, "step": 13035 }, { "epoch": 2.1272430668841764, "grad_norm": 0.040349315851926804, "learning_rate": 0.0009998770210144256, "loss": 0.0729, "num_input_tokens_seen": 28157104, "step": 13040 }, { "epoch": 2.1280587275693312, "grad_norm": 0.10633757710456848, "learning_rate": 0.000999875437336221, "loss": 0.0771, "num_input_tokens_seen": 28169040, "step": 13045 }, { "epoch": 2.128874388254486, "grad_norm": 0.059961553663015366, "learning_rate": 0.0009998738435274064, "loss": 0.0822, "num_input_tokens_seen": 28180304, "step": 13050 }, { "epoch": 2.1296900489396413, "grad_norm": 0.6498997807502747, "learning_rate": 0.0009998722395880145, "loss": 0.1658, "num_input_tokens_seen": 28191024, "step": 13055 }, { "epoch": 2.130505709624796, "grad_norm": 0.5805407166481018, "learning_rate": 0.0009998706255180774, "loss": 0.2298, "num_input_tokens_seen": 28202128, "step": 13060 }, { "epoch": 2.131321370309951, "grad_norm": 0.18699181079864502, "learning_rate": 0.0009998690013176279, "loss": 0.1455, "num_input_tokens_seen": 28211952, "step": 13065 }, { "epoch": 2.132137030995106, "grad_norm": 0.08630961179733276, "learning_rate": 0.0009998673669866988, "loss": 0.0756, "num_input_tokens_seen": 28222576, "step": 13070 }, { "epoch": 2.132952691680261, "grad_norm": 0.09728206694126129, "learning_rate": 0.0009998657225253236, "loss": 0.1018, "num_input_tokens_seen": 28233200, "step": 13075 }, { "epoch": 2.133768352365416, "grad_norm": 0.2379074990749359, "learning_rate": 0.0009998640679335354, "loss": 0.1848, "num_input_tokens_seen": 28246096, "step": 13080 }, { "epoch": 2.134584013050571, "grad_norm": 0.15434019267559052, "learning_rate": 0.0009998624032113677, "loss": 0.0562, "num_input_tokens_seen": 28256816, "step": 13085 }, { "epoch": 2.135399673735726, "grad_norm": 0.14020530879497528, "learning_rate": 0.0009998607283588543, "loss": 0.2382, "num_input_tokens_seen": 28267696, "step": 13090 }, { "epoch": 2.1362153344208807, "grad_norm": 0.07778248190879822, "learning_rate": 0.000999859043376029, "loss": 0.1103, "num_input_tokens_seen": 28278384, "step": 13095 }, { "epoch": 2.137030995106036, "grad_norm": 0.025499653071165085, "learning_rate": 0.0009998573482629264, "loss": 0.0663, "num_input_tokens_seen": 28289552, "step": 13100 }, { "epoch": 2.137846655791191, "grad_norm": 0.05297816917300224, "learning_rate": 0.0009998556430195803, "loss": 0.0436, "num_input_tokens_seen": 28298544, "step": 13105 }, { "epoch": 2.1386623164763456, "grad_norm": 0.20765253901481628, "learning_rate": 0.0009998539276460255, "loss": 0.1936, "num_input_tokens_seen": 28309904, "step": 13110 }, { "epoch": 2.139477977161501, "grad_norm": 0.23648615181446075, "learning_rate": 0.0009998522021422967, "loss": 0.1252, "num_input_tokens_seen": 28319728, "step": 13115 }, { "epoch": 2.1402936378466557, "grad_norm": 0.047302961349487305, "learning_rate": 0.000999850466508429, "loss": 0.0927, "num_input_tokens_seen": 28330640, "step": 13120 }, { "epoch": 2.141109298531811, "grad_norm": 0.059626929461956024, "learning_rate": 0.0009998487207444574, "loss": 0.0705, "num_input_tokens_seen": 28340848, "step": 13125 }, { "epoch": 2.141924959216966, "grad_norm": 0.035486433655023575, "learning_rate": 0.0009998469648504174, "loss": 0.0779, "num_input_tokens_seen": 28350608, "step": 13130 }, { "epoch": 2.1427406199021206, "grad_norm": 0.042947277426719666, "learning_rate": 0.0009998451988263444, "loss": 0.0845, "num_input_tokens_seen": 28360720, "step": 13135 }, { "epoch": 2.143556280587276, "grad_norm": 0.01824638620018959, "learning_rate": 0.0009998434226722746, "loss": 0.0234, "num_input_tokens_seen": 28371664, "step": 13140 }, { "epoch": 2.1443719412724307, "grad_norm": 0.004745565354824066, "learning_rate": 0.0009998416363882438, "loss": 0.0426, "num_input_tokens_seen": 28383408, "step": 13145 }, { "epoch": 2.1451876019575855, "grad_norm": 0.191055029630661, "learning_rate": 0.0009998398399742878, "loss": 0.0733, "num_input_tokens_seen": 28394384, "step": 13150 }, { "epoch": 2.1460032626427408, "grad_norm": 0.61994868516922, "learning_rate": 0.0009998380334304436, "loss": 0.1995, "num_input_tokens_seen": 28405232, "step": 13155 }, { "epoch": 2.1468189233278956, "grad_norm": 0.0677850991487503, "learning_rate": 0.0009998362167567476, "loss": 0.1064, "num_input_tokens_seen": 28415664, "step": 13160 }, { "epoch": 2.1476345840130504, "grad_norm": 0.6611852645874023, "learning_rate": 0.0009998343899532364, "loss": 0.1821, "num_input_tokens_seen": 28426416, "step": 13165 }, { "epoch": 2.1484502446982057, "grad_norm": 0.20804016292095184, "learning_rate": 0.0009998325530199473, "loss": 0.0515, "num_input_tokens_seen": 28437808, "step": 13170 }, { "epoch": 2.1492659053833605, "grad_norm": 0.13397958874702454, "learning_rate": 0.0009998307059569174, "loss": 0.1466, "num_input_tokens_seen": 28448304, "step": 13175 }, { "epoch": 2.1500815660685153, "grad_norm": 0.2655119001865387, "learning_rate": 0.0009998288487641843, "loss": 0.0951, "num_input_tokens_seen": 28458192, "step": 13180 }, { "epoch": 2.1508972267536706, "grad_norm": 0.12699666619300842, "learning_rate": 0.0009998269814417854, "loss": 0.0809, "num_input_tokens_seen": 28470288, "step": 13185 }, { "epoch": 2.1517128874388254, "grad_norm": 0.014473107643425465, "learning_rate": 0.0009998251039897586, "loss": 0.0221, "num_input_tokens_seen": 28481200, "step": 13190 }, { "epoch": 2.15252854812398, "grad_norm": 0.15988406538963318, "learning_rate": 0.000999823216408142, "loss": 0.1111, "num_input_tokens_seen": 28492080, "step": 13195 }, { "epoch": 2.1533442088091355, "grad_norm": 5.278985023498535, "learning_rate": 0.0009998213186969739, "loss": 0.2434, "num_input_tokens_seen": 28502800, "step": 13200 }, { "epoch": 2.1541598694942903, "grad_norm": 0.2296447604894638, "learning_rate": 0.0009998194108562927, "loss": 0.0647, "num_input_tokens_seen": 28514064, "step": 13205 }, { "epoch": 2.1549755301794455, "grad_norm": 0.07437156140804291, "learning_rate": 0.000999817492886137, "loss": 0.1646, "num_input_tokens_seen": 28523280, "step": 13210 }, { "epoch": 2.1557911908646004, "grad_norm": 0.042331233620643616, "learning_rate": 0.000999815564786546, "loss": 0.0216, "num_input_tokens_seen": 28531696, "step": 13215 }, { "epoch": 2.156606851549755, "grad_norm": 0.08266185224056244, "learning_rate": 0.0009998136265575582, "loss": 0.0703, "num_input_tokens_seen": 28543440, "step": 13220 }, { "epoch": 2.1574225122349104, "grad_norm": 0.041378866881132126, "learning_rate": 0.0009998116781992133, "loss": 0.1856, "num_input_tokens_seen": 28553808, "step": 13225 }, { "epoch": 2.1582381729200653, "grad_norm": 0.0452524833381176, "learning_rate": 0.0009998097197115507, "loss": 0.0905, "num_input_tokens_seen": 28565232, "step": 13230 }, { "epoch": 2.15905383360522, "grad_norm": 0.004929019138216972, "learning_rate": 0.00099980775109461, "loss": 0.0784, "num_input_tokens_seen": 28576304, "step": 13235 }, { "epoch": 2.1598694942903753, "grad_norm": 0.461338073015213, "learning_rate": 0.0009998057723484312, "loss": 0.2368, "num_input_tokens_seen": 28587952, "step": 13240 }, { "epoch": 2.16068515497553, "grad_norm": 0.2778373956680298, "learning_rate": 0.0009998037834730545, "loss": 0.1908, "num_input_tokens_seen": 28599472, "step": 13245 }, { "epoch": 2.161500815660685, "grad_norm": 0.13883040845394135, "learning_rate": 0.0009998017844685201, "loss": 0.0483, "num_input_tokens_seen": 28610800, "step": 13250 }, { "epoch": 2.1623164763458402, "grad_norm": 0.20979353785514832, "learning_rate": 0.0009997997753348684, "loss": 0.0419, "num_input_tokens_seen": 28621008, "step": 13255 }, { "epoch": 2.163132137030995, "grad_norm": 0.37442532181739807, "learning_rate": 0.0009997977560721402, "loss": 0.0882, "num_input_tokens_seen": 28632976, "step": 13260 }, { "epoch": 2.1639477977161503, "grad_norm": 0.08467903733253479, "learning_rate": 0.0009997957266803766, "loss": 0.1338, "num_input_tokens_seen": 28643280, "step": 13265 }, { "epoch": 2.164763458401305, "grad_norm": 0.13911455869674683, "learning_rate": 0.0009997936871596182, "loss": 0.1285, "num_input_tokens_seen": 28654192, "step": 13270 }, { "epoch": 2.16557911908646, "grad_norm": 0.11989153921604156, "learning_rate": 0.000999791637509907, "loss": 0.0864, "num_input_tokens_seen": 28665360, "step": 13275 }, { "epoch": 2.166394779771615, "grad_norm": 0.04685463383793831, "learning_rate": 0.0009997895777312843, "loss": 0.0439, "num_input_tokens_seen": 28676080, "step": 13280 }, { "epoch": 2.16721044045677, "grad_norm": 0.08106393367052078, "learning_rate": 0.0009997875078237915, "loss": 0.0558, "num_input_tokens_seen": 28688080, "step": 13285 }, { "epoch": 2.168026101141925, "grad_norm": 0.02252880111336708, "learning_rate": 0.000999785427787471, "loss": 0.0614, "num_input_tokens_seen": 28698640, "step": 13290 }, { "epoch": 2.16884176182708, "grad_norm": 0.23990119993686676, "learning_rate": 0.0009997833376223647, "loss": 0.1598, "num_input_tokens_seen": 28709488, "step": 13295 }, { "epoch": 2.169657422512235, "grad_norm": 0.08940400183200836, "learning_rate": 0.000999781237328515, "loss": 0.1469, "num_input_tokens_seen": 28719792, "step": 13300 }, { "epoch": 2.1704730831973897, "grad_norm": 0.02187115140259266, "learning_rate": 0.0009997791269059646, "loss": 0.064, "num_input_tokens_seen": 28730288, "step": 13305 }, { "epoch": 2.171288743882545, "grad_norm": 0.0489010363817215, "learning_rate": 0.0009997770063547562, "loss": 0.2072, "num_input_tokens_seen": 28741776, "step": 13310 }, { "epoch": 2.1721044045677, "grad_norm": 0.17894916236400604, "learning_rate": 0.0009997748756749327, "loss": 0.0515, "num_input_tokens_seen": 28753008, "step": 13315 }, { "epoch": 2.1729200652528546, "grad_norm": 0.21419379115104675, "learning_rate": 0.0009997727348665373, "loss": 0.1734, "num_input_tokens_seen": 28764592, "step": 13320 }, { "epoch": 2.17373572593801, "grad_norm": 0.04430823028087616, "learning_rate": 0.0009997705839296135, "loss": 0.1292, "num_input_tokens_seen": 28776048, "step": 13325 }, { "epoch": 2.1745513866231647, "grad_norm": 0.059867799282073975, "learning_rate": 0.0009997684228642049, "loss": 0.1594, "num_input_tokens_seen": 28786832, "step": 13330 }, { "epoch": 2.1753670473083195, "grad_norm": 0.04507075250148773, "learning_rate": 0.0009997662516703552, "loss": 0.111, "num_input_tokens_seen": 28797008, "step": 13335 }, { "epoch": 2.176182707993475, "grad_norm": 0.2374548316001892, "learning_rate": 0.0009997640703481082, "loss": 0.1947, "num_input_tokens_seen": 28806960, "step": 13340 }, { "epoch": 2.1769983686786296, "grad_norm": 0.07885897159576416, "learning_rate": 0.0009997618788975084, "loss": 0.1318, "num_input_tokens_seen": 28816880, "step": 13345 }, { "epoch": 2.177814029363785, "grad_norm": 0.18935911357402802, "learning_rate": 0.0009997596773186, "loss": 0.1331, "num_input_tokens_seen": 28828368, "step": 13350 }, { "epoch": 2.1786296900489397, "grad_norm": 0.04397635906934738, "learning_rate": 0.000999757465611428, "loss": 0.0748, "num_input_tokens_seen": 28839408, "step": 13355 }, { "epoch": 2.1794453507340945, "grad_norm": 0.06654397398233414, "learning_rate": 0.000999755243776037, "loss": 0.0572, "num_input_tokens_seen": 28849840, "step": 13360 }, { "epoch": 2.1802610114192498, "grad_norm": 0.33853477239608765, "learning_rate": 0.000999753011812472, "loss": 0.1696, "num_input_tokens_seen": 28860080, "step": 13365 }, { "epoch": 2.1810766721044046, "grad_norm": 0.28541481494903564, "learning_rate": 0.000999750769720778, "loss": 0.1979, "num_input_tokens_seen": 28870608, "step": 13370 }, { "epoch": 2.1818923327895594, "grad_norm": 0.21279276907444, "learning_rate": 0.0009997485175010008, "loss": 0.2584, "num_input_tokens_seen": 28881968, "step": 13375 }, { "epoch": 2.1827079934747147, "grad_norm": 0.04558209329843521, "learning_rate": 0.000999746255153186, "loss": 0.0656, "num_input_tokens_seen": 28891568, "step": 13380 }, { "epoch": 2.1835236541598695, "grad_norm": 0.027362853288650513, "learning_rate": 0.0009997439826773791, "loss": 0.046, "num_input_tokens_seen": 28901968, "step": 13385 }, { "epoch": 2.1843393148450243, "grad_norm": 0.14583677053451538, "learning_rate": 0.0009997417000736266, "loss": 0.0615, "num_input_tokens_seen": 28913360, "step": 13390 }, { "epoch": 2.1851549755301796, "grad_norm": 0.1796678900718689, "learning_rate": 0.0009997394073419747, "loss": 0.1109, "num_input_tokens_seen": 28924432, "step": 13395 }, { "epoch": 2.1859706362153344, "grad_norm": 0.21079757809638977, "learning_rate": 0.0009997371044824697, "loss": 0.0571, "num_input_tokens_seen": 28934576, "step": 13400 }, { "epoch": 2.186786296900489, "grad_norm": 0.02810826525092125, "learning_rate": 0.0009997347914951582, "loss": 0.1758, "num_input_tokens_seen": 28946032, "step": 13405 }, { "epoch": 2.1876019575856445, "grad_norm": 0.23098593950271606, "learning_rate": 0.0009997324683800872, "loss": 0.1043, "num_input_tokens_seen": 28957392, "step": 13410 }, { "epoch": 2.1884176182707993, "grad_norm": 0.0532384030520916, "learning_rate": 0.0009997301351373038, "loss": 0.0507, "num_input_tokens_seen": 28968560, "step": 13415 }, { "epoch": 2.189233278955954, "grad_norm": 0.03382715582847595, "learning_rate": 0.0009997277917668552, "loss": 0.0695, "num_input_tokens_seen": 28978448, "step": 13420 }, { "epoch": 2.1900489396411094, "grad_norm": 0.2283327728509903, "learning_rate": 0.000999725438268789, "loss": 0.1414, "num_input_tokens_seen": 28989488, "step": 13425 }, { "epoch": 2.190864600326264, "grad_norm": 0.01445755921304226, "learning_rate": 0.0009997230746431529, "loss": 0.1164, "num_input_tokens_seen": 29000112, "step": 13430 }, { "epoch": 2.1916802610114194, "grad_norm": 0.31329217553138733, "learning_rate": 0.0009997207008899946, "loss": 0.2386, "num_input_tokens_seen": 29012112, "step": 13435 }, { "epoch": 2.1924959216965743, "grad_norm": 0.11839587986469269, "learning_rate": 0.0009997183170093625, "loss": 0.0984, "num_input_tokens_seen": 29023472, "step": 13440 }, { "epoch": 2.193311582381729, "grad_norm": 0.09422369301319122, "learning_rate": 0.000999715923001305, "loss": 0.0914, "num_input_tokens_seen": 29035312, "step": 13445 }, { "epoch": 2.1941272430668843, "grad_norm": 0.0902036502957344, "learning_rate": 0.00099971351886587, "loss": 0.175, "num_input_tokens_seen": 29046736, "step": 13450 }, { "epoch": 2.194942903752039, "grad_norm": 0.1534033864736557, "learning_rate": 0.0009997111046031067, "loss": 0.0677, "num_input_tokens_seen": 29057616, "step": 13455 }, { "epoch": 2.195758564437194, "grad_norm": 0.11715641617774963, "learning_rate": 0.000999708680213064, "loss": 0.045, "num_input_tokens_seen": 29068240, "step": 13460 }, { "epoch": 2.1965742251223492, "grad_norm": 0.23075728118419647, "learning_rate": 0.000999706245695791, "loss": 0.2332, "num_input_tokens_seen": 29078320, "step": 13465 }, { "epoch": 2.197389885807504, "grad_norm": 0.08694472908973694, "learning_rate": 0.0009997038010513368, "loss": 0.0805, "num_input_tokens_seen": 29088912, "step": 13470 }, { "epoch": 2.198205546492659, "grad_norm": 0.06833475828170776, "learning_rate": 0.0009997013462797514, "loss": 0.0372, "num_input_tokens_seen": 29101264, "step": 13475 }, { "epoch": 2.199021207177814, "grad_norm": 0.17618751525878906, "learning_rate": 0.000999698881381084, "loss": 0.2906, "num_input_tokens_seen": 29111408, "step": 13480 }, { "epoch": 2.199836867862969, "grad_norm": 0.3815631866455078, "learning_rate": 0.0009996964063553851, "loss": 0.184, "num_input_tokens_seen": 29122320, "step": 13485 }, { "epoch": 2.200652528548124, "grad_norm": 0.09135878831148148, "learning_rate": 0.0009996939212027045, "loss": 0.0769, "num_input_tokens_seen": 29132816, "step": 13490 }, { "epoch": 2.201468189233279, "grad_norm": 0.10177627205848694, "learning_rate": 0.0009996914259230928, "loss": 0.1377, "num_input_tokens_seen": 29143344, "step": 13495 }, { "epoch": 2.202283849918434, "grad_norm": 0.022767700254917145, "learning_rate": 0.0009996889205166003, "loss": 0.1302, "num_input_tokens_seen": 29154096, "step": 13500 }, { "epoch": 2.203099510603589, "grad_norm": 0.08421926945447922, "learning_rate": 0.000999686404983278, "loss": 0.1896, "num_input_tokens_seen": 29163952, "step": 13505 }, { "epoch": 2.203915171288744, "grad_norm": 0.30935874581336975, "learning_rate": 0.0009996838793231771, "loss": 0.2237, "num_input_tokens_seen": 29174896, "step": 13510 }, { "epoch": 2.2047308319738987, "grad_norm": 0.10706719011068344, "learning_rate": 0.0009996813435363481, "loss": 0.0663, "num_input_tokens_seen": 29186512, "step": 13515 }, { "epoch": 2.205546492659054, "grad_norm": 0.1454295516014099, "learning_rate": 0.000999678797622843, "loss": 0.1517, "num_input_tokens_seen": 29196784, "step": 13520 }, { "epoch": 2.206362153344209, "grad_norm": 0.07734346389770508, "learning_rate": 0.000999676241582713, "loss": 0.0991, "num_input_tokens_seen": 29207760, "step": 13525 }, { "epoch": 2.2071778140293636, "grad_norm": 0.2650821805000305, "learning_rate": 0.0009996736754160102, "loss": 0.139, "num_input_tokens_seen": 29219984, "step": 13530 }, { "epoch": 2.207993474714519, "grad_norm": 0.1210259273648262, "learning_rate": 0.0009996710991227865, "loss": 0.0991, "num_input_tokens_seen": 29231728, "step": 13535 }, { "epoch": 2.2088091353996737, "grad_norm": 0.10515279322862625, "learning_rate": 0.000999668512703094, "loss": 0.0981, "num_input_tokens_seen": 29242288, "step": 13540 }, { "epoch": 2.2096247960848285, "grad_norm": 0.03792216256260872, "learning_rate": 0.0009996659161569852, "loss": 0.0719, "num_input_tokens_seen": 29253552, "step": 13545 }, { "epoch": 2.210440456769984, "grad_norm": 0.05606524273753166, "learning_rate": 0.0009996633094845127, "loss": 0.0676, "num_input_tokens_seen": 29263824, "step": 13550 }, { "epoch": 2.2112561174551386, "grad_norm": 0.05154557153582573, "learning_rate": 0.0009996606926857296, "loss": 0.0414, "num_input_tokens_seen": 29274896, "step": 13555 }, { "epoch": 2.2120717781402934, "grad_norm": 0.007821563631296158, "learning_rate": 0.0009996580657606886, "loss": 0.062, "num_input_tokens_seen": 29285680, "step": 13560 }, { "epoch": 2.2128874388254487, "grad_norm": 0.006772212218493223, "learning_rate": 0.0009996554287094428, "loss": 0.0231, "num_input_tokens_seen": 29296784, "step": 13565 }, { "epoch": 2.2137030995106035, "grad_norm": 0.22531737387180328, "learning_rate": 0.0009996527815320463, "loss": 0.0728, "num_input_tokens_seen": 29307824, "step": 13570 }, { "epoch": 2.2145187601957588, "grad_norm": 0.19521501660346985, "learning_rate": 0.000999650124228552, "loss": 0.0953, "num_input_tokens_seen": 29319472, "step": 13575 }, { "epoch": 2.2153344208809136, "grad_norm": 0.006301723886281252, "learning_rate": 0.0009996474567990142, "loss": 0.039, "num_input_tokens_seen": 29329552, "step": 13580 }, { "epoch": 2.2161500815660684, "grad_norm": 0.3316558003425598, "learning_rate": 0.0009996447792434868, "loss": 0.1384, "num_input_tokens_seen": 29340848, "step": 13585 }, { "epoch": 2.2169657422512237, "grad_norm": 0.3337273895740509, "learning_rate": 0.000999642091562024, "loss": 0.2655, "num_input_tokens_seen": 29352656, "step": 13590 }, { "epoch": 2.2177814029363785, "grad_norm": 0.025159798562526703, "learning_rate": 0.0009996393937546806, "loss": 0.0596, "num_input_tokens_seen": 29361968, "step": 13595 }, { "epoch": 2.2185970636215333, "grad_norm": 0.00534881092607975, "learning_rate": 0.000999636685821511, "loss": 0.2419, "num_input_tokens_seen": 29372368, "step": 13600 }, { "epoch": 2.2194127243066886, "grad_norm": 0.032114360481500626, "learning_rate": 0.0009996339677625702, "loss": 0.0769, "num_input_tokens_seen": 29383216, "step": 13605 }, { "epoch": 2.2202283849918434, "grad_norm": 0.14368586242198944, "learning_rate": 0.000999631239577913, "loss": 0.0505, "num_input_tokens_seen": 29394000, "step": 13610 }, { "epoch": 2.221044045676998, "grad_norm": 0.016463657841086388, "learning_rate": 0.000999628501267595, "loss": 0.0422, "num_input_tokens_seen": 29405648, "step": 13615 }, { "epoch": 2.2218597063621535, "grad_norm": 0.3516209125518799, "learning_rate": 0.0009996257528316716, "loss": 0.1869, "num_input_tokens_seen": 29416240, "step": 13620 }, { "epoch": 2.2226753670473083, "grad_norm": 0.0863688662648201, "learning_rate": 0.0009996229942701984, "loss": 0.0687, "num_input_tokens_seen": 29426864, "step": 13625 }, { "epoch": 2.223491027732463, "grad_norm": 0.03974682465195656, "learning_rate": 0.0009996202255832317, "loss": 0.0388, "num_input_tokens_seen": 29437328, "step": 13630 }, { "epoch": 2.2243066884176184, "grad_norm": 0.14925527572631836, "learning_rate": 0.000999617446770827, "loss": 0.315, "num_input_tokens_seen": 29448816, "step": 13635 }, { "epoch": 2.225122349102773, "grad_norm": 0.06979167461395264, "learning_rate": 0.0009996146578330409, "loss": 0.1381, "num_input_tokens_seen": 29460144, "step": 13640 }, { "epoch": 2.225938009787928, "grad_norm": 0.09353665262460709, "learning_rate": 0.0009996118587699302, "loss": 0.0561, "num_input_tokens_seen": 29471792, "step": 13645 }, { "epoch": 2.2267536704730833, "grad_norm": 0.4549131989479065, "learning_rate": 0.0009996090495815514, "loss": 0.1712, "num_input_tokens_seen": 29481616, "step": 13650 }, { "epoch": 2.227569331158238, "grad_norm": 0.018267210572957993, "learning_rate": 0.000999606230267961, "loss": 0.0954, "num_input_tokens_seen": 29491824, "step": 13655 }, { "epoch": 2.2283849918433933, "grad_norm": 0.035463087260723114, "learning_rate": 0.000999603400829217, "loss": 0.1225, "num_input_tokens_seen": 29501872, "step": 13660 }, { "epoch": 2.229200652528548, "grad_norm": 0.10892460495233536, "learning_rate": 0.0009996005612653762, "loss": 0.0604, "num_input_tokens_seen": 29513328, "step": 13665 }, { "epoch": 2.230016313213703, "grad_norm": 0.11330805718898773, "learning_rate": 0.000999597711576496, "loss": 0.1041, "num_input_tokens_seen": 29524944, "step": 13670 }, { "epoch": 2.2308319738988582, "grad_norm": 0.12106196582317352, "learning_rate": 0.0009995948517626347, "loss": 0.0594, "num_input_tokens_seen": 29535984, "step": 13675 }, { "epoch": 2.231647634584013, "grad_norm": 0.011277848854660988, "learning_rate": 0.0009995919818238496, "loss": 0.0779, "num_input_tokens_seen": 29547856, "step": 13680 }, { "epoch": 2.232463295269168, "grad_norm": 0.05636114999651909, "learning_rate": 0.0009995891017601996, "loss": 0.019, "num_input_tokens_seen": 29558416, "step": 13685 }, { "epoch": 2.233278955954323, "grad_norm": 0.10387800633907318, "learning_rate": 0.0009995862115717426, "loss": 0.0286, "num_input_tokens_seen": 29569328, "step": 13690 }, { "epoch": 2.234094616639478, "grad_norm": 0.10687462240457535, "learning_rate": 0.000999583311258537, "loss": 0.1339, "num_input_tokens_seen": 29580176, "step": 13695 }, { "epoch": 2.2349102773246328, "grad_norm": 0.2139943540096283, "learning_rate": 0.000999580400820642, "loss": 0.3077, "num_input_tokens_seen": 29590160, "step": 13700 }, { "epoch": 2.235725938009788, "grad_norm": 0.04784239083528519, "learning_rate": 0.0009995774802581165, "loss": 0.1522, "num_input_tokens_seen": 29600464, "step": 13705 }, { "epoch": 2.236541598694943, "grad_norm": 0.10705157369375229, "learning_rate": 0.0009995745495710194, "loss": 0.0659, "num_input_tokens_seen": 29611344, "step": 13710 }, { "epoch": 2.237357259380098, "grad_norm": 0.022201182320713997, "learning_rate": 0.0009995716087594104, "loss": 0.1477, "num_input_tokens_seen": 29621264, "step": 13715 }, { "epoch": 2.238172920065253, "grad_norm": 0.051118869334459305, "learning_rate": 0.000999568657823349, "loss": 0.3295, "num_input_tokens_seen": 29633424, "step": 13720 }, { "epoch": 2.2389885807504077, "grad_norm": 0.23491492867469788, "learning_rate": 0.000999565696762895, "loss": 0.0735, "num_input_tokens_seen": 29644656, "step": 13725 }, { "epoch": 2.239804241435563, "grad_norm": 0.07109715789556503, "learning_rate": 0.0009995627255781083, "loss": 0.1141, "num_input_tokens_seen": 29655792, "step": 13730 }, { "epoch": 2.240619902120718, "grad_norm": 0.025133494287729263, "learning_rate": 0.0009995597442690493, "loss": 0.1867, "num_input_tokens_seen": 29666704, "step": 13735 }, { "epoch": 2.2414355628058726, "grad_norm": 0.24362553656101227, "learning_rate": 0.0009995567528357785, "loss": 0.2094, "num_input_tokens_seen": 29677584, "step": 13740 }, { "epoch": 2.242251223491028, "grad_norm": 0.020591119304299355, "learning_rate": 0.0009995537512783562, "loss": 0.0953, "num_input_tokens_seen": 29687696, "step": 13745 }, { "epoch": 2.2430668841761827, "grad_norm": 0.07044732570648193, "learning_rate": 0.0009995507395968435, "loss": 0.2034, "num_input_tokens_seen": 29698416, "step": 13750 }, { "epoch": 2.2438825448613375, "grad_norm": 0.19857797026634216, "learning_rate": 0.0009995477177913014, "loss": 0.1736, "num_input_tokens_seen": 29708624, "step": 13755 }, { "epoch": 2.244698205546493, "grad_norm": 0.11759096384048462, "learning_rate": 0.0009995446858617908, "loss": 0.1793, "num_input_tokens_seen": 29720080, "step": 13760 }, { "epoch": 2.2455138662316476, "grad_norm": 0.023545170202851295, "learning_rate": 0.0009995416438083736, "loss": 0.1334, "num_input_tokens_seen": 29730288, "step": 13765 }, { "epoch": 2.2463295269168024, "grad_norm": 0.04217078909277916, "learning_rate": 0.0009995385916311112, "loss": 0.0602, "num_input_tokens_seen": 29740816, "step": 13770 }, { "epoch": 2.2471451876019577, "grad_norm": 0.02506978251039982, "learning_rate": 0.0009995355293300656, "loss": 0.1199, "num_input_tokens_seen": 29751760, "step": 13775 }, { "epoch": 2.2479608482871125, "grad_norm": 0.03564402461051941, "learning_rate": 0.0009995324569052988, "loss": 0.0892, "num_input_tokens_seen": 29761808, "step": 13780 }, { "epoch": 2.2487765089722673, "grad_norm": 0.22305072844028473, "learning_rate": 0.000999529374356873, "loss": 0.1259, "num_input_tokens_seen": 29772624, "step": 13785 }, { "epoch": 2.2495921696574226, "grad_norm": 0.008721324615180492, "learning_rate": 0.0009995262816848507, "loss": 0.1157, "num_input_tokens_seen": 29782512, "step": 13790 }, { "epoch": 2.2504078303425774, "grad_norm": 0.21320238709449768, "learning_rate": 0.0009995231788892949, "loss": 0.278, "num_input_tokens_seen": 29792720, "step": 13795 }, { "epoch": 2.2512234910277327, "grad_norm": 0.016878070309758186, "learning_rate": 0.000999520065970268, "loss": 0.0515, "num_input_tokens_seen": 29803408, "step": 13800 }, { "epoch": 2.2520391517128875, "grad_norm": 0.08246765285730362, "learning_rate": 0.000999516942927833, "loss": 0.1181, "num_input_tokens_seen": 29813264, "step": 13805 }, { "epoch": 2.2528548123980423, "grad_norm": 0.07091167569160461, "learning_rate": 0.0009995138097620537, "loss": 0.2478, "num_input_tokens_seen": 29823856, "step": 13810 }, { "epoch": 2.2536704730831976, "grad_norm": 0.031819622963666916, "learning_rate": 0.0009995106664729934, "loss": 0.1118, "num_input_tokens_seen": 29835536, "step": 13815 }, { "epoch": 2.2544861337683524, "grad_norm": 0.09289128333330154, "learning_rate": 0.0009995075130607158, "loss": 0.1376, "num_input_tokens_seen": 29845520, "step": 13820 }, { "epoch": 2.255301794453507, "grad_norm": 0.1674821972846985, "learning_rate": 0.0009995043495252848, "loss": 0.1035, "num_input_tokens_seen": 29856720, "step": 13825 }, { "epoch": 2.2561174551386625, "grad_norm": 0.06331602483987808, "learning_rate": 0.0009995011758667644, "loss": 0.1134, "num_input_tokens_seen": 29867312, "step": 13830 }, { "epoch": 2.2569331158238173, "grad_norm": 0.2317790687084198, "learning_rate": 0.000999497992085219, "loss": 0.1001, "num_input_tokens_seen": 29877104, "step": 13835 }, { "epoch": 2.257748776508972, "grad_norm": 0.009618605487048626, "learning_rate": 0.0009994947981807132, "loss": 0.1615, "num_input_tokens_seen": 29886896, "step": 13840 }, { "epoch": 2.2585644371941274, "grad_norm": 0.12687398493289948, "learning_rate": 0.0009994915941533115, "loss": 0.0873, "num_input_tokens_seen": 29897872, "step": 13845 }, { "epoch": 2.259380097879282, "grad_norm": 0.017680225893855095, "learning_rate": 0.0009994883800030791, "loss": 0.1049, "num_input_tokens_seen": 29908368, "step": 13850 }, { "epoch": 2.2601957585644374, "grad_norm": 0.03202289342880249, "learning_rate": 0.0009994851557300812, "loss": 0.1868, "num_input_tokens_seen": 29919920, "step": 13855 }, { "epoch": 2.2610114192495923, "grad_norm": 0.08714950829744339, "learning_rate": 0.000999481921334383, "loss": 0.1246, "num_input_tokens_seen": 29929392, "step": 13860 }, { "epoch": 2.261827079934747, "grad_norm": 0.47049298882484436, "learning_rate": 0.0009994786768160496, "loss": 0.11, "num_input_tokens_seen": 29939376, "step": 13865 }, { "epoch": 2.262642740619902, "grad_norm": 0.20328953862190247, "learning_rate": 0.0009994754221751474, "loss": 0.0907, "num_input_tokens_seen": 29950192, "step": 13870 }, { "epoch": 2.263458401305057, "grad_norm": 0.03649228438735008, "learning_rate": 0.0009994721574117422, "loss": 0.0716, "num_input_tokens_seen": 29960848, "step": 13875 }, { "epoch": 2.264274061990212, "grad_norm": 0.30661365389823914, "learning_rate": 0.0009994688825259001, "loss": 0.1113, "num_input_tokens_seen": 29972240, "step": 13880 }, { "epoch": 2.2650897226753672, "grad_norm": 0.20941175520420074, "learning_rate": 0.0009994655975176874, "loss": 0.2321, "num_input_tokens_seen": 29983088, "step": 13885 }, { "epoch": 2.265905383360522, "grad_norm": 0.031798407435417175, "learning_rate": 0.0009994623023871709, "loss": 0.1626, "num_input_tokens_seen": 29993744, "step": 13890 }, { "epoch": 2.266721044045677, "grad_norm": 0.07280954718589783, "learning_rate": 0.000999458997134417, "loss": 0.0874, "num_input_tokens_seen": 30003600, "step": 13895 }, { "epoch": 2.267536704730832, "grad_norm": 0.10561642050743103, "learning_rate": 0.000999455681759493, "loss": 0.0888, "num_input_tokens_seen": 30014864, "step": 13900 }, { "epoch": 2.268352365415987, "grad_norm": 0.38468343019485474, "learning_rate": 0.0009994523562624662, "loss": 0.2435, "num_input_tokens_seen": 30025392, "step": 13905 }, { "epoch": 2.2691680261011418, "grad_norm": 0.09473878890275955, "learning_rate": 0.0009994490206434038, "loss": 0.1023, "num_input_tokens_seen": 30036592, "step": 13910 }, { "epoch": 2.269983686786297, "grad_norm": 0.15248925983905792, "learning_rate": 0.000999445674902373, "loss": 0.1328, "num_input_tokens_seen": 30047504, "step": 13915 }, { "epoch": 2.270799347471452, "grad_norm": 0.22597423195838928, "learning_rate": 0.0009994423190394423, "loss": 0.2217, "num_input_tokens_seen": 30058992, "step": 13920 }, { "epoch": 2.2716150081566067, "grad_norm": 0.05791716277599335, "learning_rate": 0.0009994389530546795, "loss": 0.0545, "num_input_tokens_seen": 30070448, "step": 13925 }, { "epoch": 2.272430668841762, "grad_norm": 0.2251387983560562, "learning_rate": 0.0009994355769481524, "loss": 0.1305, "num_input_tokens_seen": 30082064, "step": 13930 }, { "epoch": 2.2732463295269167, "grad_norm": 0.1726098656654358, "learning_rate": 0.00099943219071993, "loss": 0.2394, "num_input_tokens_seen": 30091888, "step": 13935 }, { "epoch": 2.274061990212072, "grad_norm": 0.13501060009002686, "learning_rate": 0.0009994287943700807, "loss": 0.1341, "num_input_tokens_seen": 30102480, "step": 13940 }, { "epoch": 2.274877650897227, "grad_norm": 0.13204216957092285, "learning_rate": 0.0009994253878986732, "loss": 0.1126, "num_input_tokens_seen": 30113328, "step": 13945 }, { "epoch": 2.2756933115823816, "grad_norm": 0.060685381293296814, "learning_rate": 0.0009994219713057768, "loss": 0.2305, "num_input_tokens_seen": 30123696, "step": 13950 }, { "epoch": 2.2765089722675365, "grad_norm": 0.07404947280883789, "learning_rate": 0.0009994185445914604, "loss": 0.0405, "num_input_tokens_seen": 30134256, "step": 13955 }, { "epoch": 2.2773246329526917, "grad_norm": 0.1257973313331604, "learning_rate": 0.000999415107755794, "loss": 0.1392, "num_input_tokens_seen": 30145840, "step": 13960 }, { "epoch": 2.2781402936378465, "grad_norm": 0.0612579882144928, "learning_rate": 0.0009994116607988464, "loss": 0.1617, "num_input_tokens_seen": 30155792, "step": 13965 }, { "epoch": 2.278955954323002, "grad_norm": 0.17419999837875366, "learning_rate": 0.0009994082037206881, "loss": 0.1197, "num_input_tokens_seen": 30168848, "step": 13970 }, { "epoch": 2.2797716150081566, "grad_norm": 0.02619752287864685, "learning_rate": 0.0009994047365213892, "loss": 0.0719, "num_input_tokens_seen": 30180016, "step": 13975 }, { "epoch": 2.2805872756933114, "grad_norm": 0.373192697763443, "learning_rate": 0.0009994012592010196, "loss": 0.2148, "num_input_tokens_seen": 30191728, "step": 13980 }, { "epoch": 2.2814029363784667, "grad_norm": 0.027051806449890137, "learning_rate": 0.00099939777175965, "loss": 0.0659, "num_input_tokens_seen": 30202096, "step": 13985 }, { "epoch": 2.2822185970636215, "grad_norm": 0.03005523979663849, "learning_rate": 0.000999394274197351, "loss": 0.0383, "num_input_tokens_seen": 30212848, "step": 13990 }, { "epoch": 2.2830342577487763, "grad_norm": 0.050267696380615234, "learning_rate": 0.0009993907665141934, "loss": 0.0567, "num_input_tokens_seen": 30222640, "step": 13995 }, { "epoch": 2.2838499184339316, "grad_norm": 0.09234417974948883, "learning_rate": 0.0009993872487102486, "loss": 0.155, "num_input_tokens_seen": 30233552, "step": 14000 }, { "epoch": 2.2846655791190864, "grad_norm": 0.2862367331981659, "learning_rate": 0.0009993837207855876, "loss": 0.2572, "num_input_tokens_seen": 30243888, "step": 14005 }, { "epoch": 2.2854812398042412, "grad_norm": 0.01519771758466959, "learning_rate": 0.000999380182740282, "loss": 0.0294, "num_input_tokens_seen": 30254064, "step": 14010 }, { "epoch": 2.2862969004893965, "grad_norm": 0.08670762926340103, "learning_rate": 0.0009993766345744036, "loss": 0.0996, "num_input_tokens_seen": 30265616, "step": 14015 }, { "epoch": 2.2871125611745513, "grad_norm": 0.2927394509315491, "learning_rate": 0.000999373076288024, "loss": 0.155, "num_input_tokens_seen": 30277360, "step": 14020 }, { "epoch": 2.2879282218597066, "grad_norm": 0.02695581130683422, "learning_rate": 0.0009993695078812156, "loss": 0.0549, "num_input_tokens_seen": 30289040, "step": 14025 }, { "epoch": 2.2887438825448614, "grad_norm": 0.04684106260538101, "learning_rate": 0.0009993659293540506, "loss": 0.1991, "num_input_tokens_seen": 30299792, "step": 14030 }, { "epoch": 2.289559543230016, "grad_norm": 0.22159956395626068, "learning_rate": 0.0009993623407066016, "loss": 0.1252, "num_input_tokens_seen": 30311024, "step": 14035 }, { "epoch": 2.2903752039151715, "grad_norm": 0.029767746105790138, "learning_rate": 0.0009993587419389412, "loss": 0.048, "num_input_tokens_seen": 30322448, "step": 14040 }, { "epoch": 2.2911908646003263, "grad_norm": 0.2297033816576004, "learning_rate": 0.0009993551330511423, "loss": 0.1266, "num_input_tokens_seen": 30334768, "step": 14045 }, { "epoch": 2.292006525285481, "grad_norm": 0.21749313175678253, "learning_rate": 0.0009993515140432783, "loss": 0.2368, "num_input_tokens_seen": 30346096, "step": 14050 }, { "epoch": 2.2928221859706364, "grad_norm": 0.07819768041372299, "learning_rate": 0.0009993478849154224, "loss": 0.0757, "num_input_tokens_seen": 30356048, "step": 14055 }, { "epoch": 2.293637846655791, "grad_norm": 0.17430980503559113, "learning_rate": 0.0009993442456676482, "loss": 0.0919, "num_input_tokens_seen": 30366384, "step": 14060 }, { "epoch": 2.294453507340946, "grad_norm": 0.2119426727294922, "learning_rate": 0.0009993405963000294, "loss": 0.19, "num_input_tokens_seen": 30377520, "step": 14065 }, { "epoch": 2.2952691680261013, "grad_norm": 0.1759057492017746, "learning_rate": 0.00099933693681264, "loss": 0.0983, "num_input_tokens_seen": 30389072, "step": 14070 }, { "epoch": 2.296084828711256, "grad_norm": 0.02191172167658806, "learning_rate": 0.000999333267205554, "loss": 0.0574, "num_input_tokens_seen": 30399312, "step": 14075 }, { "epoch": 2.2969004893964113, "grad_norm": 0.2488049864768982, "learning_rate": 0.000999329587478846, "loss": 0.1299, "num_input_tokens_seen": 30410096, "step": 14080 }, { "epoch": 2.297716150081566, "grad_norm": 0.045439351350069046, "learning_rate": 0.0009993258976325903, "loss": 0.095, "num_input_tokens_seen": 30420432, "step": 14085 }, { "epoch": 2.298531810766721, "grad_norm": 0.09539053589105606, "learning_rate": 0.0009993221976668618, "loss": 0.0812, "num_input_tokens_seen": 30431728, "step": 14090 }, { "epoch": 2.299347471451876, "grad_norm": 0.041822321712970734, "learning_rate": 0.0009993184875817357, "loss": 0.143, "num_input_tokens_seen": 30441840, "step": 14095 }, { "epoch": 2.300163132137031, "grad_norm": 0.04555369168519974, "learning_rate": 0.0009993147673772868, "loss": 0.0384, "num_input_tokens_seen": 30452880, "step": 14100 }, { "epoch": 2.300978792822186, "grad_norm": 0.015077256597578526, "learning_rate": 0.000999311037053591, "loss": 0.0492, "num_input_tokens_seen": 30463408, "step": 14105 }, { "epoch": 2.301794453507341, "grad_norm": 0.03826287388801575, "learning_rate": 0.0009993072966107235, "loss": 0.1807, "num_input_tokens_seen": 30473264, "step": 14110 }, { "epoch": 2.302610114192496, "grad_norm": 0.12069046497344971, "learning_rate": 0.0009993035460487602, "loss": 0.0914, "num_input_tokens_seen": 30484304, "step": 14115 }, { "epoch": 2.3034257748776508, "grad_norm": 0.3424343168735504, "learning_rate": 0.0009992997853677773, "loss": 0.2685, "num_input_tokens_seen": 30494832, "step": 14120 }, { "epoch": 2.304241435562806, "grad_norm": 0.08361130207777023, "learning_rate": 0.0009992960145678506, "loss": 0.2662, "num_input_tokens_seen": 30505360, "step": 14125 }, { "epoch": 2.305057096247961, "grad_norm": 0.043682657182216644, "learning_rate": 0.0009992922336490568, "loss": 0.1662, "num_input_tokens_seen": 30516944, "step": 14130 }, { "epoch": 2.3058727569331157, "grad_norm": 0.041114646941423416, "learning_rate": 0.0009992884426114725, "loss": 0.1079, "num_input_tokens_seen": 30528048, "step": 14135 }, { "epoch": 2.306688417618271, "grad_norm": 0.1137770414352417, "learning_rate": 0.0009992846414551746, "loss": 0.1043, "num_input_tokens_seen": 30538416, "step": 14140 }, { "epoch": 2.3075040783034257, "grad_norm": 0.01884693093597889, "learning_rate": 0.00099928083018024, "loss": 0.0412, "num_input_tokens_seen": 30548272, "step": 14145 }, { "epoch": 2.3083197389885806, "grad_norm": 0.041567154228687286, "learning_rate": 0.000999277008786746, "loss": 0.1254, "num_input_tokens_seen": 30559952, "step": 14150 }, { "epoch": 2.309135399673736, "grad_norm": 0.11152085661888123, "learning_rate": 0.0009992731772747701, "loss": 0.0495, "num_input_tokens_seen": 30570448, "step": 14155 }, { "epoch": 2.3099510603588906, "grad_norm": 0.026471486315131187, "learning_rate": 0.0009992693356443898, "loss": 0.0481, "num_input_tokens_seen": 30580016, "step": 14160 }, { "epoch": 2.310766721044046, "grad_norm": 0.23874253034591675, "learning_rate": 0.0009992654838956831, "loss": 0.073, "num_input_tokens_seen": 30590704, "step": 14165 }, { "epoch": 2.3115823817292007, "grad_norm": 0.03927146643400192, "learning_rate": 0.000999261622028728, "loss": 0.0911, "num_input_tokens_seen": 30599248, "step": 14170 }, { "epoch": 2.3123980424143555, "grad_norm": 0.003558736527338624, "learning_rate": 0.0009992577500436027, "loss": 0.1807, "num_input_tokens_seen": 30610448, "step": 14175 }, { "epoch": 2.3132137030995104, "grad_norm": 0.134797140955925, "learning_rate": 0.0009992538679403857, "loss": 0.0568, "num_input_tokens_seen": 30622608, "step": 14180 }, { "epoch": 2.3140293637846656, "grad_norm": 0.03999514505267143, "learning_rate": 0.0009992499757191559, "loss": 0.0334, "num_input_tokens_seen": 30633136, "step": 14185 }, { "epoch": 2.3148450244698204, "grad_norm": 0.05780665948987007, "learning_rate": 0.000999246073379992, "loss": 0.1528, "num_input_tokens_seen": 30645232, "step": 14190 }, { "epoch": 2.3156606851549757, "grad_norm": 0.13899096846580505, "learning_rate": 0.0009992421609229729, "loss": 0.0325, "num_input_tokens_seen": 30656688, "step": 14195 }, { "epoch": 2.3164763458401305, "grad_norm": 0.048909179866313934, "learning_rate": 0.0009992382383481782, "loss": 0.0918, "num_input_tokens_seen": 30668112, "step": 14200 }, { "epoch": 2.3172920065252853, "grad_norm": 0.09822700917720795, "learning_rate": 0.0009992343056556873, "loss": 0.0506, "num_input_tokens_seen": 30678384, "step": 14205 }, { "epoch": 2.3181076672104406, "grad_norm": 0.07393937557935715, "learning_rate": 0.0009992303628455796, "loss": 0.0419, "num_input_tokens_seen": 30688624, "step": 14210 }, { "epoch": 2.3189233278955954, "grad_norm": 0.015075696632266045, "learning_rate": 0.0009992264099179355, "loss": 0.2, "num_input_tokens_seen": 30699440, "step": 14215 }, { "epoch": 2.3197389885807502, "grad_norm": 0.009084481745958328, "learning_rate": 0.000999222446872835, "loss": 0.1148, "num_input_tokens_seen": 30710608, "step": 14220 }, { "epoch": 2.3205546492659055, "grad_norm": 0.10130821913480759, "learning_rate": 0.0009992184737103583, "loss": 0.2459, "num_input_tokens_seen": 30721008, "step": 14225 }, { "epoch": 2.3213703099510603, "grad_norm": 0.007852212525904179, "learning_rate": 0.0009992144904305857, "loss": 0.1789, "num_input_tokens_seen": 30732560, "step": 14230 }, { "epoch": 2.322185970636215, "grad_norm": 0.01886787824332714, "learning_rate": 0.0009992104970335982, "loss": 0.1086, "num_input_tokens_seen": 30743056, "step": 14235 }, { "epoch": 2.3230016313213704, "grad_norm": 0.07244084775447845, "learning_rate": 0.0009992064935194767, "loss": 0.2001, "num_input_tokens_seen": 30753040, "step": 14240 }, { "epoch": 2.323817292006525, "grad_norm": 0.05422874540090561, "learning_rate": 0.0009992024798883025, "loss": 0.023, "num_input_tokens_seen": 30764912, "step": 14245 }, { "epoch": 2.3246329526916805, "grad_norm": 0.04944729804992676, "learning_rate": 0.0009991984561401566, "loss": 0.0801, "num_input_tokens_seen": 30776112, "step": 14250 }, { "epoch": 2.3254486133768353, "grad_norm": 0.10771771520376205, "learning_rate": 0.0009991944222751208, "loss": 0.1712, "num_input_tokens_seen": 30785968, "step": 14255 }, { "epoch": 2.32626427406199, "grad_norm": 0.28227928280830383, "learning_rate": 0.0009991903782932765, "loss": 0.2192, "num_input_tokens_seen": 30796048, "step": 14260 }, { "epoch": 2.3270799347471454, "grad_norm": 0.05053332820534706, "learning_rate": 0.0009991863241947062, "loss": 0.0618, "num_input_tokens_seen": 30806672, "step": 14265 }, { "epoch": 2.3278955954323, "grad_norm": 0.26253464818000793, "learning_rate": 0.0009991822599794916, "loss": 0.1352, "num_input_tokens_seen": 30815856, "step": 14270 }, { "epoch": 2.328711256117455, "grad_norm": 0.23097515106201172, "learning_rate": 0.0009991781856477156, "loss": 0.1136, "num_input_tokens_seen": 30826896, "step": 14275 }, { "epoch": 2.3295269168026103, "grad_norm": 0.10402628779411316, "learning_rate": 0.00099917410119946, "loss": 0.2063, "num_input_tokens_seen": 30837232, "step": 14280 }, { "epoch": 2.330342577487765, "grad_norm": 0.09892097115516663, "learning_rate": 0.0009991700066348081, "loss": 0.0867, "num_input_tokens_seen": 30847632, "step": 14285 }, { "epoch": 2.33115823817292, "grad_norm": 0.2751719057559967, "learning_rate": 0.000999165901953843, "loss": 0.3484, "num_input_tokens_seen": 30858576, "step": 14290 }, { "epoch": 2.331973898858075, "grad_norm": 0.08577212691307068, "learning_rate": 0.0009991617871566473, "loss": 0.1163, "num_input_tokens_seen": 30869040, "step": 14295 }, { "epoch": 2.33278955954323, "grad_norm": 0.10814081877470016, "learning_rate": 0.000999157662243305, "loss": 0.18, "num_input_tokens_seen": 30879440, "step": 14300 }, { "epoch": 2.3336052202283852, "grad_norm": 0.03923700749874115, "learning_rate": 0.0009991535272138995, "loss": 0.2457, "num_input_tokens_seen": 30888464, "step": 14305 }, { "epoch": 2.33442088091354, "grad_norm": 0.05613480135798454, "learning_rate": 0.0009991493820685142, "loss": 0.0754, "num_input_tokens_seen": 30899280, "step": 14310 }, { "epoch": 2.335236541598695, "grad_norm": 0.01724756322801113, "learning_rate": 0.000999145226807234, "loss": 0.0956, "num_input_tokens_seen": 30909936, "step": 14315 }, { "epoch": 2.3360522022838497, "grad_norm": 0.01817495748400688, "learning_rate": 0.000999141061430142, "loss": 0.0973, "num_input_tokens_seen": 30919216, "step": 14320 }, { "epoch": 2.336867862969005, "grad_norm": 0.03439074754714966, "learning_rate": 0.0009991368859373236, "loss": 0.0886, "num_input_tokens_seen": 30930192, "step": 14325 }, { "epoch": 2.3376835236541598, "grad_norm": 0.036122024059295654, "learning_rate": 0.0009991327003288626, "loss": 0.1912, "num_input_tokens_seen": 30941392, "step": 14330 }, { "epoch": 2.338499184339315, "grad_norm": 0.018994096666574478, "learning_rate": 0.0009991285046048446, "loss": 0.1269, "num_input_tokens_seen": 30951472, "step": 14335 }, { "epoch": 2.33931484502447, "grad_norm": 0.11140989512205124, "learning_rate": 0.0009991242987653541, "loss": 0.0932, "num_input_tokens_seen": 30961744, "step": 14340 }, { "epoch": 2.3401305057096247, "grad_norm": 0.1298808455467224, "learning_rate": 0.0009991200828104766, "loss": 0.1101, "num_input_tokens_seen": 30972752, "step": 14345 }, { "epoch": 2.34094616639478, "grad_norm": 0.42777135968208313, "learning_rate": 0.0009991158567402973, "loss": 0.2285, "num_input_tokens_seen": 30983184, "step": 14350 }, { "epoch": 2.3417618270799347, "grad_norm": 0.12142334878444672, "learning_rate": 0.0009991116205549022, "loss": 0.1063, "num_input_tokens_seen": 30993936, "step": 14355 }, { "epoch": 2.3425774877650896, "grad_norm": 0.2485359013080597, "learning_rate": 0.0009991073742543768, "loss": 0.1631, "num_input_tokens_seen": 31004432, "step": 14360 }, { "epoch": 2.343393148450245, "grad_norm": 0.09898041188716888, "learning_rate": 0.0009991031178388072, "loss": 0.1205, "num_input_tokens_seen": 31016144, "step": 14365 }, { "epoch": 2.3442088091353996, "grad_norm": 0.32259541749954224, "learning_rate": 0.0009990988513082799, "loss": 0.2331, "num_input_tokens_seen": 31026480, "step": 14370 }, { "epoch": 2.3450244698205545, "grad_norm": 0.03784336522221565, "learning_rate": 0.0009990945746628812, "loss": 0.1051, "num_input_tokens_seen": 31038416, "step": 14375 }, { "epoch": 2.3458401305057097, "grad_norm": 0.03259739652276039, "learning_rate": 0.0009990902879026978, "loss": 0.113, "num_input_tokens_seen": 31050128, "step": 14380 }, { "epoch": 2.3466557911908645, "grad_norm": 0.06821563094854355, "learning_rate": 0.0009990859910278167, "loss": 0.0726, "num_input_tokens_seen": 31062288, "step": 14385 }, { "epoch": 2.34747145187602, "grad_norm": 0.03981294855475426, "learning_rate": 0.0009990816840383247, "loss": 0.0996, "num_input_tokens_seen": 31072688, "step": 14390 }, { "epoch": 2.3482871125611746, "grad_norm": 0.12179841101169586, "learning_rate": 0.0009990773669343092, "loss": 0.257, "num_input_tokens_seen": 31082640, "step": 14395 }, { "epoch": 2.3491027732463294, "grad_norm": 0.3487095236778259, "learning_rate": 0.0009990730397158578, "loss": 0.1542, "num_input_tokens_seen": 31093936, "step": 14400 }, { "epoch": 2.3499184339314847, "grad_norm": 0.1869436651468277, "learning_rate": 0.0009990687023830583, "loss": 0.2426, "num_input_tokens_seen": 31105424, "step": 14405 }, { "epoch": 2.3507340946166395, "grad_norm": 0.12799769639968872, "learning_rate": 0.0009990643549359982, "loss": 0.2636, "num_input_tokens_seen": 31117552, "step": 14410 }, { "epoch": 2.3515497553017943, "grad_norm": 0.1660579890012741, "learning_rate": 0.0009990599973747657, "loss": 0.2163, "num_input_tokens_seen": 31127920, "step": 14415 }, { "epoch": 2.3523654159869496, "grad_norm": 0.03344021737575531, "learning_rate": 0.0009990556296994497, "loss": 0.1268, "num_input_tokens_seen": 31138352, "step": 14420 }, { "epoch": 2.3531810766721044, "grad_norm": 0.1717986762523651, "learning_rate": 0.000999051251910138, "loss": 0.1091, "num_input_tokens_seen": 31148144, "step": 14425 }, { "epoch": 2.3539967373572592, "grad_norm": 0.05800437182188034, "learning_rate": 0.0009990468640069196, "loss": 0.2435, "num_input_tokens_seen": 31160112, "step": 14430 }, { "epoch": 2.3548123980424145, "grad_norm": 0.06847093254327774, "learning_rate": 0.0009990424659898833, "loss": 0.0659, "num_input_tokens_seen": 31169808, "step": 14435 }, { "epoch": 2.3556280587275693, "grad_norm": 0.2985936403274536, "learning_rate": 0.0009990380578591186, "loss": 0.2181, "num_input_tokens_seen": 31179888, "step": 14440 }, { "epoch": 2.356443719412724, "grad_norm": 0.08155405521392822, "learning_rate": 0.0009990336396147144, "loss": 0.0948, "num_input_tokens_seen": 31190128, "step": 14445 }, { "epoch": 2.3572593800978794, "grad_norm": 0.029539166018366814, "learning_rate": 0.0009990292112567606, "loss": 0.0508, "num_input_tokens_seen": 31201744, "step": 14450 }, { "epoch": 2.358075040783034, "grad_norm": 0.048253558576107025, "learning_rate": 0.0009990247727853466, "loss": 0.31, "num_input_tokens_seen": 31211952, "step": 14455 }, { "epoch": 2.358890701468189, "grad_norm": 0.12014076858758926, "learning_rate": 0.0009990203242005626, "loss": 0.0809, "num_input_tokens_seen": 31223344, "step": 14460 }, { "epoch": 2.3597063621533443, "grad_norm": 0.022501321509480476, "learning_rate": 0.0009990158655024985, "loss": 0.1639, "num_input_tokens_seen": 31233584, "step": 14465 }, { "epoch": 2.360522022838499, "grad_norm": 0.045569464564323425, "learning_rate": 0.0009990113966912451, "loss": 0.1294, "num_input_tokens_seen": 31244816, "step": 14470 }, { "epoch": 2.3613376835236544, "grad_norm": 0.1737251579761505, "learning_rate": 0.0009990069177668926, "loss": 0.174, "num_input_tokens_seen": 31254736, "step": 14475 }, { "epoch": 2.362153344208809, "grad_norm": 0.09950960427522659, "learning_rate": 0.0009990024287295318, "loss": 0.1206, "num_input_tokens_seen": 31265904, "step": 14480 }, { "epoch": 2.362969004893964, "grad_norm": 0.07508999854326248, "learning_rate": 0.000998997929579254, "loss": 0.1946, "num_input_tokens_seen": 31276624, "step": 14485 }, { "epoch": 2.3637846655791193, "grad_norm": 0.05371393263339996, "learning_rate": 0.0009989934203161498, "loss": 0.0525, "num_input_tokens_seen": 31287728, "step": 14490 }, { "epoch": 2.364600326264274, "grad_norm": 0.10726862400770187, "learning_rate": 0.0009989889009403112, "loss": 0.1955, "num_input_tokens_seen": 31298704, "step": 14495 }, { "epoch": 2.365415986949429, "grad_norm": 0.02141406200826168, "learning_rate": 0.0009989843714518294, "loss": 0.0563, "num_input_tokens_seen": 31308816, "step": 14500 }, { "epoch": 2.366231647634584, "grad_norm": 0.12453465163707733, "learning_rate": 0.0009989798318507962, "loss": 0.1305, "num_input_tokens_seen": 31319536, "step": 14505 }, { "epoch": 2.367047308319739, "grad_norm": 0.10753603279590607, "learning_rate": 0.0009989752821373038, "loss": 0.0588, "num_input_tokens_seen": 31331888, "step": 14510 }, { "epoch": 2.367862969004894, "grad_norm": 0.24612200260162354, "learning_rate": 0.0009989707223114444, "loss": 0.096, "num_input_tokens_seen": 31342672, "step": 14515 }, { "epoch": 2.368678629690049, "grad_norm": 0.013971432112157345, "learning_rate": 0.0009989661523733102, "loss": 0.165, "num_input_tokens_seen": 31354544, "step": 14520 }, { "epoch": 2.369494290375204, "grad_norm": 0.1124473512172699, "learning_rate": 0.000998961572322994, "loss": 0.0956, "num_input_tokens_seen": 31366704, "step": 14525 }, { "epoch": 2.370309951060359, "grad_norm": 0.018222004175186157, "learning_rate": 0.0009989569821605886, "loss": 0.1712, "num_input_tokens_seen": 31377648, "step": 14530 }, { "epoch": 2.371125611745514, "grad_norm": 0.10997959226369858, "learning_rate": 0.0009989523818861867, "loss": 0.1332, "num_input_tokens_seen": 31386512, "step": 14535 }, { "epoch": 2.3719412724306688, "grad_norm": 0.1293511986732483, "learning_rate": 0.0009989477714998822, "loss": 0.2394, "num_input_tokens_seen": 31398736, "step": 14540 }, { "epoch": 2.3727569331158236, "grad_norm": 0.011137726716697216, "learning_rate": 0.000998943151001768, "loss": 0.0485, "num_input_tokens_seen": 31409808, "step": 14545 }, { "epoch": 2.373572593800979, "grad_norm": 0.060650866478681564, "learning_rate": 0.0009989385203919379, "loss": 0.09, "num_input_tokens_seen": 31419856, "step": 14550 }, { "epoch": 2.3743882544861337, "grad_norm": 0.038716722279787064, "learning_rate": 0.0009989338796704856, "loss": 0.1566, "num_input_tokens_seen": 31430064, "step": 14555 }, { "epoch": 2.375203915171289, "grad_norm": 0.26219359040260315, "learning_rate": 0.0009989292288375053, "loss": 0.1327, "num_input_tokens_seen": 31440496, "step": 14560 }, { "epoch": 2.3760195758564437, "grad_norm": 0.06770147383213043, "learning_rate": 0.0009989245678930915, "loss": 0.236, "num_input_tokens_seen": 31449520, "step": 14565 }, { "epoch": 2.3768352365415986, "grad_norm": 0.03615204617381096, "learning_rate": 0.0009989198968373381, "loss": 0.1816, "num_input_tokens_seen": 31460080, "step": 14570 }, { "epoch": 2.377650897226754, "grad_norm": 0.31093910336494446, "learning_rate": 0.0009989152156703403, "loss": 0.2471, "num_input_tokens_seen": 31470480, "step": 14575 }, { "epoch": 2.3784665579119086, "grad_norm": 0.19514016807079315, "learning_rate": 0.0009989105243921926, "loss": 0.1415, "num_input_tokens_seen": 31481296, "step": 14580 }, { "epoch": 2.3792822185970635, "grad_norm": 0.07342322915792465, "learning_rate": 0.0009989058230029904, "loss": 0.0911, "num_input_tokens_seen": 31491824, "step": 14585 }, { "epoch": 2.3800978792822187, "grad_norm": 0.13617360591888428, "learning_rate": 0.0009989011115028286, "loss": 0.1114, "num_input_tokens_seen": 31503216, "step": 14590 }, { "epoch": 2.3809135399673735, "grad_norm": 0.039218802005052567, "learning_rate": 0.0009988963898918029, "loss": 0.1165, "num_input_tokens_seen": 31513520, "step": 14595 }, { "epoch": 2.3817292006525284, "grad_norm": 0.03899117186665535, "learning_rate": 0.000998891658170009, "loss": 0.0966, "num_input_tokens_seen": 31524720, "step": 14600 }, { "epoch": 2.3825448613376836, "grad_norm": 0.3107498586177826, "learning_rate": 0.0009988869163375428, "loss": 0.1651, "num_input_tokens_seen": 31535536, "step": 14605 }, { "epoch": 2.3833605220228384, "grad_norm": 0.04064207524061203, "learning_rate": 0.0009988821643945002, "loss": 0.1369, "num_input_tokens_seen": 31545040, "step": 14610 }, { "epoch": 2.3841761827079937, "grad_norm": 0.013105900026857853, "learning_rate": 0.0009988774023409776, "loss": 0.0735, "num_input_tokens_seen": 31554672, "step": 14615 }, { "epoch": 2.3849918433931485, "grad_norm": 0.2231854945421219, "learning_rate": 0.0009988726301770718, "loss": 0.1178, "num_input_tokens_seen": 31564464, "step": 14620 }, { "epoch": 2.3858075040783033, "grad_norm": 0.0390477329492569, "learning_rate": 0.0009988678479028793, "loss": 0.1123, "num_input_tokens_seen": 31575280, "step": 14625 }, { "epoch": 2.3866231647634586, "grad_norm": 0.11371506750583649, "learning_rate": 0.000998863055518497, "loss": 0.0938, "num_input_tokens_seen": 31585136, "step": 14630 }, { "epoch": 2.3874388254486134, "grad_norm": 0.35495874285697937, "learning_rate": 0.0009988582530240217, "loss": 0.1405, "num_input_tokens_seen": 31595120, "step": 14635 }, { "epoch": 2.3882544861337682, "grad_norm": 0.041537411510944366, "learning_rate": 0.0009988534404195516, "loss": 0.0971, "num_input_tokens_seen": 31606192, "step": 14640 }, { "epoch": 2.3890701468189235, "grad_norm": 0.05421500280499458, "learning_rate": 0.000998848617705183, "loss": 0.1193, "num_input_tokens_seen": 31617424, "step": 14645 }, { "epoch": 2.3898858075040783, "grad_norm": 0.13569660484790802, "learning_rate": 0.000998843784881015, "loss": 0.1259, "num_input_tokens_seen": 31627408, "step": 14650 }, { "epoch": 2.390701468189233, "grad_norm": 0.27538958191871643, "learning_rate": 0.0009988389419471446, "loss": 0.0717, "num_input_tokens_seen": 31638064, "step": 14655 }, { "epoch": 2.3915171288743884, "grad_norm": 0.4632192850112915, "learning_rate": 0.0009988340889036701, "loss": 0.199, "num_input_tokens_seen": 31649168, "step": 14660 }, { "epoch": 2.392332789559543, "grad_norm": 0.023037873208522797, "learning_rate": 0.0009988292257506902, "loss": 0.0474, "num_input_tokens_seen": 31659152, "step": 14665 }, { "epoch": 2.393148450244698, "grad_norm": 0.11104388535022736, "learning_rate": 0.000998824352488303, "loss": 0.0703, "num_input_tokens_seen": 31669872, "step": 14670 }, { "epoch": 2.3939641109298533, "grad_norm": 0.5323375463485718, "learning_rate": 0.0009988194691166077, "loss": 0.1809, "num_input_tokens_seen": 31680304, "step": 14675 }, { "epoch": 2.394779771615008, "grad_norm": 0.29768455028533936, "learning_rate": 0.000998814575635703, "loss": 0.1211, "num_input_tokens_seen": 31690736, "step": 14680 }, { "epoch": 2.395595432300163, "grad_norm": 0.17780493199825287, "learning_rate": 0.000998809672045688, "loss": 0.0488, "num_input_tokens_seen": 31700784, "step": 14685 }, { "epoch": 2.396411092985318, "grad_norm": 0.07951948046684265, "learning_rate": 0.0009988047583466622, "loss": 0.1518, "num_input_tokens_seen": 31710704, "step": 14690 }, { "epoch": 2.397226753670473, "grad_norm": 0.13767142593860626, "learning_rate": 0.0009987998345387255, "loss": 0.0561, "num_input_tokens_seen": 31721936, "step": 14695 }, { "epoch": 2.3980424143556283, "grad_norm": 0.01749943196773529, "learning_rate": 0.000998794900621977, "loss": 0.1278, "num_input_tokens_seen": 31733264, "step": 14700 }, { "epoch": 2.398858075040783, "grad_norm": 0.062574103474617, "learning_rate": 0.0009987899565965172, "loss": 0.1102, "num_input_tokens_seen": 31743376, "step": 14705 }, { "epoch": 2.399673735725938, "grad_norm": 0.01560261007398367, "learning_rate": 0.0009987850024624463, "loss": 0.2166, "num_input_tokens_seen": 31754448, "step": 14710 }, { "epoch": 2.400489396411093, "grad_norm": 0.19096983969211578, "learning_rate": 0.0009987800382198647, "loss": 0.0956, "num_input_tokens_seen": 31765072, "step": 14715 }, { "epoch": 2.401305057096248, "grad_norm": 0.23281601071357727, "learning_rate": 0.0009987750638688726, "loss": 0.1029, "num_input_tokens_seen": 31775504, "step": 14720 }, { "epoch": 2.402120717781403, "grad_norm": 0.4502270519733429, "learning_rate": 0.000998770079409571, "loss": 0.0919, "num_input_tokens_seen": 31786096, "step": 14725 }, { "epoch": 2.402936378466558, "grad_norm": 0.25946739315986633, "learning_rate": 0.0009987650848420613, "loss": 0.0536, "num_input_tokens_seen": 31796848, "step": 14730 }, { "epoch": 2.403752039151713, "grad_norm": 0.09264673292636871, "learning_rate": 0.0009987600801664442, "loss": 0.0912, "num_input_tokens_seen": 31806800, "step": 14735 }, { "epoch": 2.4045676998368677, "grad_norm": 0.04489947482943535, "learning_rate": 0.0009987550653828214, "loss": 0.1519, "num_input_tokens_seen": 31815920, "step": 14740 }, { "epoch": 2.405383360522023, "grad_norm": 0.0844508484005928, "learning_rate": 0.0009987500404912946, "loss": 0.0243, "num_input_tokens_seen": 31827280, "step": 14745 }, { "epoch": 2.4061990212071778, "grad_norm": 0.01267957966774702, "learning_rate": 0.0009987450054919655, "loss": 0.0505, "num_input_tokens_seen": 31836336, "step": 14750 }, { "epoch": 2.407014681892333, "grad_norm": 0.1275075078010559, "learning_rate": 0.000998739960384936, "loss": 0.1081, "num_input_tokens_seen": 31847248, "step": 14755 }, { "epoch": 2.407830342577488, "grad_norm": 0.10418306291103363, "learning_rate": 0.0009987349051703088, "loss": 0.218, "num_input_tokens_seen": 31858608, "step": 14760 }, { "epoch": 2.4086460032626427, "grad_norm": 0.28625568747520447, "learning_rate": 0.0009987298398481859, "loss": 0.171, "num_input_tokens_seen": 31869968, "step": 14765 }, { "epoch": 2.4094616639477975, "grad_norm": 0.054068855941295624, "learning_rate": 0.00099872476441867, "loss": 0.0765, "num_input_tokens_seen": 31880752, "step": 14770 }, { "epoch": 2.4102773246329527, "grad_norm": 0.3038162589073181, "learning_rate": 0.0009987196788818643, "loss": 0.2465, "num_input_tokens_seen": 31890928, "step": 14775 }, { "epoch": 2.4110929853181076, "grad_norm": 0.5938822627067566, "learning_rate": 0.0009987145832378713, "loss": 0.1911, "num_input_tokens_seen": 31902544, "step": 14780 }, { "epoch": 2.411908646003263, "grad_norm": 0.021015219390392303, "learning_rate": 0.0009987094774867949, "loss": 0.1046, "num_input_tokens_seen": 31913072, "step": 14785 }, { "epoch": 2.4127243066884176, "grad_norm": 0.011226077564060688, "learning_rate": 0.000998704361628738, "loss": 0.0233, "num_input_tokens_seen": 31924176, "step": 14790 }, { "epoch": 2.4135399673735725, "grad_norm": 0.20805269479751587, "learning_rate": 0.000998699235663805, "loss": 0.1756, "num_input_tokens_seen": 31933936, "step": 14795 }, { "epoch": 2.4143556280587277, "grad_norm": 0.4688297510147095, "learning_rate": 0.000998694099592099, "loss": 0.1047, "num_input_tokens_seen": 31945072, "step": 14800 }, { "epoch": 2.4151712887438825, "grad_norm": 0.40544554591178894, "learning_rate": 0.0009986889534137245, "loss": 0.1608, "num_input_tokens_seen": 31956080, "step": 14805 }, { "epoch": 2.4159869494290374, "grad_norm": 0.10153290629386902, "learning_rate": 0.0009986837971287857, "loss": 0.1009, "num_input_tokens_seen": 31967088, "step": 14810 }, { "epoch": 2.4168026101141926, "grad_norm": 0.07018234580755234, "learning_rate": 0.0009986786307373873, "loss": 0.0496, "num_input_tokens_seen": 31978416, "step": 14815 }, { "epoch": 2.4176182707993474, "grad_norm": 0.019476963207125664, "learning_rate": 0.0009986734542396336, "loss": 0.1745, "num_input_tokens_seen": 31989744, "step": 14820 }, { "epoch": 2.4184339314845023, "grad_norm": 0.027445156127214432, "learning_rate": 0.0009986682676356299, "loss": 0.0799, "num_input_tokens_seen": 32001264, "step": 14825 }, { "epoch": 2.4192495921696575, "grad_norm": 0.09250480681657791, "learning_rate": 0.000998663070925481, "loss": 0.1225, "num_input_tokens_seen": 32012496, "step": 14830 }, { "epoch": 2.4200652528548123, "grad_norm": 0.10799006372690201, "learning_rate": 0.0009986578641092924, "loss": 0.0996, "num_input_tokens_seen": 32023440, "step": 14835 }, { "epoch": 2.4208809135399676, "grad_norm": 0.013307536952197552, "learning_rate": 0.0009986526471871698, "loss": 0.0987, "num_input_tokens_seen": 32035248, "step": 14840 }, { "epoch": 2.4216965742251224, "grad_norm": 0.0383186861872673, "learning_rate": 0.0009986474201592187, "loss": 0.0736, "num_input_tokens_seen": 32046032, "step": 14845 }, { "epoch": 2.4225122349102772, "grad_norm": 0.30800560116767883, "learning_rate": 0.0009986421830255447, "loss": 0.1486, "num_input_tokens_seen": 32056176, "step": 14850 }, { "epoch": 2.4233278955954325, "grad_norm": 0.1141648143529892, "learning_rate": 0.0009986369357862545, "loss": 0.1025, "num_input_tokens_seen": 32068592, "step": 14855 }, { "epoch": 2.4241435562805873, "grad_norm": 0.11519763618707657, "learning_rate": 0.0009986316784414543, "loss": 0.1525, "num_input_tokens_seen": 32079312, "step": 14860 }, { "epoch": 2.424959216965742, "grad_norm": 0.035754598677158356, "learning_rate": 0.0009986264109912507, "loss": 0.0645, "num_input_tokens_seen": 32090960, "step": 14865 }, { "epoch": 2.4257748776508974, "grad_norm": 0.10153181850910187, "learning_rate": 0.00099862113343575, "loss": 0.0376, "num_input_tokens_seen": 32102512, "step": 14870 }, { "epoch": 2.426590538336052, "grad_norm": 0.09912705421447754, "learning_rate": 0.0009986158457750596, "loss": 0.1254, "num_input_tokens_seen": 32113552, "step": 14875 }, { "epoch": 2.427406199021207, "grad_norm": 0.017049968242645264, "learning_rate": 0.0009986105480092866, "loss": 0.0551, "num_input_tokens_seen": 32124272, "step": 14880 }, { "epoch": 2.4282218597063623, "grad_norm": 0.07231627404689789, "learning_rate": 0.0009986052401385385, "loss": 0.1383, "num_input_tokens_seen": 32135920, "step": 14885 }, { "epoch": 2.429037520391517, "grad_norm": 0.37863439321517944, "learning_rate": 0.0009985999221629224, "loss": 0.0767, "num_input_tokens_seen": 32146512, "step": 14890 }, { "epoch": 2.429853181076672, "grad_norm": 0.04500739276409149, "learning_rate": 0.0009985945940825464, "loss": 0.0287, "num_input_tokens_seen": 32157488, "step": 14895 }, { "epoch": 2.430668841761827, "grad_norm": 0.013122901320457458, "learning_rate": 0.0009985892558975185, "loss": 0.2752, "num_input_tokens_seen": 32166288, "step": 14900 }, { "epoch": 2.431484502446982, "grad_norm": 0.47515347599983215, "learning_rate": 0.0009985839076079469, "loss": 0.2498, "num_input_tokens_seen": 32176400, "step": 14905 }, { "epoch": 2.432300163132137, "grad_norm": 0.23221901059150696, "learning_rate": 0.0009985785492139397, "loss": 0.0909, "num_input_tokens_seen": 32186064, "step": 14910 }, { "epoch": 2.433115823817292, "grad_norm": 0.3188129663467407, "learning_rate": 0.0009985731807156057, "loss": 0.2259, "num_input_tokens_seen": 32198512, "step": 14915 }, { "epoch": 2.433931484502447, "grad_norm": 0.053314946591854095, "learning_rate": 0.0009985678021130538, "loss": 0.0395, "num_input_tokens_seen": 32209520, "step": 14920 }, { "epoch": 2.434747145187602, "grad_norm": 0.07802852988243103, "learning_rate": 0.000998562413406393, "loss": 0.2186, "num_input_tokens_seen": 32220848, "step": 14925 }, { "epoch": 2.435562805872757, "grad_norm": 0.05485253408551216, "learning_rate": 0.0009985570145957324, "loss": 0.0985, "num_input_tokens_seen": 32232752, "step": 14930 }, { "epoch": 2.436378466557912, "grad_norm": 0.0828256905078888, "learning_rate": 0.0009985516056811815, "loss": 0.1047, "num_input_tokens_seen": 32244112, "step": 14935 }, { "epoch": 2.437194127243067, "grad_norm": 0.10004516690969467, "learning_rate": 0.0009985461866628496, "loss": 0.0813, "num_input_tokens_seen": 32254800, "step": 14940 }, { "epoch": 2.438009787928222, "grad_norm": 0.3240264654159546, "learning_rate": 0.000998540757540847, "loss": 0.0897, "num_input_tokens_seen": 32265200, "step": 14945 }, { "epoch": 2.4388254486133767, "grad_norm": 0.24419625103473663, "learning_rate": 0.0009985353183152835, "loss": 0.0691, "num_input_tokens_seen": 32275408, "step": 14950 }, { "epoch": 2.439641109298532, "grad_norm": 0.10432838648557663, "learning_rate": 0.0009985298689862692, "loss": 0.1298, "num_input_tokens_seen": 32285936, "step": 14955 }, { "epoch": 2.4404567699836868, "grad_norm": 0.06832723319530487, "learning_rate": 0.0009985244095539149, "loss": 0.1545, "num_input_tokens_seen": 32297360, "step": 14960 }, { "epoch": 2.4412724306688416, "grad_norm": 0.08811657875776291, "learning_rate": 0.0009985189400183306, "loss": 0.0478, "num_input_tokens_seen": 32307312, "step": 14965 }, { "epoch": 2.442088091353997, "grad_norm": 0.07262061536312103, "learning_rate": 0.0009985134603796278, "loss": 0.0747, "num_input_tokens_seen": 32317904, "step": 14970 }, { "epoch": 2.4429037520391517, "grad_norm": 0.056681033223867416, "learning_rate": 0.0009985079706379175, "loss": 0.0942, "num_input_tokens_seen": 32327408, "step": 14975 }, { "epoch": 2.443719412724307, "grad_norm": 0.010113386437296867, "learning_rate": 0.0009985024707933107, "loss": 0.0792, "num_input_tokens_seen": 32337840, "step": 14980 }, { "epoch": 2.4445350734094617, "grad_norm": 0.03568893298506737, "learning_rate": 0.0009984969608459186, "loss": 0.1265, "num_input_tokens_seen": 32347696, "step": 14985 }, { "epoch": 2.4453507340946166, "grad_norm": 0.28316572308540344, "learning_rate": 0.0009984914407958536, "loss": 0.096, "num_input_tokens_seen": 32358288, "step": 14990 }, { "epoch": 2.4461663947797714, "grad_norm": 0.0889546275138855, "learning_rate": 0.000998485910643227, "loss": 0.0586, "num_input_tokens_seen": 32369360, "step": 14995 }, { "epoch": 2.4469820554649266, "grad_norm": 0.005907861515879631, "learning_rate": 0.000998480370388151, "loss": 0.1575, "num_input_tokens_seen": 32379152, "step": 15000 }, { "epoch": 2.4477977161500815, "grad_norm": 0.15813182294368744, "learning_rate": 0.000998474820030738, "loss": 0.0784, "num_input_tokens_seen": 32389808, "step": 15005 }, { "epoch": 2.4486133768352367, "grad_norm": 0.03261389955878258, "learning_rate": 0.0009984692595711004, "loss": 0.0831, "num_input_tokens_seen": 32400272, "step": 15010 }, { "epoch": 2.4494290375203915, "grad_norm": 0.2515513002872467, "learning_rate": 0.0009984636890093509, "loss": 0.1369, "num_input_tokens_seen": 32411440, "step": 15015 }, { "epoch": 2.4502446982055464, "grad_norm": 0.007182952482253313, "learning_rate": 0.0009984581083456023, "loss": 0.0061, "num_input_tokens_seen": 32423408, "step": 15020 }, { "epoch": 2.4510603588907016, "grad_norm": 0.007546174805611372, "learning_rate": 0.000998452517579968, "loss": 0.1757, "num_input_tokens_seen": 32434544, "step": 15025 }, { "epoch": 2.4518760195758564, "grad_norm": 0.03878934308886528, "learning_rate": 0.000998446916712561, "loss": 0.2189, "num_input_tokens_seen": 32445712, "step": 15030 }, { "epoch": 2.4526916802610113, "grad_norm": 0.0754777267575264, "learning_rate": 0.0009984413057434948, "loss": 0.1342, "num_input_tokens_seen": 32456528, "step": 15035 }, { "epoch": 2.4535073409461665, "grad_norm": 0.14407561719417572, "learning_rate": 0.0009984356846728835, "loss": 0.1059, "num_input_tokens_seen": 32466672, "step": 15040 }, { "epoch": 2.4543230016313213, "grad_norm": 0.03257298842072487, "learning_rate": 0.0009984300535008405, "loss": 0.049, "num_input_tokens_seen": 32478736, "step": 15045 }, { "epoch": 2.455138662316476, "grad_norm": 0.02118925377726555, "learning_rate": 0.0009984244122274802, "loss": 0.1217, "num_input_tokens_seen": 32489072, "step": 15050 }, { "epoch": 2.4559543230016314, "grad_norm": 0.06860590726137161, "learning_rate": 0.000998418760852917, "loss": 0.0588, "num_input_tokens_seen": 32499184, "step": 15055 }, { "epoch": 2.4567699836867862, "grad_norm": 0.02009238302707672, "learning_rate": 0.0009984130993772652, "loss": 0.04, "num_input_tokens_seen": 32510288, "step": 15060 }, { "epoch": 2.4575856443719415, "grad_norm": 0.07246176898479462, "learning_rate": 0.0009984074278006397, "loss": 0.0661, "num_input_tokens_seen": 32519824, "step": 15065 }, { "epoch": 2.4584013050570963, "grad_norm": 0.05304267257452011, "learning_rate": 0.0009984017461231553, "loss": 0.1055, "num_input_tokens_seen": 32531216, "step": 15070 }, { "epoch": 2.459216965742251, "grad_norm": 0.549059271812439, "learning_rate": 0.0009983960543449276, "loss": 0.1106, "num_input_tokens_seen": 32541552, "step": 15075 }, { "epoch": 2.4600326264274064, "grad_norm": 0.04333930462598801, "learning_rate": 0.0009983903524660711, "loss": 0.0819, "num_input_tokens_seen": 32551664, "step": 15080 }, { "epoch": 2.460848287112561, "grad_norm": 0.3955191671848297, "learning_rate": 0.0009983846404867022, "loss": 0.1919, "num_input_tokens_seen": 32563312, "step": 15085 }, { "epoch": 2.461663947797716, "grad_norm": 0.05256238579750061, "learning_rate": 0.0009983789184069363, "loss": 0.0976, "num_input_tokens_seen": 32573488, "step": 15090 }, { "epoch": 2.4624796084828713, "grad_norm": 0.10220126062631607, "learning_rate": 0.0009983731862268893, "loss": 0.1646, "num_input_tokens_seen": 32584976, "step": 15095 }, { "epoch": 2.463295269168026, "grad_norm": 0.014233068563044071, "learning_rate": 0.0009983674439466774, "loss": 0.0303, "num_input_tokens_seen": 32595248, "step": 15100 }, { "epoch": 2.464110929853181, "grad_norm": 0.13005489110946655, "learning_rate": 0.000998361691566417, "loss": 0.0635, "num_input_tokens_seen": 32606032, "step": 15105 }, { "epoch": 2.464926590538336, "grad_norm": 0.038460299372673035, "learning_rate": 0.0009983559290862247, "loss": 0.1138, "num_input_tokens_seen": 32616080, "step": 15110 }, { "epoch": 2.465742251223491, "grad_norm": 0.20161810517311096, "learning_rate": 0.0009983501565062173, "loss": 0.097, "num_input_tokens_seen": 32626192, "step": 15115 }, { "epoch": 2.466557911908646, "grad_norm": 0.34109312295913696, "learning_rate": 0.000998344373826512, "loss": 0.1873, "num_input_tokens_seen": 32637168, "step": 15120 }, { "epoch": 2.467373572593801, "grad_norm": 0.257336288690567, "learning_rate": 0.0009983385810472256, "loss": 0.1409, "num_input_tokens_seen": 32647664, "step": 15125 }, { "epoch": 2.468189233278956, "grad_norm": 0.24098995327949524, "learning_rate": 0.0009983327781684756, "loss": 0.2147, "num_input_tokens_seen": 32658160, "step": 15130 }, { "epoch": 2.4690048939641107, "grad_norm": 0.25480785965919495, "learning_rate": 0.0009983269651903798, "loss": 0.219, "num_input_tokens_seen": 32669776, "step": 15135 }, { "epoch": 2.469820554649266, "grad_norm": 0.13052603602409363, "learning_rate": 0.0009983211421130558, "loss": 0.1297, "num_input_tokens_seen": 32680752, "step": 15140 }, { "epoch": 2.470636215334421, "grad_norm": 0.08122385293245316, "learning_rate": 0.0009983153089366218, "loss": 0.1616, "num_input_tokens_seen": 32690640, "step": 15145 }, { "epoch": 2.471451876019576, "grad_norm": 0.09041303396224976, "learning_rate": 0.0009983094656611958, "loss": 0.0726, "num_input_tokens_seen": 32702352, "step": 15150 }, { "epoch": 2.472267536704731, "grad_norm": 0.3505943715572357, "learning_rate": 0.0009983036122868962, "loss": 0.1825, "num_input_tokens_seen": 32714320, "step": 15155 }, { "epoch": 2.4730831973898857, "grad_norm": 0.08425252884626389, "learning_rate": 0.000998297748813842, "loss": 0.1295, "num_input_tokens_seen": 32724816, "step": 15160 }, { "epoch": 2.473898858075041, "grad_norm": 0.12242546677589417, "learning_rate": 0.0009982918752421516, "loss": 0.0623, "num_input_tokens_seen": 32735632, "step": 15165 }, { "epoch": 2.4747145187601958, "grad_norm": 0.18483208119869232, "learning_rate": 0.0009982859915719444, "loss": 0.1119, "num_input_tokens_seen": 32746512, "step": 15170 }, { "epoch": 2.4755301794453506, "grad_norm": 0.027985773980617523, "learning_rate": 0.0009982800978033395, "loss": 0.0535, "num_input_tokens_seen": 32758192, "step": 15175 }, { "epoch": 2.476345840130506, "grad_norm": 0.03907552361488342, "learning_rate": 0.000998274193936456, "loss": 0.0224, "num_input_tokens_seen": 32768944, "step": 15180 }, { "epoch": 2.4771615008156607, "grad_norm": 0.16137553751468658, "learning_rate": 0.000998268279971414, "loss": 0.1395, "num_input_tokens_seen": 32779120, "step": 15185 }, { "epoch": 2.4779771615008155, "grad_norm": 0.020007723942399025, "learning_rate": 0.0009982623559083332, "loss": 0.1527, "num_input_tokens_seen": 32790288, "step": 15190 }, { "epoch": 2.4787928221859707, "grad_norm": 0.047924455255270004, "learning_rate": 0.0009982564217473338, "loss": 0.0977, "num_input_tokens_seen": 32801232, "step": 15195 }, { "epoch": 2.4796084828711256, "grad_norm": 0.04608950391411781, "learning_rate": 0.000998250477488536, "loss": 0.1455, "num_input_tokens_seen": 32811344, "step": 15200 }, { "epoch": 2.480424143556281, "grad_norm": 0.08891186118125916, "learning_rate": 0.0009982445231320597, "loss": 0.0488, "num_input_tokens_seen": 32822288, "step": 15205 }, { "epoch": 2.4812398042414356, "grad_norm": 0.3643900752067566, "learning_rate": 0.0009982385586780264, "loss": 0.2882, "num_input_tokens_seen": 32833200, "step": 15210 }, { "epoch": 2.4820554649265905, "grad_norm": 0.2666700780391693, "learning_rate": 0.0009982325841265567, "loss": 0.0712, "num_input_tokens_seen": 32843440, "step": 15215 }, { "epoch": 2.4828711256117453, "grad_norm": 0.033405546098947525, "learning_rate": 0.0009982265994777717, "loss": 0.2396, "num_input_tokens_seen": 32854256, "step": 15220 }, { "epoch": 2.4836867862969005, "grad_norm": 0.08789926022291183, "learning_rate": 0.0009982206047317926, "loss": 0.0932, "num_input_tokens_seen": 32865264, "step": 15225 }, { "epoch": 2.4845024469820554, "grad_norm": 0.10413229465484619, "learning_rate": 0.0009982145998887406, "loss": 0.1065, "num_input_tokens_seen": 32874960, "step": 15230 }, { "epoch": 2.4853181076672106, "grad_norm": 0.17145420610904694, "learning_rate": 0.000998208584948738, "loss": 0.0974, "num_input_tokens_seen": 32884688, "step": 15235 }, { "epoch": 2.4861337683523654, "grad_norm": 0.1282130479812622, "learning_rate": 0.0009982025599119062, "loss": 0.1194, "num_input_tokens_seen": 32895024, "step": 15240 }, { "epoch": 2.4869494290375203, "grad_norm": 0.08388534188270569, "learning_rate": 0.0009981965247783677, "loss": 0.091, "num_input_tokens_seen": 32906096, "step": 15245 }, { "epoch": 2.4877650897226755, "grad_norm": 0.10697083920240402, "learning_rate": 0.0009981904795482446, "loss": 0.0773, "num_input_tokens_seen": 32916944, "step": 15250 }, { "epoch": 2.4885807504078303, "grad_norm": 0.36721426248550415, "learning_rate": 0.0009981844242216594, "loss": 0.0752, "num_input_tokens_seen": 32928624, "step": 15255 }, { "epoch": 2.489396411092985, "grad_norm": 0.27754151821136475, "learning_rate": 0.0009981783587987348, "loss": 0.1618, "num_input_tokens_seen": 32939376, "step": 15260 }, { "epoch": 2.4902120717781404, "grad_norm": 0.3185323476791382, "learning_rate": 0.0009981722832795937, "loss": 0.2467, "num_input_tokens_seen": 32949808, "step": 15265 }, { "epoch": 2.4910277324632952, "grad_norm": 0.29478660225868225, "learning_rate": 0.0009981661976643595, "loss": 0.0783, "num_input_tokens_seen": 32960848, "step": 15270 }, { "epoch": 2.49184339314845, "grad_norm": 0.055219899863004684, "learning_rate": 0.0009981601019531552, "loss": 0.0422, "num_input_tokens_seen": 32970512, "step": 15275 }, { "epoch": 2.4926590538336053, "grad_norm": 0.054092150181531906, "learning_rate": 0.0009981539961461045, "loss": 0.0402, "num_input_tokens_seen": 32980944, "step": 15280 }, { "epoch": 2.49347471451876, "grad_norm": 0.11143659800291061, "learning_rate": 0.000998147880243331, "loss": 0.1713, "num_input_tokens_seen": 32992304, "step": 15285 }, { "epoch": 2.4942903752039154, "grad_norm": 0.13122211396694183, "learning_rate": 0.000998141754244959, "loss": 0.0361, "num_input_tokens_seen": 33002992, "step": 15290 }, { "epoch": 2.49510603588907, "grad_norm": 0.045194584876298904, "learning_rate": 0.0009981356181511124, "loss": 0.0942, "num_input_tokens_seen": 33014256, "step": 15295 }, { "epoch": 2.495921696574225, "grad_norm": 0.36026835441589355, "learning_rate": 0.0009981294719619152, "loss": 0.1733, "num_input_tokens_seen": 33025392, "step": 15300 }, { "epoch": 2.4967373572593803, "grad_norm": 0.18099842965602875, "learning_rate": 0.0009981233156774927, "loss": 0.0631, "num_input_tokens_seen": 33035280, "step": 15305 }, { "epoch": 2.497553017944535, "grad_norm": 0.2886909544467926, "learning_rate": 0.0009981171492979691, "loss": 0.1414, "num_input_tokens_seen": 33045872, "step": 15310 }, { "epoch": 2.49836867862969, "grad_norm": 0.015260874293744564, "learning_rate": 0.0009981109728234698, "loss": 0.0889, "num_input_tokens_seen": 33056944, "step": 15315 }, { "epoch": 2.499184339314845, "grad_norm": 0.01943644881248474, "learning_rate": 0.0009981047862541194, "loss": 0.1233, "num_input_tokens_seen": 33066032, "step": 15320 }, { "epoch": 2.5, "grad_norm": 0.2573438584804535, "learning_rate": 0.0009980985895900439, "loss": 0.0624, "num_input_tokens_seen": 33077104, "step": 15325 }, { "epoch": 2.500815660685155, "grad_norm": 0.4044923484325409, "learning_rate": 0.0009980923828313685, "loss": 0.0992, "num_input_tokens_seen": 33086256, "step": 15330 }, { "epoch": 2.50163132137031, "grad_norm": 0.027015361934900284, "learning_rate": 0.000998086165978219, "loss": 0.0853, "num_input_tokens_seen": 33095888, "step": 15335 }, { "epoch": 2.502446982055465, "grad_norm": 0.1990964114665985, "learning_rate": 0.0009980799390307215, "loss": 0.0756, "num_input_tokens_seen": 33105392, "step": 15340 }, { "epoch": 2.50326264274062, "grad_norm": 0.017310230061411858, "learning_rate": 0.0009980737019890024, "loss": 0.0246, "num_input_tokens_seen": 33116048, "step": 15345 }, { "epoch": 2.504078303425775, "grad_norm": 0.28901877999305725, "learning_rate": 0.0009980674548531877, "loss": 0.0751, "num_input_tokens_seen": 33126640, "step": 15350 }, { "epoch": 2.50489396411093, "grad_norm": 0.1627601683139801, "learning_rate": 0.0009980611976234041, "loss": 0.1439, "num_input_tokens_seen": 33137488, "step": 15355 }, { "epoch": 2.5057096247960846, "grad_norm": 0.013003559783101082, "learning_rate": 0.0009980549302997788, "loss": 0.0168, "num_input_tokens_seen": 33147216, "step": 15360 }, { "epoch": 2.50652528548124, "grad_norm": 0.06594779342412949, "learning_rate": 0.000998048652882438, "loss": 0.0657, "num_input_tokens_seen": 33158096, "step": 15365 }, { "epoch": 2.5073409461663947, "grad_norm": 0.11107653379440308, "learning_rate": 0.00099804236537151, "loss": 0.1649, "num_input_tokens_seen": 33169520, "step": 15370 }, { "epoch": 2.50815660685155, "grad_norm": 0.024280471727252007, "learning_rate": 0.0009980360677671214, "loss": 0.0554, "num_input_tokens_seen": 33180080, "step": 15375 }, { "epoch": 2.5089722675367048, "grad_norm": 0.027685200795531273, "learning_rate": 0.0009980297600694, "loss": 0.0338, "num_input_tokens_seen": 33191248, "step": 15380 }, { "epoch": 2.5097879282218596, "grad_norm": 0.006733428221195936, "learning_rate": 0.0009980234422784738, "loss": 0.1162, "num_input_tokens_seen": 33201648, "step": 15385 }, { "epoch": 2.5106035889070144, "grad_norm": 0.03485396131873131, "learning_rate": 0.0009980171143944708, "loss": 0.116, "num_input_tokens_seen": 33212720, "step": 15390 }, { "epoch": 2.5114192495921697, "grad_norm": 0.13103364408016205, "learning_rate": 0.000998010776417519, "loss": 0.1022, "num_input_tokens_seen": 33223792, "step": 15395 }, { "epoch": 2.5122349102773245, "grad_norm": 0.2758151590824127, "learning_rate": 0.0009980044283477473, "loss": 0.1054, "num_input_tokens_seen": 33233520, "step": 15400 }, { "epoch": 2.5130505709624797, "grad_norm": 0.11168879270553589, "learning_rate": 0.000997998070185284, "loss": 0.0481, "num_input_tokens_seen": 33244592, "step": 15405 }, { "epoch": 2.5138662316476346, "grad_norm": 0.23607277870178223, "learning_rate": 0.000997991701930258, "loss": 0.1625, "num_input_tokens_seen": 33255664, "step": 15410 }, { "epoch": 2.5146818923327894, "grad_norm": 0.15977512300014496, "learning_rate": 0.0009979853235827984, "loss": 0.0506, "num_input_tokens_seen": 33267088, "step": 15415 }, { "epoch": 2.5154975530179446, "grad_norm": 0.2873072028160095, "learning_rate": 0.0009979789351430347, "loss": 0.0755, "num_input_tokens_seen": 33278256, "step": 15420 }, { "epoch": 2.5163132137030995, "grad_norm": 0.15967972576618195, "learning_rate": 0.0009979725366110958, "loss": 0.1753, "num_input_tokens_seen": 33290960, "step": 15425 }, { "epoch": 2.5171288743882547, "grad_norm": 0.10125792771577835, "learning_rate": 0.0009979661279871119, "loss": 0.0295, "num_input_tokens_seen": 33301936, "step": 15430 }, { "epoch": 2.5179445350734095, "grad_norm": 0.5510227680206299, "learning_rate": 0.0009979597092712128, "loss": 0.2007, "num_input_tokens_seen": 33312304, "step": 15435 }, { "epoch": 2.5187601957585644, "grad_norm": 0.3547648787498474, "learning_rate": 0.0009979532804635283, "loss": 0.1067, "num_input_tokens_seen": 33324208, "step": 15440 }, { "epoch": 2.519575856443719, "grad_norm": 0.07222874462604523, "learning_rate": 0.000997946841564189, "loss": 0.0186, "num_input_tokens_seen": 33334864, "step": 15445 }, { "epoch": 2.5203915171288744, "grad_norm": 0.2404254823923111, "learning_rate": 0.0009979403925733253, "loss": 0.0645, "num_input_tokens_seen": 33347056, "step": 15450 }, { "epoch": 2.5212071778140293, "grad_norm": 0.2713823616504669, "learning_rate": 0.0009979339334910678, "loss": 0.1587, "num_input_tokens_seen": 33357616, "step": 15455 }, { "epoch": 2.5220228384991845, "grad_norm": 0.014020344242453575, "learning_rate": 0.0009979274643175473, "loss": 0.1204, "num_input_tokens_seen": 33369072, "step": 15460 }, { "epoch": 2.5228384991843393, "grad_norm": 0.15319646894931793, "learning_rate": 0.0009979209850528954, "loss": 0.2739, "num_input_tokens_seen": 33379664, "step": 15465 }, { "epoch": 2.523654159869494, "grad_norm": 0.12626083195209503, "learning_rate": 0.0009979144956972427, "loss": 0.0382, "num_input_tokens_seen": 33389584, "step": 15470 }, { "epoch": 2.5244698205546494, "grad_norm": 0.06765622645616531, "learning_rate": 0.0009979079962507214, "loss": 0.0958, "num_input_tokens_seen": 33400688, "step": 15475 }, { "epoch": 2.5252854812398042, "grad_norm": 0.005371478386223316, "learning_rate": 0.0009979014867134628, "loss": 0.0356, "num_input_tokens_seen": 33410512, "step": 15480 }, { "epoch": 2.5261011419249595, "grad_norm": 0.03417260944843292, "learning_rate": 0.000997894967085599, "loss": 0.0316, "num_input_tokens_seen": 33419664, "step": 15485 }, { "epoch": 2.5269168026101143, "grad_norm": 0.07143821567296982, "learning_rate": 0.000997888437367262, "loss": 0.0285, "num_input_tokens_seen": 33430800, "step": 15490 }, { "epoch": 2.527732463295269, "grad_norm": 0.05124485120177269, "learning_rate": 0.0009978818975585843, "loss": 0.0667, "num_input_tokens_seen": 33441520, "step": 15495 }, { "epoch": 2.528548123980424, "grad_norm": 0.10650315880775452, "learning_rate": 0.0009978753476596982, "loss": 0.0871, "num_input_tokens_seen": 33452304, "step": 15500 }, { "epoch": 2.529363784665579, "grad_norm": 0.07975072413682938, "learning_rate": 0.0009978687876707366, "loss": 0.0821, "num_input_tokens_seen": 33461360, "step": 15505 }, { "epoch": 2.530179445350734, "grad_norm": 0.014130255207419395, "learning_rate": 0.0009978622175918323, "loss": 0.0873, "num_input_tokens_seen": 33472400, "step": 15510 }, { "epoch": 2.5309951060358893, "grad_norm": 0.05798503756523132, "learning_rate": 0.0009978556374231188, "loss": 0.0277, "num_input_tokens_seen": 33482800, "step": 15515 }, { "epoch": 2.531810766721044, "grad_norm": 0.04003949463367462, "learning_rate": 0.0009978490471647292, "loss": 0.1379, "num_input_tokens_seen": 33493392, "step": 15520 }, { "epoch": 2.532626427406199, "grad_norm": 0.03161904215812683, "learning_rate": 0.000997842446816797, "loss": 0.1923, "num_input_tokens_seen": 33505072, "step": 15525 }, { "epoch": 2.5334420880913537, "grad_norm": 0.14025592803955078, "learning_rate": 0.0009978358363794562, "loss": 0.2185, "num_input_tokens_seen": 33515280, "step": 15530 }, { "epoch": 2.534257748776509, "grad_norm": 0.16097170114517212, "learning_rate": 0.0009978292158528406, "loss": 0.0835, "num_input_tokens_seen": 33525168, "step": 15535 }, { "epoch": 2.535073409461664, "grad_norm": 0.23669864237308502, "learning_rate": 0.0009978225852370843, "loss": 0.0464, "num_input_tokens_seen": 33535344, "step": 15540 }, { "epoch": 2.535889070146819, "grad_norm": 0.05082008242607117, "learning_rate": 0.000997815944532322, "loss": 0.0481, "num_input_tokens_seen": 33547920, "step": 15545 }, { "epoch": 2.536704730831974, "grad_norm": 0.2230103611946106, "learning_rate": 0.0009978092937386878, "loss": 0.0802, "num_input_tokens_seen": 33558704, "step": 15550 }, { "epoch": 2.5375203915171287, "grad_norm": 0.11554722487926483, "learning_rate": 0.0009978026328563167, "loss": 0.1204, "num_input_tokens_seen": 33569424, "step": 15555 }, { "epoch": 2.538336052202284, "grad_norm": 0.22983507812023163, "learning_rate": 0.0009977959618853438, "loss": 0.0635, "num_input_tokens_seen": 33578992, "step": 15560 }, { "epoch": 2.539151712887439, "grad_norm": 0.04890982806682587, "learning_rate": 0.0009977892808259044, "loss": 0.1001, "num_input_tokens_seen": 33590384, "step": 15565 }, { "epoch": 2.539967373572594, "grad_norm": 0.10651346296072006, "learning_rate": 0.0009977825896781336, "loss": 0.0423, "num_input_tokens_seen": 33602672, "step": 15570 }, { "epoch": 2.540783034257749, "grad_norm": 0.0035797490272670984, "learning_rate": 0.0009977758884421673, "loss": 0.0463, "num_input_tokens_seen": 33614704, "step": 15575 }, { "epoch": 2.5415986949429037, "grad_norm": 0.010593925602734089, "learning_rate": 0.000997769177118141, "loss": 0.1474, "num_input_tokens_seen": 33625136, "step": 15580 }, { "epoch": 2.5424143556280585, "grad_norm": 0.020829571411013603, "learning_rate": 0.0009977624557061908, "loss": 0.1345, "num_input_tokens_seen": 33636304, "step": 15585 }, { "epoch": 2.5432300163132138, "grad_norm": 0.3319317400455475, "learning_rate": 0.000997755724206453, "loss": 0.2844, "num_input_tokens_seen": 33648304, "step": 15590 }, { "epoch": 2.5440456769983686, "grad_norm": 0.2581354081630707, "learning_rate": 0.0009977489826190641, "loss": 0.1156, "num_input_tokens_seen": 33659664, "step": 15595 }, { "epoch": 2.544861337683524, "grad_norm": 0.4305964410305023, "learning_rate": 0.0009977422309441605, "loss": 0.2813, "num_input_tokens_seen": 33670256, "step": 15600 }, { "epoch": 2.5456769983686787, "grad_norm": 0.03411751613020897, "learning_rate": 0.0009977354691818794, "loss": 0.0532, "num_input_tokens_seen": 33681616, "step": 15605 }, { "epoch": 2.5464926590538335, "grad_norm": 0.02051916904747486, "learning_rate": 0.0009977286973323575, "loss": 0.1839, "num_input_tokens_seen": 33691120, "step": 15610 }, { "epoch": 2.5473083197389887, "grad_norm": 0.06576742976903915, "learning_rate": 0.000997721915395732, "loss": 0.1627, "num_input_tokens_seen": 33702992, "step": 15615 }, { "epoch": 2.5481239804241436, "grad_norm": 0.017474330961704254, "learning_rate": 0.0009977151233721406, "loss": 0.0706, "num_input_tokens_seen": 33713136, "step": 15620 }, { "epoch": 2.5489396411092984, "grad_norm": 0.21475361287593842, "learning_rate": 0.0009977083212617207, "loss": 0.1152, "num_input_tokens_seen": 33723152, "step": 15625 }, { "epoch": 2.5497553017944536, "grad_norm": 0.02140083722770214, "learning_rate": 0.0009977015090646105, "loss": 0.0492, "num_input_tokens_seen": 33735408, "step": 15630 }, { "epoch": 2.5505709624796085, "grad_norm": 0.025084657594561577, "learning_rate": 0.0009976946867809476, "loss": 0.0521, "num_input_tokens_seen": 33745680, "step": 15635 }, { "epoch": 2.5513866231647633, "grad_norm": 0.12967127561569214, "learning_rate": 0.0009976878544108705, "loss": 0.0622, "num_input_tokens_seen": 33756048, "step": 15640 }, { "epoch": 2.5522022838499185, "grad_norm": 0.012976898811757565, "learning_rate": 0.000997681011954518, "loss": 0.2748, "num_input_tokens_seen": 33767536, "step": 15645 }, { "epoch": 2.5530179445350734, "grad_norm": 0.007183768320828676, "learning_rate": 0.0009976741594120281, "loss": 0.1875, "num_input_tokens_seen": 33779472, "step": 15650 }, { "epoch": 2.5538336052202286, "grad_norm": 0.013597512617707253, "learning_rate": 0.00099766729678354, "loss": 0.0981, "num_input_tokens_seen": 33791248, "step": 15655 }, { "epoch": 2.5546492659053834, "grad_norm": 0.04499229043722153, "learning_rate": 0.0009976604240691932, "loss": 0.0604, "num_input_tokens_seen": 33801360, "step": 15660 }, { "epoch": 2.5554649265905383, "grad_norm": 0.06628694385290146, "learning_rate": 0.0009976535412691261, "loss": 0.176, "num_input_tokens_seen": 33812784, "step": 15665 }, { "epoch": 2.556280587275693, "grad_norm": 0.024672731757164, "learning_rate": 0.0009976466483834789, "loss": 0.0218, "num_input_tokens_seen": 33822960, "step": 15670 }, { "epoch": 2.5570962479608483, "grad_norm": 0.010223050601780415, "learning_rate": 0.0009976397454123911, "loss": 0.0281, "num_input_tokens_seen": 33834704, "step": 15675 }, { "epoch": 2.557911908646003, "grad_norm": 0.2595158517360687, "learning_rate": 0.0009976328323560025, "loss": 0.2362, "num_input_tokens_seen": 33846064, "step": 15680 }, { "epoch": 2.5587275693311584, "grad_norm": 0.030189234763383865, "learning_rate": 0.0009976259092144533, "loss": 0.1507, "num_input_tokens_seen": 33857648, "step": 15685 }, { "epoch": 2.5595432300163132, "grad_norm": 0.1471072882413864, "learning_rate": 0.0009976189759878836, "loss": 0.1127, "num_input_tokens_seen": 33867984, "step": 15690 }, { "epoch": 2.560358890701468, "grad_norm": 0.1314489245414734, "learning_rate": 0.0009976120326764342, "loss": 0.1343, "num_input_tokens_seen": 33878192, "step": 15695 }, { "epoch": 2.5611745513866233, "grad_norm": 0.014674081467092037, "learning_rate": 0.0009976050792802457, "loss": 0.2699, "num_input_tokens_seen": 33888816, "step": 15700 }, { "epoch": 2.561990212071778, "grad_norm": 0.09239017963409424, "learning_rate": 0.000997598115799459, "loss": 0.1055, "num_input_tokens_seen": 33900304, "step": 15705 }, { "epoch": 2.5628058727569334, "grad_norm": 0.03055289201438427, "learning_rate": 0.0009975911422342152, "loss": 0.2172, "num_input_tokens_seen": 33910736, "step": 15710 }, { "epoch": 2.563621533442088, "grad_norm": 0.30208563804626465, "learning_rate": 0.0009975841585846558, "loss": 0.0785, "num_input_tokens_seen": 33921360, "step": 15715 }, { "epoch": 2.564437194127243, "grad_norm": 0.07520193606615067, "learning_rate": 0.000997577164850922, "loss": 0.207, "num_input_tokens_seen": 33932368, "step": 15720 }, { "epoch": 2.565252854812398, "grad_norm": 0.09612149000167847, "learning_rate": 0.000997570161033156, "loss": 0.0911, "num_input_tokens_seen": 33943408, "step": 15725 }, { "epoch": 2.566068515497553, "grad_norm": 0.2481773942708969, "learning_rate": 0.0009975631471314992, "loss": 0.181, "num_input_tokens_seen": 33953872, "step": 15730 }, { "epoch": 2.566884176182708, "grad_norm": 0.04081231728196144, "learning_rate": 0.0009975561231460942, "loss": 0.0844, "num_input_tokens_seen": 33964112, "step": 15735 }, { "epoch": 2.567699836867863, "grad_norm": 0.02521529607474804, "learning_rate": 0.000997549089077083, "loss": 0.0721, "num_input_tokens_seen": 33973872, "step": 15740 }, { "epoch": 2.568515497553018, "grad_norm": 0.02110426500439644, "learning_rate": 0.0009975420449246084, "loss": 0.133, "num_input_tokens_seen": 33985168, "step": 15745 }, { "epoch": 2.569331158238173, "grad_norm": 0.029750151559710503, "learning_rate": 0.0009975349906888131, "loss": 0.0967, "num_input_tokens_seen": 33997712, "step": 15750 }, { "epoch": 2.5701468189233276, "grad_norm": 0.24482709169387817, "learning_rate": 0.00099752792636984, "loss": 0.1754, "num_input_tokens_seen": 34006544, "step": 15755 }, { "epoch": 2.570962479608483, "grad_norm": 0.31642553210258484, "learning_rate": 0.0009975208519678324, "loss": 0.1222, "num_input_tokens_seen": 34018320, "step": 15760 }, { "epoch": 2.5717781402936377, "grad_norm": 0.07368852198123932, "learning_rate": 0.0009975137674829335, "loss": 0.0852, "num_input_tokens_seen": 34028752, "step": 15765 }, { "epoch": 2.572593800978793, "grad_norm": 0.16020609438419342, "learning_rate": 0.000997506672915287, "loss": 0.0865, "num_input_tokens_seen": 34039824, "step": 15770 }, { "epoch": 2.573409461663948, "grad_norm": 0.1626286655664444, "learning_rate": 0.0009974995682650368, "loss": 0.1492, "num_input_tokens_seen": 34050832, "step": 15775 }, { "epoch": 2.5742251223491026, "grad_norm": 0.07220303267240524, "learning_rate": 0.0009974924535323265, "loss": 0.207, "num_input_tokens_seen": 34062352, "step": 15780 }, { "epoch": 2.575040783034258, "grad_norm": 0.05702436715364456, "learning_rate": 0.0009974853287173006, "loss": 0.1266, "num_input_tokens_seen": 34073200, "step": 15785 }, { "epoch": 2.5758564437194127, "grad_norm": 0.08150399476289749, "learning_rate": 0.0009974781938201034, "loss": 0.1625, "num_input_tokens_seen": 34084592, "step": 15790 }, { "epoch": 2.576672104404568, "grad_norm": 0.08891163021326065, "learning_rate": 0.0009974710488408795, "loss": 0.1383, "num_input_tokens_seen": 34095728, "step": 15795 }, { "epoch": 2.5774877650897228, "grad_norm": 0.03785131499171257, "learning_rate": 0.0009974638937797736, "loss": 0.1174, "num_input_tokens_seen": 34106864, "step": 15800 }, { "epoch": 2.5783034257748776, "grad_norm": 0.22656035423278809, "learning_rate": 0.000997456728636931, "loss": 0.1269, "num_input_tokens_seen": 34117168, "step": 15805 }, { "epoch": 2.5791190864600324, "grad_norm": 0.1340230405330658, "learning_rate": 0.0009974495534124967, "loss": 0.1749, "num_input_tokens_seen": 34127536, "step": 15810 }, { "epoch": 2.5799347471451877, "grad_norm": 0.030737364664673805, "learning_rate": 0.000997442368106616, "loss": 0.1282, "num_input_tokens_seen": 34138192, "step": 15815 }, { "epoch": 2.5807504078303425, "grad_norm": 0.1610642373561859, "learning_rate": 0.0009974351727194347, "loss": 0.1309, "num_input_tokens_seen": 34148240, "step": 15820 }, { "epoch": 2.5815660685154977, "grad_norm": 0.24954892694950104, "learning_rate": 0.0009974279672510986, "loss": 0.2381, "num_input_tokens_seen": 34158288, "step": 15825 }, { "epoch": 2.5823817292006526, "grad_norm": 0.2072475701570511, "learning_rate": 0.0009974207517017537, "loss": 0.1263, "num_input_tokens_seen": 34169168, "step": 15830 }, { "epoch": 2.5831973898858074, "grad_norm": 0.17524996399879456, "learning_rate": 0.0009974135260715465, "loss": 0.055, "num_input_tokens_seen": 34180240, "step": 15835 }, { "epoch": 2.5840130505709626, "grad_norm": 0.03093145787715912, "learning_rate": 0.0009974062903606229, "loss": 0.0875, "num_input_tokens_seen": 34191056, "step": 15840 }, { "epoch": 2.5848287112561175, "grad_norm": 0.07865778356790543, "learning_rate": 0.0009973990445691298, "loss": 0.063, "num_input_tokens_seen": 34201520, "step": 15845 }, { "epoch": 2.5856443719412723, "grad_norm": 0.02840258739888668, "learning_rate": 0.0009973917886972143, "loss": 0.1226, "num_input_tokens_seen": 34212432, "step": 15850 }, { "epoch": 2.5864600326264275, "grad_norm": 0.0962333083152771, "learning_rate": 0.000997384522745023, "loss": 0.0299, "num_input_tokens_seen": 34223088, "step": 15855 }, { "epoch": 2.5872756933115824, "grad_norm": 0.04188697040081024, "learning_rate": 0.0009973772467127035, "loss": 0.0683, "num_input_tokens_seen": 34233680, "step": 15860 }, { "epoch": 2.588091353996737, "grad_norm": 0.02506262995302677, "learning_rate": 0.000997369960600403, "loss": 0.0247, "num_input_tokens_seen": 34243536, "step": 15865 }, { "epoch": 2.5889070146818924, "grad_norm": 0.00804267916828394, "learning_rate": 0.0009973626644082694, "loss": 0.0869, "num_input_tokens_seen": 34254128, "step": 15870 }, { "epoch": 2.5897226753670473, "grad_norm": 0.2663293778896332, "learning_rate": 0.0009973553581364503, "loss": 0.1112, "num_input_tokens_seen": 34265392, "step": 15875 }, { "epoch": 2.5905383360522025, "grad_norm": 0.05442769080400467, "learning_rate": 0.0009973480417850942, "loss": 0.0507, "num_input_tokens_seen": 34276560, "step": 15880 }, { "epoch": 2.5913539967373573, "grad_norm": 0.18911486864089966, "learning_rate": 0.0009973407153543489, "loss": 0.0684, "num_input_tokens_seen": 34287184, "step": 15885 }, { "epoch": 2.592169657422512, "grad_norm": 0.2101517915725708, "learning_rate": 0.0009973333788443632, "loss": 0.3001, "num_input_tokens_seen": 34298384, "step": 15890 }, { "epoch": 2.592985318107667, "grad_norm": 0.0532357320189476, "learning_rate": 0.0009973260322552855, "loss": 0.047, "num_input_tokens_seen": 34309200, "step": 15895 }, { "epoch": 2.5938009787928222, "grad_norm": 0.026510654017329216, "learning_rate": 0.000997318675587265, "loss": 0.1191, "num_input_tokens_seen": 34320624, "step": 15900 }, { "epoch": 2.594616639477977, "grad_norm": 0.31104010343551636, "learning_rate": 0.0009973113088404507, "loss": 0.0992, "num_input_tokens_seen": 34330416, "step": 15905 }, { "epoch": 2.5954323001631323, "grad_norm": 0.03591397777199745, "learning_rate": 0.0009973039320149916, "loss": 0.0401, "num_input_tokens_seen": 34340880, "step": 15910 }, { "epoch": 2.596247960848287, "grad_norm": 0.4878038465976715, "learning_rate": 0.0009972965451110376, "loss": 0.2358, "num_input_tokens_seen": 34352656, "step": 15915 }, { "epoch": 2.597063621533442, "grad_norm": 0.013632343150675297, "learning_rate": 0.0009972891481287382, "loss": 0.1368, "num_input_tokens_seen": 34362864, "step": 15920 }, { "epoch": 2.597879282218597, "grad_norm": 0.03750962018966675, "learning_rate": 0.0009972817410682433, "loss": 0.0649, "num_input_tokens_seen": 34373808, "step": 15925 }, { "epoch": 2.598694942903752, "grad_norm": 0.0388517789542675, "learning_rate": 0.0009972743239297032, "loss": 0.1859, "num_input_tokens_seen": 34385168, "step": 15930 }, { "epoch": 2.5995106035889073, "grad_norm": 0.1468440592288971, "learning_rate": 0.000997266896713268, "loss": 0.1934, "num_input_tokens_seen": 34395984, "step": 15935 }, { "epoch": 2.600326264274062, "grad_norm": 0.1793297827243805, "learning_rate": 0.0009972594594190884, "loss": 0.0814, "num_input_tokens_seen": 34407120, "step": 15940 }, { "epoch": 2.601141924959217, "grad_norm": 0.02474283240735531, "learning_rate": 0.0009972520120473149, "loss": 0.2123, "num_input_tokens_seen": 34418416, "step": 15945 }, { "epoch": 2.6019575856443717, "grad_norm": 0.17851468920707703, "learning_rate": 0.0009972445545980988, "loss": 0.0652, "num_input_tokens_seen": 34428240, "step": 15950 }, { "epoch": 2.602773246329527, "grad_norm": 0.10923799872398376, "learning_rate": 0.0009972370870715908, "loss": 0.0974, "num_input_tokens_seen": 34439760, "step": 15955 }, { "epoch": 2.603588907014682, "grad_norm": 0.04684066027402878, "learning_rate": 0.0009972296094679426, "loss": 0.1162, "num_input_tokens_seen": 34450320, "step": 15960 }, { "epoch": 2.604404567699837, "grad_norm": 0.06591646373271942, "learning_rate": 0.0009972221217873054, "loss": 0.0553, "num_input_tokens_seen": 34459376, "step": 15965 }, { "epoch": 2.605220228384992, "grad_norm": 0.02446860261261463, "learning_rate": 0.0009972146240298312, "loss": 0.1061, "num_input_tokens_seen": 34469936, "step": 15970 }, { "epoch": 2.6060358890701467, "grad_norm": 0.0367775484919548, "learning_rate": 0.000997207116195672, "loss": 0.077, "num_input_tokens_seen": 34480784, "step": 15975 }, { "epoch": 2.6068515497553015, "grad_norm": 0.1273317039012909, "learning_rate": 0.0009971995982849795, "loss": 0.0964, "num_input_tokens_seen": 34491632, "step": 15980 }, { "epoch": 2.607667210440457, "grad_norm": 0.044068437069654465, "learning_rate": 0.0009971920702979066, "loss": 0.2179, "num_input_tokens_seen": 34503056, "step": 15985 }, { "epoch": 2.6084828711256116, "grad_norm": 0.28478342294692993, "learning_rate": 0.000997184532234606, "loss": 0.2028, "num_input_tokens_seen": 34513680, "step": 15990 }, { "epoch": 2.609298531810767, "grad_norm": 0.21568089723587036, "learning_rate": 0.0009971769840952296, "loss": 0.1333, "num_input_tokens_seen": 34523952, "step": 15995 }, { "epoch": 2.6101141924959217, "grad_norm": 0.15971040725708008, "learning_rate": 0.0009971694258799312, "loss": 0.1346, "num_input_tokens_seen": 34535504, "step": 16000 }, { "epoch": 2.6109298531810765, "grad_norm": 0.3939029276371002, "learning_rate": 0.0009971618575888637, "loss": 0.0941, "num_input_tokens_seen": 34545968, "step": 16005 }, { "epoch": 2.6117455138662318, "grad_norm": 0.028145480901002884, "learning_rate": 0.0009971542792221802, "loss": 0.0911, "num_input_tokens_seen": 34556816, "step": 16010 }, { "epoch": 2.6125611745513866, "grad_norm": 0.06394167989492416, "learning_rate": 0.000997146690780035, "loss": 0.0377, "num_input_tokens_seen": 34567536, "step": 16015 }, { "epoch": 2.613376835236542, "grad_norm": 0.2707046866416931, "learning_rate": 0.000997139092262581, "loss": 0.2199, "num_input_tokens_seen": 34578864, "step": 16020 }, { "epoch": 2.6141924959216967, "grad_norm": 0.018489275127649307, "learning_rate": 0.0009971314836699728, "loss": 0.0756, "num_input_tokens_seen": 34589616, "step": 16025 }, { "epoch": 2.6150081566068515, "grad_norm": 0.02678763121366501, "learning_rate": 0.0009971238650023644, "loss": 0.1265, "num_input_tokens_seen": 34599696, "step": 16030 }, { "epoch": 2.6158238172920063, "grad_norm": 0.13371257483959198, "learning_rate": 0.0009971162362599102, "loss": 0.1744, "num_input_tokens_seen": 34609968, "step": 16035 }, { "epoch": 2.6166394779771616, "grad_norm": 0.011454968713223934, "learning_rate": 0.000997108597442765, "loss": 0.1268, "num_input_tokens_seen": 34621232, "step": 16040 }, { "epoch": 2.6174551386623164, "grad_norm": 0.07720526307821274, "learning_rate": 0.000997100948551083, "loss": 0.164, "num_input_tokens_seen": 34632176, "step": 16045 }, { "epoch": 2.6182707993474716, "grad_norm": 0.23443147540092468, "learning_rate": 0.0009970932895850201, "loss": 0.0923, "num_input_tokens_seen": 34642192, "step": 16050 }, { "epoch": 2.6190864600326265, "grad_norm": 0.14973659813404083, "learning_rate": 0.000997085620544731, "loss": 0.0656, "num_input_tokens_seen": 34652656, "step": 16055 }, { "epoch": 2.6199021207177813, "grad_norm": 0.09505864232778549, "learning_rate": 0.0009970779414303712, "loss": 0.1245, "num_input_tokens_seen": 34662256, "step": 16060 }, { "epoch": 2.6207177814029365, "grad_norm": 0.09561175107955933, "learning_rate": 0.0009970702522420962, "loss": 0.0608, "num_input_tokens_seen": 34672048, "step": 16065 }, { "epoch": 2.6215334420880914, "grad_norm": 0.02433728240430355, "learning_rate": 0.000997062552980062, "loss": 0.147, "num_input_tokens_seen": 34684144, "step": 16070 }, { "epoch": 2.622349102773246, "grad_norm": 0.03134793043136597, "learning_rate": 0.0009970548436444248, "loss": 0.0921, "num_input_tokens_seen": 34695760, "step": 16075 }, { "epoch": 2.6231647634584014, "grad_norm": 0.08532543480396271, "learning_rate": 0.0009970471242353406, "loss": 0.0488, "num_input_tokens_seen": 34707440, "step": 16080 }, { "epoch": 2.6239804241435563, "grad_norm": 0.05656068027019501, "learning_rate": 0.0009970393947529657, "loss": 0.2027, "num_input_tokens_seen": 34717360, "step": 16085 }, { "epoch": 2.624796084828711, "grad_norm": 0.012644010595977306, "learning_rate": 0.0009970316551974568, "loss": 0.0946, "num_input_tokens_seen": 34728592, "step": 16090 }, { "epoch": 2.6256117455138663, "grad_norm": 0.009032066911458969, "learning_rate": 0.0009970239055689712, "loss": 0.1343, "num_input_tokens_seen": 34738448, "step": 16095 }, { "epoch": 2.626427406199021, "grad_norm": 0.2959740161895752, "learning_rate": 0.0009970161458676655, "loss": 0.1637, "num_input_tokens_seen": 34748560, "step": 16100 }, { "epoch": 2.6272430668841764, "grad_norm": 0.370420902967453, "learning_rate": 0.000997008376093697, "loss": 0.1048, "num_input_tokens_seen": 34760080, "step": 16105 }, { "epoch": 2.6280587275693312, "grad_norm": 0.10738759487867355, "learning_rate": 0.0009970005962472233, "loss": 0.2756, "num_input_tokens_seen": 34770768, "step": 16110 }, { "epoch": 2.628874388254486, "grad_norm": 0.2667399048805237, "learning_rate": 0.0009969928063284022, "loss": 0.1851, "num_input_tokens_seen": 34781904, "step": 16115 }, { "epoch": 2.629690048939641, "grad_norm": 0.08870065957307816, "learning_rate": 0.0009969850063373913, "loss": 0.1142, "num_input_tokens_seen": 34793456, "step": 16120 }, { "epoch": 2.630505709624796, "grad_norm": 0.044643230736255646, "learning_rate": 0.0009969771962743488, "loss": 0.0862, "num_input_tokens_seen": 34802832, "step": 16125 }, { "epoch": 2.631321370309951, "grad_norm": 0.07348885387182236, "learning_rate": 0.0009969693761394326, "loss": 0.0519, "num_input_tokens_seen": 34814128, "step": 16130 }, { "epoch": 2.632137030995106, "grad_norm": 0.02179831452667713, "learning_rate": 0.000996961545932802, "loss": 0.2024, "num_input_tokens_seen": 34824912, "step": 16135 }, { "epoch": 2.632952691680261, "grad_norm": 0.09899701923131943, "learning_rate": 0.0009969537056546151, "loss": 0.1013, "num_input_tokens_seen": 34836528, "step": 16140 }, { "epoch": 2.633768352365416, "grad_norm": 0.015514988452196121, "learning_rate": 0.000996945855305031, "loss": 0.2274, "num_input_tokens_seen": 34847856, "step": 16145 }, { "epoch": 2.634584013050571, "grad_norm": 0.0492723248898983, "learning_rate": 0.0009969379948842085, "loss": 0.0539, "num_input_tokens_seen": 34858864, "step": 16150 }, { "epoch": 2.635399673735726, "grad_norm": 0.014452402479946613, "learning_rate": 0.0009969301243923073, "loss": 0.1282, "num_input_tokens_seen": 34870864, "step": 16155 }, { "epoch": 2.636215334420881, "grad_norm": 0.13472549617290497, "learning_rate": 0.0009969222438294867, "loss": 0.1863, "num_input_tokens_seen": 34882128, "step": 16160 }, { "epoch": 2.637030995106036, "grad_norm": 0.06442057341337204, "learning_rate": 0.0009969143531959063, "loss": 0.0506, "num_input_tokens_seen": 34892048, "step": 16165 }, { "epoch": 2.637846655791191, "grad_norm": 0.22055679559707642, "learning_rate": 0.0009969064524917265, "loss": 0.1662, "num_input_tokens_seen": 34902512, "step": 16170 }, { "epoch": 2.6386623164763456, "grad_norm": 0.14423388242721558, "learning_rate": 0.000996898541717107, "loss": 0.0797, "num_input_tokens_seen": 34913264, "step": 16175 }, { "epoch": 2.639477977161501, "grad_norm": 0.28471753001213074, "learning_rate": 0.0009968906208722077, "loss": 0.2281, "num_input_tokens_seen": 34923568, "step": 16180 }, { "epoch": 2.6402936378466557, "grad_norm": 0.11436915397644043, "learning_rate": 0.00099688268995719, "loss": 0.145, "num_input_tokens_seen": 34935728, "step": 16185 }, { "epoch": 2.641109298531811, "grad_norm": 0.032494015991687775, "learning_rate": 0.0009968747489722141, "loss": 0.0885, "num_input_tokens_seen": 34945424, "step": 16190 }, { "epoch": 2.641924959216966, "grad_norm": 0.19248348474502563, "learning_rate": 0.0009968667979174412, "loss": 0.0811, "num_input_tokens_seen": 34955088, "step": 16195 }, { "epoch": 2.6427406199021206, "grad_norm": 0.05164299160242081, "learning_rate": 0.0009968588367930324, "loss": 0.1271, "num_input_tokens_seen": 34964912, "step": 16200 }, { "epoch": 2.6435562805872754, "grad_norm": 0.08268439024686813, "learning_rate": 0.0009968508655991489, "loss": 0.1375, "num_input_tokens_seen": 34974512, "step": 16205 }, { "epoch": 2.6443719412724307, "grad_norm": 0.016492389142513275, "learning_rate": 0.0009968428843359523, "loss": 0.1201, "num_input_tokens_seen": 34986480, "step": 16210 }, { "epoch": 2.6451876019575855, "grad_norm": 0.07583379000425339, "learning_rate": 0.0009968348930036043, "loss": 0.166, "num_input_tokens_seen": 34997776, "step": 16215 }, { "epoch": 2.6460032626427408, "grad_norm": 0.18386182188987732, "learning_rate": 0.000996826891602267, "loss": 0.125, "num_input_tokens_seen": 35008464, "step": 16220 }, { "epoch": 2.6468189233278956, "grad_norm": 0.060339607298374176, "learning_rate": 0.0009968188801321024, "loss": 0.1063, "num_input_tokens_seen": 35019856, "step": 16225 }, { "epoch": 2.6476345840130504, "grad_norm": 0.049145959317684174, "learning_rate": 0.000996810858593273, "loss": 0.0659, "num_input_tokens_seen": 35030448, "step": 16230 }, { "epoch": 2.6484502446982057, "grad_norm": 0.0780707523226738, "learning_rate": 0.000996802826985941, "loss": 0.0325, "num_input_tokens_seen": 35040624, "step": 16235 }, { "epoch": 2.6492659053833605, "grad_norm": 0.18097113072872162, "learning_rate": 0.0009967947853102698, "loss": 0.2693, "num_input_tokens_seen": 35051504, "step": 16240 }, { "epoch": 2.6500815660685157, "grad_norm": 0.30805906653404236, "learning_rate": 0.000996786733566422, "loss": 0.0694, "num_input_tokens_seen": 35062064, "step": 16245 }, { "epoch": 2.6508972267536706, "grad_norm": 0.09705768525600433, "learning_rate": 0.0009967786717545609, "loss": 0.1476, "num_input_tokens_seen": 35073392, "step": 16250 }, { "epoch": 2.6517128874388254, "grad_norm": 0.06425599753856659, "learning_rate": 0.0009967705998748496, "loss": 0.156, "num_input_tokens_seen": 35083888, "step": 16255 }, { "epoch": 2.65252854812398, "grad_norm": 0.10672083497047424, "learning_rate": 0.000996762517927452, "loss": 0.0603, "num_input_tokens_seen": 35094832, "step": 16260 }, { "epoch": 2.6533442088091355, "grad_norm": 0.057495590299367905, "learning_rate": 0.0009967544259125317, "loss": 0.0459, "num_input_tokens_seen": 35104688, "step": 16265 }, { "epoch": 2.6541598694942903, "grad_norm": 0.0719924345612526, "learning_rate": 0.000996746323830253, "loss": 0.1254, "num_input_tokens_seen": 35116624, "step": 16270 }, { "epoch": 2.6549755301794455, "grad_norm": 0.007228717673569918, "learning_rate": 0.0009967382116807797, "loss": 0.0657, "num_input_tokens_seen": 35128304, "step": 16275 }, { "epoch": 2.6557911908646004, "grad_norm": 0.36026492714881897, "learning_rate": 0.0009967300894642764, "loss": 0.2737, "num_input_tokens_seen": 35138704, "step": 16280 }, { "epoch": 2.656606851549755, "grad_norm": 0.10282549262046814, "learning_rate": 0.0009967219571809076, "loss": 0.0491, "num_input_tokens_seen": 35149360, "step": 16285 }, { "epoch": 2.6574225122349104, "grad_norm": 0.021380217745900154, "learning_rate": 0.0009967138148308384, "loss": 0.086, "num_input_tokens_seen": 35160880, "step": 16290 }, { "epoch": 2.6582381729200653, "grad_norm": 0.10661449283361435, "learning_rate": 0.0009967056624142336, "loss": 0.1181, "num_input_tokens_seen": 35172336, "step": 16295 }, { "epoch": 2.65905383360522, "grad_norm": 0.4914528727531433, "learning_rate": 0.0009966974999312584, "loss": 0.2245, "num_input_tokens_seen": 35181456, "step": 16300 }, { "epoch": 2.6598694942903753, "grad_norm": 0.06355796009302139, "learning_rate": 0.000996689327382078, "loss": 0.1373, "num_input_tokens_seen": 35192368, "step": 16305 }, { "epoch": 2.66068515497553, "grad_norm": 0.3796338737010956, "learning_rate": 0.0009966811447668586, "loss": 0.1299, "num_input_tokens_seen": 35202928, "step": 16310 }, { "epoch": 2.661500815660685, "grad_norm": 0.1853310614824295, "learning_rate": 0.0009966729520857658, "loss": 0.0788, "num_input_tokens_seen": 35213648, "step": 16315 }, { "epoch": 2.6623164763458402, "grad_norm": 0.2697104513645172, "learning_rate": 0.0009966647493389654, "loss": 0.2907, "num_input_tokens_seen": 35223696, "step": 16320 }, { "epoch": 2.663132137030995, "grad_norm": 0.29940056800842285, "learning_rate": 0.0009966565365266238, "loss": 0.093, "num_input_tokens_seen": 35234192, "step": 16325 }, { "epoch": 2.6639477977161503, "grad_norm": 0.21155039966106415, "learning_rate": 0.0009966483136489073, "loss": 0.1198, "num_input_tokens_seen": 35244304, "step": 16330 }, { "epoch": 2.664763458401305, "grad_norm": 0.17306700348854065, "learning_rate": 0.0009966400807059827, "loss": 0.1618, "num_input_tokens_seen": 35253680, "step": 16335 }, { "epoch": 2.66557911908646, "grad_norm": 0.37232574820518494, "learning_rate": 0.000996631837698017, "loss": 0.152, "num_input_tokens_seen": 35265232, "step": 16340 }, { "epoch": 2.6663947797716148, "grad_norm": 0.04756417125463486, "learning_rate": 0.000996623584625177, "loss": 0.1079, "num_input_tokens_seen": 35275344, "step": 16345 }, { "epoch": 2.66721044045677, "grad_norm": 0.06157650798559189, "learning_rate": 0.00099661532148763, "loss": 0.0883, "num_input_tokens_seen": 35286352, "step": 16350 }, { "epoch": 2.668026101141925, "grad_norm": 0.12347109615802765, "learning_rate": 0.0009966070482855436, "loss": 0.049, "num_input_tokens_seen": 35297168, "step": 16355 }, { "epoch": 2.66884176182708, "grad_norm": 0.12684176862239838, "learning_rate": 0.0009965987650190852, "loss": 0.1489, "num_input_tokens_seen": 35308880, "step": 16360 }, { "epoch": 2.669657422512235, "grad_norm": 0.16086122393608093, "learning_rate": 0.000996590471688423, "loss": 0.0432, "num_input_tokens_seen": 35319664, "step": 16365 }, { "epoch": 2.6704730831973897, "grad_norm": 0.013957779854536057, "learning_rate": 0.000996582168293725, "loss": 0.0145, "num_input_tokens_seen": 35330448, "step": 16370 }, { "epoch": 2.671288743882545, "grad_norm": 0.08329591900110245, "learning_rate": 0.0009965738548351592, "loss": 0.1156, "num_input_tokens_seen": 35342320, "step": 16375 }, { "epoch": 2.6721044045677, "grad_norm": 0.14150507748126984, "learning_rate": 0.0009965655313128945, "loss": 0.1712, "num_input_tokens_seen": 35352880, "step": 16380 }, { "epoch": 2.672920065252855, "grad_norm": 0.08169712871313095, "learning_rate": 0.0009965571977270994, "loss": 0.0797, "num_input_tokens_seen": 35364464, "step": 16385 }, { "epoch": 2.67373572593801, "grad_norm": 0.03572501242160797, "learning_rate": 0.0009965488540779426, "loss": 0.1506, "num_input_tokens_seen": 35375312, "step": 16390 }, { "epoch": 2.6745513866231647, "grad_norm": 0.026975184679031372, "learning_rate": 0.0009965405003655933, "loss": 0.1544, "num_input_tokens_seen": 35385808, "step": 16395 }, { "epoch": 2.6753670473083195, "grad_norm": 0.17339801788330078, "learning_rate": 0.000996532136590221, "loss": 0.2404, "num_input_tokens_seen": 35396720, "step": 16400 }, { "epoch": 2.676182707993475, "grad_norm": 0.09230559319257736, "learning_rate": 0.000996523762751995, "loss": 0.1064, "num_input_tokens_seen": 35407152, "step": 16405 }, { "epoch": 2.6769983686786296, "grad_norm": 0.20066949725151062, "learning_rate": 0.000996515378851085, "loss": 0.0794, "num_input_tokens_seen": 35417040, "step": 16410 }, { "epoch": 2.677814029363785, "grad_norm": 0.32734841108322144, "learning_rate": 0.0009965069848876609, "loss": 0.2026, "num_input_tokens_seen": 35427312, "step": 16415 }, { "epoch": 2.6786296900489397, "grad_norm": 0.40488773584365845, "learning_rate": 0.000996498580861893, "loss": 0.1542, "num_input_tokens_seen": 35439056, "step": 16420 }, { "epoch": 2.6794453507340945, "grad_norm": 0.018780147656798363, "learning_rate": 0.0009964901667739517, "loss": 0.1095, "num_input_tokens_seen": 35449136, "step": 16425 }, { "epoch": 2.6802610114192493, "grad_norm": 0.06358105689287186, "learning_rate": 0.000996481742624007, "loss": 0.109, "num_input_tokens_seen": 35459824, "step": 16430 }, { "epoch": 2.6810766721044046, "grad_norm": 0.04820743575692177, "learning_rate": 0.00099647330841223, "loss": 0.0394, "num_input_tokens_seen": 35470672, "step": 16435 }, { "epoch": 2.6818923327895594, "grad_norm": 0.2936408221721649, "learning_rate": 0.0009964648641387918, "loss": 0.1765, "num_input_tokens_seen": 35482064, "step": 16440 }, { "epoch": 2.6827079934747147, "grad_norm": 0.3161642551422119, "learning_rate": 0.000996456409803863, "loss": 0.2511, "num_input_tokens_seen": 35492112, "step": 16445 }, { "epoch": 2.6835236541598695, "grad_norm": 0.02953287400305271, "learning_rate": 0.0009964479454076156, "loss": 0.0898, "num_input_tokens_seen": 35502832, "step": 16450 }, { "epoch": 2.6843393148450243, "grad_norm": 0.2762671411037445, "learning_rate": 0.0009964394709502207, "loss": 0.114, "num_input_tokens_seen": 35513968, "step": 16455 }, { "epoch": 2.6851549755301796, "grad_norm": 0.10804454237222672, "learning_rate": 0.0009964309864318502, "loss": 0.0902, "num_input_tokens_seen": 35526512, "step": 16460 }, { "epoch": 2.6859706362153344, "grad_norm": 0.0768001452088356, "learning_rate": 0.0009964224918526758, "loss": 0.1479, "num_input_tokens_seen": 35536336, "step": 16465 }, { "epoch": 2.6867862969004896, "grad_norm": 0.213592991232872, "learning_rate": 0.0009964139872128699, "loss": 0.1994, "num_input_tokens_seen": 35546896, "step": 16470 }, { "epoch": 2.6876019575856445, "grad_norm": 0.31113916635513306, "learning_rate": 0.000996405472512605, "loss": 0.1289, "num_input_tokens_seen": 35556912, "step": 16475 }, { "epoch": 2.6884176182707993, "grad_norm": 0.1809568703174591, "learning_rate": 0.0009963969477520531, "loss": 0.1696, "num_input_tokens_seen": 35567120, "step": 16480 }, { "epoch": 2.689233278955954, "grad_norm": 0.05125579610466957, "learning_rate": 0.0009963884129313876, "loss": 0.0797, "num_input_tokens_seen": 35578160, "step": 16485 }, { "epoch": 2.6900489396411094, "grad_norm": 0.007092699408531189, "learning_rate": 0.0009963798680507811, "loss": 0.1376, "num_input_tokens_seen": 35588048, "step": 16490 }, { "epoch": 2.690864600326264, "grad_norm": 0.024526186287403107, "learning_rate": 0.0009963713131104068, "loss": 0.0663, "num_input_tokens_seen": 35598320, "step": 16495 }, { "epoch": 2.6916802610114194, "grad_norm": 0.020964832976460457, "learning_rate": 0.0009963627481104384, "loss": 0.2503, "num_input_tokens_seen": 35609904, "step": 16500 }, { "epoch": 2.6924959216965743, "grad_norm": 0.0777529925107956, "learning_rate": 0.000996354173051049, "loss": 0.1028, "num_input_tokens_seen": 35620464, "step": 16505 }, { "epoch": 2.693311582381729, "grad_norm": 0.1677270382642746, "learning_rate": 0.0009963455879324129, "loss": 0.2008, "num_input_tokens_seen": 35631952, "step": 16510 }, { "epoch": 2.6941272430668843, "grad_norm": 0.047426752746105194, "learning_rate": 0.0009963369927547035, "loss": 0.0498, "num_input_tokens_seen": 35643344, "step": 16515 }, { "epoch": 2.694942903752039, "grad_norm": 0.0982656478881836, "learning_rate": 0.0009963283875180952, "loss": 0.0568, "num_input_tokens_seen": 35654096, "step": 16520 }, { "epoch": 2.695758564437194, "grad_norm": 0.09715648740530014, "learning_rate": 0.0009963197722227628, "loss": 0.1423, "num_input_tokens_seen": 35665744, "step": 16525 }, { "epoch": 2.6965742251223492, "grad_norm": 0.01877671293914318, "learning_rate": 0.0009963111468688805, "loss": 0.0993, "num_input_tokens_seen": 35676784, "step": 16530 }, { "epoch": 2.697389885807504, "grad_norm": 0.06698782742023468, "learning_rate": 0.000996302511456623, "loss": 0.1455, "num_input_tokens_seen": 35686640, "step": 16535 }, { "epoch": 2.698205546492659, "grad_norm": 0.2722280025482178, "learning_rate": 0.0009962938659861657, "loss": 0.152, "num_input_tokens_seen": 35697680, "step": 16540 }, { "epoch": 2.699021207177814, "grad_norm": 0.10414028912782669, "learning_rate": 0.0009962852104576836, "loss": 0.1142, "num_input_tokens_seen": 35709136, "step": 16545 }, { "epoch": 2.699836867862969, "grad_norm": 0.21903131902217865, "learning_rate": 0.0009962765448713522, "loss": 0.1413, "num_input_tokens_seen": 35719984, "step": 16550 }, { "epoch": 2.700652528548124, "grad_norm": 0.10758703202009201, "learning_rate": 0.000996267869227347, "loss": 0.0781, "num_input_tokens_seen": 35730416, "step": 16555 }, { "epoch": 2.701468189233279, "grad_norm": 0.25017839670181274, "learning_rate": 0.0009962591835258436, "loss": 0.1088, "num_input_tokens_seen": 35740208, "step": 16560 }, { "epoch": 2.702283849918434, "grad_norm": 0.06840622425079346, "learning_rate": 0.0009962504877670186, "loss": 0.0691, "num_input_tokens_seen": 35750640, "step": 16565 }, { "epoch": 2.7030995106035887, "grad_norm": 0.03250577673316002, "learning_rate": 0.0009962417819510479, "loss": 0.1304, "num_input_tokens_seen": 35760336, "step": 16570 }, { "epoch": 2.703915171288744, "grad_norm": 0.00827009417116642, "learning_rate": 0.0009962330660781078, "loss": 0.0723, "num_input_tokens_seen": 35770672, "step": 16575 }, { "epoch": 2.7047308319738987, "grad_norm": 0.04564407095313072, "learning_rate": 0.0009962243401483752, "loss": 0.1187, "num_input_tokens_seen": 35780656, "step": 16580 }, { "epoch": 2.705546492659054, "grad_norm": 0.43104279041290283, "learning_rate": 0.000996215604162027, "loss": 0.1705, "num_input_tokens_seen": 35790320, "step": 16585 }, { "epoch": 2.706362153344209, "grad_norm": 0.027523692697286606, "learning_rate": 0.0009962068581192399, "loss": 0.0789, "num_input_tokens_seen": 35800016, "step": 16590 }, { "epoch": 2.7071778140293636, "grad_norm": 0.1364722102880478, "learning_rate": 0.0009961981020201913, "loss": 0.143, "num_input_tokens_seen": 35811952, "step": 16595 }, { "epoch": 2.707993474714519, "grad_norm": 0.023210443556308746, "learning_rate": 0.0009961893358650586, "loss": 0.1007, "num_input_tokens_seen": 35822384, "step": 16600 }, { "epoch": 2.7088091353996737, "grad_norm": 0.026472602039575577, "learning_rate": 0.00099618055965402, "loss": 0.0928, "num_input_tokens_seen": 35833808, "step": 16605 }, { "epoch": 2.709624796084829, "grad_norm": 0.14552193880081177, "learning_rate": 0.0009961717733872524, "loss": 0.2665, "num_input_tokens_seen": 35844976, "step": 16610 }, { "epoch": 2.710440456769984, "grad_norm": 0.08589240908622742, "learning_rate": 0.0009961629770649347, "loss": 0.1368, "num_input_tokens_seen": 35855312, "step": 16615 }, { "epoch": 2.7112561174551386, "grad_norm": 0.03177138417959213, "learning_rate": 0.0009961541706872447, "loss": 0.0893, "num_input_tokens_seen": 35866256, "step": 16620 }, { "epoch": 2.7120717781402934, "grad_norm": 0.08662046492099762, "learning_rate": 0.000996145354254361, "loss": 0.1226, "num_input_tokens_seen": 35875920, "step": 16625 }, { "epoch": 2.7128874388254487, "grad_norm": 0.0505714938044548, "learning_rate": 0.0009961365277664624, "loss": 0.0828, "num_input_tokens_seen": 35885264, "step": 16630 }, { "epoch": 2.7137030995106035, "grad_norm": 0.17067772150039673, "learning_rate": 0.0009961276912237276, "loss": 0.2038, "num_input_tokens_seen": 35896976, "step": 16635 }, { "epoch": 2.7145187601957588, "grad_norm": 0.04251964017748833, "learning_rate": 0.0009961188446263357, "loss": 0.1066, "num_input_tokens_seen": 35907824, "step": 16640 }, { "epoch": 2.7153344208809136, "grad_norm": 0.07396790385246277, "learning_rate": 0.0009961099879744661, "loss": 0.1028, "num_input_tokens_seen": 35917296, "step": 16645 }, { "epoch": 2.7161500815660684, "grad_norm": 0.15099787712097168, "learning_rate": 0.0009961011212682982, "loss": 0.1817, "num_input_tokens_seen": 35927344, "step": 16650 }, { "epoch": 2.7169657422512232, "grad_norm": 0.1596049666404724, "learning_rate": 0.0009960922445080118, "loss": 0.1012, "num_input_tokens_seen": 35938064, "step": 16655 }, { "epoch": 2.7177814029363785, "grad_norm": 0.09530681371688843, "learning_rate": 0.0009960833576937867, "loss": 0.1337, "num_input_tokens_seen": 35948976, "step": 16660 }, { "epoch": 2.7185970636215333, "grad_norm": 0.043561115860939026, "learning_rate": 0.000996074460825803, "loss": 0.0775, "num_input_tokens_seen": 35959632, "step": 16665 }, { "epoch": 2.7194127243066886, "grad_norm": 0.29739460349082947, "learning_rate": 0.0009960655539042412, "loss": 0.2536, "num_input_tokens_seen": 35970032, "step": 16670 }, { "epoch": 2.7202283849918434, "grad_norm": 0.05227457359433174, "learning_rate": 0.0009960566369292814, "loss": 0.0817, "num_input_tokens_seen": 35980016, "step": 16675 }, { "epoch": 2.721044045676998, "grad_norm": 0.1555776298046112, "learning_rate": 0.0009960477099011048, "loss": 0.1175, "num_input_tokens_seen": 35990160, "step": 16680 }, { "epoch": 2.7218597063621535, "grad_norm": 0.11949800699949265, "learning_rate": 0.000996038772819892, "loss": 0.1514, "num_input_tokens_seen": 36000080, "step": 16685 }, { "epoch": 2.7226753670473083, "grad_norm": 0.2714529037475586, "learning_rate": 0.0009960298256858238, "loss": 0.2106, "num_input_tokens_seen": 36010448, "step": 16690 }, { "epoch": 2.7234910277324635, "grad_norm": 0.08755488693714142, "learning_rate": 0.0009960208684990824, "loss": 0.2886, "num_input_tokens_seen": 36022032, "step": 16695 }, { "epoch": 2.7243066884176184, "grad_norm": 0.09630600363016129, "learning_rate": 0.0009960119012598489, "loss": 0.1332, "num_input_tokens_seen": 36033936, "step": 16700 }, { "epoch": 2.725122349102773, "grad_norm": 0.051263317465782166, "learning_rate": 0.0009960029239683046, "loss": 0.0733, "num_input_tokens_seen": 36044944, "step": 16705 }, { "epoch": 2.725938009787928, "grad_norm": 0.015054866671562195, "learning_rate": 0.000995993936624632, "loss": 0.0849, "num_input_tokens_seen": 36056048, "step": 16710 }, { "epoch": 2.7267536704730833, "grad_norm": 0.14939671754837036, "learning_rate": 0.000995984939229013, "loss": 0.1281, "num_input_tokens_seen": 36065648, "step": 16715 }, { "epoch": 2.727569331158238, "grad_norm": 0.01939992420375347, "learning_rate": 0.0009959759317816302, "loss": 0.133, "num_input_tokens_seen": 36076912, "step": 16720 }, { "epoch": 2.7283849918433933, "grad_norm": 0.11972417682409286, "learning_rate": 0.0009959669142826659, "loss": 0.0687, "num_input_tokens_seen": 36088432, "step": 16725 }, { "epoch": 2.729200652528548, "grad_norm": 0.10571133345365524, "learning_rate": 0.0009959578867323028, "loss": 0.1034, "num_input_tokens_seen": 36098864, "step": 16730 }, { "epoch": 2.730016313213703, "grad_norm": 0.14954891800880432, "learning_rate": 0.000995948849130724, "loss": 0.0928, "num_input_tokens_seen": 36109904, "step": 16735 }, { "epoch": 2.7308319738988582, "grad_norm": 0.26034170389175415, "learning_rate": 0.0009959398014781128, "loss": 0.14, "num_input_tokens_seen": 36120144, "step": 16740 }, { "epoch": 2.731647634584013, "grad_norm": 0.013762902468442917, "learning_rate": 0.000995930743774652, "loss": 0.1305, "num_input_tokens_seen": 36130000, "step": 16745 }, { "epoch": 2.732463295269168, "grad_norm": 0.18873825669288635, "learning_rate": 0.0009959216760205257, "loss": 0.076, "num_input_tokens_seen": 36140784, "step": 16750 }, { "epoch": 2.733278955954323, "grad_norm": 0.18349812924861908, "learning_rate": 0.0009959125982159176, "loss": 0.0801, "num_input_tokens_seen": 36151696, "step": 16755 }, { "epoch": 2.734094616639478, "grad_norm": 0.09744887053966522, "learning_rate": 0.0009959035103610115, "loss": 0.1002, "num_input_tokens_seen": 36163216, "step": 16760 }, { "epoch": 2.7349102773246328, "grad_norm": 0.02300012856721878, "learning_rate": 0.0009958944124559919, "loss": 0.1048, "num_input_tokens_seen": 36173872, "step": 16765 }, { "epoch": 2.735725938009788, "grad_norm": 0.12899620831012726, "learning_rate": 0.0009958853045010426, "loss": 0.0708, "num_input_tokens_seen": 36184880, "step": 16770 }, { "epoch": 2.736541598694943, "grad_norm": 0.0463111475110054, "learning_rate": 0.0009958761864963487, "loss": 0.0846, "num_input_tokens_seen": 36194992, "step": 16775 }, { "epoch": 2.737357259380098, "grad_norm": 0.004665287211537361, "learning_rate": 0.0009958670584420948, "loss": 0.1254, "num_input_tokens_seen": 36205904, "step": 16780 }, { "epoch": 2.738172920065253, "grad_norm": 0.09385996311903, "learning_rate": 0.000995857920338466, "loss": 0.2034, "num_input_tokens_seen": 36216368, "step": 16785 }, { "epoch": 2.7389885807504077, "grad_norm": 0.011358195915818214, "learning_rate": 0.0009958487721856474, "loss": 0.1393, "num_input_tokens_seen": 36227120, "step": 16790 }, { "epoch": 2.7398042414355626, "grad_norm": 0.038222286850214005, "learning_rate": 0.0009958396139838242, "loss": 0.0539, "num_input_tokens_seen": 36239376, "step": 16795 }, { "epoch": 2.740619902120718, "grad_norm": 0.015411783941090107, "learning_rate": 0.0009958304457331822, "loss": 0.0836, "num_input_tokens_seen": 36248752, "step": 16800 }, { "epoch": 2.7414355628058726, "grad_norm": 0.33093613386154175, "learning_rate": 0.0009958212674339075, "loss": 0.1454, "num_input_tokens_seen": 36260400, "step": 16805 }, { "epoch": 2.742251223491028, "grad_norm": 0.32975584268569946, "learning_rate": 0.0009958120790861855, "loss": 0.1453, "num_input_tokens_seen": 36271824, "step": 16810 }, { "epoch": 2.7430668841761827, "grad_norm": 0.2892857491970062, "learning_rate": 0.000995802880690203, "loss": 0.261, "num_input_tokens_seen": 36282480, "step": 16815 }, { "epoch": 2.7438825448613375, "grad_norm": 0.14340080320835114, "learning_rate": 0.000995793672246146, "loss": 0.1293, "num_input_tokens_seen": 36294224, "step": 16820 }, { "epoch": 2.744698205546493, "grad_norm": 0.01589050143957138, "learning_rate": 0.0009957844537542013, "loss": 0.1475, "num_input_tokens_seen": 36304816, "step": 16825 }, { "epoch": 2.7455138662316476, "grad_norm": 0.23456436395645142, "learning_rate": 0.0009957752252145557, "loss": 0.1676, "num_input_tokens_seen": 36317232, "step": 16830 }, { "epoch": 2.746329526916803, "grad_norm": 0.08127406984567642, "learning_rate": 0.0009957659866273963, "loss": 0.1416, "num_input_tokens_seen": 36327472, "step": 16835 }, { "epoch": 2.7471451876019577, "grad_norm": 0.04334314540028572, "learning_rate": 0.0009957567379929103, "loss": 0.1127, "num_input_tokens_seen": 36339184, "step": 16840 }, { "epoch": 2.7479608482871125, "grad_norm": 0.052842382341623306, "learning_rate": 0.0009957474793112848, "loss": 0.0712, "num_input_tokens_seen": 36349456, "step": 16845 }, { "epoch": 2.7487765089722673, "grad_norm": 0.21459154784679413, "learning_rate": 0.0009957382105827079, "loss": 0.2259, "num_input_tokens_seen": 36360080, "step": 16850 }, { "epoch": 2.7495921696574226, "grad_norm": 0.02801487408578396, "learning_rate": 0.0009957289318073674, "loss": 0.0601, "num_input_tokens_seen": 36370928, "step": 16855 }, { "epoch": 2.7504078303425774, "grad_norm": 0.036222945898771286, "learning_rate": 0.000995719642985451, "loss": 0.1911, "num_input_tokens_seen": 36382192, "step": 16860 }, { "epoch": 2.7512234910277327, "grad_norm": 0.07335011661052704, "learning_rate": 0.0009957103441171472, "loss": 0.0771, "num_input_tokens_seen": 36393072, "step": 16865 }, { "epoch": 2.7520391517128875, "grad_norm": 0.06008516624569893, "learning_rate": 0.0009957010352026447, "loss": 0.1471, "num_input_tokens_seen": 36403248, "step": 16870 }, { "epoch": 2.7528548123980423, "grad_norm": 0.04127403348684311, "learning_rate": 0.0009956917162421317, "loss": 0.163, "num_input_tokens_seen": 36414224, "step": 16875 }, { "epoch": 2.753670473083197, "grad_norm": 0.10111255198717117, "learning_rate": 0.0009956823872357972, "loss": 0.074, "num_input_tokens_seen": 36425776, "step": 16880 }, { "epoch": 2.7544861337683524, "grad_norm": 0.28562769293785095, "learning_rate": 0.0009956730481838303, "loss": 0.1342, "num_input_tokens_seen": 36436304, "step": 16885 }, { "epoch": 2.755301794453507, "grad_norm": 0.4154522120952606, "learning_rate": 0.0009956636990864202, "loss": 0.2255, "num_input_tokens_seen": 36448144, "step": 16890 }, { "epoch": 2.7561174551386625, "grad_norm": 0.12270428985357285, "learning_rate": 0.0009956543399437569, "loss": 0.0579, "num_input_tokens_seen": 36459984, "step": 16895 }, { "epoch": 2.7569331158238173, "grad_norm": 0.3817971646785736, "learning_rate": 0.0009956449707560291, "loss": 0.1085, "num_input_tokens_seen": 36469296, "step": 16900 }, { "epoch": 2.757748776508972, "grad_norm": 0.03330438211560249, "learning_rate": 0.0009956355915234274, "loss": 0.1708, "num_input_tokens_seen": 36480912, "step": 16905 }, { "epoch": 2.7585644371941274, "grad_norm": 0.006395846139639616, "learning_rate": 0.0009956262022461416, "loss": 0.0985, "num_input_tokens_seen": 36492304, "step": 16910 }, { "epoch": 2.759380097879282, "grad_norm": 0.04634559899568558, "learning_rate": 0.0009956168029243621, "loss": 0.2179, "num_input_tokens_seen": 36501008, "step": 16915 }, { "epoch": 2.7601957585644374, "grad_norm": 0.29531002044677734, "learning_rate": 0.0009956073935582794, "loss": 0.1281, "num_input_tokens_seen": 36511472, "step": 16920 }, { "epoch": 2.7610114192495923, "grad_norm": 0.05817113444209099, "learning_rate": 0.000995597974148084, "loss": 0.23, "num_input_tokens_seen": 36522160, "step": 16925 }, { "epoch": 2.761827079934747, "grad_norm": 0.13715581595897675, "learning_rate": 0.0009955885446939672, "loss": 0.1054, "num_input_tokens_seen": 36533584, "step": 16930 }, { "epoch": 2.762642740619902, "grad_norm": 0.07386986166238785, "learning_rate": 0.0009955791051961195, "loss": 0.0662, "num_input_tokens_seen": 36544112, "step": 16935 }, { "epoch": 2.763458401305057, "grad_norm": 0.35734301805496216, "learning_rate": 0.000995569655654733, "loss": 0.1563, "num_input_tokens_seen": 36555216, "step": 16940 }, { "epoch": 2.764274061990212, "grad_norm": 0.15354810655117035, "learning_rate": 0.0009955601960699983, "loss": 0.1154, "num_input_tokens_seen": 36566128, "step": 16945 }, { "epoch": 2.7650897226753672, "grad_norm": 0.13912299275398254, "learning_rate": 0.0009955507264421079, "loss": 0.1023, "num_input_tokens_seen": 36577840, "step": 16950 }, { "epoch": 2.765905383360522, "grad_norm": 0.1463913768529892, "learning_rate": 0.0009955412467712531, "loss": 0.1029, "num_input_tokens_seen": 36587984, "step": 16955 }, { "epoch": 2.766721044045677, "grad_norm": 0.07335563749074936, "learning_rate": 0.0009955317570576265, "loss": 0.0562, "num_input_tokens_seen": 36599184, "step": 16960 }, { "epoch": 2.767536704730832, "grad_norm": 0.0504898726940155, "learning_rate": 0.0009955222573014202, "loss": 0.183, "num_input_tokens_seen": 36608656, "step": 16965 }, { "epoch": 2.768352365415987, "grad_norm": 0.07223479449748993, "learning_rate": 0.0009955127475028266, "loss": 0.0593, "num_input_tokens_seen": 36619376, "step": 16970 }, { "epoch": 2.7691680261011418, "grad_norm": 0.033831678330898285, "learning_rate": 0.0009955032276620388, "loss": 0.0547, "num_input_tokens_seen": 36630608, "step": 16975 }, { "epoch": 2.769983686786297, "grad_norm": 0.05858471244573593, "learning_rate": 0.0009954936977792492, "loss": 0.1003, "num_input_tokens_seen": 36643248, "step": 16980 }, { "epoch": 2.770799347471452, "grad_norm": 0.060462482273578644, "learning_rate": 0.0009954841578546515, "loss": 0.0845, "num_input_tokens_seen": 36653616, "step": 16985 }, { "epoch": 2.7716150081566067, "grad_norm": 0.14699064195156097, "learning_rate": 0.0009954746078884387, "loss": 0.1507, "num_input_tokens_seen": 36664208, "step": 16990 }, { "epoch": 2.772430668841762, "grad_norm": 0.08873192220926285, "learning_rate": 0.0009954650478808042, "loss": 0.1303, "num_input_tokens_seen": 36674864, "step": 16995 }, { "epoch": 2.7732463295269167, "grad_norm": 0.02031448297202587, "learning_rate": 0.0009954554778319423, "loss": 0.018, "num_input_tokens_seen": 36686960, "step": 17000 }, { "epoch": 2.774061990212072, "grad_norm": 0.24523167312145233, "learning_rate": 0.0009954458977420465, "loss": 0.2362, "num_input_tokens_seen": 36698736, "step": 17005 }, { "epoch": 2.774877650897227, "grad_norm": 0.007347160950303078, "learning_rate": 0.000995436307611311, "loss": 0.062, "num_input_tokens_seen": 36708624, "step": 17010 }, { "epoch": 2.7756933115823816, "grad_norm": 0.08425823599100113, "learning_rate": 0.0009954267074399302, "loss": 0.064, "num_input_tokens_seen": 36719248, "step": 17015 }, { "epoch": 2.7765089722675365, "grad_norm": 0.03476683050394058, "learning_rate": 0.0009954170972280988, "loss": 0.1202, "num_input_tokens_seen": 36729968, "step": 17020 }, { "epoch": 2.7773246329526917, "grad_norm": 0.06355013698339462, "learning_rate": 0.0009954074769760112, "loss": 0.0787, "num_input_tokens_seen": 36740912, "step": 17025 }, { "epoch": 2.7781402936378465, "grad_norm": 0.09173543006181717, "learning_rate": 0.0009953978466838629, "loss": 0.0528, "num_input_tokens_seen": 36752688, "step": 17030 }, { "epoch": 2.778955954323002, "grad_norm": 0.03648596629500389, "learning_rate": 0.0009953882063518486, "loss": 0.1048, "num_input_tokens_seen": 36763184, "step": 17035 }, { "epoch": 2.7797716150081566, "grad_norm": 2.9722390174865723, "learning_rate": 0.000995378555980164, "loss": 0.1309, "num_input_tokens_seen": 36774256, "step": 17040 }, { "epoch": 2.7805872756933114, "grad_norm": 0.25080835819244385, "learning_rate": 0.0009953688955690045, "loss": 0.1298, "num_input_tokens_seen": 36785296, "step": 17045 }, { "epoch": 2.7814029363784667, "grad_norm": 0.04424146190285683, "learning_rate": 0.0009953592251185658, "loss": 0.0725, "num_input_tokens_seen": 36795664, "step": 17050 }, { "epoch": 2.7822185970636215, "grad_norm": 0.051554128527641296, "learning_rate": 0.000995349544629044, "loss": 0.0598, "num_input_tokens_seen": 36806448, "step": 17055 }, { "epoch": 2.7830342577487768, "grad_norm": 0.13502323627471924, "learning_rate": 0.0009953398541006353, "loss": 0.1835, "num_input_tokens_seen": 36817584, "step": 17060 }, { "epoch": 2.7838499184339316, "grad_norm": 0.008884107694029808, "learning_rate": 0.0009953301535335361, "loss": 0.0902, "num_input_tokens_seen": 36828176, "step": 17065 }, { "epoch": 2.7846655791190864, "grad_norm": 0.0390922985970974, "learning_rate": 0.000995320442927943, "loss": 0.0223, "num_input_tokens_seen": 36839440, "step": 17070 }, { "epoch": 2.7854812398042412, "grad_norm": 0.05051243305206299, "learning_rate": 0.0009953107222840528, "loss": 0.1935, "num_input_tokens_seen": 36849424, "step": 17075 }, { "epoch": 2.7862969004893965, "grad_norm": 0.10211008042097092, "learning_rate": 0.0009953009916020624, "loss": 0.0385, "num_input_tokens_seen": 36859600, "step": 17080 }, { "epoch": 2.7871125611745513, "grad_norm": 0.047614820301532745, "learning_rate": 0.0009952912508821691, "loss": 0.2805, "num_input_tokens_seen": 36869328, "step": 17085 }, { "epoch": 2.7879282218597066, "grad_norm": 0.23581935465335846, "learning_rate": 0.0009952815001245702, "loss": 0.2678, "num_input_tokens_seen": 36881616, "step": 17090 }, { "epoch": 2.7887438825448614, "grad_norm": 0.12701764702796936, "learning_rate": 0.0009952717393294636, "loss": 0.1181, "num_input_tokens_seen": 36891984, "step": 17095 }, { "epoch": 2.789559543230016, "grad_norm": 0.08800496906042099, "learning_rate": 0.0009952619684970468, "loss": 0.1528, "num_input_tokens_seen": 36904496, "step": 17100 }, { "epoch": 2.790375203915171, "grad_norm": 0.03913078457117081, "learning_rate": 0.0009952521876275178, "loss": 0.0548, "num_input_tokens_seen": 36916112, "step": 17105 }, { "epoch": 2.7911908646003263, "grad_norm": 0.07983973622322083, "learning_rate": 0.0009952423967210752, "loss": 0.3558, "num_input_tokens_seen": 36927024, "step": 17110 }, { "epoch": 2.792006525285481, "grad_norm": 0.23866671323776245, "learning_rate": 0.0009952325957779168, "loss": 0.2566, "num_input_tokens_seen": 36937904, "step": 17115 }, { "epoch": 2.7928221859706364, "grad_norm": 0.06691578775644302, "learning_rate": 0.0009952227847982418, "loss": 0.1988, "num_input_tokens_seen": 36948784, "step": 17120 }, { "epoch": 2.793637846655791, "grad_norm": 0.035940021276474, "learning_rate": 0.000995212963782249, "loss": 0.1817, "num_input_tokens_seen": 36959888, "step": 17125 }, { "epoch": 2.794453507340946, "grad_norm": 0.10588597506284714, "learning_rate": 0.000995203132730137, "loss": 0.1137, "num_input_tokens_seen": 36970160, "step": 17130 }, { "epoch": 2.7952691680261013, "grad_norm": 0.11466117203235626, "learning_rate": 0.0009951932916421053, "loss": 0.21, "num_input_tokens_seen": 36980176, "step": 17135 }, { "epoch": 2.796084828711256, "grad_norm": 0.055492889136075974, "learning_rate": 0.0009951834405183535, "loss": 0.0823, "num_input_tokens_seen": 36991120, "step": 17140 }, { "epoch": 2.7969004893964113, "grad_norm": 0.08512550592422485, "learning_rate": 0.0009951735793590811, "loss": 0.0914, "num_input_tokens_seen": 37001936, "step": 17145 }, { "epoch": 2.797716150081566, "grad_norm": 0.161712646484375, "learning_rate": 0.0009951637081644879, "loss": 0.1336, "num_input_tokens_seen": 37012304, "step": 17150 }, { "epoch": 2.798531810766721, "grad_norm": 0.05392231047153473, "learning_rate": 0.000995153826934774, "loss": 0.1269, "num_input_tokens_seen": 37023184, "step": 17155 }, { "epoch": 2.799347471451876, "grad_norm": 0.19957001507282257, "learning_rate": 0.0009951439356701394, "loss": 0.2885, "num_input_tokens_seen": 37032464, "step": 17160 }, { "epoch": 2.800163132137031, "grad_norm": 0.06437575072050095, "learning_rate": 0.0009951340343707852, "loss": 0.1218, "num_input_tokens_seen": 37043600, "step": 17165 }, { "epoch": 2.800978792822186, "grad_norm": 0.05523599311709404, "learning_rate": 0.0009951241230369114, "loss": 0.0558, "num_input_tokens_seen": 37054736, "step": 17170 }, { "epoch": 2.801794453507341, "grad_norm": 0.06531516462564468, "learning_rate": 0.0009951142016687193, "loss": 0.1309, "num_input_tokens_seen": 37064784, "step": 17175 }, { "epoch": 2.802610114192496, "grad_norm": 0.22749574482440948, "learning_rate": 0.0009951042702664099, "loss": 0.1156, "num_input_tokens_seen": 37076048, "step": 17180 }, { "epoch": 2.8034257748776508, "grad_norm": 0.016217265278100967, "learning_rate": 0.0009950943288301842, "loss": 0.0544, "num_input_tokens_seen": 37086320, "step": 17185 }, { "epoch": 2.804241435562806, "grad_norm": 0.05535699054598808, "learning_rate": 0.0009950843773602438, "loss": 0.1382, "num_input_tokens_seen": 37096624, "step": 17190 }, { "epoch": 2.805057096247961, "grad_norm": 0.04713506996631622, "learning_rate": 0.0009950744158567905, "loss": 0.1113, "num_input_tokens_seen": 37107920, "step": 17195 }, { "epoch": 2.8058727569331157, "grad_norm": 0.2325257956981659, "learning_rate": 0.0009950644443200262, "loss": 0.0938, "num_input_tokens_seen": 37119376, "step": 17200 }, { "epoch": 2.806688417618271, "grad_norm": 0.1204783171415329, "learning_rate": 0.0009950544627501529, "loss": 0.0429, "num_input_tokens_seen": 37129296, "step": 17205 }, { "epoch": 2.8075040783034257, "grad_norm": 0.1460723578929901, "learning_rate": 0.0009950444711473727, "loss": 0.0967, "num_input_tokens_seen": 37140464, "step": 17210 }, { "epoch": 2.8083197389885806, "grad_norm": 0.19406886398792267, "learning_rate": 0.0009950344695118885, "loss": 0.081, "num_input_tokens_seen": 37152304, "step": 17215 }, { "epoch": 2.809135399673736, "grad_norm": 0.1977512240409851, "learning_rate": 0.0009950244578439027, "loss": 0.0667, "num_input_tokens_seen": 37162064, "step": 17220 }, { "epoch": 2.8099510603588906, "grad_norm": 0.006031016819179058, "learning_rate": 0.0009950144361436182, "loss": 0.09, "num_input_tokens_seen": 37172208, "step": 17225 }, { "epoch": 2.810766721044046, "grad_norm": 0.26271548867225647, "learning_rate": 0.0009950044044112383, "loss": 0.1832, "num_input_tokens_seen": 37182832, "step": 17230 }, { "epoch": 2.8115823817292007, "grad_norm": 0.015055930241942406, "learning_rate": 0.000994994362646966, "loss": 0.1124, "num_input_tokens_seen": 37193744, "step": 17235 }, { "epoch": 2.8123980424143555, "grad_norm": 0.3475509285926819, "learning_rate": 0.0009949843108510053, "loss": 0.1237, "num_input_tokens_seen": 37205680, "step": 17240 }, { "epoch": 2.8132137030995104, "grad_norm": 0.05111459642648697, "learning_rate": 0.0009949742490235594, "loss": 0.0895, "num_input_tokens_seen": 37215792, "step": 17245 }, { "epoch": 2.8140293637846656, "grad_norm": 0.008972534909844398, "learning_rate": 0.0009949641771648324, "loss": 0.0839, "num_input_tokens_seen": 37226672, "step": 17250 }, { "epoch": 2.8148450244698204, "grad_norm": 0.17732147872447968, "learning_rate": 0.0009949540952750285, "loss": 0.1308, "num_input_tokens_seen": 37237840, "step": 17255 }, { "epoch": 2.8156606851549757, "grad_norm": 0.15822334587574005, "learning_rate": 0.000994944003354352, "loss": 0.1167, "num_input_tokens_seen": 37246512, "step": 17260 }, { "epoch": 2.8164763458401305, "grad_norm": 0.22857484221458435, "learning_rate": 0.0009949339014030075, "loss": 0.0983, "num_input_tokens_seen": 37256752, "step": 17265 }, { "epoch": 2.8172920065252853, "grad_norm": 0.3095235526561737, "learning_rate": 0.0009949237894211994, "loss": 0.0725, "num_input_tokens_seen": 37267408, "step": 17270 }, { "epoch": 2.8181076672104406, "grad_norm": 0.04649650678038597, "learning_rate": 0.000994913667409133, "loss": 0.1148, "num_input_tokens_seen": 37278128, "step": 17275 }, { "epoch": 2.8189233278955954, "grad_norm": 0.2554285526275635, "learning_rate": 0.0009949035353670132, "loss": 0.1668, "num_input_tokens_seen": 37289232, "step": 17280 }, { "epoch": 2.8197389885807507, "grad_norm": 0.08338715881109238, "learning_rate": 0.0009948933932950456, "loss": 0.1178, "num_input_tokens_seen": 37299184, "step": 17285 }, { "epoch": 2.8205546492659055, "grad_norm": 0.03342604637145996, "learning_rate": 0.0009948832411934352, "loss": 0.0879, "num_input_tokens_seen": 37309168, "step": 17290 }, { "epoch": 2.8213703099510603, "grad_norm": 0.4044336974620819, "learning_rate": 0.0009948730790623884, "loss": 0.3918, "num_input_tokens_seen": 37319312, "step": 17295 }, { "epoch": 2.822185970636215, "grad_norm": 0.19606174528598785, "learning_rate": 0.0009948629069021107, "loss": 0.1901, "num_input_tokens_seen": 37330960, "step": 17300 }, { "epoch": 2.8230016313213704, "grad_norm": 0.16445383429527283, "learning_rate": 0.0009948527247128085, "loss": 0.0684, "num_input_tokens_seen": 37341488, "step": 17305 }, { "epoch": 2.823817292006525, "grad_norm": 0.06228538975119591, "learning_rate": 0.0009948425324946882, "loss": 0.1346, "num_input_tokens_seen": 37352816, "step": 17310 }, { "epoch": 2.8246329526916805, "grad_norm": 0.47119632363319397, "learning_rate": 0.0009948323302479561, "loss": 0.171, "num_input_tokens_seen": 37363024, "step": 17315 }, { "epoch": 2.8254486133768353, "grad_norm": 0.06357542425394058, "learning_rate": 0.000994822117972819, "loss": 0.1433, "num_input_tokens_seen": 37373040, "step": 17320 }, { "epoch": 2.82626427406199, "grad_norm": 0.10284095257520676, "learning_rate": 0.000994811895669484, "loss": 0.1057, "num_input_tokens_seen": 37385264, "step": 17325 }, { "epoch": 2.827079934747145, "grad_norm": 0.07552319765090942, "learning_rate": 0.0009948016633381583, "loss": 0.0702, "num_input_tokens_seen": 37396304, "step": 17330 }, { "epoch": 2.8278955954323, "grad_norm": 0.08341233432292938, "learning_rate": 0.0009947914209790492, "loss": 0.1312, "num_input_tokens_seen": 37405936, "step": 17335 }, { "epoch": 2.828711256117455, "grad_norm": 0.10752265155315399, "learning_rate": 0.0009947811685923642, "loss": 0.1083, "num_input_tokens_seen": 37416368, "step": 17340 }, { "epoch": 2.8295269168026103, "grad_norm": 0.14965581893920898, "learning_rate": 0.0009947709061783113, "loss": 0.0953, "num_input_tokens_seen": 37427248, "step": 17345 }, { "epoch": 2.830342577487765, "grad_norm": 0.01618516817688942, "learning_rate": 0.000994760633737098, "loss": 0.0684, "num_input_tokens_seen": 37440016, "step": 17350 }, { "epoch": 2.83115823817292, "grad_norm": 0.057069338858127594, "learning_rate": 0.0009947503512689332, "loss": 0.1047, "num_input_tokens_seen": 37450544, "step": 17355 }, { "epoch": 2.831973898858075, "grad_norm": 0.3303675949573517, "learning_rate": 0.0009947400587740245, "loss": 0.1249, "num_input_tokens_seen": 37461680, "step": 17360 }, { "epoch": 2.83278955954323, "grad_norm": 0.008456055074930191, "learning_rate": 0.0009947297562525811, "loss": 0.022, "num_input_tokens_seen": 37473776, "step": 17365 }, { "epoch": 2.8336052202283852, "grad_norm": 0.0926298052072525, "learning_rate": 0.0009947194437048116, "loss": 0.0857, "num_input_tokens_seen": 37485264, "step": 17370 }, { "epoch": 2.83442088091354, "grad_norm": 0.011585327796638012, "learning_rate": 0.000994709121130925, "loss": 0.0812, "num_input_tokens_seen": 37494832, "step": 17375 }, { "epoch": 2.835236541598695, "grad_norm": 0.10047841817140579, "learning_rate": 0.0009946987885311304, "loss": 0.1362, "num_input_tokens_seen": 37506192, "step": 17380 }, { "epoch": 2.8360522022838497, "grad_norm": 0.22406654059886932, "learning_rate": 0.0009946884459056374, "loss": 0.1045, "num_input_tokens_seen": 37516720, "step": 17385 }, { "epoch": 2.836867862969005, "grad_norm": 0.3409539461135864, "learning_rate": 0.0009946780932546552, "loss": 0.0727, "num_input_tokens_seen": 37526512, "step": 17390 }, { "epoch": 2.8376835236541598, "grad_norm": 0.3181345462799072, "learning_rate": 0.0009946677305783943, "loss": 0.0693, "num_input_tokens_seen": 37537584, "step": 17395 }, { "epoch": 2.838499184339315, "grad_norm": 0.07474850863218307, "learning_rate": 0.000994657357877064, "loss": 0.0754, "num_input_tokens_seen": 37548080, "step": 17400 }, { "epoch": 2.83931484502447, "grad_norm": 0.44936174154281616, "learning_rate": 0.0009946469751508748, "loss": 0.2652, "num_input_tokens_seen": 37559408, "step": 17405 }, { "epoch": 2.8401305057096247, "grad_norm": 0.10020944476127625, "learning_rate": 0.0009946365824000374, "loss": 0.1235, "num_input_tokens_seen": 37571984, "step": 17410 }, { "epoch": 2.84094616639478, "grad_norm": 0.36994320154190063, "learning_rate": 0.000994626179624762, "loss": 0.2553, "num_input_tokens_seen": 37583120, "step": 17415 }, { "epoch": 2.8417618270799347, "grad_norm": 0.1831275373697281, "learning_rate": 0.0009946157668252597, "loss": 0.1014, "num_input_tokens_seen": 37594448, "step": 17420 }, { "epoch": 2.8425774877650896, "grad_norm": 0.1548808068037033, "learning_rate": 0.0009946053440017413, "loss": 0.1633, "num_input_tokens_seen": 37605104, "step": 17425 }, { "epoch": 2.843393148450245, "grad_norm": 0.09173159301280975, "learning_rate": 0.000994594911154418, "loss": 0.1092, "num_input_tokens_seen": 37616144, "step": 17430 }, { "epoch": 2.8442088091353996, "grad_norm": 0.04721226543188095, "learning_rate": 0.0009945844682835018, "loss": 0.0661, "num_input_tokens_seen": 37627472, "step": 17435 }, { "epoch": 2.8450244698205545, "grad_norm": 0.3377913236618042, "learning_rate": 0.0009945740153892036, "loss": 0.1691, "num_input_tokens_seen": 37636976, "step": 17440 }, { "epoch": 2.8458401305057097, "grad_norm": 0.15685850381851196, "learning_rate": 0.0009945635524717359, "loss": 0.0818, "num_input_tokens_seen": 37646864, "step": 17445 }, { "epoch": 2.8466557911908645, "grad_norm": 0.2095039188861847, "learning_rate": 0.00099455307953131, "loss": 0.0751, "num_input_tokens_seen": 37659120, "step": 17450 }, { "epoch": 2.84747145187602, "grad_norm": 0.01756330393254757, "learning_rate": 0.0009945425965681388, "loss": 0.0764, "num_input_tokens_seen": 37670320, "step": 17455 }, { "epoch": 2.8482871125611746, "grad_norm": 0.5726795792579651, "learning_rate": 0.0009945321035824343, "loss": 0.168, "num_input_tokens_seen": 37679792, "step": 17460 }, { "epoch": 2.8491027732463294, "grad_norm": 0.06907206028699875, "learning_rate": 0.0009945216005744096, "loss": 0.0441, "num_input_tokens_seen": 37690320, "step": 17465 }, { "epoch": 2.8499184339314843, "grad_norm": 0.010861308313906193, "learning_rate": 0.0009945110875442774, "loss": 0.0363, "num_input_tokens_seen": 37700336, "step": 17470 }, { "epoch": 2.8507340946166395, "grad_norm": 0.18746575713157654, "learning_rate": 0.0009945005644922504, "loss": 0.0768, "num_input_tokens_seen": 37711472, "step": 17475 }, { "epoch": 2.8515497553017943, "grad_norm": 0.0585092194378376, "learning_rate": 0.0009944900314185422, "loss": 0.023, "num_input_tokens_seen": 37723408, "step": 17480 }, { "epoch": 2.8523654159869496, "grad_norm": 0.043622761964797974, "learning_rate": 0.0009944794883233663, "loss": 0.0497, "num_input_tokens_seen": 37732592, "step": 17485 }, { "epoch": 2.8531810766721044, "grad_norm": 0.023899629712104797, "learning_rate": 0.0009944689352069363, "loss": 0.0937, "num_input_tokens_seen": 37743952, "step": 17490 }, { "epoch": 2.8539967373572592, "grad_norm": 0.015929071232676506, "learning_rate": 0.000994458372069466, "loss": 0.0389, "num_input_tokens_seen": 37754224, "step": 17495 }, { "epoch": 2.8548123980424145, "grad_norm": 0.13006608188152313, "learning_rate": 0.0009944477989111695, "loss": 0.1159, "num_input_tokens_seen": 37766064, "step": 17500 }, { "epoch": 2.8556280587275693, "grad_norm": 0.31213119626045227, "learning_rate": 0.0009944372157322612, "loss": 0.4099, "num_input_tokens_seen": 37777808, "step": 17505 }, { "epoch": 2.8564437194127246, "grad_norm": 0.11234349012374878, "learning_rate": 0.0009944266225329552, "loss": 0.143, "num_input_tokens_seen": 37788112, "step": 17510 }, { "epoch": 2.8572593800978794, "grad_norm": 0.02435676008462906, "learning_rate": 0.0009944160193134668, "loss": 0.1176, "num_input_tokens_seen": 37797968, "step": 17515 }, { "epoch": 2.858075040783034, "grad_norm": 0.16125547885894775, "learning_rate": 0.0009944054060740104, "loss": 0.1754, "num_input_tokens_seen": 37809264, "step": 17520 }, { "epoch": 2.858890701468189, "grad_norm": 0.11402436345815659, "learning_rate": 0.0009943947828148013, "loss": 0.1628, "num_input_tokens_seen": 37820560, "step": 17525 }, { "epoch": 2.8597063621533443, "grad_norm": 0.21214991807937622, "learning_rate": 0.0009943841495360546, "loss": 0.1773, "num_input_tokens_seen": 37832560, "step": 17530 }, { "epoch": 2.860522022838499, "grad_norm": 0.07254047691822052, "learning_rate": 0.0009943735062379862, "loss": 0.0997, "num_input_tokens_seen": 37843344, "step": 17535 }, { "epoch": 2.8613376835236544, "grad_norm": 0.09635142982006073, "learning_rate": 0.0009943628529208114, "loss": 0.1049, "num_input_tokens_seen": 37852016, "step": 17540 }, { "epoch": 2.862153344208809, "grad_norm": 0.2803623378276825, "learning_rate": 0.0009943521895847461, "loss": 0.2001, "num_input_tokens_seen": 37863184, "step": 17545 }, { "epoch": 2.862969004893964, "grad_norm": 0.15080243349075317, "learning_rate": 0.0009943415162300066, "loss": 0.1092, "num_input_tokens_seen": 37874320, "step": 17550 }, { "epoch": 2.863784665579119, "grad_norm": 0.27501583099365234, "learning_rate": 0.0009943308328568094, "loss": 0.1965, "num_input_tokens_seen": 37885488, "step": 17555 }, { "epoch": 2.864600326264274, "grad_norm": 0.04714236781001091, "learning_rate": 0.0009943201394653706, "loss": 0.0518, "num_input_tokens_seen": 37895472, "step": 17560 }, { "epoch": 2.865415986949429, "grad_norm": 0.036594655364751816, "learning_rate": 0.0009943094360559072, "loss": 0.1097, "num_input_tokens_seen": 37907472, "step": 17565 }, { "epoch": 2.866231647634584, "grad_norm": 0.15192469954490662, "learning_rate": 0.0009942987226286358, "loss": 0.1907, "num_input_tokens_seen": 37918800, "step": 17570 }, { "epoch": 2.867047308319739, "grad_norm": 0.3447050154209137, "learning_rate": 0.0009942879991837739, "loss": 0.1275, "num_input_tokens_seen": 37930000, "step": 17575 }, { "epoch": 2.867862969004894, "grad_norm": 0.16697214543819427, "learning_rate": 0.0009942772657215385, "loss": 0.0906, "num_input_tokens_seen": 37940368, "step": 17580 }, { "epoch": 2.868678629690049, "grad_norm": 0.07197245210409164, "learning_rate": 0.0009942665222421475, "loss": 0.1743, "num_input_tokens_seen": 37951280, "step": 17585 }, { "epoch": 2.869494290375204, "grad_norm": 0.11030949652194977, "learning_rate": 0.0009942557687458182, "loss": 0.1734, "num_input_tokens_seen": 37963568, "step": 17590 }, { "epoch": 2.870309951060359, "grad_norm": 0.21147139370441437, "learning_rate": 0.0009942450052327688, "loss": 0.0945, "num_input_tokens_seen": 37975632, "step": 17595 }, { "epoch": 2.871125611745514, "grad_norm": 0.15809282660484314, "learning_rate": 0.0009942342317032172, "loss": 0.179, "num_input_tokens_seen": 37986160, "step": 17600 }, { "epoch": 2.8719412724306688, "grad_norm": 0.06237461790442467, "learning_rate": 0.000994223448157382, "loss": 0.0685, "num_input_tokens_seen": 37996432, "step": 17605 }, { "epoch": 2.8727569331158236, "grad_norm": 0.027538523077964783, "learning_rate": 0.000994212654595482, "loss": 0.2181, "num_input_tokens_seen": 38007152, "step": 17610 }, { "epoch": 2.873572593800979, "grad_norm": 0.06343001872301102, "learning_rate": 0.0009942018510177351, "loss": 0.1239, "num_input_tokens_seen": 38017424, "step": 17615 }, { "epoch": 2.8743882544861337, "grad_norm": 0.01702725514769554, "learning_rate": 0.000994191037424361, "loss": 0.1288, "num_input_tokens_seen": 38028144, "step": 17620 }, { "epoch": 2.875203915171289, "grad_norm": 0.056369274854660034, "learning_rate": 0.0009941802138155786, "loss": 0.0425, "num_input_tokens_seen": 38039536, "step": 17625 }, { "epoch": 2.8760195758564437, "grad_norm": 0.018767191097140312, "learning_rate": 0.0009941693801916074, "loss": 0.0816, "num_input_tokens_seen": 38050480, "step": 17630 }, { "epoch": 2.8768352365415986, "grad_norm": 0.13800957798957825, "learning_rate": 0.0009941585365526666, "loss": 0.0917, "num_input_tokens_seen": 38061456, "step": 17635 }, { "epoch": 2.877650897226754, "grad_norm": 0.2098018378019333, "learning_rate": 0.0009941476828989762, "loss": 0.0934, "num_input_tokens_seen": 38072208, "step": 17640 }, { "epoch": 2.8784665579119086, "grad_norm": 0.086976557970047, "learning_rate": 0.0009941368192307562, "loss": 0.0349, "num_input_tokens_seen": 38084272, "step": 17645 }, { "epoch": 2.8792822185970635, "grad_norm": 0.22944562137126923, "learning_rate": 0.0009941259455482267, "loss": 0.17, "num_input_tokens_seen": 38095472, "step": 17650 }, { "epoch": 2.8800978792822187, "grad_norm": 0.20812676846981049, "learning_rate": 0.0009941150618516079, "loss": 0.0841, "num_input_tokens_seen": 38106352, "step": 17655 }, { "epoch": 2.8809135399673735, "grad_norm": 0.0660615935921669, "learning_rate": 0.0009941041681411206, "loss": 0.1725, "num_input_tokens_seen": 38116816, "step": 17660 }, { "epoch": 2.8817292006525284, "grad_norm": 0.004768581595271826, "learning_rate": 0.0009940932644169858, "loss": 0.1003, "num_input_tokens_seen": 38127216, "step": 17665 }, { "epoch": 2.8825448613376836, "grad_norm": 0.3718203902244568, "learning_rate": 0.000994082350679424, "loss": 0.2346, "num_input_tokens_seen": 38138480, "step": 17670 }, { "epoch": 2.8833605220228384, "grad_norm": 0.12242249399423599, "learning_rate": 0.0009940714269286565, "loss": 0.0261, "num_input_tokens_seen": 38149360, "step": 17675 }, { "epoch": 2.8841761827079937, "grad_norm": 0.3140617609024048, "learning_rate": 0.000994060493164905, "loss": 0.1163, "num_input_tokens_seen": 38160528, "step": 17680 }, { "epoch": 2.8849918433931485, "grad_norm": 0.21743452548980713, "learning_rate": 0.0009940495493883906, "loss": 0.1016, "num_input_tokens_seen": 38171664, "step": 17685 }, { "epoch": 2.8858075040783033, "grad_norm": 0.31543511152267456, "learning_rate": 0.0009940385955993353, "loss": 0.1391, "num_input_tokens_seen": 38182256, "step": 17690 }, { "epoch": 2.886623164763458, "grad_norm": 0.009964260272681713, "learning_rate": 0.0009940276317979611, "loss": 0.1684, "num_input_tokens_seen": 38192720, "step": 17695 }, { "epoch": 2.8874388254486134, "grad_norm": 0.04191471263766289, "learning_rate": 0.0009940166579844906, "loss": 0.0816, "num_input_tokens_seen": 38203952, "step": 17700 }, { "epoch": 2.8882544861337682, "grad_norm": 0.020595213398337364, "learning_rate": 0.0009940056741591455, "loss": 0.1563, "num_input_tokens_seen": 38214544, "step": 17705 }, { "epoch": 2.8890701468189235, "grad_norm": 0.04182575270533562, "learning_rate": 0.0009939946803221487, "loss": 0.02, "num_input_tokens_seen": 38225488, "step": 17710 }, { "epoch": 2.8898858075040783, "grad_norm": 0.248220294713974, "learning_rate": 0.000993983676473723, "loss": 0.1399, "num_input_tokens_seen": 38235536, "step": 17715 }, { "epoch": 2.890701468189233, "grad_norm": 0.26066142320632935, "learning_rate": 0.0009939726626140917, "loss": 0.173, "num_input_tokens_seen": 38246800, "step": 17720 }, { "epoch": 2.8915171288743884, "grad_norm": 0.18540221452713013, "learning_rate": 0.0009939616387434776, "loss": 0.0998, "num_input_tokens_seen": 38258064, "step": 17725 }, { "epoch": 2.892332789559543, "grad_norm": 0.03552176430821419, "learning_rate": 0.0009939506048621044, "loss": 0.1742, "num_input_tokens_seen": 38268240, "step": 17730 }, { "epoch": 2.8931484502446985, "grad_norm": 0.013385550118982792, "learning_rate": 0.0009939395609701953, "loss": 0.051, "num_input_tokens_seen": 38279152, "step": 17735 }, { "epoch": 2.8939641109298533, "grad_norm": 0.035256728529930115, "learning_rate": 0.0009939285070679745, "loss": 0.1166, "num_input_tokens_seen": 38288848, "step": 17740 }, { "epoch": 2.894779771615008, "grad_norm": 0.03183083236217499, "learning_rate": 0.000993917443155666, "loss": 0.0932, "num_input_tokens_seen": 38298928, "step": 17745 }, { "epoch": 2.895595432300163, "grad_norm": 0.016891175881028175, "learning_rate": 0.0009939063692334937, "loss": 0.0699, "num_input_tokens_seen": 38310384, "step": 17750 }, { "epoch": 2.896411092985318, "grad_norm": 0.04170704260468483, "learning_rate": 0.0009938952853016825, "loss": 0.0274, "num_input_tokens_seen": 38321680, "step": 17755 }, { "epoch": 2.897226753670473, "grad_norm": 0.1320401430130005, "learning_rate": 0.0009938841913604568, "loss": 0.0693, "num_input_tokens_seen": 38332720, "step": 17760 }, { "epoch": 2.8980424143556283, "grad_norm": 0.09784867614507675, "learning_rate": 0.0009938730874100412, "loss": 0.0918, "num_input_tokens_seen": 38342288, "step": 17765 }, { "epoch": 2.898858075040783, "grad_norm": 0.006880197674036026, "learning_rate": 0.0009938619734506612, "loss": 0.1196, "num_input_tokens_seen": 38353616, "step": 17770 }, { "epoch": 2.899673735725938, "grad_norm": 0.0650179386138916, "learning_rate": 0.0009938508494825417, "loss": 0.088, "num_input_tokens_seen": 38363440, "step": 17775 }, { "epoch": 2.9004893964110927, "grad_norm": 0.026967985555529594, "learning_rate": 0.0009938397155059083, "loss": 0.2644, "num_input_tokens_seen": 38373104, "step": 17780 }, { "epoch": 2.901305057096248, "grad_norm": 0.1562485694885254, "learning_rate": 0.0009938285715209866, "loss": 0.0487, "num_input_tokens_seen": 38384432, "step": 17785 }, { "epoch": 2.902120717781403, "grad_norm": 0.1810639500617981, "learning_rate": 0.0009938174175280023, "loss": 0.1217, "num_input_tokens_seen": 38393968, "step": 17790 }, { "epoch": 2.902936378466558, "grad_norm": 0.07126948237419128, "learning_rate": 0.0009938062535271817, "loss": 0.0786, "num_input_tokens_seen": 38405552, "step": 17795 }, { "epoch": 2.903752039151713, "grad_norm": 0.05120917782187462, "learning_rate": 0.0009937950795187508, "loss": 0.0357, "num_input_tokens_seen": 38417680, "step": 17800 }, { "epoch": 2.9045676998368677, "grad_norm": 0.4032564163208008, "learning_rate": 0.0009937838955029362, "loss": 0.2696, "num_input_tokens_seen": 38428880, "step": 17805 }, { "epoch": 2.905383360522023, "grad_norm": 0.010374731384217739, "learning_rate": 0.0009937727014799646, "loss": 0.0381, "num_input_tokens_seen": 38438384, "step": 17810 }, { "epoch": 2.9061990212071778, "grad_norm": 0.23950040340423584, "learning_rate": 0.0009937614974500628, "loss": 0.1114, "num_input_tokens_seen": 38447728, "step": 17815 }, { "epoch": 2.907014681892333, "grad_norm": 0.20208469033241272, "learning_rate": 0.000993750283413458, "loss": 0.1503, "num_input_tokens_seen": 38457040, "step": 17820 }, { "epoch": 2.907830342577488, "grad_norm": 0.1307826191186905, "learning_rate": 0.0009937390593703773, "loss": 0.1604, "num_input_tokens_seen": 38468560, "step": 17825 }, { "epoch": 2.9086460032626427, "grad_norm": 0.19126513600349426, "learning_rate": 0.000993727825321048, "loss": 0.282, "num_input_tokens_seen": 38479824, "step": 17830 }, { "epoch": 2.9094616639477975, "grad_norm": 0.2289649248123169, "learning_rate": 0.0009937165812656983, "loss": 0.2535, "num_input_tokens_seen": 38491088, "step": 17835 }, { "epoch": 2.9102773246329527, "grad_norm": 0.06718108057975769, "learning_rate": 0.0009937053272045554, "loss": 0.1297, "num_input_tokens_seen": 38503408, "step": 17840 }, { "epoch": 2.9110929853181076, "grad_norm": 0.11619468033313751, "learning_rate": 0.000993694063137848, "loss": 0.0744, "num_input_tokens_seen": 38513424, "step": 17845 }, { "epoch": 2.911908646003263, "grad_norm": 0.18589140474796295, "learning_rate": 0.000993682789065804, "loss": 0.2195, "num_input_tokens_seen": 38522064, "step": 17850 }, { "epoch": 2.9127243066884176, "grad_norm": 0.06229662150144577, "learning_rate": 0.0009936715049886522, "loss": 0.0928, "num_input_tokens_seen": 38533840, "step": 17855 }, { "epoch": 2.9135399673735725, "grad_norm": 0.028761588037014008, "learning_rate": 0.0009936602109066209, "loss": 0.0714, "num_input_tokens_seen": 38544880, "step": 17860 }, { "epoch": 2.9143556280587277, "grad_norm": 0.07432349771261215, "learning_rate": 0.0009936489068199392, "loss": 0.1079, "num_input_tokens_seen": 38556496, "step": 17865 }, { "epoch": 2.9151712887438825, "grad_norm": 0.0758923813700676, "learning_rate": 0.0009936375927288362, "loss": 0.1479, "num_input_tokens_seen": 38566544, "step": 17870 }, { "epoch": 2.9159869494290374, "grad_norm": 0.19653162360191345, "learning_rate": 0.000993626268633541, "loss": 0.1244, "num_input_tokens_seen": 38576784, "step": 17875 }, { "epoch": 2.9168026101141926, "grad_norm": 0.021339325234293938, "learning_rate": 0.0009936149345342834, "loss": 0.1649, "num_input_tokens_seen": 38587248, "step": 17880 }, { "epoch": 2.9176182707993474, "grad_norm": 0.03423245996236801, "learning_rate": 0.000993603590431293, "loss": 0.0303, "num_input_tokens_seen": 38597520, "step": 17885 }, { "epoch": 2.9184339314845023, "grad_norm": 0.019704636186361313, "learning_rate": 0.0009935922363247995, "loss": 0.0463, "num_input_tokens_seen": 38606096, "step": 17890 }, { "epoch": 2.9192495921696575, "grad_norm": 0.3421989381313324, "learning_rate": 0.0009935808722150333, "loss": 0.0981, "num_input_tokens_seen": 38616304, "step": 17895 }, { "epoch": 2.9200652528548123, "grad_norm": 0.023574629798531532, "learning_rate": 0.0009935694981022245, "loss": 0.1268, "num_input_tokens_seen": 38627056, "step": 17900 }, { "epoch": 2.9208809135399676, "grad_norm": 0.030020825564861298, "learning_rate": 0.0009935581139866039, "loss": 0.0767, "num_input_tokens_seen": 38637712, "step": 17905 }, { "epoch": 2.9216965742251224, "grad_norm": 0.009417254477739334, "learning_rate": 0.0009935467198684015, "loss": 0.172, "num_input_tokens_seen": 38648048, "step": 17910 }, { "epoch": 2.9225122349102772, "grad_norm": 0.09373844414949417, "learning_rate": 0.0009935353157478493, "loss": 0.1257, "num_input_tokens_seen": 38658864, "step": 17915 }, { "epoch": 2.923327895595432, "grad_norm": 0.13210704922676086, "learning_rate": 0.0009935239016251776, "loss": 0.1099, "num_input_tokens_seen": 38670768, "step": 17920 }, { "epoch": 2.9241435562805873, "grad_norm": 0.19173915684223175, "learning_rate": 0.0009935124775006178, "loss": 0.0659, "num_input_tokens_seen": 38680688, "step": 17925 }, { "epoch": 2.924959216965742, "grad_norm": 0.17141948640346527, "learning_rate": 0.0009935010433744017, "loss": 0.2717, "num_input_tokens_seen": 38691760, "step": 17930 }, { "epoch": 2.9257748776508974, "grad_norm": 0.1959507316350937, "learning_rate": 0.000993489599246761, "loss": 0.1797, "num_input_tokens_seen": 38702288, "step": 17935 }, { "epoch": 2.926590538336052, "grad_norm": 0.06817397475242615, "learning_rate": 0.0009934781451179273, "loss": 0.0991, "num_input_tokens_seen": 38714320, "step": 17940 }, { "epoch": 2.927406199021207, "grad_norm": 0.03896523639559746, "learning_rate": 0.000993466680988133, "loss": 0.103, "num_input_tokens_seen": 38725360, "step": 17945 }, { "epoch": 2.9282218597063623, "grad_norm": 0.27005574107170105, "learning_rate": 0.0009934552068576105, "loss": 0.2493, "num_input_tokens_seen": 38736848, "step": 17950 }, { "epoch": 2.929037520391517, "grad_norm": 0.06814210116863251, "learning_rate": 0.0009934437227265924, "loss": 0.1069, "num_input_tokens_seen": 38746704, "step": 17955 }, { "epoch": 2.9298531810766724, "grad_norm": 0.02641262486577034, "learning_rate": 0.0009934322285953111, "loss": 0.0667, "num_input_tokens_seen": 38758064, "step": 17960 }, { "epoch": 2.930668841761827, "grad_norm": 0.24329756200313568, "learning_rate": 0.0009934207244639997, "loss": 0.1445, "num_input_tokens_seen": 38769328, "step": 17965 }, { "epoch": 2.931484502446982, "grad_norm": 0.12470272183418274, "learning_rate": 0.0009934092103328915, "loss": 0.1498, "num_input_tokens_seen": 38779056, "step": 17970 }, { "epoch": 2.932300163132137, "grad_norm": 0.21885892748832703, "learning_rate": 0.0009933976862022196, "loss": 0.1307, "num_input_tokens_seen": 38790256, "step": 17975 }, { "epoch": 2.933115823817292, "grad_norm": 0.23397760093212128, "learning_rate": 0.0009933861520722176, "loss": 0.2127, "num_input_tokens_seen": 38800912, "step": 17980 }, { "epoch": 2.933931484502447, "grad_norm": 0.09371541440486908, "learning_rate": 0.0009933746079431195, "loss": 0.1108, "num_input_tokens_seen": 38813584, "step": 17985 }, { "epoch": 2.934747145187602, "grad_norm": 0.20272843539714813, "learning_rate": 0.000993363053815159, "loss": 0.1326, "num_input_tokens_seen": 38824688, "step": 17990 }, { "epoch": 2.935562805872757, "grad_norm": 0.04231204092502594, "learning_rate": 0.0009933514896885705, "loss": 0.1311, "num_input_tokens_seen": 38835344, "step": 17995 }, { "epoch": 2.936378466557912, "grad_norm": 0.020343264564871788, "learning_rate": 0.000993339915563588, "loss": 0.0627, "num_input_tokens_seen": 38846064, "step": 18000 }, { "epoch": 2.9371941272430666, "grad_norm": 0.17910413444042206, "learning_rate": 0.0009933283314404462, "loss": 0.2066, "num_input_tokens_seen": 38856144, "step": 18005 }, { "epoch": 2.938009787928222, "grad_norm": 0.12234373390674591, "learning_rate": 0.0009933167373193802, "loss": 0.0693, "num_input_tokens_seen": 38866928, "step": 18010 }, { "epoch": 2.9388254486133767, "grad_norm": 0.022121267393231392, "learning_rate": 0.0009933051332006245, "loss": 0.0891, "num_input_tokens_seen": 38877808, "step": 18015 }, { "epoch": 2.939641109298532, "grad_norm": 0.06730733811855316, "learning_rate": 0.0009932935190844145, "loss": 0.1021, "num_input_tokens_seen": 38888400, "step": 18020 }, { "epoch": 2.9404567699836868, "grad_norm": 0.018460089340806007, "learning_rate": 0.0009932818949709855, "loss": 0.0782, "num_input_tokens_seen": 38900816, "step": 18025 }, { "epoch": 2.9412724306688416, "grad_norm": 0.015820801258087158, "learning_rate": 0.0009932702608605733, "loss": 0.0337, "num_input_tokens_seen": 38911760, "step": 18030 }, { "epoch": 2.942088091353997, "grad_norm": 0.07911639660596848, "learning_rate": 0.0009932586167534134, "loss": 0.0605, "num_input_tokens_seen": 38923280, "step": 18035 }, { "epoch": 2.9429037520391517, "grad_norm": 0.015101470053195953, "learning_rate": 0.0009932469626497418, "loss": 0.0977, "num_input_tokens_seen": 38934736, "step": 18040 }, { "epoch": 2.943719412724307, "grad_norm": 0.20669004321098328, "learning_rate": 0.000993235298549795, "loss": 0.1945, "num_input_tokens_seen": 38946320, "step": 18045 }, { "epoch": 2.9445350734094617, "grad_norm": 0.05207635089755058, "learning_rate": 0.0009932236244538089, "loss": 0.0687, "num_input_tokens_seen": 38957904, "step": 18050 }, { "epoch": 2.9453507340946166, "grad_norm": 0.03562372922897339, "learning_rate": 0.0009932119403620206, "loss": 0.1958, "num_input_tokens_seen": 38969680, "step": 18055 }, { "epoch": 2.9461663947797714, "grad_norm": 0.06473542749881744, "learning_rate": 0.0009932002462746665, "loss": 0.0558, "num_input_tokens_seen": 38980240, "step": 18060 }, { "epoch": 2.9469820554649266, "grad_norm": 0.018230898305773735, "learning_rate": 0.0009931885421919837, "loss": 0.0406, "num_input_tokens_seen": 38990416, "step": 18065 }, { "epoch": 2.9477977161500815, "grad_norm": 0.3707036077976227, "learning_rate": 0.0009931768281142095, "loss": 0.4286, "num_input_tokens_seen": 39001392, "step": 18070 }, { "epoch": 2.9486133768352367, "grad_norm": 0.2989596426486969, "learning_rate": 0.0009931651040415812, "loss": 0.1393, "num_input_tokens_seen": 39010896, "step": 18075 }, { "epoch": 2.9494290375203915, "grad_norm": 0.07519622147083282, "learning_rate": 0.0009931533699743364, "loss": 0.0497, "num_input_tokens_seen": 39021968, "step": 18080 }, { "epoch": 2.9502446982055464, "grad_norm": 0.026655271649360657, "learning_rate": 0.000993141625912713, "loss": 0.0428, "num_input_tokens_seen": 39033136, "step": 18085 }, { "epoch": 2.9510603588907016, "grad_norm": 0.18112024664878845, "learning_rate": 0.0009931298718569492, "loss": 0.1749, "num_input_tokens_seen": 39043152, "step": 18090 }, { "epoch": 2.9518760195758564, "grad_norm": 0.32215139269828796, "learning_rate": 0.0009931181078072827, "loss": 0.3785, "num_input_tokens_seen": 39053712, "step": 18095 }, { "epoch": 2.9526916802610113, "grad_norm": 0.047990377992391586, "learning_rate": 0.0009931063337639521, "loss": 0.091, "num_input_tokens_seen": 39065104, "step": 18100 }, { "epoch": 2.9535073409461665, "grad_norm": 0.07378200441598892, "learning_rate": 0.0009930945497271964, "loss": 0.1194, "num_input_tokens_seen": 39076688, "step": 18105 }, { "epoch": 2.9543230016313213, "grad_norm": 0.06102699786424637, "learning_rate": 0.0009930827556972539, "loss": 0.1353, "num_input_tokens_seen": 39086896, "step": 18110 }, { "epoch": 2.955138662316476, "grad_norm": 0.07082720100879669, "learning_rate": 0.0009930709516743639, "loss": 0.0557, "num_input_tokens_seen": 39097968, "step": 18115 }, { "epoch": 2.9559543230016314, "grad_norm": 0.013466399163007736, "learning_rate": 0.0009930591376587654, "loss": 0.1757, "num_input_tokens_seen": 39108400, "step": 18120 }, { "epoch": 2.9567699836867862, "grad_norm": 0.053425006568431854, "learning_rate": 0.0009930473136506982, "loss": 0.0697, "num_input_tokens_seen": 39118832, "step": 18125 }, { "epoch": 2.9575856443719415, "grad_norm": 0.03286390006542206, "learning_rate": 0.0009930354796504018, "loss": 0.0684, "num_input_tokens_seen": 39128848, "step": 18130 }, { "epoch": 2.9584013050570963, "grad_norm": 0.0802207738161087, "learning_rate": 0.0009930236356581158, "loss": 0.1224, "num_input_tokens_seen": 39140304, "step": 18135 }, { "epoch": 2.959216965742251, "grad_norm": 0.2165968418121338, "learning_rate": 0.0009930117816740803, "loss": 0.1446, "num_input_tokens_seen": 39149744, "step": 18140 }, { "epoch": 2.960032626427406, "grad_norm": 0.08548370003700256, "learning_rate": 0.0009929999176985355, "loss": 0.1027, "num_input_tokens_seen": 39160624, "step": 18145 }, { "epoch": 2.960848287112561, "grad_norm": 0.12427590787410736, "learning_rate": 0.0009929880437317222, "loss": 0.1193, "num_input_tokens_seen": 39171728, "step": 18150 }, { "epoch": 2.961663947797716, "grad_norm": 0.039144739508628845, "learning_rate": 0.0009929761597738808, "loss": 0.0448, "num_input_tokens_seen": 39183056, "step": 18155 }, { "epoch": 2.9624796084828713, "grad_norm": 0.1752675175666809, "learning_rate": 0.000992964265825252, "loss": 0.0705, "num_input_tokens_seen": 39194064, "step": 18160 }, { "epoch": 2.963295269168026, "grad_norm": 0.2643725574016571, "learning_rate": 0.0009929523618860772, "loss": 0.211, "num_input_tokens_seen": 39203056, "step": 18165 }, { "epoch": 2.964110929853181, "grad_norm": 0.0823717936873436, "learning_rate": 0.000992940447956597, "loss": 0.161, "num_input_tokens_seen": 39213072, "step": 18170 }, { "epoch": 2.964926590538336, "grad_norm": 0.02457917481660843, "learning_rate": 0.000992928524037054, "loss": 0.1581, "num_input_tokens_seen": 39223760, "step": 18175 }, { "epoch": 2.965742251223491, "grad_norm": 0.2596750259399414, "learning_rate": 0.0009929165901276884, "loss": 0.1656, "num_input_tokens_seen": 39233712, "step": 18180 }, { "epoch": 2.9665579119086463, "grad_norm": 0.04739905148744583, "learning_rate": 0.000992904646228743, "loss": 0.202, "num_input_tokens_seen": 39245008, "step": 18185 }, { "epoch": 2.967373572593801, "grad_norm": 0.015841137617826462, "learning_rate": 0.00099289269234046, "loss": 0.0698, "num_input_tokens_seen": 39256144, "step": 18190 }, { "epoch": 2.968189233278956, "grad_norm": 0.0571049340069294, "learning_rate": 0.000992880728463081, "loss": 0.204, "num_input_tokens_seen": 39266928, "step": 18195 }, { "epoch": 2.9690048939641107, "grad_norm": 0.04103940725326538, "learning_rate": 0.0009928687545968486, "loss": 0.202, "num_input_tokens_seen": 39277520, "step": 18200 }, { "epoch": 2.969820554649266, "grad_norm": 0.07702548801898956, "learning_rate": 0.0009928567707420059, "loss": 0.1576, "num_input_tokens_seen": 39288016, "step": 18205 }, { "epoch": 2.970636215334421, "grad_norm": 0.046397615224123, "learning_rate": 0.0009928447768987956, "loss": 0.0769, "num_input_tokens_seen": 39298800, "step": 18210 }, { "epoch": 2.971451876019576, "grad_norm": 0.02386937104165554, "learning_rate": 0.0009928327730674604, "loss": 0.1068, "num_input_tokens_seen": 39309584, "step": 18215 }, { "epoch": 2.972267536704731, "grad_norm": 0.008320878259837627, "learning_rate": 0.000992820759248244, "loss": 0.039, "num_input_tokens_seen": 39319856, "step": 18220 }, { "epoch": 2.9730831973898857, "grad_norm": 0.045859336853027344, "learning_rate": 0.00099280873544139, "loss": 0.1627, "num_input_tokens_seen": 39331216, "step": 18225 }, { "epoch": 2.9738988580750405, "grad_norm": 0.2066645473241806, "learning_rate": 0.0009927967016471414, "loss": 0.2928, "num_input_tokens_seen": 39342128, "step": 18230 }, { "epoch": 2.9747145187601958, "grad_norm": 0.14198949933052063, "learning_rate": 0.0009927846578657426, "loss": 0.0825, "num_input_tokens_seen": 39353712, "step": 18235 }, { "epoch": 2.9755301794453506, "grad_norm": 0.035590436309576035, "learning_rate": 0.0009927726040974377, "loss": 0.2045, "num_input_tokens_seen": 39364880, "step": 18240 }, { "epoch": 2.976345840130506, "grad_norm": 0.23182573914527893, "learning_rate": 0.0009927605403424707, "loss": 0.1562, "num_input_tokens_seen": 39376112, "step": 18245 }, { "epoch": 2.9771615008156607, "grad_norm": 0.11598934233188629, "learning_rate": 0.0009927484666010862, "loss": 0.1156, "num_input_tokens_seen": 39386608, "step": 18250 }, { "epoch": 2.9779771615008155, "grad_norm": 0.060181815177202225, "learning_rate": 0.000992736382873529, "loss": 0.1514, "num_input_tokens_seen": 39397808, "step": 18255 }, { "epoch": 2.9787928221859707, "grad_norm": 0.017984725534915924, "learning_rate": 0.000992724289160044, "loss": 0.0866, "num_input_tokens_seen": 39410096, "step": 18260 }, { "epoch": 2.9796084828711256, "grad_norm": 0.11590778827667236, "learning_rate": 0.000992712185460876, "loss": 0.1216, "num_input_tokens_seen": 39419600, "step": 18265 }, { "epoch": 2.980424143556281, "grad_norm": 0.011195077560842037, "learning_rate": 0.0009927000717762707, "loss": 0.1402, "num_input_tokens_seen": 39430288, "step": 18270 }, { "epoch": 2.9812398042414356, "grad_norm": 0.034643013030290604, "learning_rate": 0.0009926879481064734, "loss": 0.021, "num_input_tokens_seen": 39440592, "step": 18275 }, { "epoch": 2.9820554649265905, "grad_norm": 0.06665744632482529, "learning_rate": 0.0009926758144517297, "loss": 0.0673, "num_input_tokens_seen": 39450896, "step": 18280 }, { "epoch": 2.9828711256117453, "grad_norm": 0.06756220012903214, "learning_rate": 0.000992663670812286, "loss": 0.1451, "num_input_tokens_seen": 39461008, "step": 18285 }, { "epoch": 2.9836867862969005, "grad_norm": 0.18365518748760223, "learning_rate": 0.0009926515171883874, "loss": 0.2056, "num_input_tokens_seen": 39473008, "step": 18290 }, { "epoch": 2.9845024469820554, "grad_norm": 0.18645812571048737, "learning_rate": 0.0009926393535802812, "loss": 0.0928, "num_input_tokens_seen": 39483152, "step": 18295 }, { "epoch": 2.9853181076672106, "grad_norm": 0.01627344638109207, "learning_rate": 0.0009926271799882134, "loss": 0.0875, "num_input_tokens_seen": 39493680, "step": 18300 }, { "epoch": 2.9861337683523654, "grad_norm": 0.05606166273355484, "learning_rate": 0.000992614996412431, "loss": 0.2688, "num_input_tokens_seen": 39504976, "step": 18305 }, { "epoch": 2.9869494290375203, "grad_norm": 0.04933241009712219, "learning_rate": 0.0009926028028531808, "loss": 0.0697, "num_input_tokens_seen": 39515984, "step": 18310 }, { "epoch": 2.9877650897226755, "grad_norm": 0.14203248918056488, "learning_rate": 0.0009925905993107098, "loss": 0.1788, "num_input_tokens_seen": 39527280, "step": 18315 }, { "epoch": 2.9885807504078303, "grad_norm": 0.07063695788383484, "learning_rate": 0.0009925783857852653, "loss": 0.137, "num_input_tokens_seen": 39537392, "step": 18320 }, { "epoch": 2.9893964110929856, "grad_norm": 0.015914978459477425, "learning_rate": 0.0009925661622770953, "loss": 0.0494, "num_input_tokens_seen": 39548912, "step": 18325 }, { "epoch": 2.9902120717781404, "grad_norm": 0.17021217942237854, "learning_rate": 0.0009925539287864466, "loss": 0.0836, "num_input_tokens_seen": 39559632, "step": 18330 }, { "epoch": 2.9910277324632952, "grad_norm": 0.05064714327454567, "learning_rate": 0.000992541685313568, "loss": 0.1144, "num_input_tokens_seen": 39569136, "step": 18335 }, { "epoch": 2.99184339314845, "grad_norm": 0.25260981917381287, "learning_rate": 0.0009925294318587075, "loss": 0.2159, "num_input_tokens_seen": 39579696, "step": 18340 }, { "epoch": 2.9926590538336053, "grad_norm": 0.06170574203133583, "learning_rate": 0.000992517168422113, "loss": 0.0464, "num_input_tokens_seen": 39590832, "step": 18345 }, { "epoch": 2.99347471451876, "grad_norm": 0.013995518907904625, "learning_rate": 0.0009925048950040333, "loss": 0.0424, "num_input_tokens_seen": 39600592, "step": 18350 }, { "epoch": 2.9942903752039154, "grad_norm": 0.060316264629364014, "learning_rate": 0.000992492611604717, "loss": 0.0682, "num_input_tokens_seen": 39611632, "step": 18355 }, { "epoch": 2.99510603588907, "grad_norm": 0.06257718056440353, "learning_rate": 0.0009924803182244134, "loss": 0.0929, "num_input_tokens_seen": 39621936, "step": 18360 }, { "epoch": 2.995921696574225, "grad_norm": 0.0066615138202905655, "learning_rate": 0.0009924680148633714, "loss": 0.0734, "num_input_tokens_seen": 39632656, "step": 18365 }, { "epoch": 2.99673735725938, "grad_norm": 0.05021839216351509, "learning_rate": 0.0009924557015218401, "loss": 0.0598, "num_input_tokens_seen": 39643632, "step": 18370 }, { "epoch": 2.997553017944535, "grad_norm": 0.02938193641602993, "learning_rate": 0.0009924433782000695, "loss": 0.0367, "num_input_tokens_seen": 39655024, "step": 18375 }, { "epoch": 2.99836867862969, "grad_norm": 0.018920963630080223, "learning_rate": 0.000992431044898309, "loss": 0.1484, "num_input_tokens_seen": 39666608, "step": 18380 }, { "epoch": 2.999184339314845, "grad_norm": 0.1706968992948532, "learning_rate": 0.0009924187016168086, "loss": 0.142, "num_input_tokens_seen": 39676464, "step": 18385 }, { "epoch": 3.0, "grad_norm": 0.018366577103734016, "learning_rate": 0.0009924063483558187, "loss": 0.0471, "num_input_tokens_seen": 39686560, "step": 18390 }, { "epoch": 3.0, "eval_loss": 0.13300453126430511, "eval_runtime": 104.3252, "eval_samples_per_second": 26.12, "eval_steps_per_second": 6.537, "num_input_tokens_seen": 39686560, "step": 18390 }, { "epoch": 3.000815660685155, "grad_norm": 0.4041382968425751, "learning_rate": 0.0009923939851155896, "loss": 0.2678, "num_input_tokens_seen": 39697952, "step": 18395 }, { "epoch": 3.00163132137031, "grad_norm": 0.10898050665855408, "learning_rate": 0.0009923816118963715, "loss": 0.0907, "num_input_tokens_seen": 39709376, "step": 18400 }, { "epoch": 3.002446982055465, "grad_norm": 0.02952382154762745, "learning_rate": 0.0009923692286984156, "loss": 0.0607, "num_input_tokens_seen": 39719520, "step": 18405 }, { "epoch": 3.0032626427406197, "grad_norm": 0.21859124302864075, "learning_rate": 0.0009923568355219726, "loss": 0.0674, "num_input_tokens_seen": 39730336, "step": 18410 }, { "epoch": 3.004078303425775, "grad_norm": 0.20705083012580872, "learning_rate": 0.0009923444323672937, "loss": 0.1633, "num_input_tokens_seen": 39741312, "step": 18415 }, { "epoch": 3.00489396411093, "grad_norm": 0.41306212544441223, "learning_rate": 0.0009923320192346302, "loss": 0.2865, "num_input_tokens_seen": 39751904, "step": 18420 }, { "epoch": 3.0057096247960846, "grad_norm": 0.2739427387714386, "learning_rate": 0.000992319596124234, "loss": 0.2192, "num_input_tokens_seen": 39759712, "step": 18425 }, { "epoch": 3.00652528548124, "grad_norm": 0.08089111745357513, "learning_rate": 0.0009923071630363563, "loss": 0.2559, "num_input_tokens_seen": 39770304, "step": 18430 }, { "epoch": 3.0073409461663947, "grad_norm": 0.06087656319141388, "learning_rate": 0.0009922947199712496, "loss": 0.0577, "num_input_tokens_seen": 39781728, "step": 18435 }, { "epoch": 3.00815660685155, "grad_norm": 0.2174050360918045, "learning_rate": 0.0009922822669291658, "loss": 0.0988, "num_input_tokens_seen": 39792576, "step": 18440 }, { "epoch": 3.0089722675367048, "grad_norm": 0.10168199986219406, "learning_rate": 0.0009922698039103574, "loss": 0.0998, "num_input_tokens_seen": 39804640, "step": 18445 }, { "epoch": 3.0097879282218596, "grad_norm": 0.0561521053314209, "learning_rate": 0.000992257330915077, "loss": 0.0723, "num_input_tokens_seen": 39815488, "step": 18450 }, { "epoch": 3.010603588907015, "grad_norm": 0.027122139930725098, "learning_rate": 0.0009922448479435773, "loss": 0.102, "num_input_tokens_seen": 39826592, "step": 18455 }, { "epoch": 3.0114192495921697, "grad_norm": 0.035200659185647964, "learning_rate": 0.0009922323549961113, "loss": 0.072, "num_input_tokens_seen": 39836992, "step": 18460 }, { "epoch": 3.0122349102773245, "grad_norm": 0.02427753619849682, "learning_rate": 0.000992219852072932, "loss": 0.0597, "num_input_tokens_seen": 39847872, "step": 18465 }, { "epoch": 3.0130505709624797, "grad_norm": 0.09208391606807709, "learning_rate": 0.0009922073391742932, "loss": 0.0916, "num_input_tokens_seen": 39858464, "step": 18470 }, { "epoch": 3.0138662316476346, "grad_norm": 0.05289800465106964, "learning_rate": 0.0009921948163004483, "loss": 0.0644, "num_input_tokens_seen": 39868384, "step": 18475 }, { "epoch": 3.0146818923327894, "grad_norm": 0.03495422378182411, "learning_rate": 0.000992182283451651, "loss": 0.0712, "num_input_tokens_seen": 39879232, "step": 18480 }, { "epoch": 3.0154975530179446, "grad_norm": 0.05791272968053818, "learning_rate": 0.0009921697406281554, "loss": 0.1534, "num_input_tokens_seen": 39889952, "step": 18485 }, { "epoch": 3.0163132137030995, "grad_norm": 0.3341110050678253, "learning_rate": 0.0009921571878302154, "loss": 0.0751, "num_input_tokens_seen": 39899584, "step": 18490 }, { "epoch": 3.0171288743882543, "grad_norm": 0.30827581882476807, "learning_rate": 0.0009921446250580857, "loss": 0.1462, "num_input_tokens_seen": 39910208, "step": 18495 }, { "epoch": 3.0179445350734095, "grad_norm": 0.011666425503790379, "learning_rate": 0.000992132052312021, "loss": 0.1156, "num_input_tokens_seen": 39921568, "step": 18500 }, { "epoch": 3.0187601957585644, "grad_norm": 0.10604102164506912, "learning_rate": 0.000992119469592276, "loss": 0.1729, "num_input_tokens_seen": 39933472, "step": 18505 }, { "epoch": 3.0195758564437196, "grad_norm": 0.08499285578727722, "learning_rate": 0.0009921068768991056, "loss": 0.1846, "num_input_tokens_seen": 39942784, "step": 18510 }, { "epoch": 3.0203915171288744, "grad_norm": 0.03187287226319313, "learning_rate": 0.0009920942742327648, "loss": 0.1292, "num_input_tokens_seen": 39953024, "step": 18515 }, { "epoch": 3.0212071778140293, "grad_norm": 0.09806955605745316, "learning_rate": 0.0009920816615935095, "loss": 0.2594, "num_input_tokens_seen": 39963360, "step": 18520 }, { "epoch": 3.0220228384991845, "grad_norm": 0.0858381912112236, "learning_rate": 0.000992069038981595, "loss": 0.1536, "num_input_tokens_seen": 39973312, "step": 18525 }, { "epoch": 3.0228384991843393, "grad_norm": 0.1275833398103714, "learning_rate": 0.0009920564063972772, "loss": 0.144, "num_input_tokens_seen": 39982432, "step": 18530 }, { "epoch": 3.023654159869494, "grad_norm": 0.0897335633635521, "learning_rate": 0.0009920437638408122, "loss": 0.1375, "num_input_tokens_seen": 39994080, "step": 18535 }, { "epoch": 3.0244698205546494, "grad_norm": 0.15980899333953857, "learning_rate": 0.000992031111312456, "loss": 0.2011, "num_input_tokens_seen": 40005216, "step": 18540 }, { "epoch": 3.0252854812398042, "grad_norm": 0.018551170825958252, "learning_rate": 0.0009920184488124654, "loss": 0.0634, "num_input_tokens_seen": 40016512, "step": 18545 }, { "epoch": 3.026101141924959, "grad_norm": 0.11224852502346039, "learning_rate": 0.0009920057763410962, "loss": 0.106, "num_input_tokens_seen": 40027648, "step": 18550 }, { "epoch": 3.0269168026101143, "grad_norm": 0.08699093014001846, "learning_rate": 0.0009919930938986064, "loss": 0.0881, "num_input_tokens_seen": 40039936, "step": 18555 }, { "epoch": 3.027732463295269, "grad_norm": 0.1539272964000702, "learning_rate": 0.000991980401485252, "loss": 0.0572, "num_input_tokens_seen": 40050144, "step": 18560 }, { "epoch": 3.028548123980424, "grad_norm": 0.13647182285785675, "learning_rate": 0.000991967699101291, "loss": 0.054, "num_input_tokens_seen": 40060960, "step": 18565 }, { "epoch": 3.029363784665579, "grad_norm": 0.028170354664325714, "learning_rate": 0.00099195498674698, "loss": 0.0461, "num_input_tokens_seen": 40071904, "step": 18570 }, { "epoch": 3.030179445350734, "grad_norm": 0.026690561324357986, "learning_rate": 0.0009919422644225776, "loss": 0.0853, "num_input_tokens_seen": 40082304, "step": 18575 }, { "epoch": 3.0309951060358893, "grad_norm": 0.06538096070289612, "learning_rate": 0.0009919295321283409, "loss": 0.1397, "num_input_tokens_seen": 40093568, "step": 18580 }, { "epoch": 3.031810766721044, "grad_norm": 0.007915168069303036, "learning_rate": 0.0009919167898645282, "loss": 0.0953, "num_input_tokens_seen": 40104800, "step": 18585 }, { "epoch": 3.032626427406199, "grad_norm": 0.007932478561997414, "learning_rate": 0.0009919040376313976, "loss": 0.1102, "num_input_tokens_seen": 40115744, "step": 18590 }, { "epoch": 3.033442088091354, "grad_norm": 0.10504759848117828, "learning_rate": 0.0009918912754292078, "loss": 0.1152, "num_input_tokens_seen": 40126912, "step": 18595 }, { "epoch": 3.034257748776509, "grad_norm": 0.1981155127286911, "learning_rate": 0.0009918785032582173, "loss": 0.0418, "num_input_tokens_seen": 40138048, "step": 18600 }, { "epoch": 3.035073409461664, "grad_norm": 0.06274720281362534, "learning_rate": 0.000991865721118685, "loss": 0.0965, "num_input_tokens_seen": 40148320, "step": 18605 }, { "epoch": 3.035889070146819, "grad_norm": 0.08528237044811249, "learning_rate": 0.0009918529290108696, "loss": 0.0434, "num_input_tokens_seen": 40159008, "step": 18610 }, { "epoch": 3.036704730831974, "grad_norm": 0.03208599612116814, "learning_rate": 0.000991840126935031, "loss": 0.0841, "num_input_tokens_seen": 40170048, "step": 18615 }, { "epoch": 3.0375203915171287, "grad_norm": 0.3258243501186371, "learning_rate": 0.000991827314891428, "loss": 0.1148, "num_input_tokens_seen": 40181120, "step": 18620 }, { "epoch": 3.038336052202284, "grad_norm": 0.4030015170574188, "learning_rate": 0.0009918144928803205, "loss": 0.0824, "num_input_tokens_seen": 40192128, "step": 18625 }, { "epoch": 3.039151712887439, "grad_norm": 0.023172525689005852, "learning_rate": 0.0009918016609019686, "loss": 0.0986, "num_input_tokens_seen": 40202912, "step": 18630 }, { "epoch": 3.0399673735725936, "grad_norm": 0.06557204574346542, "learning_rate": 0.0009917888189566323, "loss": 0.0344, "num_input_tokens_seen": 40213312, "step": 18635 }, { "epoch": 3.040783034257749, "grad_norm": 0.26883557438850403, "learning_rate": 0.0009917759670445712, "loss": 0.0922, "num_input_tokens_seen": 40225216, "step": 18640 }, { "epoch": 3.0415986949429037, "grad_norm": 0.09337891638278961, "learning_rate": 0.0009917631051660468, "loss": 0.0849, "num_input_tokens_seen": 40236704, "step": 18645 }, { "epoch": 3.0424143556280585, "grad_norm": 0.04281153157353401, "learning_rate": 0.0009917502333213189, "loss": 0.1265, "num_input_tokens_seen": 40247008, "step": 18650 }, { "epoch": 3.0432300163132138, "grad_norm": 0.10101702064275742, "learning_rate": 0.0009917373515106486, "loss": 0.1686, "num_input_tokens_seen": 40258688, "step": 18655 }, { "epoch": 3.0440456769983686, "grad_norm": 0.04322616755962372, "learning_rate": 0.0009917244597342973, "loss": 0.0643, "num_input_tokens_seen": 40270048, "step": 18660 }, { "epoch": 3.044861337683524, "grad_norm": 0.03076149895787239, "learning_rate": 0.000991711557992526, "loss": 0.1509, "num_input_tokens_seen": 40280960, "step": 18665 }, { "epoch": 3.0456769983686787, "grad_norm": 0.13857974112033844, "learning_rate": 0.000991698646285596, "loss": 0.1128, "num_input_tokens_seen": 40291040, "step": 18670 }, { "epoch": 3.0464926590538335, "grad_norm": 0.1310303956270218, "learning_rate": 0.0009916857246137693, "loss": 0.3588, "num_input_tokens_seen": 40301248, "step": 18675 }, { "epoch": 3.0473083197389887, "grad_norm": 0.016415778547525406, "learning_rate": 0.0009916727929773078, "loss": 0.0454, "num_input_tokens_seen": 40313344, "step": 18680 }, { "epoch": 3.0481239804241436, "grad_norm": 0.039434533566236496, "learning_rate": 0.0009916598513764732, "loss": 0.0921, "num_input_tokens_seen": 40324224, "step": 18685 }, { "epoch": 3.0489396411092984, "grad_norm": 0.04546388238668442, "learning_rate": 0.0009916468998115281, "loss": 0.1622, "num_input_tokens_seen": 40334880, "step": 18690 }, { "epoch": 3.0497553017944536, "grad_norm": 0.2834668457508087, "learning_rate": 0.000991633938282735, "loss": 0.2138, "num_input_tokens_seen": 40345792, "step": 18695 }, { "epoch": 3.0505709624796085, "grad_norm": 0.15581931173801422, "learning_rate": 0.0009916209667903562, "loss": 0.0791, "num_input_tokens_seen": 40357568, "step": 18700 }, { "epoch": 3.0513866231647633, "grad_norm": 0.05432992801070213, "learning_rate": 0.0009916079853346548, "loss": 0.1303, "num_input_tokens_seen": 40368032, "step": 18705 }, { "epoch": 3.0522022838499185, "grad_norm": 0.10288121551275253, "learning_rate": 0.0009915949939158942, "loss": 0.1321, "num_input_tokens_seen": 40379328, "step": 18710 }, { "epoch": 3.0530179445350734, "grad_norm": 0.20555542409420013, "learning_rate": 0.0009915819925343373, "loss": 0.1389, "num_input_tokens_seen": 40389984, "step": 18715 }, { "epoch": 3.053833605220228, "grad_norm": 0.10626711696386337, "learning_rate": 0.0009915689811902477, "loss": 0.1218, "num_input_tokens_seen": 40400768, "step": 18720 }, { "epoch": 3.0546492659053834, "grad_norm": 0.04966336488723755, "learning_rate": 0.000991555959883889, "loss": 0.0396, "num_input_tokens_seen": 40411264, "step": 18725 }, { "epoch": 3.0554649265905383, "grad_norm": 0.015639428049325943, "learning_rate": 0.0009915429286155254, "loss": 0.0856, "num_input_tokens_seen": 40421504, "step": 18730 }, { "epoch": 3.0562805872756935, "grad_norm": 0.22529128193855286, "learning_rate": 0.0009915298873854207, "loss": 0.0504, "num_input_tokens_seen": 40432416, "step": 18735 }, { "epoch": 3.0570962479608483, "grad_norm": 0.1471482813358307, "learning_rate": 0.0009915168361938392, "loss": 0.1181, "num_input_tokens_seen": 40443680, "step": 18740 }, { "epoch": 3.057911908646003, "grad_norm": 0.09654200822114944, "learning_rate": 0.0009915037750410456, "loss": 0.0371, "num_input_tokens_seen": 40454144, "step": 18745 }, { "epoch": 3.0587275693311584, "grad_norm": 0.0210373867303133, "learning_rate": 0.0009914907039273045, "loss": 0.0711, "num_input_tokens_seen": 40464832, "step": 18750 }, { "epoch": 3.0595432300163132, "grad_norm": 0.011072233319282532, "learning_rate": 0.0009914776228528805, "loss": 0.2025, "num_input_tokens_seen": 40473984, "step": 18755 }, { "epoch": 3.060358890701468, "grad_norm": 0.012772710993885994, "learning_rate": 0.0009914645318180393, "loss": 0.0205, "num_input_tokens_seen": 40483008, "step": 18760 }, { "epoch": 3.0611745513866233, "grad_norm": 0.12213150411844254, "learning_rate": 0.0009914514308230458, "loss": 0.1444, "num_input_tokens_seen": 40493792, "step": 18765 }, { "epoch": 3.061990212071778, "grad_norm": 0.22693930566310883, "learning_rate": 0.0009914383198681657, "loss": 0.064, "num_input_tokens_seen": 40503616, "step": 18770 }, { "epoch": 3.062805872756933, "grad_norm": 0.01713327318429947, "learning_rate": 0.0009914251989536645, "loss": 0.062, "num_input_tokens_seen": 40515744, "step": 18775 }, { "epoch": 3.063621533442088, "grad_norm": 0.036710288375616074, "learning_rate": 0.0009914120680798082, "loss": 0.276, "num_input_tokens_seen": 40525696, "step": 18780 }, { "epoch": 3.064437194127243, "grad_norm": 0.0682438313961029, "learning_rate": 0.000991398927246863, "loss": 0.0949, "num_input_tokens_seen": 40535808, "step": 18785 }, { "epoch": 3.065252854812398, "grad_norm": 0.029467858374118805, "learning_rate": 0.000991385776455095, "loss": 0.0521, "num_input_tokens_seen": 40545952, "step": 18790 }, { "epoch": 3.066068515497553, "grad_norm": 0.0759044960141182, "learning_rate": 0.0009913726157047712, "loss": 0.1035, "num_input_tokens_seen": 40556672, "step": 18795 }, { "epoch": 3.066884176182708, "grad_norm": 0.07253002375364304, "learning_rate": 0.0009913594449961576, "loss": 0.0954, "num_input_tokens_seen": 40567776, "step": 18800 }, { "epoch": 3.067699836867863, "grad_norm": 0.029207590967416763, "learning_rate": 0.0009913462643295217, "loss": 0.0709, "num_input_tokens_seen": 40580128, "step": 18805 }, { "epoch": 3.068515497553018, "grad_norm": 0.1622144877910614, "learning_rate": 0.0009913330737051304, "loss": 0.0561, "num_input_tokens_seen": 40590720, "step": 18810 }, { "epoch": 3.069331158238173, "grad_norm": 0.057377591729164124, "learning_rate": 0.0009913198731232513, "loss": 0.0517, "num_input_tokens_seen": 40601120, "step": 18815 }, { "epoch": 3.070146818923328, "grad_norm": 0.028364259749650955, "learning_rate": 0.0009913066625841513, "loss": 0.0719, "num_input_tokens_seen": 40612160, "step": 18820 }, { "epoch": 3.070962479608483, "grad_norm": 0.02483329363167286, "learning_rate": 0.0009912934420880988, "loss": 0.0243, "num_input_tokens_seen": 40623520, "step": 18825 }, { "epoch": 3.0717781402936377, "grad_norm": 0.04901111498475075, "learning_rate": 0.0009912802116353613, "loss": 0.1453, "num_input_tokens_seen": 40634368, "step": 18830 }, { "epoch": 3.072593800978793, "grad_norm": 0.3878200054168701, "learning_rate": 0.0009912669712262073, "loss": 0.1408, "num_input_tokens_seen": 40644352, "step": 18835 }, { "epoch": 3.073409461663948, "grad_norm": 0.19973596930503845, "learning_rate": 0.0009912537208609047, "loss": 0.1298, "num_input_tokens_seen": 40655872, "step": 18840 }, { "epoch": 3.0742251223491026, "grad_norm": 0.06443160772323608, "learning_rate": 0.0009912404605397222, "loss": 0.193, "num_input_tokens_seen": 40667136, "step": 18845 }, { "epoch": 3.075040783034258, "grad_norm": 0.061161428689956665, "learning_rate": 0.0009912271902629288, "loss": 0.0462, "num_input_tokens_seen": 40677024, "step": 18850 }, { "epoch": 3.0758564437194127, "grad_norm": 0.05967619642615318, "learning_rate": 0.000991213910030793, "loss": 0.1259, "num_input_tokens_seen": 40688896, "step": 18855 }, { "epoch": 3.0766721044045675, "grad_norm": 0.03804397210478783, "learning_rate": 0.0009912006198435843, "loss": 0.1563, "num_input_tokens_seen": 40699424, "step": 18860 }, { "epoch": 3.0774877650897228, "grad_norm": 0.02365795336663723, "learning_rate": 0.000991187319701572, "loss": 0.0638, "num_input_tokens_seen": 40709568, "step": 18865 }, { "epoch": 3.0783034257748776, "grad_norm": 0.1885252445936203, "learning_rate": 0.0009911740096050252, "loss": 0.1204, "num_input_tokens_seen": 40719776, "step": 18870 }, { "epoch": 3.0791190864600324, "grad_norm": 0.034633222967386246, "learning_rate": 0.0009911606895542143, "loss": 0.0329, "num_input_tokens_seen": 40731680, "step": 18875 }, { "epoch": 3.0799347471451877, "grad_norm": 0.4475903809070587, "learning_rate": 0.0009911473595494089, "loss": 0.2263, "num_input_tokens_seen": 40742432, "step": 18880 }, { "epoch": 3.0807504078303425, "grad_norm": 0.09852276742458344, "learning_rate": 0.0009911340195908791, "loss": 0.0573, "num_input_tokens_seen": 40752864, "step": 18885 }, { "epoch": 3.0815660685154977, "grad_norm": 0.20587606728076935, "learning_rate": 0.0009911206696788955, "loss": 0.1347, "num_input_tokens_seen": 40764096, "step": 18890 }, { "epoch": 3.0823817292006526, "grad_norm": 0.03990110754966736, "learning_rate": 0.0009911073098137285, "loss": 0.0614, "num_input_tokens_seen": 40774496, "step": 18895 }, { "epoch": 3.0831973898858074, "grad_norm": 0.1772138625383377, "learning_rate": 0.0009910939399956488, "loss": 0.2116, "num_input_tokens_seen": 40785376, "step": 18900 }, { "epoch": 3.0840130505709626, "grad_norm": 0.03769468143582344, "learning_rate": 0.0009910805602249273, "loss": 0.1012, "num_input_tokens_seen": 40796224, "step": 18905 }, { "epoch": 3.0848287112561175, "grad_norm": 0.1547006070613861, "learning_rate": 0.0009910671705018353, "loss": 0.0998, "num_input_tokens_seen": 40806752, "step": 18910 }, { "epoch": 3.0856443719412723, "grad_norm": 0.07292618602514267, "learning_rate": 0.000991053770826644, "loss": 0.0579, "num_input_tokens_seen": 40817600, "step": 18915 }, { "epoch": 3.0864600326264275, "grad_norm": 0.026757830753922462, "learning_rate": 0.0009910403611996252, "loss": 0.0725, "num_input_tokens_seen": 40828672, "step": 18920 }, { "epoch": 3.0872756933115824, "grad_norm": 0.2143961787223816, "learning_rate": 0.0009910269416210508, "loss": 0.0654, "num_input_tokens_seen": 40839264, "step": 18925 }, { "epoch": 3.088091353996737, "grad_norm": 0.011441903188824654, "learning_rate": 0.0009910135120911924, "loss": 0.0948, "num_input_tokens_seen": 40849824, "step": 18930 }, { "epoch": 3.0889070146818924, "grad_norm": 0.14295248687267303, "learning_rate": 0.0009910000726103222, "loss": 0.1664, "num_input_tokens_seen": 40860480, "step": 18935 }, { "epoch": 3.0897226753670473, "grad_norm": 0.30127739906311035, "learning_rate": 0.0009909866231787125, "loss": 0.1988, "num_input_tokens_seen": 40871328, "step": 18940 }, { "epoch": 3.090538336052202, "grad_norm": 0.0621350072324276, "learning_rate": 0.0009909731637966362, "loss": 0.0268, "num_input_tokens_seen": 40881600, "step": 18945 }, { "epoch": 3.0913539967373573, "grad_norm": 0.021904218941926956, "learning_rate": 0.0009909596944643658, "loss": 0.0298, "num_input_tokens_seen": 40892384, "step": 18950 }, { "epoch": 3.092169657422512, "grad_norm": 0.23718012869358063, "learning_rate": 0.0009909462151821745, "loss": 0.0333, "num_input_tokens_seen": 40903360, "step": 18955 }, { "epoch": 3.0929853181076674, "grad_norm": 0.12597306072711945, "learning_rate": 0.0009909327259503351, "loss": 0.1615, "num_input_tokens_seen": 40914656, "step": 18960 }, { "epoch": 3.0938009787928222, "grad_norm": 0.06073950603604317, "learning_rate": 0.0009909192267691215, "loss": 0.1295, "num_input_tokens_seen": 40925440, "step": 18965 }, { "epoch": 3.094616639477977, "grad_norm": 0.01612691581249237, "learning_rate": 0.000990905717638807, "loss": 0.0318, "num_input_tokens_seen": 40936128, "step": 18970 }, { "epoch": 3.0954323001631323, "grad_norm": 0.19054622948169708, "learning_rate": 0.000990892198559665, "loss": 0.0606, "num_input_tokens_seen": 40946880, "step": 18975 }, { "epoch": 3.096247960848287, "grad_norm": 0.13514332473278046, "learning_rate": 0.0009908786695319702, "loss": 0.0746, "num_input_tokens_seen": 40957824, "step": 18980 }, { "epoch": 3.097063621533442, "grad_norm": 0.14186139404773712, "learning_rate": 0.0009908651305559964, "loss": 0.0397, "num_input_tokens_seen": 40968768, "step": 18985 }, { "epoch": 3.097879282218597, "grad_norm": 0.0024534219410270452, "learning_rate": 0.000990851581632018, "loss": 0.306, "num_input_tokens_seen": 40979744, "step": 18990 }, { "epoch": 3.098694942903752, "grad_norm": 0.016497788950800896, "learning_rate": 0.0009908380227603094, "loss": 0.0313, "num_input_tokens_seen": 40988896, "step": 18995 }, { "epoch": 3.099510603588907, "grad_norm": 0.05126534029841423, "learning_rate": 0.000990824453941146, "loss": 0.1094, "num_input_tokens_seen": 40998496, "step": 19000 }, { "epoch": 3.100326264274062, "grad_norm": 0.05188438668847084, "learning_rate": 0.000990810875174802, "loss": 0.1703, "num_input_tokens_seen": 41010528, "step": 19005 }, { "epoch": 3.101141924959217, "grad_norm": 0.029037483036518097, "learning_rate": 0.0009907972864615531, "loss": 0.1512, "num_input_tokens_seen": 41019776, "step": 19010 }, { "epoch": 3.1019575856443717, "grad_norm": 0.014521844685077667, "learning_rate": 0.0009907836878016746, "loss": 0.0499, "num_input_tokens_seen": 41031968, "step": 19015 }, { "epoch": 3.102773246329527, "grad_norm": 0.20838069915771484, "learning_rate": 0.000990770079195442, "loss": 0.1695, "num_input_tokens_seen": 41043680, "step": 19020 }, { "epoch": 3.103588907014682, "grad_norm": 0.09915346652269363, "learning_rate": 0.0009907564606431315, "loss": 0.0435, "num_input_tokens_seen": 41054016, "step": 19025 }, { "epoch": 3.104404567699837, "grad_norm": 0.06864742934703827, "learning_rate": 0.0009907428321450182, "loss": 0.0797, "num_input_tokens_seen": 41063616, "step": 19030 }, { "epoch": 3.105220228384992, "grad_norm": 0.004306207876652479, "learning_rate": 0.0009907291937013792, "loss": 0.1082, "num_input_tokens_seen": 41074080, "step": 19035 }, { "epoch": 3.1060358890701467, "grad_norm": 0.11784060299396515, "learning_rate": 0.0009907155453124906, "loss": 0.0606, "num_input_tokens_seen": 41084576, "step": 19040 }, { "epoch": 3.106851549755302, "grad_norm": 0.2601620852947235, "learning_rate": 0.0009907018869786289, "loss": 0.0562, "num_input_tokens_seen": 41095392, "step": 19045 }, { "epoch": 3.107667210440457, "grad_norm": 0.10783284902572632, "learning_rate": 0.0009906882187000708, "loss": 0.0224, "num_input_tokens_seen": 41105600, "step": 19050 }, { "epoch": 3.1084828711256116, "grad_norm": 0.015093565918505192, "learning_rate": 0.0009906745404770936, "loss": 0.0188, "num_input_tokens_seen": 41115360, "step": 19055 }, { "epoch": 3.109298531810767, "grad_norm": 0.05067162215709686, "learning_rate": 0.0009906608523099743, "loss": 0.1462, "num_input_tokens_seen": 41127360, "step": 19060 }, { "epoch": 3.1101141924959217, "grad_norm": 0.026781724765896797, "learning_rate": 0.0009906471541989905, "loss": 0.0885, "num_input_tokens_seen": 41136128, "step": 19065 }, { "epoch": 3.1109298531810765, "grad_norm": 0.06520544737577438, "learning_rate": 0.0009906334461444195, "loss": 0.3441, "num_input_tokens_seen": 41146016, "step": 19070 }, { "epoch": 3.1117455138662318, "grad_norm": 0.023566193878650665, "learning_rate": 0.0009906197281465395, "loss": 0.0979, "num_input_tokens_seen": 41156352, "step": 19075 }, { "epoch": 3.1125611745513866, "grad_norm": 0.02930636517703533, "learning_rate": 0.0009906060002056283, "loss": 0.0986, "num_input_tokens_seen": 41167488, "step": 19080 }, { "epoch": 3.1133768352365414, "grad_norm": 0.031548816710710526, "learning_rate": 0.000990592262321964, "loss": 0.1582, "num_input_tokens_seen": 41177056, "step": 19085 }, { "epoch": 3.1141924959216967, "grad_norm": 0.3108008801937103, "learning_rate": 0.0009905785144958253, "loss": 0.1406, "num_input_tokens_seen": 41187936, "step": 19090 }, { "epoch": 3.1150081566068515, "grad_norm": 0.008990020491182804, "learning_rate": 0.0009905647567274905, "loss": 0.058, "num_input_tokens_seen": 41199008, "step": 19095 }, { "epoch": 3.1158238172920063, "grad_norm": 0.09523888677358627, "learning_rate": 0.0009905509890172385, "loss": 0.1301, "num_input_tokens_seen": 41208928, "step": 19100 }, { "epoch": 3.1166394779771616, "grad_norm": 0.13288551568984985, "learning_rate": 0.0009905372113653487, "loss": 0.1122, "num_input_tokens_seen": 41220160, "step": 19105 }, { "epoch": 3.1174551386623164, "grad_norm": 0.17126767337322235, "learning_rate": 0.0009905234237721, "loss": 0.0707, "num_input_tokens_seen": 41230496, "step": 19110 }, { "epoch": 3.1182707993474716, "grad_norm": 0.07555588334798813, "learning_rate": 0.0009905096262377716, "loss": 0.1934, "num_input_tokens_seen": 41242112, "step": 19115 }, { "epoch": 3.1190864600326265, "grad_norm": 0.09480588883161545, "learning_rate": 0.0009904958187626433, "loss": 0.1784, "num_input_tokens_seen": 41253152, "step": 19120 }, { "epoch": 3.1199021207177813, "grad_norm": 0.3653087913990021, "learning_rate": 0.0009904820013469952, "loss": 0.1563, "num_input_tokens_seen": 41265504, "step": 19125 }, { "epoch": 3.1207177814029365, "grad_norm": 0.07240969687700272, "learning_rate": 0.0009904681739911073, "loss": 0.0867, "num_input_tokens_seen": 41276768, "step": 19130 }, { "epoch": 3.1215334420880914, "grad_norm": 0.14926016330718994, "learning_rate": 0.0009904543366952593, "loss": 0.1133, "num_input_tokens_seen": 41288192, "step": 19135 }, { "epoch": 3.122349102773246, "grad_norm": 0.1793287992477417, "learning_rate": 0.0009904404894597323, "loss": 0.1796, "num_input_tokens_seen": 41298848, "step": 19140 }, { "epoch": 3.1231647634584014, "grad_norm": 0.01959438994526863, "learning_rate": 0.0009904266322848063, "loss": 0.2499, "num_input_tokens_seen": 41310368, "step": 19145 }, { "epoch": 3.1239804241435563, "grad_norm": 0.25041159987449646, "learning_rate": 0.0009904127651707627, "loss": 0.1038, "num_input_tokens_seen": 41321472, "step": 19150 }, { "epoch": 3.124796084828711, "grad_norm": 0.046120330691337585, "learning_rate": 0.000990398888117882, "loss": 0.0986, "num_input_tokens_seen": 41333184, "step": 19155 }, { "epoch": 3.1256117455138663, "grad_norm": 0.05421703681349754, "learning_rate": 0.0009903850011264458, "loss": 0.1848, "num_input_tokens_seen": 41344448, "step": 19160 }, { "epoch": 3.126427406199021, "grad_norm": 0.07466793060302734, "learning_rate": 0.0009903711041967357, "loss": 0.0462, "num_input_tokens_seen": 41354528, "step": 19165 }, { "epoch": 3.1272430668841764, "grad_norm": 0.03715520352125168, "learning_rate": 0.000990357197329033, "loss": 0.062, "num_input_tokens_seen": 41364672, "step": 19170 }, { "epoch": 3.1280587275693312, "grad_norm": 0.0559394396841526, "learning_rate": 0.0009903432805236194, "loss": 0.0941, "num_input_tokens_seen": 41376544, "step": 19175 }, { "epoch": 3.128874388254486, "grad_norm": 0.12095335870981216, "learning_rate": 0.0009903293537807773, "loss": 0.1149, "num_input_tokens_seen": 41387136, "step": 19180 }, { "epoch": 3.1296900489396413, "grad_norm": 0.09060528874397278, "learning_rate": 0.0009903154171007889, "loss": 0.1218, "num_input_tokens_seen": 41398944, "step": 19185 }, { "epoch": 3.130505709624796, "grad_norm": 0.1547139286994934, "learning_rate": 0.0009903014704839366, "loss": 0.1264, "num_input_tokens_seen": 41410336, "step": 19190 }, { "epoch": 3.131321370309951, "grad_norm": 0.31044110655784607, "learning_rate": 0.000990287513930503, "loss": 0.1305, "num_input_tokens_seen": 41422144, "step": 19195 }, { "epoch": 3.132137030995106, "grad_norm": 0.25305864214897156, "learning_rate": 0.000990273547440771, "loss": 0.0413, "num_input_tokens_seen": 41432256, "step": 19200 }, { "epoch": 3.132952691680261, "grad_norm": 0.2586519718170166, "learning_rate": 0.0009902595710150233, "loss": 0.2537, "num_input_tokens_seen": 41443872, "step": 19205 }, { "epoch": 3.133768352365416, "grad_norm": 0.2524658143520355, "learning_rate": 0.0009902455846535437, "loss": 0.163, "num_input_tokens_seen": 41455264, "step": 19210 }, { "epoch": 3.134584013050571, "grad_norm": 0.06486053764820099, "learning_rate": 0.0009902315883566152, "loss": 0.2082, "num_input_tokens_seen": 41464064, "step": 19215 }, { "epoch": 3.135399673735726, "grad_norm": 0.033876340836286545, "learning_rate": 0.000990217582124522, "loss": 0.114, "num_input_tokens_seen": 41475584, "step": 19220 }, { "epoch": 3.1362153344208807, "grad_norm": 0.10405676066875458, "learning_rate": 0.0009902035659575474, "loss": 0.1028, "num_input_tokens_seen": 41485920, "step": 19225 }, { "epoch": 3.137030995106036, "grad_norm": 0.059751030057668686, "learning_rate": 0.0009901895398559757, "loss": 0.048, "num_input_tokens_seen": 41494848, "step": 19230 }, { "epoch": 3.137846655791191, "grad_norm": 0.135848730802536, "learning_rate": 0.0009901755038200912, "loss": 0.1098, "num_input_tokens_seen": 41505728, "step": 19235 }, { "epoch": 3.1386623164763456, "grad_norm": 0.17315439879894257, "learning_rate": 0.0009901614578501782, "loss": 0.0475, "num_input_tokens_seen": 41516704, "step": 19240 }, { "epoch": 3.139477977161501, "grad_norm": 0.03595849871635437, "learning_rate": 0.0009901474019465215, "loss": 0.0959, "num_input_tokens_seen": 41527168, "step": 19245 }, { "epoch": 3.1402936378466557, "grad_norm": 0.10270584374666214, "learning_rate": 0.0009901333361094057, "loss": 0.0469, "num_input_tokens_seen": 41537216, "step": 19250 }, { "epoch": 3.141109298531811, "grad_norm": 0.2124761939048767, "learning_rate": 0.0009901192603391162, "loss": 0.0743, "num_input_tokens_seen": 41547552, "step": 19255 }, { "epoch": 3.141924959216966, "grad_norm": 0.21061433851718903, "learning_rate": 0.0009901051746359381, "loss": 0.1932, "num_input_tokens_seen": 41558848, "step": 19260 }, { "epoch": 3.1427406199021206, "grad_norm": 0.46988174319267273, "learning_rate": 0.0009900910790001571, "loss": 0.123, "num_input_tokens_seen": 41568800, "step": 19265 }, { "epoch": 3.143556280587276, "grad_norm": 0.013828648254275322, "learning_rate": 0.0009900769734320586, "loss": 0.0672, "num_input_tokens_seen": 41578944, "step": 19270 }, { "epoch": 3.1443719412724307, "grad_norm": 0.03389706090092659, "learning_rate": 0.0009900628579319283, "loss": 0.1807, "num_input_tokens_seen": 41589632, "step": 19275 }, { "epoch": 3.1451876019575855, "grad_norm": 0.011747496202588081, "learning_rate": 0.0009900487325000527, "loss": 0.1267, "num_input_tokens_seen": 41599840, "step": 19280 }, { "epoch": 3.1460032626427408, "grad_norm": 0.09051178395748138, "learning_rate": 0.0009900345971367178, "loss": 0.1556, "num_input_tokens_seen": 41610624, "step": 19285 }, { "epoch": 3.1468189233278956, "grad_norm": 0.05729413032531738, "learning_rate": 0.00099002045184221, "loss": 0.2308, "num_input_tokens_seen": 41621760, "step": 19290 }, { "epoch": 3.1476345840130504, "grad_norm": 0.3062225878238678, "learning_rate": 0.0009900062966168163, "loss": 0.0804, "num_input_tokens_seen": 41632960, "step": 19295 }, { "epoch": 3.1484502446982057, "grad_norm": 0.22729410231113434, "learning_rate": 0.0009899921314608232, "loss": 0.1071, "num_input_tokens_seen": 41643904, "step": 19300 }, { "epoch": 3.1492659053833605, "grad_norm": 0.20046371221542358, "learning_rate": 0.0009899779563745182, "loss": 0.172, "num_input_tokens_seen": 41653888, "step": 19305 }, { "epoch": 3.1500815660685153, "grad_norm": 0.25563520193099976, "learning_rate": 0.0009899637713581882, "loss": 0.1245, "num_input_tokens_seen": 41664576, "step": 19310 }, { "epoch": 3.1508972267536706, "grad_norm": 0.1551029235124588, "learning_rate": 0.0009899495764121207, "loss": 0.0612, "num_input_tokens_seen": 41675264, "step": 19315 }, { "epoch": 3.1517128874388254, "grad_norm": 0.12454026192426682, "learning_rate": 0.0009899353715366037, "loss": 0.0797, "num_input_tokens_seen": 41685472, "step": 19320 }, { "epoch": 3.15252854812398, "grad_norm": 0.06250635534524918, "learning_rate": 0.0009899211567319247, "loss": 0.0749, "num_input_tokens_seen": 41696544, "step": 19325 }, { "epoch": 3.1533442088091355, "grad_norm": 0.05608004331588745, "learning_rate": 0.000989906931998372, "loss": 0.1578, "num_input_tokens_seen": 41707936, "step": 19330 }, { "epoch": 3.1541598694942903, "grad_norm": 0.49152302742004395, "learning_rate": 0.000989892697336234, "loss": 0.1704, "num_input_tokens_seen": 41719264, "step": 19335 }, { "epoch": 3.1549755301794455, "grad_norm": 0.037784576416015625, "learning_rate": 0.0009898784527457988, "loss": 0.1415, "num_input_tokens_seen": 41729600, "step": 19340 }, { "epoch": 3.1557911908646004, "grad_norm": 0.056737542152404785, "learning_rate": 0.0009898641982273553, "loss": 0.0836, "num_input_tokens_seen": 41740864, "step": 19345 }, { "epoch": 3.156606851549755, "grad_norm": 0.04015759751200676, "learning_rate": 0.0009898499337811925, "loss": 0.0763, "num_input_tokens_seen": 41751008, "step": 19350 }, { "epoch": 3.1574225122349104, "grad_norm": 0.24220706522464752, "learning_rate": 0.0009898356594075992, "loss": 0.0337, "num_input_tokens_seen": 41762880, "step": 19355 }, { "epoch": 3.1582381729200653, "grad_norm": 0.06953652203083038, "learning_rate": 0.0009898213751068652, "loss": 0.0426, "num_input_tokens_seen": 41774656, "step": 19360 }, { "epoch": 3.15905383360522, "grad_norm": 0.12147502601146698, "learning_rate": 0.0009898070808792795, "loss": 0.2132, "num_input_tokens_seen": 41784448, "step": 19365 }, { "epoch": 3.1598694942903753, "grad_norm": 0.10891088098287582, "learning_rate": 0.0009897927767251319, "loss": 0.0581, "num_input_tokens_seen": 41795424, "step": 19370 }, { "epoch": 3.16068515497553, "grad_norm": 0.04449936002492905, "learning_rate": 0.0009897784626447122, "loss": 0.1133, "num_input_tokens_seen": 41806784, "step": 19375 }, { "epoch": 3.161500815660685, "grad_norm": 0.04268823191523552, "learning_rate": 0.0009897641386383106, "loss": 0.0575, "num_input_tokens_seen": 41816416, "step": 19380 }, { "epoch": 3.1623164763458402, "grad_norm": 0.0271256472915411, "learning_rate": 0.0009897498047062177, "loss": 0.0241, "num_input_tokens_seen": 41826304, "step": 19385 }, { "epoch": 3.163132137030995, "grad_norm": 0.1386682540178299, "learning_rate": 0.0009897354608487234, "loss": 0.0799, "num_input_tokens_seen": 41836672, "step": 19390 }, { "epoch": 3.1639477977161503, "grad_norm": 0.05629609525203705, "learning_rate": 0.000989721107066119, "loss": 0.04, "num_input_tokens_seen": 41849056, "step": 19395 }, { "epoch": 3.164763458401305, "grad_norm": 0.3085429072380066, "learning_rate": 0.000989706743358695, "loss": 0.1577, "num_input_tokens_seen": 41860096, "step": 19400 }, { "epoch": 3.16557911908646, "grad_norm": 0.08157593756914139, "learning_rate": 0.0009896923697267426, "loss": 0.0989, "num_input_tokens_seen": 41870944, "step": 19405 }, { "epoch": 3.166394779771615, "grad_norm": 0.011861706152558327, "learning_rate": 0.0009896779861705532, "loss": 0.0213, "num_input_tokens_seen": 41882816, "step": 19410 }, { "epoch": 3.16721044045677, "grad_norm": 0.530161440372467, "learning_rate": 0.000989663592690418, "loss": 0.0885, "num_input_tokens_seen": 41895136, "step": 19415 }, { "epoch": 3.168026101141925, "grad_norm": 0.29451650381088257, "learning_rate": 0.0009896491892866291, "loss": 0.3026, "num_input_tokens_seen": 41905600, "step": 19420 }, { "epoch": 3.16884176182708, "grad_norm": 0.25288835167884827, "learning_rate": 0.0009896347759594782, "loss": 0.1333, "num_input_tokens_seen": 41915616, "step": 19425 }, { "epoch": 3.169657422512235, "grad_norm": 0.24792082607746124, "learning_rate": 0.0009896203527092573, "loss": 0.103, "num_input_tokens_seen": 41926464, "step": 19430 }, { "epoch": 3.1704730831973897, "grad_norm": 0.02476027049124241, "learning_rate": 0.000989605919536259, "loss": 0.09, "num_input_tokens_seen": 41936288, "step": 19435 }, { "epoch": 3.171288743882545, "grad_norm": 0.3007301390171051, "learning_rate": 0.0009895914764407755, "loss": 0.0939, "num_input_tokens_seen": 41946848, "step": 19440 }, { "epoch": 3.1721044045677, "grad_norm": 0.03558652475476265, "learning_rate": 0.0009895770234230996, "loss": 0.0364, "num_input_tokens_seen": 41957184, "step": 19445 }, { "epoch": 3.1729200652528546, "grad_norm": 0.26430195569992065, "learning_rate": 0.0009895625604835244, "loss": 0.0601, "num_input_tokens_seen": 41967872, "step": 19450 }, { "epoch": 3.17373572593801, "grad_norm": 0.09238547086715698, "learning_rate": 0.0009895480876223428, "loss": 0.0679, "num_input_tokens_seen": 41979168, "step": 19455 }, { "epoch": 3.1745513866231647, "grad_norm": 0.12186761945486069, "learning_rate": 0.000989533604839848, "loss": 0.1686, "num_input_tokens_seen": 41989408, "step": 19460 }, { "epoch": 3.1753670473083195, "grad_norm": 0.02422953210771084, "learning_rate": 0.0009895191121363338, "loss": 0.0982, "num_input_tokens_seen": 42000992, "step": 19465 }, { "epoch": 3.176182707993475, "grad_norm": 0.02566608414053917, "learning_rate": 0.0009895046095120938, "loss": 0.0169, "num_input_tokens_seen": 42013120, "step": 19470 }, { "epoch": 3.1769983686786296, "grad_norm": 0.014058174565434456, "learning_rate": 0.0009894900969674221, "loss": 0.0522, "num_input_tokens_seen": 42024576, "step": 19475 }, { "epoch": 3.177814029363785, "grad_norm": 0.15957419574260712, "learning_rate": 0.0009894755745026124, "loss": 0.1375, "num_input_tokens_seen": 42035584, "step": 19480 }, { "epoch": 3.1786296900489397, "grad_norm": 0.10306224972009659, "learning_rate": 0.0009894610421179594, "loss": 0.0647, "num_input_tokens_seen": 42045568, "step": 19485 }, { "epoch": 3.1794453507340945, "grad_norm": 0.012239839881658554, "learning_rate": 0.0009894464998137572, "loss": 0.1268, "num_input_tokens_seen": 42055584, "step": 19490 }, { "epoch": 3.1802610114192498, "grad_norm": 0.143980473279953, "learning_rate": 0.000989431947590301, "loss": 0.1304, "num_input_tokens_seen": 42067200, "step": 19495 }, { "epoch": 3.1810766721044046, "grad_norm": 0.39404723048210144, "learning_rate": 0.0009894173854478854, "loss": 0.2973, "num_input_tokens_seen": 42077216, "step": 19500 }, { "epoch": 3.1818923327895594, "grad_norm": 0.23682059347629547, "learning_rate": 0.0009894028133868055, "loss": 0.0818, "num_input_tokens_seen": 42087712, "step": 19505 }, { "epoch": 3.1827079934747147, "grad_norm": 0.12177720665931702, "learning_rate": 0.000989388231407357, "loss": 0.2016, "num_input_tokens_seen": 42096800, "step": 19510 }, { "epoch": 3.1835236541598695, "grad_norm": 0.04504339396953583, "learning_rate": 0.000989373639509835, "loss": 0.1545, "num_input_tokens_seen": 42107072, "step": 19515 }, { "epoch": 3.1843393148450243, "grad_norm": 0.05149910971522331, "learning_rate": 0.0009893590376945354, "loss": 0.1078, "num_input_tokens_seen": 42119296, "step": 19520 }, { "epoch": 3.1851549755301796, "grad_norm": 0.04976373910903931, "learning_rate": 0.000989344425961754, "loss": 0.1569, "num_input_tokens_seen": 42130720, "step": 19525 }, { "epoch": 3.1859706362153344, "grad_norm": 0.08386904001235962, "learning_rate": 0.000989329804311787, "loss": 0.2267, "num_input_tokens_seen": 42143008, "step": 19530 }, { "epoch": 3.186786296900489, "grad_norm": 0.02012326568365097, "learning_rate": 0.000989315172744931, "loss": 0.0463, "num_input_tokens_seen": 42153920, "step": 19535 }, { "epoch": 3.1876019575856445, "grad_norm": 0.009358236566185951, "learning_rate": 0.0009893005312614823, "loss": 0.0838, "num_input_tokens_seen": 42163648, "step": 19540 }, { "epoch": 3.1884176182707993, "grad_norm": 0.13989722728729248, "learning_rate": 0.0009892858798617374, "loss": 0.0643, "num_input_tokens_seen": 42173728, "step": 19545 }, { "epoch": 3.189233278955954, "grad_norm": 0.363875150680542, "learning_rate": 0.0009892712185459935, "loss": 0.1261, "num_input_tokens_seen": 42184640, "step": 19550 }, { "epoch": 3.1900489396411094, "grad_norm": 0.0844980925321579, "learning_rate": 0.0009892565473145476, "loss": 0.0639, "num_input_tokens_seen": 42194496, "step": 19555 }, { "epoch": 3.190864600326264, "grad_norm": 0.022796163335442543, "learning_rate": 0.0009892418661676973, "loss": 0.0314, "num_input_tokens_seen": 42205600, "step": 19560 }, { "epoch": 3.1916802610114194, "grad_norm": 0.013570157811045647, "learning_rate": 0.0009892271751057399, "loss": 0.1843, "num_input_tokens_seen": 42216960, "step": 19565 }, { "epoch": 3.1924959216965743, "grad_norm": 0.2201593816280365, "learning_rate": 0.000989212474128973, "loss": 0.0617, "num_input_tokens_seen": 42227488, "step": 19570 }, { "epoch": 3.193311582381729, "grad_norm": 0.29099833965301514, "learning_rate": 0.0009891977632376949, "loss": 0.0482, "num_input_tokens_seen": 42239200, "step": 19575 }, { "epoch": 3.1941272430668843, "grad_norm": 0.029572706669569016, "learning_rate": 0.0009891830424322034, "loss": 0.0524, "num_input_tokens_seen": 42249568, "step": 19580 }, { "epoch": 3.194942903752039, "grad_norm": 0.3603842556476593, "learning_rate": 0.000989168311712797, "loss": 0.0571, "num_input_tokens_seen": 42261600, "step": 19585 }, { "epoch": 3.195758564437194, "grad_norm": 0.2792888283729553, "learning_rate": 0.0009891535710797744, "loss": 0.1407, "num_input_tokens_seen": 42272128, "step": 19590 }, { "epoch": 3.1965742251223492, "grad_norm": 0.015375965274870396, "learning_rate": 0.0009891388205334338, "loss": 0.1697, "num_input_tokens_seen": 42282752, "step": 19595 }, { "epoch": 3.197389885807504, "grad_norm": 0.02478569932281971, "learning_rate": 0.0009891240600740747, "loss": 0.0944, "num_input_tokens_seen": 42293600, "step": 19600 }, { "epoch": 3.198205546492659, "grad_norm": 0.15989695489406586, "learning_rate": 0.000989109289701996, "loss": 0.1333, "num_input_tokens_seen": 42304960, "step": 19605 }, { "epoch": 3.199021207177814, "grad_norm": 0.03559863567352295, "learning_rate": 0.000989094509417497, "loss": 0.077, "num_input_tokens_seen": 42314208, "step": 19610 }, { "epoch": 3.199836867862969, "grad_norm": 0.10061795264482498, "learning_rate": 0.0009890797192208774, "loss": 0.0364, "num_input_tokens_seen": 42325664, "step": 19615 }, { "epoch": 3.200652528548124, "grad_norm": 0.08416914939880371, "learning_rate": 0.0009890649191124368, "loss": 0.1063, "num_input_tokens_seen": 42335968, "step": 19620 }, { "epoch": 3.201468189233279, "grad_norm": 0.2974399924278259, "learning_rate": 0.000989050109092475, "loss": 0.1009, "num_input_tokens_seen": 42347200, "step": 19625 }, { "epoch": 3.202283849918434, "grad_norm": 0.23012153804302216, "learning_rate": 0.0009890352891612927, "loss": 0.1675, "num_input_tokens_seen": 42357664, "step": 19630 }, { "epoch": 3.203099510603589, "grad_norm": 0.005497765261679888, "learning_rate": 0.0009890204593191896, "loss": 0.0959, "num_input_tokens_seen": 42368576, "step": 19635 }, { "epoch": 3.203915171288744, "grad_norm": 0.04624853655695915, "learning_rate": 0.0009890056195664668, "loss": 0.0752, "num_input_tokens_seen": 42379424, "step": 19640 }, { "epoch": 3.2047308319738987, "grad_norm": 0.26433175802230835, "learning_rate": 0.0009889907699034246, "loss": 0.1107, "num_input_tokens_seen": 42389696, "step": 19645 }, { "epoch": 3.205546492659054, "grad_norm": 0.02200070209801197, "learning_rate": 0.000988975910330364, "loss": 0.1722, "num_input_tokens_seen": 42400576, "step": 19650 }, { "epoch": 3.206362153344209, "grad_norm": 0.11286628246307373, "learning_rate": 0.0009889610408475864, "loss": 0.0789, "num_input_tokens_seen": 42412480, "step": 19655 }, { "epoch": 3.2071778140293636, "grad_norm": 0.021310703828930855, "learning_rate": 0.000988946161455393, "loss": 0.0434, "num_input_tokens_seen": 42423808, "step": 19660 }, { "epoch": 3.207993474714519, "grad_norm": 0.09360131621360779, "learning_rate": 0.0009889312721540855, "loss": 0.0783, "num_input_tokens_seen": 42434336, "step": 19665 }, { "epoch": 3.2088091353996737, "grad_norm": 0.013711540028452873, "learning_rate": 0.0009889163729439653, "loss": 0.0616, "num_input_tokens_seen": 42445344, "step": 19670 }, { "epoch": 3.2096247960848285, "grad_norm": 0.24944034218788147, "learning_rate": 0.0009889014638253346, "loss": 0.2007, "num_input_tokens_seen": 42456288, "step": 19675 }, { "epoch": 3.210440456769984, "grad_norm": 0.032694924622774124, "learning_rate": 0.0009888865447984956, "loss": 0.0351, "num_input_tokens_seen": 42467200, "step": 19680 }, { "epoch": 3.2112561174551386, "grad_norm": 0.1983887106180191, "learning_rate": 0.0009888716158637505, "loss": 0.1485, "num_input_tokens_seen": 42477248, "step": 19685 }, { "epoch": 3.2120717781402934, "grad_norm": 0.047511663287878036, "learning_rate": 0.000988856677021402, "loss": 0.0907, "num_input_tokens_seen": 42487328, "step": 19690 }, { "epoch": 3.2128874388254487, "grad_norm": 0.4561445415019989, "learning_rate": 0.0009888417282717529, "loss": 0.2249, "num_input_tokens_seen": 42498528, "step": 19695 }, { "epoch": 3.2137030995106035, "grad_norm": 0.1878419816493988, "learning_rate": 0.000988826769615106, "loss": 0.1104, "num_input_tokens_seen": 42508960, "step": 19700 }, { "epoch": 3.2145187601957588, "grad_norm": 0.02542862296104431, "learning_rate": 0.0009888118010517642, "loss": 0.0923, "num_input_tokens_seen": 42520864, "step": 19705 }, { "epoch": 3.2153344208809136, "grad_norm": 0.033721037209033966, "learning_rate": 0.0009887968225820315, "loss": 0.0696, "num_input_tokens_seen": 42532032, "step": 19710 }, { "epoch": 3.2161500815660684, "grad_norm": 0.01637588068842888, "learning_rate": 0.0009887818342062106, "loss": 0.249, "num_input_tokens_seen": 42542880, "step": 19715 }, { "epoch": 3.2169657422512237, "grad_norm": 0.1705736517906189, "learning_rate": 0.0009887668359246063, "loss": 0.1879, "num_input_tokens_seen": 42554016, "step": 19720 }, { "epoch": 3.2177814029363785, "grad_norm": 0.11977442353963852, "learning_rate": 0.0009887518277375217, "loss": 0.1345, "num_input_tokens_seen": 42564736, "step": 19725 }, { "epoch": 3.2185970636215333, "grad_norm": 0.08343325555324554, "learning_rate": 0.0009887368096452617, "loss": 0.0575, "num_input_tokens_seen": 42575616, "step": 19730 }, { "epoch": 3.2194127243066886, "grad_norm": 0.1298862099647522, "learning_rate": 0.0009887217816481298, "loss": 0.1225, "num_input_tokens_seen": 42585632, "step": 19735 }, { "epoch": 3.2202283849918434, "grad_norm": 0.3058036267757416, "learning_rate": 0.0009887067437464312, "loss": 0.1664, "num_input_tokens_seen": 42595648, "step": 19740 }, { "epoch": 3.221044045676998, "grad_norm": 0.11011570692062378, "learning_rate": 0.0009886916959404703, "loss": 0.1214, "num_input_tokens_seen": 42605536, "step": 19745 }, { "epoch": 3.2218597063621535, "grad_norm": 0.08198998123407364, "learning_rate": 0.0009886766382305526, "loss": 0.0778, "num_input_tokens_seen": 42617280, "step": 19750 }, { "epoch": 3.2226753670473083, "grad_norm": 0.2720610797405243, "learning_rate": 0.0009886615706169825, "loss": 0.1713, "num_input_tokens_seen": 42628992, "step": 19755 }, { "epoch": 3.223491027732463, "grad_norm": 0.09863653779029846, "learning_rate": 0.0009886464931000661, "loss": 0.0756, "num_input_tokens_seen": 42639424, "step": 19760 }, { "epoch": 3.2243066884176184, "grad_norm": 0.14290325343608856, "learning_rate": 0.0009886314056801084, "loss": 0.0904, "num_input_tokens_seen": 42650720, "step": 19765 }, { "epoch": 3.225122349102773, "grad_norm": 0.10260216146707535, "learning_rate": 0.0009886163083574154, "loss": 0.059, "num_input_tokens_seen": 42661440, "step": 19770 }, { "epoch": 3.225938009787928, "grad_norm": 0.07296804338693619, "learning_rate": 0.000988601201132293, "loss": 0.1792, "num_input_tokens_seen": 42673216, "step": 19775 }, { "epoch": 3.2267536704730833, "grad_norm": 0.14618900418281555, "learning_rate": 0.0009885860840050478, "loss": 0.1453, "num_input_tokens_seen": 42684864, "step": 19780 }, { "epoch": 3.227569331158238, "grad_norm": 0.005429369863122702, "learning_rate": 0.0009885709569759852, "loss": 0.0127, "num_input_tokens_seen": 42695584, "step": 19785 }, { "epoch": 3.2283849918433933, "grad_norm": 0.19854576885700226, "learning_rate": 0.0009885558200454128, "loss": 0.0779, "num_input_tokens_seen": 42706208, "step": 19790 }, { "epoch": 3.229200652528548, "grad_norm": 0.1451471447944641, "learning_rate": 0.0009885406732136367, "loss": 0.1808, "num_input_tokens_seen": 42717504, "step": 19795 }, { "epoch": 3.230016313213703, "grad_norm": 0.15058696269989014, "learning_rate": 0.0009885255164809644, "loss": 0.1352, "num_input_tokens_seen": 42729088, "step": 19800 }, { "epoch": 3.2308319738988582, "grad_norm": 0.12624840438365936, "learning_rate": 0.0009885103498477026, "loss": 0.1846, "num_input_tokens_seen": 42739968, "step": 19805 }, { "epoch": 3.231647634584013, "grad_norm": 0.1825597882270813, "learning_rate": 0.0009884951733141586, "loss": 0.0474, "num_input_tokens_seen": 42752608, "step": 19810 }, { "epoch": 3.232463295269168, "grad_norm": 0.02511531300842762, "learning_rate": 0.0009884799868806406, "loss": 0.0746, "num_input_tokens_seen": 42763008, "step": 19815 }, { "epoch": 3.233278955954323, "grad_norm": 0.4271232485771179, "learning_rate": 0.000988464790547456, "loss": 0.1685, "num_input_tokens_seen": 42773152, "step": 19820 }, { "epoch": 3.234094616639478, "grad_norm": 0.3347690999507904, "learning_rate": 0.0009884495843149124, "loss": 0.145, "num_input_tokens_seen": 42784576, "step": 19825 }, { "epoch": 3.2349102773246328, "grad_norm": 0.028980210423469543, "learning_rate": 0.0009884343681833185, "loss": 0.0671, "num_input_tokens_seen": 42795104, "step": 19830 }, { "epoch": 3.235725938009788, "grad_norm": 0.1877697855234146, "learning_rate": 0.0009884191421529825, "loss": 0.1117, "num_input_tokens_seen": 42806784, "step": 19835 }, { "epoch": 3.236541598694943, "grad_norm": 0.01746250130236149, "learning_rate": 0.000988403906224213, "loss": 0.3354, "num_input_tokens_seen": 42817632, "step": 19840 }, { "epoch": 3.237357259380098, "grad_norm": 0.2586572766304016, "learning_rate": 0.0009883886603973188, "loss": 0.1645, "num_input_tokens_seen": 42829120, "step": 19845 }, { "epoch": 3.238172920065253, "grad_norm": 0.14209625124931335, "learning_rate": 0.0009883734046726086, "loss": 0.1205, "num_input_tokens_seen": 42839936, "step": 19850 }, { "epoch": 3.2389885807504077, "grad_norm": 0.014838643372058868, "learning_rate": 0.0009883581390503922, "loss": 0.1385, "num_input_tokens_seen": 42851168, "step": 19855 }, { "epoch": 3.239804241435563, "grad_norm": 0.14577189087867737, "learning_rate": 0.0009883428635309784, "loss": 0.0498, "num_input_tokens_seen": 42862336, "step": 19860 }, { "epoch": 3.240619902120718, "grad_norm": 0.0610969215631485, "learning_rate": 0.0009883275781146768, "loss": 0.0823, "num_input_tokens_seen": 42871840, "step": 19865 }, { "epoch": 3.2414355628058726, "grad_norm": 0.11508188396692276, "learning_rate": 0.0009883122828017977, "loss": 0.1139, "num_input_tokens_seen": 42883104, "step": 19870 }, { "epoch": 3.242251223491028, "grad_norm": 0.07366909831762314, "learning_rate": 0.0009882969775926505, "loss": 0.2003, "num_input_tokens_seen": 42894112, "step": 19875 }, { "epoch": 3.2430668841761827, "grad_norm": 0.04922463744878769, "learning_rate": 0.0009882816624875454, "loss": 0.1718, "num_input_tokens_seen": 42906048, "step": 19880 }, { "epoch": 3.2438825448613375, "grad_norm": 0.024903669953346252, "learning_rate": 0.0009882663374867933, "loss": 0.1147, "num_input_tokens_seen": 42917280, "step": 19885 }, { "epoch": 3.244698205546493, "grad_norm": 0.035866592079401016, "learning_rate": 0.0009882510025907042, "loss": 0.0488, "num_input_tokens_seen": 42928032, "step": 19890 }, { "epoch": 3.2455138662316476, "grad_norm": 0.20037434995174408, "learning_rate": 0.0009882356577995894, "loss": 0.1908, "num_input_tokens_seen": 42938592, "step": 19895 }, { "epoch": 3.2463295269168024, "grad_norm": 0.0859912559390068, "learning_rate": 0.0009882203031137595, "loss": 0.0793, "num_input_tokens_seen": 42949376, "step": 19900 }, { "epoch": 3.2471451876019577, "grad_norm": 0.1569044589996338, "learning_rate": 0.000988204938533526, "loss": 0.1642, "num_input_tokens_seen": 42961312, "step": 19905 }, { "epoch": 3.2479608482871125, "grad_norm": 0.03795454278588295, "learning_rate": 0.0009881895640591997, "loss": 0.0689, "num_input_tokens_seen": 42971072, "step": 19910 }, { "epoch": 3.2487765089722673, "grad_norm": 0.15967807173728943, "learning_rate": 0.0009881741796910928, "loss": 0.0654, "num_input_tokens_seen": 42982144, "step": 19915 }, { "epoch": 3.2495921696574226, "grad_norm": 0.17215892672538757, "learning_rate": 0.0009881587854295168, "loss": 0.1427, "num_input_tokens_seen": 42992672, "step": 19920 }, { "epoch": 3.2504078303425774, "grad_norm": 0.06646841764450073, "learning_rate": 0.0009881433812747838, "loss": 0.0571, "num_input_tokens_seen": 43005184, "step": 19925 }, { "epoch": 3.2512234910277327, "grad_norm": 0.555322527885437, "learning_rate": 0.000988127967227206, "loss": 0.152, "num_input_tokens_seen": 43016032, "step": 19930 }, { "epoch": 3.2520391517128875, "grad_norm": 0.034235671162605286, "learning_rate": 0.0009881125432870956, "loss": 0.0217, "num_input_tokens_seen": 43026944, "step": 19935 }, { "epoch": 3.2528548123980423, "grad_norm": 0.03825150430202484, "learning_rate": 0.0009880971094547652, "loss": 0.0557, "num_input_tokens_seen": 43037216, "step": 19940 }, { "epoch": 3.2536704730831976, "grad_norm": 0.11131853610277176, "learning_rate": 0.0009880816657305278, "loss": 0.2084, "num_input_tokens_seen": 43047200, "step": 19945 }, { "epoch": 3.2544861337683524, "grad_norm": 0.08202892541885376, "learning_rate": 0.0009880662121146964, "loss": 0.0266, "num_input_tokens_seen": 43057056, "step": 19950 }, { "epoch": 3.255301794453507, "grad_norm": 0.31110596656799316, "learning_rate": 0.0009880507486075838, "loss": 0.1447, "num_input_tokens_seen": 43067968, "step": 19955 }, { "epoch": 3.2561174551386625, "grad_norm": 0.013478612527251244, "learning_rate": 0.0009880352752095038, "loss": 0.0748, "num_input_tokens_seen": 43079488, "step": 19960 }, { "epoch": 3.2569331158238173, "grad_norm": 0.044147297739982605, "learning_rate": 0.0009880197919207698, "loss": 0.0707, "num_input_tokens_seen": 43091904, "step": 19965 }, { "epoch": 3.257748776508972, "grad_norm": 0.0808190405368805, "learning_rate": 0.0009880042987416957, "loss": 0.0614, "num_input_tokens_seen": 43103456, "step": 19970 }, { "epoch": 3.2585644371941274, "grad_norm": 0.049794089049100876, "learning_rate": 0.0009879887956725953, "loss": 0.2647, "num_input_tokens_seen": 43114656, "step": 19975 }, { "epoch": 3.259380097879282, "grad_norm": 0.035072099417448044, "learning_rate": 0.0009879732827137828, "loss": 0.0974, "num_input_tokens_seen": 43125888, "step": 19980 }, { "epoch": 3.2601957585644374, "grad_norm": 0.023805363103747368, "learning_rate": 0.0009879577598655728, "loss": 0.1017, "num_input_tokens_seen": 43136896, "step": 19985 }, { "epoch": 3.2610114192495923, "grad_norm": 0.30112189054489136, "learning_rate": 0.0009879422271282798, "loss": 0.1875, "num_input_tokens_seen": 43147840, "step": 19990 }, { "epoch": 3.261827079934747, "grad_norm": 0.07104147225618362, "learning_rate": 0.0009879266845022187, "loss": 0.174, "num_input_tokens_seen": 43159488, "step": 19995 }, { "epoch": 3.262642740619902, "grad_norm": 0.009224711917340755, "learning_rate": 0.0009879111319877041, "loss": 0.155, "num_input_tokens_seen": 43169664, "step": 20000 }, { "epoch": 3.263458401305057, "grad_norm": 0.08163163810968399, "learning_rate": 0.0009878955695850516, "loss": 0.1174, "num_input_tokens_seen": 43180608, "step": 20005 }, { "epoch": 3.264274061990212, "grad_norm": 0.14063313603401184, "learning_rate": 0.0009878799972945762, "loss": 0.2159, "num_input_tokens_seen": 43192000, "step": 20010 }, { "epoch": 3.2650897226753672, "grad_norm": 0.1994280070066452, "learning_rate": 0.000987864415116594, "loss": 0.1589, "num_input_tokens_seen": 43201664, "step": 20015 }, { "epoch": 3.265905383360522, "grad_norm": 0.029008809477090836, "learning_rate": 0.0009878488230514206, "loss": 0.2314, "num_input_tokens_seen": 43211936, "step": 20020 }, { "epoch": 3.266721044045677, "grad_norm": 0.12221764028072357, "learning_rate": 0.0009878332210993717, "loss": 0.0622, "num_input_tokens_seen": 43222368, "step": 20025 }, { "epoch": 3.267536704730832, "grad_norm": 0.26277509331703186, "learning_rate": 0.0009878176092607638, "loss": 0.1054, "num_input_tokens_seen": 43233184, "step": 20030 }, { "epoch": 3.268352365415987, "grad_norm": 0.10239958763122559, "learning_rate": 0.0009878019875359132, "loss": 0.1953, "num_input_tokens_seen": 43242176, "step": 20035 }, { "epoch": 3.2691680261011418, "grad_norm": 0.13120250403881073, "learning_rate": 0.0009877863559251366, "loss": 0.1454, "num_input_tokens_seen": 43252768, "step": 20040 }, { "epoch": 3.269983686786297, "grad_norm": 0.018589971587061882, "learning_rate": 0.0009877707144287505, "loss": 0.0404, "num_input_tokens_seen": 43263520, "step": 20045 }, { "epoch": 3.270799347471452, "grad_norm": 0.027139538899064064, "learning_rate": 0.0009877550630470722, "loss": 0.0489, "num_input_tokens_seen": 43275904, "step": 20050 }, { "epoch": 3.2716150081566067, "grad_norm": 0.014216907322406769, "learning_rate": 0.000987739401780419, "loss": 0.1373, "num_input_tokens_seen": 43286912, "step": 20055 }, { "epoch": 3.272430668841762, "grad_norm": 0.23312470316886902, "learning_rate": 0.0009877237306291076, "loss": 0.0713, "num_input_tokens_seen": 43298592, "step": 20060 }, { "epoch": 3.2732463295269167, "grad_norm": 0.0692359060049057, "learning_rate": 0.0009877080495934564, "loss": 0.0389, "num_input_tokens_seen": 43308768, "step": 20065 }, { "epoch": 3.274061990212072, "grad_norm": 0.1647033542394638, "learning_rate": 0.0009876923586737828, "loss": 0.1046, "num_input_tokens_seen": 43319872, "step": 20070 }, { "epoch": 3.274877650897227, "grad_norm": 0.27640584111213684, "learning_rate": 0.000987676657870405, "loss": 0.1488, "num_input_tokens_seen": 43330656, "step": 20075 }, { "epoch": 3.2756933115823816, "grad_norm": 0.15489503741264343, "learning_rate": 0.0009876609471836408, "loss": 0.149, "num_input_tokens_seen": 43342432, "step": 20080 }, { "epoch": 3.2765089722675365, "grad_norm": 0.019808344542980194, "learning_rate": 0.000987645226613809, "loss": 0.0899, "num_input_tokens_seen": 43353536, "step": 20085 }, { "epoch": 3.2773246329526917, "grad_norm": 0.2305270880460739, "learning_rate": 0.0009876294961612283, "loss": 0.1162, "num_input_tokens_seen": 43363968, "step": 20090 }, { "epoch": 3.2781402936378465, "grad_norm": 0.2396644800901413, "learning_rate": 0.0009876137558262168, "loss": 0.1231, "num_input_tokens_seen": 43374880, "step": 20095 }, { "epoch": 3.278955954323002, "grad_norm": 0.1311480849981308, "learning_rate": 0.0009875980056090943, "loss": 0.0945, "num_input_tokens_seen": 43385376, "step": 20100 }, { "epoch": 3.2797716150081566, "grad_norm": 0.12157776951789856, "learning_rate": 0.0009875822455101795, "loss": 0.0866, "num_input_tokens_seen": 43395488, "step": 20105 }, { "epoch": 3.2805872756933114, "grad_norm": 0.20856699347496033, "learning_rate": 0.000987566475529792, "loss": 0.2334, "num_input_tokens_seen": 43406688, "step": 20110 }, { "epoch": 3.2814029363784667, "grad_norm": 0.16164426505565643, "learning_rate": 0.0009875506956682513, "loss": 0.0477, "num_input_tokens_seen": 43418016, "step": 20115 }, { "epoch": 3.2822185970636215, "grad_norm": 0.02845783904194832, "learning_rate": 0.0009875349059258773, "loss": 0.0247, "num_input_tokens_seen": 43429696, "step": 20120 }, { "epoch": 3.2830342577487763, "grad_norm": 0.012877179309725761, "learning_rate": 0.00098751910630299, "loss": 0.1311, "num_input_tokens_seen": 43440224, "step": 20125 }, { "epoch": 3.2838499184339316, "grad_norm": 0.06847991794347763, "learning_rate": 0.0009875032967999096, "loss": 0.0433, "num_input_tokens_seen": 43450240, "step": 20130 }, { "epoch": 3.2846655791190864, "grad_norm": 0.3901269733905792, "learning_rate": 0.0009874874774169562, "loss": 0.1489, "num_input_tokens_seen": 43461472, "step": 20135 }, { "epoch": 3.2854812398042412, "grad_norm": 0.17134247720241547, "learning_rate": 0.0009874716481544509, "loss": 0.0909, "num_input_tokens_seen": 43471360, "step": 20140 }, { "epoch": 3.2862969004893965, "grad_norm": 0.014369341544806957, "learning_rate": 0.0009874558090127142, "loss": 0.0842, "num_input_tokens_seen": 43481696, "step": 20145 }, { "epoch": 3.2871125611745513, "grad_norm": 0.028087224811315536, "learning_rate": 0.0009874399599920669, "loss": 0.1663, "num_input_tokens_seen": 43493184, "step": 20150 }, { "epoch": 3.2879282218597066, "grad_norm": 0.02049064077436924, "learning_rate": 0.0009874241010928307, "loss": 0.0399, "num_input_tokens_seen": 43504480, "step": 20155 }, { "epoch": 3.2887438825448614, "grad_norm": 0.027095932513475418, "learning_rate": 0.0009874082323153266, "loss": 0.1668, "num_input_tokens_seen": 43515776, "step": 20160 }, { "epoch": 3.289559543230016, "grad_norm": 0.07035677134990692, "learning_rate": 0.0009873923536598765, "loss": 0.0632, "num_input_tokens_seen": 43527744, "step": 20165 }, { "epoch": 3.2903752039151715, "grad_norm": 0.2741357684135437, "learning_rate": 0.000987376465126802, "loss": 0.2684, "num_input_tokens_seen": 43537472, "step": 20170 }, { "epoch": 3.2911908646003263, "grad_norm": 0.045753687620162964, "learning_rate": 0.0009873605667164252, "loss": 0.1322, "num_input_tokens_seen": 43549408, "step": 20175 }, { "epoch": 3.292006525285481, "grad_norm": 0.017948150634765625, "learning_rate": 0.0009873446584290682, "loss": 0.0573, "num_input_tokens_seen": 43560448, "step": 20180 }, { "epoch": 3.2928221859706364, "grad_norm": 0.18306821584701538, "learning_rate": 0.0009873287402650535, "loss": 0.1842, "num_input_tokens_seen": 43572608, "step": 20185 }, { "epoch": 3.293637846655791, "grad_norm": 0.009299108758568764, "learning_rate": 0.0009873128122247035, "loss": 0.0395, "num_input_tokens_seen": 43582592, "step": 20190 }, { "epoch": 3.294453507340946, "grad_norm": 0.08880362659692764, "learning_rate": 0.0009872968743083414, "loss": 0.1787, "num_input_tokens_seen": 43594144, "step": 20195 }, { "epoch": 3.2952691680261013, "grad_norm": 0.010650860145688057, "learning_rate": 0.0009872809265162898, "loss": 0.186, "num_input_tokens_seen": 43606112, "step": 20200 }, { "epoch": 3.296084828711256, "grad_norm": 0.06544356793165207, "learning_rate": 0.000987264968848872, "loss": 0.0763, "num_input_tokens_seen": 43617440, "step": 20205 }, { "epoch": 3.2969004893964113, "grad_norm": 0.024027137085795403, "learning_rate": 0.0009872490013064117, "loss": 0.0339, "num_input_tokens_seen": 43626912, "step": 20210 }, { "epoch": 3.297716150081566, "grad_norm": 0.0910574197769165, "learning_rate": 0.000987233023889232, "loss": 0.0502, "num_input_tokens_seen": 43636864, "step": 20215 }, { "epoch": 3.298531810766721, "grad_norm": 0.10073552280664444, "learning_rate": 0.000987217036597657, "loss": 0.169, "num_input_tokens_seen": 43649440, "step": 20220 }, { "epoch": 3.299347471451876, "grad_norm": 0.10896526277065277, "learning_rate": 0.000987201039432011, "loss": 0.0605, "num_input_tokens_seen": 43660256, "step": 20225 }, { "epoch": 3.300163132137031, "grad_norm": 0.009215278550982475, "learning_rate": 0.0009871850323926177, "loss": 0.0805, "num_input_tokens_seen": 43670080, "step": 20230 }, { "epoch": 3.300978792822186, "grad_norm": 0.07386329770088196, "learning_rate": 0.0009871690154798017, "loss": 0.131, "num_input_tokens_seen": 43681216, "step": 20235 }, { "epoch": 3.301794453507341, "grad_norm": 0.006736678536981344, "learning_rate": 0.0009871529886938874, "loss": 0.051, "num_input_tokens_seen": 43692160, "step": 20240 }, { "epoch": 3.302610114192496, "grad_norm": 0.014095074497163296, "learning_rate": 0.0009871369520352, "loss": 0.0657, "num_input_tokens_seen": 43703072, "step": 20245 }, { "epoch": 3.3034257748776508, "grad_norm": 0.25055989623069763, "learning_rate": 0.0009871209055040643, "loss": 0.1716, "num_input_tokens_seen": 43715200, "step": 20250 }, { "epoch": 3.304241435562806, "grad_norm": 0.07020489871501923, "learning_rate": 0.0009871048491008052, "loss": 0.0618, "num_input_tokens_seen": 43726464, "step": 20255 }, { "epoch": 3.305057096247961, "grad_norm": 0.028803810477256775, "learning_rate": 0.0009870887828257486, "loss": 0.1483, "num_input_tokens_seen": 43736768, "step": 20260 }, { "epoch": 3.3058727569331157, "grad_norm": 0.10179125517606735, "learning_rate": 0.00098707270667922, "loss": 0.1012, "num_input_tokens_seen": 43746976, "step": 20265 }, { "epoch": 3.306688417618271, "grad_norm": 0.16739678382873535, "learning_rate": 0.000987056620661545, "loss": 0.051, "num_input_tokens_seen": 43758016, "step": 20270 }, { "epoch": 3.3075040783034257, "grad_norm": 0.35529154539108276, "learning_rate": 0.0009870405247730497, "loss": 0.228, "num_input_tokens_seen": 43769248, "step": 20275 }, { "epoch": 3.3083197389885806, "grad_norm": 0.301084041595459, "learning_rate": 0.0009870244190140602, "loss": 0.131, "num_input_tokens_seen": 43780480, "step": 20280 }, { "epoch": 3.309135399673736, "grad_norm": 0.04685066267848015, "learning_rate": 0.000987008303384903, "loss": 0.0305, "num_input_tokens_seen": 43790368, "step": 20285 }, { "epoch": 3.3099510603588906, "grad_norm": 0.23070085048675537, "learning_rate": 0.000986992177885905, "loss": 0.1079, "num_input_tokens_seen": 43801024, "step": 20290 }, { "epoch": 3.310766721044046, "grad_norm": 0.17997124791145325, "learning_rate": 0.0009869760425173927, "loss": 0.2046, "num_input_tokens_seen": 43811648, "step": 20295 }, { "epoch": 3.3115823817292007, "grad_norm": 0.06736693531274796, "learning_rate": 0.000986959897279693, "loss": 0.1092, "num_input_tokens_seen": 43821408, "step": 20300 }, { "epoch": 3.3123980424143555, "grad_norm": 0.02195006050169468, "learning_rate": 0.0009869437421731332, "loss": 0.1409, "num_input_tokens_seen": 43832704, "step": 20305 }, { "epoch": 3.3132137030995104, "grad_norm": 0.14123927056789398, "learning_rate": 0.0009869275771980405, "loss": 0.236, "num_input_tokens_seen": 43843616, "step": 20310 }, { "epoch": 3.3140293637846656, "grad_norm": 0.05863885581493378, "learning_rate": 0.000986911402354743, "loss": 0.0286, "num_input_tokens_seen": 43853632, "step": 20315 }, { "epoch": 3.3148450244698204, "grad_norm": 0.3729930520057678, "learning_rate": 0.0009868952176435683, "loss": 0.2039, "num_input_tokens_seen": 43862464, "step": 20320 }, { "epoch": 3.3156606851549757, "grad_norm": 0.06673592329025269, "learning_rate": 0.0009868790230648443, "loss": 0.1247, "num_input_tokens_seen": 43874816, "step": 20325 }, { "epoch": 3.3164763458401305, "grad_norm": 0.17178292572498322, "learning_rate": 0.0009868628186188993, "loss": 0.1355, "num_input_tokens_seen": 43885664, "step": 20330 }, { "epoch": 3.3172920065252853, "grad_norm": 0.053232546895742416, "learning_rate": 0.0009868466043060616, "loss": 0.0695, "num_input_tokens_seen": 43896064, "step": 20335 }, { "epoch": 3.3181076672104406, "grad_norm": 0.14313098788261414, "learning_rate": 0.00098683038012666, "loss": 0.1094, "num_input_tokens_seen": 43906944, "step": 20340 }, { "epoch": 3.3189233278955954, "grad_norm": 0.17783254384994507, "learning_rate": 0.0009868141460810226, "loss": 0.0965, "num_input_tokens_seen": 43917536, "step": 20345 }, { "epoch": 3.3197389885807502, "grad_norm": 0.06175949424505234, "learning_rate": 0.0009867979021694795, "loss": 0.147, "num_input_tokens_seen": 43927328, "step": 20350 }, { "epoch": 3.3205546492659055, "grad_norm": 0.06311400979757309, "learning_rate": 0.0009867816483923593, "loss": 0.0801, "num_input_tokens_seen": 43938080, "step": 20355 }, { "epoch": 3.3213703099510603, "grad_norm": 0.018011409789323807, "learning_rate": 0.0009867653847499913, "loss": 0.0178, "num_input_tokens_seen": 43948736, "step": 20360 }, { "epoch": 3.322185970636215, "grad_norm": 0.04775621369481087, "learning_rate": 0.0009867491112427055, "loss": 0.1504, "num_input_tokens_seen": 43956640, "step": 20365 }, { "epoch": 3.3230016313213704, "grad_norm": 0.009588596411049366, "learning_rate": 0.0009867328278708313, "loss": 0.0534, "num_input_tokens_seen": 43967264, "step": 20370 }, { "epoch": 3.323817292006525, "grad_norm": 0.05249223858118057, "learning_rate": 0.0009867165346346988, "loss": 0.1235, "num_input_tokens_seen": 43978304, "step": 20375 }, { "epoch": 3.3246329526916805, "grad_norm": 0.11108996719121933, "learning_rate": 0.0009867002315346383, "loss": 0.1813, "num_input_tokens_seen": 43988192, "step": 20380 }, { "epoch": 3.3254486133768353, "grad_norm": 0.3311072289943695, "learning_rate": 0.0009866839185709805, "loss": 0.1808, "num_input_tokens_seen": 43999648, "step": 20385 }, { "epoch": 3.32626427406199, "grad_norm": 0.15181860327720642, "learning_rate": 0.0009866675957440553, "loss": 0.1027, "num_input_tokens_seen": 44010240, "step": 20390 }, { "epoch": 3.3270799347471454, "grad_norm": 0.06140878424048424, "learning_rate": 0.0009866512630541942, "loss": 0.1012, "num_input_tokens_seen": 44020256, "step": 20395 }, { "epoch": 3.3278955954323, "grad_norm": 0.22549058496952057, "learning_rate": 0.0009866349205017277, "loss": 0.2377, "num_input_tokens_seen": 44030624, "step": 20400 }, { "epoch": 3.328711256117455, "grad_norm": 0.510720431804657, "learning_rate": 0.0009866185680869873, "loss": 0.1167, "num_input_tokens_seen": 44041984, "step": 20405 }, { "epoch": 3.3295269168026103, "grad_norm": 0.028794415295124054, "learning_rate": 0.0009866022058103042, "loss": 0.139, "num_input_tokens_seen": 44052704, "step": 20410 }, { "epoch": 3.330342577487765, "grad_norm": 0.21347418427467346, "learning_rate": 0.0009865858336720102, "loss": 0.0711, "num_input_tokens_seen": 44063328, "step": 20415 }, { "epoch": 3.33115823817292, "grad_norm": 0.22976835072040558, "learning_rate": 0.000986569451672437, "loss": 0.2015, "num_input_tokens_seen": 44073504, "step": 20420 }, { "epoch": 3.331973898858075, "grad_norm": 0.13875390589237213, "learning_rate": 0.0009865530598119163, "loss": 0.1549, "num_input_tokens_seen": 44085056, "step": 20425 }, { "epoch": 3.33278955954323, "grad_norm": 0.10439455509185791, "learning_rate": 0.000986536658090781, "loss": 0.0916, "num_input_tokens_seen": 44096544, "step": 20430 }, { "epoch": 3.3336052202283852, "grad_norm": 0.023876899853348732, "learning_rate": 0.0009865202465093631, "loss": 0.052, "num_input_tokens_seen": 44107296, "step": 20435 }, { "epoch": 3.33442088091354, "grad_norm": 0.037222541868686676, "learning_rate": 0.000986503825067995, "loss": 0.0988, "num_input_tokens_seen": 44117888, "step": 20440 }, { "epoch": 3.335236541598695, "grad_norm": 0.12048997730016708, "learning_rate": 0.0009864873937670098, "loss": 0.1215, "num_input_tokens_seen": 44128768, "step": 20445 }, { "epoch": 3.3360522022838497, "grad_norm": 0.1649116575717926, "learning_rate": 0.0009864709526067404, "loss": 0.063, "num_input_tokens_seen": 44139840, "step": 20450 }, { "epoch": 3.336867862969005, "grad_norm": 0.1496172398328781, "learning_rate": 0.0009864545015875199, "loss": 0.1838, "num_input_tokens_seen": 44150944, "step": 20455 }, { "epoch": 3.3376835236541598, "grad_norm": 0.2561221122741699, "learning_rate": 0.000986438040709682, "loss": 0.137, "num_input_tokens_seen": 44162176, "step": 20460 }, { "epoch": 3.338499184339315, "grad_norm": 0.3229144811630249, "learning_rate": 0.00098642156997356, "loss": 0.2363, "num_input_tokens_seen": 44172320, "step": 20465 }, { "epoch": 3.33931484502447, "grad_norm": 0.33167365193367004, "learning_rate": 0.0009864050893794878, "loss": 0.124, "num_input_tokens_seen": 44181792, "step": 20470 }, { "epoch": 3.3401305057096247, "grad_norm": 0.12265154719352722, "learning_rate": 0.0009863885989277994, "loss": 0.0668, "num_input_tokens_seen": 44191904, "step": 20475 }, { "epoch": 3.34094616639478, "grad_norm": 0.05573216825723648, "learning_rate": 0.0009863720986188291, "loss": 0.236, "num_input_tokens_seen": 44203136, "step": 20480 }, { "epoch": 3.3417618270799347, "grad_norm": 0.2283083200454712, "learning_rate": 0.0009863555884529114, "loss": 0.1327, "num_input_tokens_seen": 44213120, "step": 20485 }, { "epoch": 3.3425774877650896, "grad_norm": 0.036833275109529495, "learning_rate": 0.0009863390684303804, "loss": 0.0315, "num_input_tokens_seen": 44223808, "step": 20490 }, { "epoch": 3.343393148450245, "grad_norm": 0.1686730533838272, "learning_rate": 0.0009863225385515714, "loss": 0.105, "num_input_tokens_seen": 44233696, "step": 20495 }, { "epoch": 3.3442088091353996, "grad_norm": 0.007665099576115608, "learning_rate": 0.000986305998816819, "loss": 0.0719, "num_input_tokens_seen": 44244768, "step": 20500 }, { "epoch": 3.3450244698205545, "grad_norm": 0.24534624814987183, "learning_rate": 0.000986289449226459, "loss": 0.0818, "num_input_tokens_seen": 44257440, "step": 20505 }, { "epoch": 3.3458401305057097, "grad_norm": 0.08357571810483932, "learning_rate": 0.000986272889780826, "loss": 0.0724, "num_input_tokens_seen": 44267072, "step": 20510 }, { "epoch": 3.3466557911908645, "grad_norm": 0.05552374944090843, "learning_rate": 0.000986256320480256, "loss": 0.1012, "num_input_tokens_seen": 44277824, "step": 20515 }, { "epoch": 3.34747145187602, "grad_norm": 0.06279558688402176, "learning_rate": 0.0009862397413250852, "loss": 0.0712, "num_input_tokens_seen": 44289536, "step": 20520 }, { "epoch": 3.3482871125611746, "grad_norm": 0.1191486343741417, "learning_rate": 0.0009862231523156489, "loss": 0.0283, "num_input_tokens_seen": 44300128, "step": 20525 }, { "epoch": 3.3491027732463294, "grad_norm": 0.24852430820465088, "learning_rate": 0.0009862065534522837, "loss": 0.1774, "num_input_tokens_seen": 44311968, "step": 20530 }, { "epoch": 3.3499184339314847, "grad_norm": 0.4310777187347412, "learning_rate": 0.000986189944735326, "loss": 0.2391, "num_input_tokens_seen": 44322880, "step": 20535 }, { "epoch": 3.3507340946166395, "grad_norm": 0.018584955483675003, "learning_rate": 0.000986173326165112, "loss": 0.1365, "num_input_tokens_seen": 44333856, "step": 20540 }, { "epoch": 3.3515497553017943, "grad_norm": 0.32223519682884216, "learning_rate": 0.000986156697741979, "loss": 0.0954, "num_input_tokens_seen": 44345792, "step": 20545 }, { "epoch": 3.3523654159869496, "grad_norm": 0.023187018930912018, "learning_rate": 0.0009861400594662637, "loss": 0.0697, "num_input_tokens_seen": 44356288, "step": 20550 }, { "epoch": 3.3531810766721044, "grad_norm": 0.09370926767587662, "learning_rate": 0.0009861234113383035, "loss": 0.055, "num_input_tokens_seen": 44366272, "step": 20555 }, { "epoch": 3.3539967373572592, "grad_norm": 0.16531574726104736, "learning_rate": 0.0009861067533584356, "loss": 0.2461, "num_input_tokens_seen": 44377216, "step": 20560 }, { "epoch": 3.3548123980424145, "grad_norm": 0.26989296078681946, "learning_rate": 0.0009860900855269976, "loss": 0.1353, "num_input_tokens_seen": 44387904, "step": 20565 }, { "epoch": 3.3556280587275693, "grad_norm": 0.47458159923553467, "learning_rate": 0.0009860734078443276, "loss": 0.1, "num_input_tokens_seen": 44399360, "step": 20570 }, { "epoch": 3.356443719412724, "grad_norm": 0.12648791074752808, "learning_rate": 0.0009860567203107632, "loss": 0.0404, "num_input_tokens_seen": 44411008, "step": 20575 }, { "epoch": 3.3572593800978794, "grad_norm": 0.1176278293132782, "learning_rate": 0.0009860400229266427, "loss": 0.0429, "num_input_tokens_seen": 44422368, "step": 20580 }, { "epoch": 3.358075040783034, "grad_norm": 0.00954756885766983, "learning_rate": 0.0009860233156923047, "loss": 0.0343, "num_input_tokens_seen": 44433952, "step": 20585 }, { "epoch": 3.358890701468189, "grad_norm": 0.027827974408864975, "learning_rate": 0.0009860065986080876, "loss": 0.1153, "num_input_tokens_seen": 44444928, "step": 20590 }, { "epoch": 3.3597063621533443, "grad_norm": 0.10029228031635284, "learning_rate": 0.00098598987167433, "loss": 0.1043, "num_input_tokens_seen": 44454912, "step": 20595 }, { "epoch": 3.360522022838499, "grad_norm": 0.26266270875930786, "learning_rate": 0.0009859731348913713, "loss": 0.087, "num_input_tokens_seen": 44464960, "step": 20600 }, { "epoch": 3.3613376835236544, "grad_norm": 0.34491756558418274, "learning_rate": 0.0009859563882595507, "loss": 0.0804, "num_input_tokens_seen": 44476384, "step": 20605 }, { "epoch": 3.362153344208809, "grad_norm": 0.3061334192752838, "learning_rate": 0.0009859396317792074, "loss": 0.076, "num_input_tokens_seen": 44486848, "step": 20610 }, { "epoch": 3.362969004893964, "grad_norm": 0.34796616435050964, "learning_rate": 0.0009859228654506807, "loss": 0.1258, "num_input_tokens_seen": 44496992, "step": 20615 }, { "epoch": 3.3637846655791193, "grad_norm": 0.4032030701637268, "learning_rate": 0.0009859060892743108, "loss": 0.195, "num_input_tokens_seen": 44507936, "step": 20620 }, { "epoch": 3.364600326264274, "grad_norm": 0.024919243529438972, "learning_rate": 0.0009858893032504378, "loss": 0.1616, "num_input_tokens_seen": 44518208, "step": 20625 }, { "epoch": 3.365415986949429, "grad_norm": 0.03653990849852562, "learning_rate": 0.0009858725073794016, "loss": 0.1798, "num_input_tokens_seen": 44527808, "step": 20630 }, { "epoch": 3.366231647634584, "grad_norm": 0.008715344592928886, "learning_rate": 0.0009858557016615423, "loss": 0.0748, "num_input_tokens_seen": 44538816, "step": 20635 }, { "epoch": 3.367047308319739, "grad_norm": 0.0267187487334013, "learning_rate": 0.0009858388860972012, "loss": 0.0607, "num_input_tokens_seen": 44549984, "step": 20640 }, { "epoch": 3.367862969004894, "grad_norm": 0.13579556345939636, "learning_rate": 0.0009858220606867188, "loss": 0.0912, "num_input_tokens_seen": 44560896, "step": 20645 }, { "epoch": 3.368678629690049, "grad_norm": 0.1383434385061264, "learning_rate": 0.000985805225430436, "loss": 0.1498, "num_input_tokens_seen": 44571296, "step": 20650 }, { "epoch": 3.369494290375204, "grad_norm": 0.1948719024658203, "learning_rate": 0.0009857883803286937, "loss": 0.1229, "num_input_tokens_seen": 44581632, "step": 20655 }, { "epoch": 3.370309951060359, "grad_norm": 0.12205937504768372, "learning_rate": 0.0009857715253818338, "loss": 0.1749, "num_input_tokens_seen": 44593408, "step": 20660 }, { "epoch": 3.371125611745514, "grad_norm": 0.09529534727334976, "learning_rate": 0.000985754660590198, "loss": 0.1719, "num_input_tokens_seen": 44603456, "step": 20665 }, { "epoch": 3.3719412724306688, "grad_norm": 0.07062441855669022, "learning_rate": 0.0009857377859541275, "loss": 0.1032, "num_input_tokens_seen": 44613760, "step": 20670 }, { "epoch": 3.3727569331158236, "grad_norm": 0.047079894691705704, "learning_rate": 0.0009857209014739645, "loss": 0.1247, "num_input_tokens_seen": 44625312, "step": 20675 }, { "epoch": 3.373572593800979, "grad_norm": 0.09550262242555618, "learning_rate": 0.0009857040071500512, "loss": 0.1387, "num_input_tokens_seen": 44636640, "step": 20680 }, { "epoch": 3.3743882544861337, "grad_norm": 0.23679779469966888, "learning_rate": 0.0009856871029827303, "loss": 0.1233, "num_input_tokens_seen": 44647808, "step": 20685 }, { "epoch": 3.375203915171289, "grad_norm": 0.1307252049446106, "learning_rate": 0.0009856701889723438, "loss": 0.1777, "num_input_tokens_seen": 44659232, "step": 20690 }, { "epoch": 3.3760195758564437, "grad_norm": 0.12352076917886734, "learning_rate": 0.0009856532651192351, "loss": 0.0916, "num_input_tokens_seen": 44669632, "step": 20695 }, { "epoch": 3.3768352365415986, "grad_norm": 0.07193929702043533, "learning_rate": 0.0009856363314237468, "loss": 0.0966, "num_input_tokens_seen": 44679200, "step": 20700 }, { "epoch": 3.377650897226754, "grad_norm": 0.0639791488647461, "learning_rate": 0.0009856193878862221, "loss": 0.0816, "num_input_tokens_seen": 44689952, "step": 20705 }, { "epoch": 3.3784665579119086, "grad_norm": 0.020089639350771904, "learning_rate": 0.0009856024345070045, "loss": 0.2074, "num_input_tokens_seen": 44700256, "step": 20710 }, { "epoch": 3.3792822185970635, "grad_norm": 0.09342822432518005, "learning_rate": 0.0009855854712864376, "loss": 0.0697, "num_input_tokens_seen": 44711296, "step": 20715 }, { "epoch": 3.3800978792822187, "grad_norm": 0.2638692855834961, "learning_rate": 0.000985568498224865, "loss": 0.203, "num_input_tokens_seen": 44721952, "step": 20720 }, { "epoch": 3.3809135399673735, "grad_norm": 0.03347161412239075, "learning_rate": 0.0009855515153226308, "loss": 0.089, "num_input_tokens_seen": 44732224, "step": 20725 }, { "epoch": 3.3817292006525284, "grad_norm": 0.013189471326768398, "learning_rate": 0.0009855345225800792, "loss": 0.0188, "num_input_tokens_seen": 44742208, "step": 20730 }, { "epoch": 3.3825448613376836, "grad_norm": 0.03436797112226486, "learning_rate": 0.0009855175199975546, "loss": 0.0381, "num_input_tokens_seen": 44752608, "step": 20735 }, { "epoch": 3.3833605220228384, "grad_norm": 0.15107502043247223, "learning_rate": 0.0009855005075754015, "loss": 0.2633, "num_input_tokens_seen": 44761536, "step": 20740 }, { "epoch": 3.3841761827079937, "grad_norm": 0.07098552584648132, "learning_rate": 0.0009854834853139647, "loss": 0.0456, "num_input_tokens_seen": 44773280, "step": 20745 }, { "epoch": 3.3849918433931485, "grad_norm": 0.07078447937965393, "learning_rate": 0.0009854664532135892, "loss": 0.038, "num_input_tokens_seen": 44783648, "step": 20750 }, { "epoch": 3.3858075040783033, "grad_norm": 0.04884570464491844, "learning_rate": 0.0009854494112746203, "loss": 0.0396, "num_input_tokens_seen": 44795072, "step": 20755 }, { "epoch": 3.3866231647634586, "grad_norm": 0.5071955323219299, "learning_rate": 0.000985432359497403, "loss": 0.2411, "num_input_tokens_seen": 44806720, "step": 20760 }, { "epoch": 3.3874388254486134, "grad_norm": 0.08907990157604218, "learning_rate": 0.0009854152978822834, "loss": 0.0957, "num_input_tokens_seen": 44818016, "step": 20765 }, { "epoch": 3.3882544861337682, "grad_norm": 0.044619422405958176, "learning_rate": 0.0009853982264296068, "loss": 0.2157, "num_input_tokens_seen": 44828288, "step": 20770 }, { "epoch": 3.3890701468189235, "grad_norm": 0.06012643128633499, "learning_rate": 0.0009853811451397195, "loss": 0.1456, "num_input_tokens_seen": 44839904, "step": 20775 }, { "epoch": 3.3898858075040783, "grad_norm": 0.18308354914188385, "learning_rate": 0.0009853640540129674, "loss": 0.1918, "num_input_tokens_seen": 44849696, "step": 20780 }, { "epoch": 3.390701468189233, "grad_norm": 0.015501677058637142, "learning_rate": 0.0009853469530496971, "loss": 0.1271, "num_input_tokens_seen": 44861440, "step": 20785 }, { "epoch": 3.3915171288743884, "grad_norm": 0.0387810580432415, "learning_rate": 0.000985329842250255, "loss": 0.1263, "num_input_tokens_seen": 44871616, "step": 20790 }, { "epoch": 3.392332789559543, "grad_norm": 0.044051121920347214, "learning_rate": 0.000985312721614988, "loss": 0.0247, "num_input_tokens_seen": 44881664, "step": 20795 }, { "epoch": 3.393148450244698, "grad_norm": 0.07020793855190277, "learning_rate": 0.0009852955911442431, "loss": 0.1735, "num_input_tokens_seen": 44893568, "step": 20800 }, { "epoch": 3.3939641109298533, "grad_norm": 0.040416814386844635, "learning_rate": 0.0009852784508383673, "loss": 0.123, "num_input_tokens_seen": 44905440, "step": 20805 }, { "epoch": 3.394779771615008, "grad_norm": 0.14210517704486847, "learning_rate": 0.0009852613006977081, "loss": 0.0976, "num_input_tokens_seen": 44916608, "step": 20810 }, { "epoch": 3.395595432300163, "grad_norm": 0.07477036863565445, "learning_rate": 0.0009852441407226132, "loss": 0.1732, "num_input_tokens_seen": 44926912, "step": 20815 }, { "epoch": 3.396411092985318, "grad_norm": 0.13453570008277893, "learning_rate": 0.00098522697091343, "loss": 0.0618, "num_input_tokens_seen": 44938336, "step": 20820 }, { "epoch": 3.397226753670473, "grad_norm": 0.22666631639003754, "learning_rate": 0.0009852097912705067, "loss": 0.17, "num_input_tokens_seen": 44949536, "step": 20825 }, { "epoch": 3.3980424143556283, "grad_norm": 0.039935484528541565, "learning_rate": 0.0009851926017941917, "loss": 0.0938, "num_input_tokens_seen": 44960224, "step": 20830 }, { "epoch": 3.398858075040783, "grad_norm": 0.13182282447814941, "learning_rate": 0.0009851754024848328, "loss": 0.0339, "num_input_tokens_seen": 44972064, "step": 20835 }, { "epoch": 3.399673735725938, "grad_norm": 0.09955941140651703, "learning_rate": 0.0009851581933427792, "loss": 0.2265, "num_input_tokens_seen": 44981344, "step": 20840 }, { "epoch": 3.400489396411093, "grad_norm": 0.3201892077922821, "learning_rate": 0.000985140974368379, "loss": 0.0835, "num_input_tokens_seen": 44990048, "step": 20845 }, { "epoch": 3.401305057096248, "grad_norm": 0.12802691757678986, "learning_rate": 0.0009851237455619818, "loss": 0.1331, "num_input_tokens_seen": 44999680, "step": 20850 }, { "epoch": 3.402120717781403, "grad_norm": 0.12686687707901, "learning_rate": 0.0009851065069239361, "loss": 0.1641, "num_input_tokens_seen": 45010496, "step": 20855 }, { "epoch": 3.402936378466558, "grad_norm": 0.33500126004219055, "learning_rate": 0.0009850892584545921, "loss": 0.274, "num_input_tokens_seen": 45019456, "step": 20860 }, { "epoch": 3.403752039151713, "grad_norm": 0.22078731656074524, "learning_rate": 0.0009850720001542985, "loss": 0.0871, "num_input_tokens_seen": 45030080, "step": 20865 }, { "epoch": 3.4045676998368677, "grad_norm": 0.17461831867694855, "learning_rate": 0.0009850547320234058, "loss": 0.0918, "num_input_tokens_seen": 45039744, "step": 20870 }, { "epoch": 3.405383360522023, "grad_norm": 0.09240314364433289, "learning_rate": 0.0009850374540622633, "loss": 0.107, "num_input_tokens_seen": 45051296, "step": 20875 }, { "epoch": 3.4061990212071778, "grad_norm": 0.2283192127943039, "learning_rate": 0.0009850201662712217, "loss": 0.2721, "num_input_tokens_seen": 45061856, "step": 20880 }, { "epoch": 3.407014681892333, "grad_norm": 0.02627626061439514, "learning_rate": 0.0009850028686506313, "loss": 0.0632, "num_input_tokens_seen": 45072672, "step": 20885 }, { "epoch": 3.407830342577488, "grad_norm": 0.04120069742202759, "learning_rate": 0.000984985561200842, "loss": 0.1596, "num_input_tokens_seen": 45083136, "step": 20890 }, { "epoch": 3.4086460032626427, "grad_norm": 0.0899464339017868, "learning_rate": 0.0009849682439222055, "loss": 0.1067, "num_input_tokens_seen": 45093728, "step": 20895 }, { "epoch": 3.4094616639477975, "grad_norm": 0.05037546902894974, "learning_rate": 0.000984950916815072, "loss": 0.1463, "num_input_tokens_seen": 45104352, "step": 20900 }, { "epoch": 3.4102773246329527, "grad_norm": 0.11913888901472092, "learning_rate": 0.0009849335798797932, "loss": 0.0663, "num_input_tokens_seen": 45115232, "step": 20905 }, { "epoch": 3.4110929853181076, "grad_norm": 0.24528612196445465, "learning_rate": 0.0009849162331167201, "loss": 0.2262, "num_input_tokens_seen": 45126496, "step": 20910 }, { "epoch": 3.411908646003263, "grad_norm": 0.2762809991836548, "learning_rate": 0.0009848988765262044, "loss": 0.3311, "num_input_tokens_seen": 45136640, "step": 20915 }, { "epoch": 3.4127243066884176, "grad_norm": 0.0873592421412468, "learning_rate": 0.0009848815101085977, "loss": 0.0857, "num_input_tokens_seen": 45146624, "step": 20920 }, { "epoch": 3.4135399673735725, "grad_norm": 0.22041839361190796, "learning_rate": 0.0009848641338642524, "loss": 0.097, "num_input_tokens_seen": 45158528, "step": 20925 }, { "epoch": 3.4143556280587277, "grad_norm": 0.07119708508253098, "learning_rate": 0.00098484674779352, "loss": 0.0967, "num_input_tokens_seen": 45169504, "step": 20930 }, { "epoch": 3.4151712887438825, "grad_norm": 0.15038251876831055, "learning_rate": 0.0009848293518967533, "loss": 0.093, "num_input_tokens_seen": 45178816, "step": 20935 }, { "epoch": 3.4159869494290374, "grad_norm": 0.021397694945335388, "learning_rate": 0.0009848119461743049, "loss": 0.0764, "num_input_tokens_seen": 45188608, "step": 20940 }, { "epoch": 3.4168026101141926, "grad_norm": 0.02735428512096405, "learning_rate": 0.000984794530626527, "loss": 0.0728, "num_input_tokens_seen": 45199200, "step": 20945 }, { "epoch": 3.4176182707993474, "grad_norm": 0.04284452646970749, "learning_rate": 0.0009847771052537732, "loss": 0.0321, "num_input_tokens_seen": 45209632, "step": 20950 }, { "epoch": 3.4184339314845023, "grad_norm": 0.1716538518667221, "learning_rate": 0.0009847596700563966, "loss": 0.0596, "num_input_tokens_seen": 45220704, "step": 20955 }, { "epoch": 3.4192495921696575, "grad_norm": 0.23990774154663086, "learning_rate": 0.00098474222503475, "loss": 0.1428, "num_input_tokens_seen": 45232032, "step": 20960 }, { "epoch": 3.4200652528548123, "grad_norm": 0.011622985824942589, "learning_rate": 0.0009847247701891874, "loss": 0.0294, "num_input_tokens_seen": 45242272, "step": 20965 }, { "epoch": 3.4208809135399676, "grad_norm": 0.0393514484167099, "learning_rate": 0.0009847073055200624, "loss": 0.0506, "num_input_tokens_seen": 45252992, "step": 20970 }, { "epoch": 3.4216965742251224, "grad_norm": 0.22967559099197388, "learning_rate": 0.0009846898310277288, "loss": 0.195, "num_input_tokens_seen": 45262112, "step": 20975 }, { "epoch": 3.4225122349102772, "grad_norm": 0.04496794566512108, "learning_rate": 0.000984672346712541, "loss": 0.076, "num_input_tokens_seen": 45272928, "step": 20980 }, { "epoch": 3.4233278955954325, "grad_norm": 0.027481194585561752, "learning_rate": 0.0009846548525748533, "loss": 0.1146, "num_input_tokens_seen": 45283008, "step": 20985 }, { "epoch": 3.4241435562805873, "grad_norm": 0.16030284762382507, "learning_rate": 0.0009846373486150201, "loss": 0.0665, "num_input_tokens_seen": 45294528, "step": 20990 }, { "epoch": 3.424959216965742, "grad_norm": 0.22145883738994598, "learning_rate": 0.0009846198348333964, "loss": 0.1024, "num_input_tokens_seen": 45304704, "step": 20995 }, { "epoch": 3.4257748776508974, "grad_norm": 0.11130783706903458, "learning_rate": 0.0009846023112303369, "loss": 0.066, "num_input_tokens_seen": 45315808, "step": 21000 }, { "epoch": 3.426590538336052, "grad_norm": 0.015251759439706802, "learning_rate": 0.0009845847778061968, "loss": 0.0869, "num_input_tokens_seen": 45326368, "step": 21005 }, { "epoch": 3.427406199021207, "grad_norm": 0.13934676349163055, "learning_rate": 0.0009845672345613313, "loss": 0.3595, "num_input_tokens_seen": 45338720, "step": 21010 }, { "epoch": 3.4282218597063623, "grad_norm": 0.04819369688630104, "learning_rate": 0.0009845496814960962, "loss": 0.0563, "num_input_tokens_seen": 45349312, "step": 21015 }, { "epoch": 3.429037520391517, "grad_norm": 0.015418825671076775, "learning_rate": 0.0009845321186108468, "loss": 0.0139, "num_input_tokens_seen": 45359200, "step": 21020 }, { "epoch": 3.429853181076672, "grad_norm": 0.027279937639832497, "learning_rate": 0.0009845145459059397, "loss": 0.1699, "num_input_tokens_seen": 45370080, "step": 21025 }, { "epoch": 3.430668841761827, "grad_norm": 0.33519336581230164, "learning_rate": 0.0009844969633817306, "loss": 0.2864, "num_input_tokens_seen": 45381120, "step": 21030 }, { "epoch": 3.431484502446982, "grad_norm": 0.21078824996948242, "learning_rate": 0.000984479371038576, "loss": 0.1314, "num_input_tokens_seen": 45391680, "step": 21035 }, { "epoch": 3.432300163132137, "grad_norm": 0.11252864450216293, "learning_rate": 0.0009844617688768323, "loss": 0.172, "num_input_tokens_seen": 45403680, "step": 21040 }, { "epoch": 3.433115823817292, "grad_norm": 0.052799422293901443, "learning_rate": 0.000984444156896856, "loss": 0.1883, "num_input_tokens_seen": 45413792, "step": 21045 }, { "epoch": 3.433931484502447, "grad_norm": 0.03641320765018463, "learning_rate": 0.0009844265350990047, "loss": 0.0433, "num_input_tokens_seen": 45424448, "step": 21050 }, { "epoch": 3.434747145187602, "grad_norm": 0.034446459263563156, "learning_rate": 0.000984408903483635, "loss": 0.0909, "num_input_tokens_seen": 45435648, "step": 21055 }, { "epoch": 3.435562805872757, "grad_norm": 0.24737633764743805, "learning_rate": 0.0009843912620511042, "loss": 0.2122, "num_input_tokens_seen": 45447712, "step": 21060 }, { "epoch": 3.436378466557912, "grad_norm": 0.1329614222049713, "learning_rate": 0.00098437361080177, "loss": 0.2306, "num_input_tokens_seen": 45458624, "step": 21065 }, { "epoch": 3.437194127243067, "grad_norm": 0.23497426509857178, "learning_rate": 0.0009843559497359903, "loss": 0.0798, "num_input_tokens_seen": 45469888, "step": 21070 }, { "epoch": 3.438009787928222, "grad_norm": 0.029102738946676254, "learning_rate": 0.0009843382788541227, "loss": 0.0475, "num_input_tokens_seen": 45481152, "step": 21075 }, { "epoch": 3.4388254486133767, "grad_norm": 0.2662644684314728, "learning_rate": 0.0009843205981565253, "loss": 0.2584, "num_input_tokens_seen": 45491744, "step": 21080 }, { "epoch": 3.439641109298532, "grad_norm": 0.0457543320953846, "learning_rate": 0.0009843029076435567, "loss": 0.0942, "num_input_tokens_seen": 45503200, "step": 21085 }, { "epoch": 3.4404567699836868, "grad_norm": 0.301805317401886, "learning_rate": 0.0009842852073155754, "loss": 0.2383, "num_input_tokens_seen": 45514080, "step": 21090 }, { "epoch": 3.4412724306688416, "grad_norm": 0.0727965384721756, "learning_rate": 0.00098426749717294, "loss": 0.2714, "num_input_tokens_seen": 45525344, "step": 21095 }, { "epoch": 3.442088091353997, "grad_norm": 0.08845868706703186, "learning_rate": 0.0009842497772160092, "loss": 0.075, "num_input_tokens_seen": 45536416, "step": 21100 }, { "epoch": 3.4429037520391517, "grad_norm": 0.02879498526453972, "learning_rate": 0.0009842320474451427, "loss": 0.0665, "num_input_tokens_seen": 45547584, "step": 21105 }, { "epoch": 3.443719412724307, "grad_norm": 0.21388547122478485, "learning_rate": 0.0009842143078606991, "loss": 0.1559, "num_input_tokens_seen": 45557408, "step": 21110 }, { "epoch": 3.4445350734094617, "grad_norm": 0.012092086486518383, "learning_rate": 0.0009841965584630385, "loss": 0.1198, "num_input_tokens_seen": 45568288, "step": 21115 }, { "epoch": 3.4453507340946166, "grad_norm": 0.038375016301870346, "learning_rate": 0.0009841787992525203, "loss": 0.1292, "num_input_tokens_seen": 45578368, "step": 21120 }, { "epoch": 3.4461663947797714, "grad_norm": 0.06722944229841232, "learning_rate": 0.0009841610302295048, "loss": 0.1718, "num_input_tokens_seen": 45589440, "step": 21125 }, { "epoch": 3.4469820554649266, "grad_norm": 0.05471692234277725, "learning_rate": 0.0009841432513943516, "loss": 0.1012, "num_input_tokens_seen": 45600992, "step": 21130 }, { "epoch": 3.4477977161500815, "grad_norm": 0.019768936559557915, "learning_rate": 0.0009841254627474213, "loss": 0.0614, "num_input_tokens_seen": 45612672, "step": 21135 }, { "epoch": 3.4486133768352367, "grad_norm": 0.03371129930019379, "learning_rate": 0.000984107664289074, "loss": 0.0468, "num_input_tokens_seen": 45623232, "step": 21140 }, { "epoch": 3.4494290375203915, "grad_norm": 0.028715256601572037, "learning_rate": 0.0009840898560196712, "loss": 0.1304, "num_input_tokens_seen": 45634944, "step": 21145 }, { "epoch": 3.4502446982055464, "grad_norm": 0.21854211390018463, "learning_rate": 0.000984072037939573, "loss": 0.1332, "num_input_tokens_seen": 45646976, "step": 21150 }, { "epoch": 3.4510603588907016, "grad_norm": 0.10130687057971954, "learning_rate": 0.000984054210049141, "loss": 0.0583, "num_input_tokens_seen": 45658912, "step": 21155 }, { "epoch": 3.4518760195758564, "grad_norm": 0.02355033904314041, "learning_rate": 0.0009840363723487365, "loss": 0.1057, "num_input_tokens_seen": 45668704, "step": 21160 }, { "epoch": 3.4526916802610113, "grad_norm": 0.11190728843212128, "learning_rate": 0.0009840185248387208, "loss": 0.0735, "num_input_tokens_seen": 45678880, "step": 21165 }, { "epoch": 3.4535073409461665, "grad_norm": 0.04235926270484924, "learning_rate": 0.0009840006675194558, "loss": 0.0232, "num_input_tokens_seen": 45688480, "step": 21170 }, { "epoch": 3.4543230016313213, "grad_norm": 0.051719971001148224, "learning_rate": 0.000983982800391303, "loss": 0.1718, "num_input_tokens_seen": 45699488, "step": 21175 }, { "epoch": 3.455138662316476, "grad_norm": 0.008737016469240189, "learning_rate": 0.0009839649234546248, "loss": 0.08, "num_input_tokens_seen": 45710464, "step": 21180 }, { "epoch": 3.4559543230016314, "grad_norm": 0.02019837126135826, "learning_rate": 0.0009839470367097836, "loss": 0.1023, "num_input_tokens_seen": 45719296, "step": 21185 }, { "epoch": 3.4567699836867862, "grad_norm": 0.09250687062740326, "learning_rate": 0.0009839291401571417, "loss": 0.0996, "num_input_tokens_seen": 45730048, "step": 21190 }, { "epoch": 3.4575856443719415, "grad_norm": 0.00748001504689455, "learning_rate": 0.0009839112337970619, "loss": 0.0999, "num_input_tokens_seen": 45740416, "step": 21195 }, { "epoch": 3.4584013050570963, "grad_norm": 0.21730181574821472, "learning_rate": 0.0009838933176299072, "loss": 0.1565, "num_input_tokens_seen": 45750976, "step": 21200 }, { "epoch": 3.459216965742251, "grad_norm": 0.13255657255649567, "learning_rate": 0.0009838753916560404, "loss": 0.0694, "num_input_tokens_seen": 45761920, "step": 21205 }, { "epoch": 3.4600326264274064, "grad_norm": 0.015047597698867321, "learning_rate": 0.000983857455875825, "loss": 0.0601, "num_input_tokens_seen": 45771200, "step": 21210 }, { "epoch": 3.460848287112561, "grad_norm": 0.011840114369988441, "learning_rate": 0.0009838395102896244, "loss": 0.0952, "num_input_tokens_seen": 45781440, "step": 21215 }, { "epoch": 3.461663947797716, "grad_norm": 0.027201639488339424, "learning_rate": 0.0009838215548978024, "loss": 0.2223, "num_input_tokens_seen": 45791936, "step": 21220 }, { "epoch": 3.4624796084828713, "grad_norm": 0.02236240915954113, "learning_rate": 0.0009838035897007226, "loss": 0.1305, "num_input_tokens_seen": 45803008, "step": 21225 }, { "epoch": 3.463295269168026, "grad_norm": 0.04895614832639694, "learning_rate": 0.0009837856146987496, "loss": 0.0834, "num_input_tokens_seen": 45812640, "step": 21230 }, { "epoch": 3.464110929853181, "grad_norm": 0.019846901297569275, "learning_rate": 0.0009837676298922473, "loss": 0.1204, "num_input_tokens_seen": 45823936, "step": 21235 }, { "epoch": 3.464926590538336, "grad_norm": 0.06821836531162262, "learning_rate": 0.0009837496352815803, "loss": 0.0972, "num_input_tokens_seen": 45833792, "step": 21240 }, { "epoch": 3.465742251223491, "grad_norm": 0.12537789344787598, "learning_rate": 0.000983731630867113, "loss": 0.1592, "num_input_tokens_seen": 45845184, "step": 21245 }, { "epoch": 3.466557911908646, "grad_norm": 0.04127345606684685, "learning_rate": 0.0009837136166492109, "loss": 0.0991, "num_input_tokens_seen": 45856384, "step": 21250 }, { "epoch": 3.467373572593801, "grad_norm": 0.07141025364398956, "learning_rate": 0.0009836955926282385, "loss": 0.1935, "num_input_tokens_seen": 45866944, "step": 21255 }, { "epoch": 3.468189233278956, "grad_norm": 0.22872596979141235, "learning_rate": 0.0009836775588045613, "loss": 0.1322, "num_input_tokens_seen": 45876864, "step": 21260 }, { "epoch": 3.4690048939641107, "grad_norm": 0.01511832233518362, "learning_rate": 0.0009836595151785448, "loss": 0.0844, "num_input_tokens_seen": 45888128, "step": 21265 }, { "epoch": 3.469820554649266, "grad_norm": 0.16741567850112915, "learning_rate": 0.0009836414617505548, "loss": 0.0955, "num_input_tokens_seen": 45899872, "step": 21270 }, { "epoch": 3.470636215334421, "grad_norm": 0.033205289393663406, "learning_rate": 0.000983623398520957, "loss": 0.0272, "num_input_tokens_seen": 45910080, "step": 21275 }, { "epoch": 3.471451876019576, "grad_norm": 0.09035614132881165, "learning_rate": 0.0009836053254901173, "loss": 0.044, "num_input_tokens_seen": 45921280, "step": 21280 }, { "epoch": 3.472267536704731, "grad_norm": 0.025018323212862015, "learning_rate": 0.0009835872426584024, "loss": 0.14, "num_input_tokens_seen": 45931424, "step": 21285 }, { "epoch": 3.4730831973898857, "grad_norm": 0.2466650903224945, "learning_rate": 0.0009835691500261784, "loss": 0.1823, "num_input_tokens_seen": 45942144, "step": 21290 }, { "epoch": 3.473898858075041, "grad_norm": 0.06468857824802399, "learning_rate": 0.0009835510475938124, "loss": 0.0997, "num_input_tokens_seen": 45951808, "step": 21295 }, { "epoch": 3.4747145187601958, "grad_norm": 0.07884536683559418, "learning_rate": 0.0009835329353616708, "loss": 0.0531, "num_input_tokens_seen": 45961952, "step": 21300 }, { "epoch": 3.4755301794453506, "grad_norm": 0.07850110530853271, "learning_rate": 0.000983514813330121, "loss": 0.0645, "num_input_tokens_seen": 45973184, "step": 21305 }, { "epoch": 3.476345840130506, "grad_norm": 0.14517685770988464, "learning_rate": 0.00098349668149953, "loss": 0.074, "num_input_tokens_seen": 45984576, "step": 21310 }, { "epoch": 3.4771615008156607, "grad_norm": 0.06265454739332199, "learning_rate": 0.0009834785398702653, "loss": 0.2407, "num_input_tokens_seen": 45995296, "step": 21315 }, { "epoch": 3.4779771615008155, "grad_norm": 0.06418195366859436, "learning_rate": 0.0009834603884426947, "loss": 0.1461, "num_input_tokens_seen": 46005056, "step": 21320 }, { "epoch": 3.4787928221859707, "grad_norm": 0.0186470914632082, "learning_rate": 0.000983442227217186, "loss": 0.034, "num_input_tokens_seen": 46016288, "step": 21325 }, { "epoch": 3.4796084828711256, "grad_norm": 0.05757369101047516, "learning_rate": 0.0009834240561941072, "loss": 0.1369, "num_input_tokens_seen": 46026720, "step": 21330 }, { "epoch": 3.480424143556281, "grad_norm": 0.1285952627658844, "learning_rate": 0.000983405875373827, "loss": 0.1197, "num_input_tokens_seen": 46036512, "step": 21335 }, { "epoch": 3.4812398042414356, "grad_norm": 0.392232209444046, "learning_rate": 0.0009833876847567132, "loss": 0.1521, "num_input_tokens_seen": 46046720, "step": 21340 }, { "epoch": 3.4820554649265905, "grad_norm": 0.035316839814186096, "learning_rate": 0.0009833694843431346, "loss": 0.1633, "num_input_tokens_seen": 46054656, "step": 21345 }, { "epoch": 3.4828711256117453, "grad_norm": 0.09889949858188629, "learning_rate": 0.0009833512741334604, "loss": 0.05, "num_input_tokens_seen": 46064672, "step": 21350 }, { "epoch": 3.4836867862969005, "grad_norm": 0.09070184081792831, "learning_rate": 0.0009833330541280595, "loss": 0.234, "num_input_tokens_seen": 46075648, "step": 21355 }, { "epoch": 3.4845024469820554, "grad_norm": 0.25338008999824524, "learning_rate": 0.0009833148243273012, "loss": 0.2217, "num_input_tokens_seen": 46086272, "step": 21360 }, { "epoch": 3.4853181076672106, "grad_norm": 0.21512499451637268, "learning_rate": 0.0009832965847315547, "loss": 0.1127, "num_input_tokens_seen": 46097312, "step": 21365 }, { "epoch": 3.4861337683523654, "grad_norm": 0.02201324701309204, "learning_rate": 0.00098327833534119, "loss": 0.1292, "num_input_tokens_seen": 46108864, "step": 21370 }, { "epoch": 3.4869494290375203, "grad_norm": 0.0452757403254509, "learning_rate": 0.0009832600761565764, "loss": 0.1468, "num_input_tokens_seen": 46118560, "step": 21375 }, { "epoch": 3.4877650897226755, "grad_norm": 0.2989788055419922, "learning_rate": 0.0009832418071780845, "loss": 0.1376, "num_input_tokens_seen": 46130176, "step": 21380 }, { "epoch": 3.4885807504078303, "grad_norm": 0.24722062051296234, "learning_rate": 0.0009832235284060842, "loss": 0.1063, "num_input_tokens_seen": 46140544, "step": 21385 }, { "epoch": 3.489396411092985, "grad_norm": 0.25683891773223877, "learning_rate": 0.0009832052398409464, "loss": 0.045, "num_input_tokens_seen": 46151136, "step": 21390 }, { "epoch": 3.4902120717781404, "grad_norm": 0.059739794582128525, "learning_rate": 0.000983186941483041, "loss": 0.0595, "num_input_tokens_seen": 46162656, "step": 21395 }, { "epoch": 3.4910277324632952, "grad_norm": 0.08448109030723572, "learning_rate": 0.0009831686333327397, "loss": 0.0696, "num_input_tokens_seen": 46174144, "step": 21400 }, { "epoch": 3.49184339314845, "grad_norm": 0.40453746914863586, "learning_rate": 0.0009831503153904127, "loss": 0.198, "num_input_tokens_seen": 46185312, "step": 21405 }, { "epoch": 3.4926590538336053, "grad_norm": 0.2741551101207733, "learning_rate": 0.000983131987656432, "loss": 0.3102, "num_input_tokens_seen": 46197952, "step": 21410 }, { "epoch": 3.49347471451876, "grad_norm": 0.1125497967004776, "learning_rate": 0.0009831136501311684, "loss": 0.1285, "num_input_tokens_seen": 46210112, "step": 21415 }, { "epoch": 3.4942903752039154, "grad_norm": 0.15472069382667542, "learning_rate": 0.000983095302814994, "loss": 0.1178, "num_input_tokens_seen": 46221792, "step": 21420 }, { "epoch": 3.49510603588907, "grad_norm": 0.04142550751566887, "learning_rate": 0.0009830769457082804, "loss": 0.0765, "num_input_tokens_seen": 46232352, "step": 21425 }, { "epoch": 3.495921696574225, "grad_norm": 0.1576572060585022, "learning_rate": 0.0009830585788113994, "loss": 0.1153, "num_input_tokens_seen": 46241408, "step": 21430 }, { "epoch": 3.4967373572593803, "grad_norm": 0.7239755392074585, "learning_rate": 0.0009830402021247238, "loss": 0.1244, "num_input_tokens_seen": 46252256, "step": 21435 }, { "epoch": 3.497553017944535, "grad_norm": 0.05123426020145416, "learning_rate": 0.0009830218156486256, "loss": 0.0459, "num_input_tokens_seen": 46262560, "step": 21440 }, { "epoch": 3.49836867862969, "grad_norm": 0.237247496843338, "learning_rate": 0.0009830034193834777, "loss": 0.12, "num_input_tokens_seen": 46272384, "step": 21445 }, { "epoch": 3.499184339314845, "grad_norm": 0.14821790158748627, "learning_rate": 0.0009829850133296527, "loss": 0.0583, "num_input_tokens_seen": 46282912, "step": 21450 }, { "epoch": 3.5, "grad_norm": 0.23827241361141205, "learning_rate": 0.0009829665974875237, "loss": 0.3054, "num_input_tokens_seen": 46293568, "step": 21455 }, { "epoch": 3.500815660685155, "grad_norm": 0.2410009652376175, "learning_rate": 0.0009829481718574638, "loss": 0.1068, "num_input_tokens_seen": 46303776, "step": 21460 }, { "epoch": 3.50163132137031, "grad_norm": 0.15220727026462555, "learning_rate": 0.0009829297364398466, "loss": 0.0975, "num_input_tokens_seen": 46314752, "step": 21465 }, { "epoch": 3.502446982055465, "grad_norm": 0.2097858190536499, "learning_rate": 0.0009829112912350456, "loss": 0.112, "num_input_tokens_seen": 46326016, "step": 21470 }, { "epoch": 3.50326264274062, "grad_norm": 0.04197875037789345, "learning_rate": 0.000982892836243435, "loss": 0.1185, "num_input_tokens_seen": 46336608, "step": 21475 }, { "epoch": 3.504078303425775, "grad_norm": 0.04261185601353645, "learning_rate": 0.000982874371465388, "loss": 0.0999, "num_input_tokens_seen": 46346848, "step": 21480 }, { "epoch": 3.50489396411093, "grad_norm": 0.04833027720451355, "learning_rate": 0.0009828558969012795, "loss": 0.183, "num_input_tokens_seen": 46357312, "step": 21485 }, { "epoch": 3.5057096247960846, "grad_norm": 0.046633776277303696, "learning_rate": 0.0009828374125514837, "loss": 0.0853, "num_input_tokens_seen": 46368256, "step": 21490 }, { "epoch": 3.50652528548124, "grad_norm": 0.152083620429039, "learning_rate": 0.0009828189184163752, "loss": 0.0603, "num_input_tokens_seen": 46379616, "step": 21495 }, { "epoch": 3.5073409461663947, "grad_norm": 0.06856576353311539, "learning_rate": 0.0009828004144963288, "loss": 0.2622, "num_input_tokens_seen": 46390464, "step": 21500 }, { "epoch": 3.50815660685155, "grad_norm": 0.026581063866615295, "learning_rate": 0.0009827819007917195, "loss": 0.1375, "num_input_tokens_seen": 46401952, "step": 21505 }, { "epoch": 3.5089722675367048, "grad_norm": 0.1864786148071289, "learning_rate": 0.0009827633773029228, "loss": 0.0691, "num_input_tokens_seen": 46412256, "step": 21510 }, { "epoch": 3.5097879282218596, "grad_norm": 0.33901888132095337, "learning_rate": 0.0009827448440303135, "loss": 0.2557, "num_input_tokens_seen": 46423776, "step": 21515 }, { "epoch": 3.5106035889070144, "grad_norm": 0.2910694181919098, "learning_rate": 0.0009827263009742678, "loss": 0.1801, "num_input_tokens_seen": 46433184, "step": 21520 }, { "epoch": 3.5114192495921697, "grad_norm": 0.073338583111763, "learning_rate": 0.000982707748135161, "loss": 0.068, "num_input_tokens_seen": 46443552, "step": 21525 }, { "epoch": 3.5122349102773245, "grad_norm": 0.0231319610029459, "learning_rate": 0.0009826891855133693, "loss": 0.0649, "num_input_tokens_seen": 46454432, "step": 21530 }, { "epoch": 3.5130505709624797, "grad_norm": 0.1458815038204193, "learning_rate": 0.000982670613109269, "loss": 0.1035, "num_input_tokens_seen": 46464416, "step": 21535 }, { "epoch": 3.5138662316476346, "grad_norm": 0.18037347495555878, "learning_rate": 0.0009826520309232365, "loss": 0.1237, "num_input_tokens_seen": 46475168, "step": 21540 }, { "epoch": 3.5146818923327894, "grad_norm": 0.09783005714416504, "learning_rate": 0.0009826334389556482, "loss": 0.094, "num_input_tokens_seen": 46485728, "step": 21545 }, { "epoch": 3.5154975530179446, "grad_norm": 0.014355110935866833, "learning_rate": 0.000982614837206881, "loss": 0.0405, "num_input_tokens_seen": 46496800, "step": 21550 }, { "epoch": 3.5163132137030995, "grad_norm": 0.04569780454039574, "learning_rate": 0.000982596225677312, "loss": 0.1141, "num_input_tokens_seen": 46508192, "step": 21555 }, { "epoch": 3.5171288743882547, "grad_norm": 0.04446453973650932, "learning_rate": 0.0009825776043673182, "loss": 0.2343, "num_input_tokens_seen": 46518144, "step": 21560 }, { "epoch": 3.5179445350734095, "grad_norm": 0.042683396488428116, "learning_rate": 0.000982558973277277, "loss": 0.0327, "num_input_tokens_seen": 46529472, "step": 21565 }, { "epoch": 3.5187601957585644, "grad_norm": 0.17508842051029205, "learning_rate": 0.0009825403324075662, "loss": 0.0757, "num_input_tokens_seen": 46540064, "step": 21570 }, { "epoch": 3.519575856443719, "grad_norm": 0.07077736407518387, "learning_rate": 0.0009825216817585633, "loss": 0.0869, "num_input_tokens_seen": 46549760, "step": 21575 }, { "epoch": 3.5203915171288744, "grad_norm": 0.13378828763961792, "learning_rate": 0.0009825030213306463, "loss": 0.2107, "num_input_tokens_seen": 46559200, "step": 21580 }, { "epoch": 3.5212071778140293, "grad_norm": 0.12505577504634857, "learning_rate": 0.0009824843511241936, "loss": 0.0643, "num_input_tokens_seen": 46568640, "step": 21585 }, { "epoch": 3.5220228384991845, "grad_norm": 0.21770580112934113, "learning_rate": 0.0009824656711395834, "loss": 0.1854, "num_input_tokens_seen": 46579456, "step": 21590 }, { "epoch": 3.5228384991843393, "grad_norm": 0.06577157974243164, "learning_rate": 0.0009824469813771945, "loss": 0.0638, "num_input_tokens_seen": 46591392, "step": 21595 }, { "epoch": 3.523654159869494, "grad_norm": 0.013933411799371243, "learning_rate": 0.0009824282818374052, "loss": 0.092, "num_input_tokens_seen": 46601824, "step": 21600 }, { "epoch": 3.5244698205546494, "grad_norm": 0.2521439790725708, "learning_rate": 0.000982409572520595, "loss": 0.1258, "num_input_tokens_seen": 46613568, "step": 21605 }, { "epoch": 3.5252854812398042, "grad_norm": 0.026784474030137062, "learning_rate": 0.0009823908534271426, "loss": 0.0435, "num_input_tokens_seen": 46623392, "step": 21610 }, { "epoch": 3.5261011419249595, "grad_norm": 0.29943710565567017, "learning_rate": 0.0009823721245574278, "loss": 0.1717, "num_input_tokens_seen": 46633888, "step": 21615 }, { "epoch": 3.5269168026101143, "grad_norm": 0.03245735540986061, "learning_rate": 0.0009823533859118299, "loss": 0.1059, "num_input_tokens_seen": 46645120, "step": 21620 }, { "epoch": 3.527732463295269, "grad_norm": 0.1805766075849533, "learning_rate": 0.0009823346374907287, "loss": 0.1294, "num_input_tokens_seen": 46655616, "step": 21625 }, { "epoch": 3.528548123980424, "grad_norm": 0.019752616062760353, "learning_rate": 0.000982315879294504, "loss": 0.0587, "num_input_tokens_seen": 46667040, "step": 21630 }, { "epoch": 3.529363784665579, "grad_norm": 0.19607168436050415, "learning_rate": 0.0009822971113235366, "loss": 0.1263, "num_input_tokens_seen": 46677568, "step": 21635 }, { "epoch": 3.530179445350734, "grad_norm": 0.17643755674362183, "learning_rate": 0.0009822783335782061, "loss": 0.0255, "num_input_tokens_seen": 46687456, "step": 21640 }, { "epoch": 3.5309951060358893, "grad_norm": 0.03207607939839363, "learning_rate": 0.0009822595460588935, "loss": 0.0931, "num_input_tokens_seen": 46698848, "step": 21645 }, { "epoch": 3.531810766721044, "grad_norm": 0.3559918999671936, "learning_rate": 0.0009822407487659792, "loss": 0.2216, "num_input_tokens_seen": 46707808, "step": 21650 }, { "epoch": 3.532626427406199, "grad_norm": 0.4256376624107361, "learning_rate": 0.0009822219416998445, "loss": 0.1334, "num_input_tokens_seen": 46717568, "step": 21655 }, { "epoch": 3.5334420880913537, "grad_norm": 0.35481536388397217, "learning_rate": 0.0009822031248608704, "loss": 0.1886, "num_input_tokens_seen": 46728768, "step": 21660 }, { "epoch": 3.534257748776509, "grad_norm": 0.0037591701839119196, "learning_rate": 0.0009821842982494383, "loss": 0.1587, "num_input_tokens_seen": 46738688, "step": 21665 }, { "epoch": 3.535073409461664, "grad_norm": 0.04193272814154625, "learning_rate": 0.0009821654618659297, "loss": 0.0767, "num_input_tokens_seen": 46749472, "step": 21670 }, { "epoch": 3.535889070146819, "grad_norm": 0.25011444091796875, "learning_rate": 0.0009821466157107263, "loss": 0.2151, "num_input_tokens_seen": 46760192, "step": 21675 }, { "epoch": 3.536704730831974, "grad_norm": 0.022408533841371536, "learning_rate": 0.0009821277597842101, "loss": 0.1316, "num_input_tokens_seen": 46772480, "step": 21680 }, { "epoch": 3.5375203915171287, "grad_norm": 0.20478790998458862, "learning_rate": 0.0009821088940867632, "loss": 0.1691, "num_input_tokens_seen": 46783648, "step": 21685 }, { "epoch": 3.538336052202284, "grad_norm": 0.01956280879676342, "learning_rate": 0.0009820900186187681, "loss": 0.0929, "num_input_tokens_seen": 46796000, "step": 21690 }, { "epoch": 3.539151712887439, "grad_norm": 0.2637084424495697, "learning_rate": 0.0009820711333806068, "loss": 0.1666, "num_input_tokens_seen": 46807104, "step": 21695 }, { "epoch": 3.539967373572594, "grad_norm": 0.29831311106681824, "learning_rate": 0.000982052238372663, "loss": 0.2095, "num_input_tokens_seen": 46816512, "step": 21700 }, { "epoch": 3.540783034257749, "grad_norm": 0.016275297850370407, "learning_rate": 0.0009820333335953187, "loss": 0.1492, "num_input_tokens_seen": 46827040, "step": 21705 }, { "epoch": 3.5415986949429037, "grad_norm": 0.01925618015229702, "learning_rate": 0.0009820144190489574, "loss": 0.06, "num_input_tokens_seen": 46836192, "step": 21710 }, { "epoch": 3.5424143556280585, "grad_norm": 0.21569260954856873, "learning_rate": 0.0009819954947339624, "loss": 0.1697, "num_input_tokens_seen": 46848224, "step": 21715 }, { "epoch": 3.5432300163132138, "grad_norm": 0.22966501116752625, "learning_rate": 0.0009819765606507173, "loss": 0.2074, "num_input_tokens_seen": 46858368, "step": 21720 }, { "epoch": 3.5440456769983686, "grad_norm": 0.04934801533818245, "learning_rate": 0.0009819576167996058, "loss": 0.1869, "num_input_tokens_seen": 46868224, "step": 21725 }, { "epoch": 3.544861337683524, "grad_norm": 0.13582730293273926, "learning_rate": 0.000981938663181012, "loss": 0.2172, "num_input_tokens_seen": 46879808, "step": 21730 }, { "epoch": 3.5456769983686787, "grad_norm": 0.33926254510879517, "learning_rate": 0.0009819196997953195, "loss": 0.1889, "num_input_tokens_seen": 46890752, "step": 21735 }, { "epoch": 3.5464926590538335, "grad_norm": 0.15687373280525208, "learning_rate": 0.000981900726642913, "loss": 0.0824, "num_input_tokens_seen": 46902144, "step": 21740 }, { "epoch": 3.5473083197389887, "grad_norm": 0.10445967316627502, "learning_rate": 0.0009818817437241768, "loss": 0.0966, "num_input_tokens_seen": 46913952, "step": 21745 }, { "epoch": 3.5481239804241436, "grad_norm": 0.07214030623435974, "learning_rate": 0.000981862751039496, "loss": 0.0816, "num_input_tokens_seen": 46924064, "step": 21750 }, { "epoch": 3.5489396411092984, "grad_norm": 0.12183009833097458, "learning_rate": 0.000981843748589255, "loss": 0.1136, "num_input_tokens_seen": 46935808, "step": 21755 }, { "epoch": 3.5497553017944536, "grad_norm": 0.26651546359062195, "learning_rate": 0.0009818247363738396, "loss": 0.1183, "num_input_tokens_seen": 46947872, "step": 21760 }, { "epoch": 3.5505709624796085, "grad_norm": 0.057084642350673676, "learning_rate": 0.0009818057143936344, "loss": 0.049, "num_input_tokens_seen": 46959872, "step": 21765 }, { "epoch": 3.5513866231647633, "grad_norm": 0.23655331134796143, "learning_rate": 0.000981786682649025, "loss": 0.222, "num_input_tokens_seen": 46970240, "step": 21770 }, { "epoch": 3.5522022838499185, "grad_norm": 0.3807028532028198, "learning_rate": 0.0009817676411403976, "loss": 0.1415, "num_input_tokens_seen": 46980256, "step": 21775 }, { "epoch": 3.5530179445350734, "grad_norm": 0.06155622750520706, "learning_rate": 0.0009817485898681378, "loss": 0.1771, "num_input_tokens_seen": 46990304, "step": 21780 }, { "epoch": 3.5538336052202286, "grad_norm": 0.218609020113945, "learning_rate": 0.0009817295288326315, "loss": 0.1787, "num_input_tokens_seen": 47000096, "step": 21785 }, { "epoch": 3.5546492659053834, "grad_norm": 0.19485388696193695, "learning_rate": 0.0009817104580342653, "loss": 0.2099, "num_input_tokens_seen": 47011776, "step": 21790 }, { "epoch": 3.5554649265905383, "grad_norm": 0.034824732691049576, "learning_rate": 0.0009816913774734254, "loss": 0.121, "num_input_tokens_seen": 47022464, "step": 21795 }, { "epoch": 3.556280587275693, "grad_norm": 0.24314482510089874, "learning_rate": 0.0009816722871504987, "loss": 0.1287, "num_input_tokens_seen": 47032224, "step": 21800 }, { "epoch": 3.5570962479608483, "grad_norm": 0.03600064292550087, "learning_rate": 0.0009816531870658722, "loss": 0.0517, "num_input_tokens_seen": 47042880, "step": 21805 }, { "epoch": 3.557911908646003, "grad_norm": 0.0426083467900753, "learning_rate": 0.0009816340772199328, "loss": 0.0721, "num_input_tokens_seen": 47054208, "step": 21810 }, { "epoch": 3.5587275693311584, "grad_norm": 0.012417069636285305, "learning_rate": 0.0009816149576130678, "loss": 0.1294, "num_input_tokens_seen": 47064512, "step": 21815 }, { "epoch": 3.5595432300163132, "grad_norm": 0.10296936333179474, "learning_rate": 0.0009815958282456648, "loss": 0.1107, "num_input_tokens_seen": 47076000, "step": 21820 }, { "epoch": 3.560358890701468, "grad_norm": 0.10328549891710281, "learning_rate": 0.0009815766891181112, "loss": 0.0969, "num_input_tokens_seen": 47087296, "step": 21825 }, { "epoch": 3.5611745513866233, "grad_norm": 0.08359768241643906, "learning_rate": 0.0009815575402307953, "loss": 0.1611, "num_input_tokens_seen": 47099136, "step": 21830 }, { "epoch": 3.561990212071778, "grad_norm": 0.2594926655292511, "learning_rate": 0.0009815383815841047, "loss": 0.1483, "num_input_tokens_seen": 47109600, "step": 21835 }, { "epoch": 3.5628058727569334, "grad_norm": 0.07332587242126465, "learning_rate": 0.0009815192131784282, "loss": 0.1677, "num_input_tokens_seen": 47120992, "step": 21840 }, { "epoch": 3.563621533442088, "grad_norm": 0.14021438360214233, "learning_rate": 0.0009815000350141539, "loss": 0.194, "num_input_tokens_seen": 47132576, "step": 21845 }, { "epoch": 3.564437194127243, "grad_norm": 0.1485007256269455, "learning_rate": 0.0009814808470916705, "loss": 0.2045, "num_input_tokens_seen": 47143232, "step": 21850 }, { "epoch": 3.565252854812398, "grad_norm": 0.3131265342235565, "learning_rate": 0.0009814616494113668, "loss": 0.1787, "num_input_tokens_seen": 47152896, "step": 21855 }, { "epoch": 3.566068515497553, "grad_norm": 0.11759250611066818, "learning_rate": 0.0009814424419736323, "loss": 0.064, "num_input_tokens_seen": 47164640, "step": 21860 }, { "epoch": 3.566884176182708, "grad_norm": 0.07363845407962799, "learning_rate": 0.0009814232247788556, "loss": 0.17, "num_input_tokens_seen": 47175072, "step": 21865 }, { "epoch": 3.567699836867863, "grad_norm": 0.24259096384048462, "learning_rate": 0.0009814039978274269, "loss": 0.0675, "num_input_tokens_seen": 47184768, "step": 21870 }, { "epoch": 3.568515497553018, "grad_norm": 0.055216096341609955, "learning_rate": 0.0009813847611197352, "loss": 0.1437, "num_input_tokens_seen": 47194624, "step": 21875 }, { "epoch": 3.569331158238173, "grad_norm": 0.1896030753850937, "learning_rate": 0.0009813655146561709, "loss": 0.113, "num_input_tokens_seen": 47205024, "step": 21880 }, { "epoch": 3.5701468189233276, "grad_norm": 0.11027665436267853, "learning_rate": 0.0009813462584371236, "loss": 0.1247, "num_input_tokens_seen": 47216544, "step": 21885 }, { "epoch": 3.570962479608483, "grad_norm": 0.11065191775560379, "learning_rate": 0.0009813269924629838, "loss": 0.1304, "num_input_tokens_seen": 47228096, "step": 21890 }, { "epoch": 3.5717781402936377, "grad_norm": 0.09954856336116791, "learning_rate": 0.000981307716734142, "loss": 0.0834, "num_input_tokens_seen": 47239776, "step": 21895 }, { "epoch": 3.572593800978793, "grad_norm": 0.0794321596622467, "learning_rate": 0.0009812884312509883, "loss": 0.1291, "num_input_tokens_seen": 47251392, "step": 21900 }, { "epoch": 3.573409461663948, "grad_norm": 0.11455259472131729, "learning_rate": 0.0009812691360139144, "loss": 0.192, "num_input_tokens_seen": 47262080, "step": 21905 }, { "epoch": 3.5742251223491026, "grad_norm": 0.14087459444999695, "learning_rate": 0.000981249831023311, "loss": 0.1007, "num_input_tokens_seen": 47272704, "step": 21910 }, { "epoch": 3.575040783034258, "grad_norm": 0.032544784247875214, "learning_rate": 0.000981230516279569, "loss": 0.1585, "num_input_tokens_seen": 47283136, "step": 21915 }, { "epoch": 3.5758564437194127, "grad_norm": 0.045426297932863235, "learning_rate": 0.0009812111917830801, "loss": 0.1507, "num_input_tokens_seen": 47294656, "step": 21920 }, { "epoch": 3.576672104404568, "grad_norm": 0.20326849818229675, "learning_rate": 0.000981191857534236, "loss": 0.0993, "num_input_tokens_seen": 47304608, "step": 21925 }, { "epoch": 3.5774877650897228, "grad_norm": 0.19702641665935516, "learning_rate": 0.0009811725135334287, "loss": 0.1146, "num_input_tokens_seen": 47314848, "step": 21930 }, { "epoch": 3.5783034257748776, "grad_norm": 0.19827289879322052, "learning_rate": 0.0009811531597810497, "loss": 0.0895, "num_input_tokens_seen": 47325248, "step": 21935 }, { "epoch": 3.5791190864600324, "grad_norm": 0.2922540605068207, "learning_rate": 0.0009811337962774916, "loss": 0.0551, "num_input_tokens_seen": 47336832, "step": 21940 }, { "epoch": 3.5799347471451877, "grad_norm": 0.09413319081068039, "learning_rate": 0.0009811144230231468, "loss": 0.0959, "num_input_tokens_seen": 47347392, "step": 21945 }, { "epoch": 3.5807504078303425, "grad_norm": 0.030188152566552162, "learning_rate": 0.0009810950400184078, "loss": 0.1037, "num_input_tokens_seen": 47358688, "step": 21950 }, { "epoch": 3.5815660685154977, "grad_norm": 0.12798836827278137, "learning_rate": 0.0009810756472636677, "loss": 0.0589, "num_input_tokens_seen": 47369536, "step": 21955 }, { "epoch": 3.5823817292006526, "grad_norm": 0.07745285332202911, "learning_rate": 0.000981056244759319, "loss": 0.0511, "num_input_tokens_seen": 47380288, "step": 21960 }, { "epoch": 3.5831973898858074, "grad_norm": 0.04094652086496353, "learning_rate": 0.0009810368325057555, "loss": 0.0653, "num_input_tokens_seen": 47390304, "step": 21965 }, { "epoch": 3.5840130505709626, "grad_norm": 0.08738607913255692, "learning_rate": 0.0009810174105033703, "loss": 0.1829, "num_input_tokens_seen": 47401696, "step": 21970 }, { "epoch": 3.5848287112561175, "grad_norm": 0.01020030863583088, "learning_rate": 0.000980997978752557, "loss": 0.0745, "num_input_tokens_seen": 47412000, "step": 21975 }, { "epoch": 3.5856443719412723, "grad_norm": 0.012071529403328896, "learning_rate": 0.0009809785372537094, "loss": 0.0801, "num_input_tokens_seen": 47423072, "step": 21980 }, { "epoch": 3.5864600326264275, "grad_norm": 0.11362315714359283, "learning_rate": 0.0009809590860072217, "loss": 0.0783, "num_input_tokens_seen": 47433056, "step": 21985 }, { "epoch": 3.5872756933115824, "grad_norm": 0.16817206144332886, "learning_rate": 0.0009809396250134881, "loss": 0.048, "num_input_tokens_seen": 47444768, "step": 21990 }, { "epoch": 3.588091353996737, "grad_norm": 0.012135513126850128, "learning_rate": 0.0009809201542729028, "loss": 0.0869, "num_input_tokens_seen": 47455456, "step": 21995 }, { "epoch": 3.5889070146818924, "grad_norm": 0.01094899419695139, "learning_rate": 0.0009809006737858603, "loss": 0.0659, "num_input_tokens_seen": 47464640, "step": 22000 }, { "epoch": 3.5897226753670473, "grad_norm": 0.22788214683532715, "learning_rate": 0.0009808811835527557, "loss": 0.0572, "num_input_tokens_seen": 47475872, "step": 22005 }, { "epoch": 3.5905383360522025, "grad_norm": 0.36213254928588867, "learning_rate": 0.000980861683573984, "loss": 0.1111, "num_input_tokens_seen": 47487648, "step": 22010 }, { "epoch": 3.5913539967373573, "grad_norm": 0.014669449999928474, "learning_rate": 0.00098084217384994, "loss": 0.0581, "num_input_tokens_seen": 47499872, "step": 22015 }, { "epoch": 3.592169657422512, "grad_norm": 0.1435958445072174, "learning_rate": 0.0009808226543810198, "loss": 0.1888, "num_input_tokens_seen": 47510112, "step": 22020 }, { "epoch": 3.592985318107667, "grad_norm": 0.0931192934513092, "learning_rate": 0.0009808031251676182, "loss": 0.1387, "num_input_tokens_seen": 47521120, "step": 22025 }, { "epoch": 3.5938009787928222, "grad_norm": 0.029048530384898186, "learning_rate": 0.0009807835862101313, "loss": 0.0632, "num_input_tokens_seen": 47532096, "step": 22030 }, { "epoch": 3.594616639477977, "grad_norm": 0.323615700006485, "learning_rate": 0.0009807640375089552, "loss": 0.1341, "num_input_tokens_seen": 47542400, "step": 22035 }, { "epoch": 3.5954323001631323, "grad_norm": 0.024663671851158142, "learning_rate": 0.000980744479064486, "loss": 0.1472, "num_input_tokens_seen": 47553184, "step": 22040 }, { "epoch": 3.596247960848287, "grad_norm": 0.24778079986572266, "learning_rate": 0.00098072491087712, "loss": 0.0723, "num_input_tokens_seen": 47563616, "step": 22045 }, { "epoch": 3.597063621533442, "grad_norm": 0.19076694548130035, "learning_rate": 0.0009807053329472539, "loss": 0.0832, "num_input_tokens_seen": 47575488, "step": 22050 }, { "epoch": 3.597879282218597, "grad_norm": 0.016913898289203644, "learning_rate": 0.0009806857452752844, "loss": 0.0588, "num_input_tokens_seen": 47586912, "step": 22055 }, { "epoch": 3.598694942903752, "grad_norm": 0.011805090121924877, "learning_rate": 0.0009806661478616084, "loss": 0.0373, "num_input_tokens_seen": 47597536, "step": 22060 }, { "epoch": 3.5995106035889073, "grad_norm": 0.3173375129699707, "learning_rate": 0.000980646540706623, "loss": 0.1357, "num_input_tokens_seen": 47609664, "step": 22065 }, { "epoch": 3.600326264274062, "grad_norm": 0.3606938123703003, "learning_rate": 0.0009806269238107261, "loss": 0.1998, "num_input_tokens_seen": 47619616, "step": 22070 }, { "epoch": 3.601141924959217, "grad_norm": 0.276654452085495, "learning_rate": 0.0009806072971743148, "loss": 0.0819, "num_input_tokens_seen": 47631808, "step": 22075 }, { "epoch": 3.6019575856443717, "grad_norm": 0.11738751083612442, "learning_rate": 0.000980587660797787, "loss": 0.0927, "num_input_tokens_seen": 47643808, "step": 22080 }, { "epoch": 3.602773246329527, "grad_norm": 0.271005243062973, "learning_rate": 0.00098056801468154, "loss": 0.0855, "num_input_tokens_seen": 47655808, "step": 22085 }, { "epoch": 3.603588907014682, "grad_norm": 0.1622883677482605, "learning_rate": 0.0009805483588259732, "loss": 0.0636, "num_input_tokens_seen": 47666848, "step": 22090 }, { "epoch": 3.604404567699837, "grad_norm": 0.16403160989284515, "learning_rate": 0.000980528693231484, "loss": 0.1015, "num_input_tokens_seen": 47678368, "step": 22095 }, { "epoch": 3.605220228384992, "grad_norm": 0.015675485134124756, "learning_rate": 0.0009805090178984712, "loss": 0.0458, "num_input_tokens_seen": 47688896, "step": 22100 }, { "epoch": 3.6060358890701467, "grad_norm": 0.4606676697731018, "learning_rate": 0.0009804893328273336, "loss": 0.1414, "num_input_tokens_seen": 47700608, "step": 22105 }, { "epoch": 3.6068515497553015, "grad_norm": 0.05162198841571808, "learning_rate": 0.0009804696380184704, "loss": 0.158, "num_input_tokens_seen": 47711392, "step": 22110 }, { "epoch": 3.607667210440457, "grad_norm": 0.13467980921268463, "learning_rate": 0.0009804499334722801, "loss": 0.0952, "num_input_tokens_seen": 47721632, "step": 22115 }, { "epoch": 3.6084828711256116, "grad_norm": 0.12729661166667938, "learning_rate": 0.0009804302191891625, "loss": 0.0657, "num_input_tokens_seen": 47732416, "step": 22120 }, { "epoch": 3.609298531810767, "grad_norm": 0.10975393652915955, "learning_rate": 0.0009804104951695173, "loss": 0.3005, "num_input_tokens_seen": 47742912, "step": 22125 }, { "epoch": 3.6101141924959217, "grad_norm": 0.325187087059021, "learning_rate": 0.0009803907614137435, "loss": 0.1112, "num_input_tokens_seen": 47753120, "step": 22130 }, { "epoch": 3.6109298531810765, "grad_norm": 0.011061440221965313, "learning_rate": 0.0009803710179222419, "loss": 0.0873, "num_input_tokens_seen": 47764448, "step": 22135 }, { "epoch": 3.6117455138662318, "grad_norm": 0.33052414655685425, "learning_rate": 0.000980351264695412, "loss": 0.1659, "num_input_tokens_seen": 47775136, "step": 22140 }, { "epoch": 3.6125611745513866, "grad_norm": 0.020290156826376915, "learning_rate": 0.0009803315017336545, "loss": 0.058, "num_input_tokens_seen": 47785760, "step": 22145 }, { "epoch": 3.613376835236542, "grad_norm": 0.32145053148269653, "learning_rate": 0.0009803117290373697, "loss": 0.0977, "num_input_tokens_seen": 47796960, "step": 22150 }, { "epoch": 3.6141924959216967, "grad_norm": 0.13012543320655823, "learning_rate": 0.0009802919466069585, "loss": 0.0642, "num_input_tokens_seen": 47807040, "step": 22155 }, { "epoch": 3.6150081566068515, "grad_norm": 0.04984763264656067, "learning_rate": 0.0009802721544428215, "loss": 0.0591, "num_input_tokens_seen": 47818560, "step": 22160 }, { "epoch": 3.6158238172920063, "grad_norm": 0.024310573935508728, "learning_rate": 0.0009802523525453601, "loss": 0.0508, "num_input_tokens_seen": 47828256, "step": 22165 }, { "epoch": 3.6166394779771616, "grad_norm": 0.11786495894193649, "learning_rate": 0.0009802325409149757, "loss": 0.0754, "num_input_tokens_seen": 47839328, "step": 22170 }, { "epoch": 3.6174551386623164, "grad_norm": 0.10442333668470383, "learning_rate": 0.0009802127195520697, "loss": 0.0523, "num_input_tokens_seen": 47850880, "step": 22175 }, { "epoch": 3.6182707993474716, "grad_norm": 0.007943031378090382, "learning_rate": 0.0009801928884570434, "loss": 0.06, "num_input_tokens_seen": 47861120, "step": 22180 }, { "epoch": 3.6190864600326265, "grad_norm": 0.34523555636405945, "learning_rate": 0.0009801730476302992, "loss": 0.0606, "num_input_tokens_seen": 47872032, "step": 22185 }, { "epoch": 3.6199021207177813, "grad_norm": 0.012854296714067459, "learning_rate": 0.000980153197072239, "loss": 0.0603, "num_input_tokens_seen": 47883072, "step": 22190 }, { "epoch": 3.6207177814029365, "grad_norm": 0.20778623223304749, "learning_rate": 0.0009801333367832651, "loss": 0.0616, "num_input_tokens_seen": 47893408, "step": 22195 }, { "epoch": 3.6215334420880914, "grad_norm": 0.4646817445755005, "learning_rate": 0.0009801134667637803, "loss": 0.1556, "num_input_tokens_seen": 47903680, "step": 22200 }, { "epoch": 3.622349102773246, "grad_norm": 0.09540849924087524, "learning_rate": 0.0009800935870141868, "loss": 0.1095, "num_input_tokens_seen": 47915232, "step": 22205 }, { "epoch": 3.6231647634584014, "grad_norm": 0.14768311381340027, "learning_rate": 0.0009800736975348878, "loss": 0.126, "num_input_tokens_seen": 47927232, "step": 22210 }, { "epoch": 3.6239804241435563, "grad_norm": 0.00815974734723568, "learning_rate": 0.0009800537983262862, "loss": 0.1839, "num_input_tokens_seen": 47937824, "step": 22215 }, { "epoch": 3.624796084828711, "grad_norm": 0.01215081661939621, "learning_rate": 0.0009800338893887857, "loss": 0.0914, "num_input_tokens_seen": 47945792, "step": 22220 }, { "epoch": 3.6256117455138663, "grad_norm": 0.1170039102435112, "learning_rate": 0.000980013970722789, "loss": 0.055, "num_input_tokens_seen": 47956960, "step": 22225 }, { "epoch": 3.626427406199021, "grad_norm": 0.21183598041534424, "learning_rate": 0.0009799940423287005, "loss": 0.1026, "num_input_tokens_seen": 47967488, "step": 22230 }, { "epoch": 3.6272430668841764, "grad_norm": 0.09954554587602615, "learning_rate": 0.000979974104206924, "loss": 0.0399, "num_input_tokens_seen": 47977312, "step": 22235 }, { "epoch": 3.6280587275693312, "grad_norm": 0.1701282262802124, "learning_rate": 0.0009799541563578632, "loss": 0.1231, "num_input_tokens_seen": 47987744, "step": 22240 }, { "epoch": 3.628874388254486, "grad_norm": 0.13297243416309357, "learning_rate": 0.0009799341987819224, "loss": 0.1644, "num_input_tokens_seen": 47998496, "step": 22245 }, { "epoch": 3.629690048939641, "grad_norm": 0.042690031230449677, "learning_rate": 0.0009799142314795065, "loss": 0.1198, "num_input_tokens_seen": 48010144, "step": 22250 }, { "epoch": 3.630505709624796, "grad_norm": 0.02187974750995636, "learning_rate": 0.0009798942544510198, "loss": 0.0666, "num_input_tokens_seen": 48021760, "step": 22255 }, { "epoch": 3.631321370309951, "grad_norm": 0.1917685568332672, "learning_rate": 0.000979874267696867, "loss": 0.0673, "num_input_tokens_seen": 48032544, "step": 22260 }, { "epoch": 3.632137030995106, "grad_norm": 0.14201650023460388, "learning_rate": 0.0009798542712174537, "loss": 0.0612, "num_input_tokens_seen": 48043808, "step": 22265 }, { "epoch": 3.632952691680261, "grad_norm": 0.01867818646132946, "learning_rate": 0.0009798342650131845, "loss": 0.1134, "num_input_tokens_seen": 48055840, "step": 22270 }, { "epoch": 3.633768352365416, "grad_norm": 0.21375758945941925, "learning_rate": 0.0009798142490844656, "loss": 0.2288, "num_input_tokens_seen": 48065664, "step": 22275 }, { "epoch": 3.634584013050571, "grad_norm": 0.23075224459171295, "learning_rate": 0.0009797942234317022, "loss": 0.0608, "num_input_tokens_seen": 48075104, "step": 22280 }, { "epoch": 3.635399673735726, "grad_norm": 0.009332277812063694, "learning_rate": 0.0009797741880553, "loss": 0.0887, "num_input_tokens_seen": 48086944, "step": 22285 }, { "epoch": 3.636215334420881, "grad_norm": 0.05546322464942932, "learning_rate": 0.0009797541429556653, "loss": 0.0567, "num_input_tokens_seen": 48097408, "step": 22290 }, { "epoch": 3.637030995106036, "grad_norm": 0.08566584438085556, "learning_rate": 0.0009797340881332044, "loss": 0.0917, "num_input_tokens_seen": 48109664, "step": 22295 }, { "epoch": 3.637846655791191, "grad_norm": 0.062336668372154236, "learning_rate": 0.0009797140235883236, "loss": 0.0414, "num_input_tokens_seen": 48120960, "step": 22300 }, { "epoch": 3.6386623164763456, "grad_norm": 0.014260306023061275, "learning_rate": 0.0009796939493214294, "loss": 0.0671, "num_input_tokens_seen": 48130176, "step": 22305 }, { "epoch": 3.639477977161501, "grad_norm": 0.00947236642241478, "learning_rate": 0.000979673865332929, "loss": 0.0742, "num_input_tokens_seen": 48141344, "step": 22310 }, { "epoch": 3.6402936378466557, "grad_norm": 0.0033442594576627016, "learning_rate": 0.0009796537716232289, "loss": 0.2434, "num_input_tokens_seen": 48151360, "step": 22315 }, { "epoch": 3.641109298531811, "grad_norm": 0.144778311252594, "learning_rate": 0.000979633668192737, "loss": 0.0844, "num_input_tokens_seen": 48163488, "step": 22320 }, { "epoch": 3.641924959216966, "grad_norm": 0.022786879912018776, "learning_rate": 0.0009796135550418602, "loss": 0.0753, "num_input_tokens_seen": 48173600, "step": 22325 }, { "epoch": 3.6427406199021206, "grad_norm": 0.02024921402335167, "learning_rate": 0.0009795934321710062, "loss": 0.0761, "num_input_tokens_seen": 48184416, "step": 22330 }, { "epoch": 3.6435562805872754, "grad_norm": 0.3580096662044525, "learning_rate": 0.0009795732995805829, "loss": 0.1956, "num_input_tokens_seen": 48196640, "step": 22335 }, { "epoch": 3.6443719412724307, "grad_norm": 0.15329070389270782, "learning_rate": 0.0009795531572709983, "loss": 0.2529, "num_input_tokens_seen": 48207104, "step": 22340 }, { "epoch": 3.6451876019575855, "grad_norm": 0.29911723732948303, "learning_rate": 0.0009795330052426608, "loss": 0.0996, "num_input_tokens_seen": 48216512, "step": 22345 }, { "epoch": 3.6460032626427408, "grad_norm": 0.08988039940595627, "learning_rate": 0.0009795128434959785, "loss": 0.0469, "num_input_tokens_seen": 48224320, "step": 22350 }, { "epoch": 3.6468189233278956, "grad_norm": 0.08285453915596008, "learning_rate": 0.00097949267203136, "loss": 0.0477, "num_input_tokens_seen": 48236000, "step": 22355 }, { "epoch": 3.6476345840130504, "grad_norm": 0.07392103224992752, "learning_rate": 0.0009794724908492143, "loss": 0.0686, "num_input_tokens_seen": 48246624, "step": 22360 }, { "epoch": 3.6484502446982057, "grad_norm": 0.2411106377840042, "learning_rate": 0.0009794522999499503, "loss": 0.1117, "num_input_tokens_seen": 48258240, "step": 22365 }, { "epoch": 3.6492659053833605, "grad_norm": 0.14189265668392181, "learning_rate": 0.0009794320993339772, "loss": 0.1807, "num_input_tokens_seen": 48269504, "step": 22370 }, { "epoch": 3.6500815660685157, "grad_norm": 0.18825192749500275, "learning_rate": 0.0009794118890017046, "loss": 0.1139, "num_input_tokens_seen": 48280192, "step": 22375 }, { "epoch": 3.6508972267536706, "grad_norm": 0.1059550791978836, "learning_rate": 0.0009793916689535417, "loss": 0.1414, "num_input_tokens_seen": 48292000, "step": 22380 }, { "epoch": 3.6517128874388254, "grad_norm": 0.21567970514297485, "learning_rate": 0.0009793714391898984, "loss": 0.1296, "num_input_tokens_seen": 48302560, "step": 22385 }, { "epoch": 3.65252854812398, "grad_norm": 0.44998466968536377, "learning_rate": 0.000979351199711185, "loss": 0.1213, "num_input_tokens_seen": 48312672, "step": 22390 }, { "epoch": 3.6533442088091355, "grad_norm": 0.1450589895248413, "learning_rate": 0.0009793309505178112, "loss": 0.0819, "num_input_tokens_seen": 48323552, "step": 22395 }, { "epoch": 3.6541598694942903, "grad_norm": 0.014210057444870472, "learning_rate": 0.000979310691610188, "loss": 0.0549, "num_input_tokens_seen": 48333984, "step": 22400 }, { "epoch": 3.6549755301794455, "grad_norm": 0.16024717688560486, "learning_rate": 0.0009792904229887253, "loss": 0.087, "num_input_tokens_seen": 48343200, "step": 22405 }, { "epoch": 3.6557911908646004, "grad_norm": 0.2967562675476074, "learning_rate": 0.0009792701446538342, "loss": 0.1384, "num_input_tokens_seen": 48354208, "step": 22410 }, { "epoch": 3.656606851549755, "grad_norm": 0.0832614079117775, "learning_rate": 0.0009792498566059255, "loss": 0.0603, "num_input_tokens_seen": 48364704, "step": 22415 }, { "epoch": 3.6574225122349104, "grad_norm": 0.030475346371531487, "learning_rate": 0.0009792295588454106, "loss": 0.0498, "num_input_tokens_seen": 48375904, "step": 22420 }, { "epoch": 3.6582381729200653, "grad_norm": 0.3510100543498993, "learning_rate": 0.0009792092513727006, "loss": 0.0782, "num_input_tokens_seen": 48385952, "step": 22425 }, { "epoch": 3.65905383360522, "grad_norm": 0.04821424558758736, "learning_rate": 0.0009791889341882075, "loss": 0.1041, "num_input_tokens_seen": 48397344, "step": 22430 }, { "epoch": 3.6598694942903753, "grad_norm": 0.2667387127876282, "learning_rate": 0.0009791686072923424, "loss": 0.1576, "num_input_tokens_seen": 48407968, "step": 22435 }, { "epoch": 3.66068515497553, "grad_norm": 0.03599264472723007, "learning_rate": 0.0009791482706855178, "loss": 0.0309, "num_input_tokens_seen": 48419552, "step": 22440 }, { "epoch": 3.661500815660685, "grad_norm": 0.03535604104399681, "learning_rate": 0.0009791279243681456, "loss": 0.0332, "num_input_tokens_seen": 48430592, "step": 22445 }, { "epoch": 3.6623164763458402, "grad_norm": 0.021337002515792847, "learning_rate": 0.0009791075683406383, "loss": 0.353, "num_input_tokens_seen": 48441440, "step": 22450 }, { "epoch": 3.663132137030995, "grad_norm": 0.19581550359725952, "learning_rate": 0.0009790872026034082, "loss": 0.1812, "num_input_tokens_seen": 48452832, "step": 22455 }, { "epoch": 3.6639477977161503, "grad_norm": 0.17172972857952118, "learning_rate": 0.0009790668271568684, "loss": 0.1571, "num_input_tokens_seen": 48463584, "step": 22460 }, { "epoch": 3.664763458401305, "grad_norm": 0.04200759530067444, "learning_rate": 0.0009790464420014312, "loss": 0.0649, "num_input_tokens_seen": 48474592, "step": 22465 }, { "epoch": 3.66557911908646, "grad_norm": 0.10835733264684677, "learning_rate": 0.0009790260471375105, "loss": 0.1974, "num_input_tokens_seen": 48484000, "step": 22470 }, { "epoch": 3.6663947797716148, "grad_norm": 0.08503836393356323, "learning_rate": 0.0009790056425655193, "loss": 0.118, "num_input_tokens_seen": 48495200, "step": 22475 }, { "epoch": 3.66721044045677, "grad_norm": 0.04061872139573097, "learning_rate": 0.0009789852282858708, "loss": 0.0745, "num_input_tokens_seen": 48506496, "step": 22480 }, { "epoch": 3.668026101141925, "grad_norm": 0.1223311722278595, "learning_rate": 0.0009789648042989793, "loss": 0.1393, "num_input_tokens_seen": 48517472, "step": 22485 }, { "epoch": 3.66884176182708, "grad_norm": 0.02928619645535946, "learning_rate": 0.0009789443706052583, "loss": 0.0604, "num_input_tokens_seen": 48528480, "step": 22490 }, { "epoch": 3.669657422512235, "grad_norm": 0.05828101187944412, "learning_rate": 0.000978923927205122, "loss": 0.0642, "num_input_tokens_seen": 48540288, "step": 22495 }, { "epoch": 3.6704730831973897, "grad_norm": 0.023443002253770828, "learning_rate": 0.0009789034740989848, "loss": 0.1695, "num_input_tokens_seen": 48551488, "step": 22500 }, { "epoch": 3.671288743882545, "grad_norm": 0.11195553839206696, "learning_rate": 0.0009788830112872611, "loss": 0.0986, "num_input_tokens_seen": 48562144, "step": 22505 }, { "epoch": 3.6721044045677, "grad_norm": 0.02752220258116722, "learning_rate": 0.0009788625387703658, "loss": 0.0818, "num_input_tokens_seen": 48574592, "step": 22510 }, { "epoch": 3.672920065252855, "grad_norm": 0.08413148671388626, "learning_rate": 0.0009788420565487136, "loss": 0.0286, "num_input_tokens_seen": 48585184, "step": 22515 }, { "epoch": 3.67373572593801, "grad_norm": 0.2985360026359558, "learning_rate": 0.0009788215646227196, "loss": 0.2863, "num_input_tokens_seen": 48596064, "step": 22520 }, { "epoch": 3.6745513866231647, "grad_norm": 0.05592694878578186, "learning_rate": 0.0009788010629927992, "loss": 0.0926, "num_input_tokens_seen": 48607744, "step": 22525 }, { "epoch": 3.6753670473083195, "grad_norm": 0.02296856790781021, "learning_rate": 0.000978780551659368, "loss": 0.137, "num_input_tokens_seen": 48618080, "step": 22530 }, { "epoch": 3.676182707993475, "grad_norm": 0.036889709532260895, "learning_rate": 0.0009787600306228415, "loss": 0.0724, "num_input_tokens_seen": 48629184, "step": 22535 }, { "epoch": 3.6769983686786296, "grad_norm": 0.14342361688613892, "learning_rate": 0.0009787394998836355, "loss": 0.1689, "num_input_tokens_seen": 48641248, "step": 22540 }, { "epoch": 3.677814029363785, "grad_norm": 0.07336661219596863, "learning_rate": 0.0009787189594421663, "loss": 0.0874, "num_input_tokens_seen": 48652736, "step": 22545 }, { "epoch": 3.6786296900489397, "grad_norm": 0.0910760834813118, "learning_rate": 0.00097869840929885, "loss": 0.1086, "num_input_tokens_seen": 48663968, "step": 22550 }, { "epoch": 3.6794453507340945, "grad_norm": 0.18680618703365326, "learning_rate": 0.0009786778494541033, "loss": 0.2617, "num_input_tokens_seen": 48673376, "step": 22555 }, { "epoch": 3.6802610114192493, "grad_norm": 0.1408645659685135, "learning_rate": 0.0009786572799083426, "loss": 0.055, "num_input_tokens_seen": 48684640, "step": 22560 }, { "epoch": 3.6810766721044046, "grad_norm": 0.22366338968276978, "learning_rate": 0.000978636700661985, "loss": 0.0602, "num_input_tokens_seen": 48695648, "step": 22565 }, { "epoch": 3.6818923327895594, "grad_norm": 0.1184091717004776, "learning_rate": 0.0009786161117154475, "loss": 0.1328, "num_input_tokens_seen": 48704896, "step": 22570 }, { "epoch": 3.6827079934747147, "grad_norm": 0.07041114568710327, "learning_rate": 0.0009785955130691471, "loss": 0.1069, "num_input_tokens_seen": 48715872, "step": 22575 }, { "epoch": 3.6835236541598695, "grad_norm": 0.05920056253671646, "learning_rate": 0.0009785749047235017, "loss": 0.0326, "num_input_tokens_seen": 48727168, "step": 22580 }, { "epoch": 3.6843393148450243, "grad_norm": 0.023530038073658943, "learning_rate": 0.0009785542866789288, "loss": 0.0473, "num_input_tokens_seen": 48738592, "step": 22585 }, { "epoch": 3.6851549755301796, "grad_norm": 0.2996993958950043, "learning_rate": 0.000978533658935846, "loss": 0.0921, "num_input_tokens_seen": 48748832, "step": 22590 }, { "epoch": 3.6859706362153344, "grad_norm": 0.060156457126140594, "learning_rate": 0.0009785130214946716, "loss": 0.1612, "num_input_tokens_seen": 48760224, "step": 22595 }, { "epoch": 3.6867862969004896, "grad_norm": 0.023070821538567543, "learning_rate": 0.0009784923743558238, "loss": 0.0336, "num_input_tokens_seen": 48770272, "step": 22600 }, { "epoch": 3.6876019575856445, "grad_norm": 0.2717708945274353, "learning_rate": 0.000978471717519721, "loss": 0.2277, "num_input_tokens_seen": 48780192, "step": 22605 }, { "epoch": 3.6884176182707993, "grad_norm": 0.2964619994163513, "learning_rate": 0.0009784510509867818, "loss": 0.1072, "num_input_tokens_seen": 48791296, "step": 22610 }, { "epoch": 3.689233278955954, "grad_norm": 0.07812714576721191, "learning_rate": 0.0009784303747574254, "loss": 0.1365, "num_input_tokens_seen": 48802816, "step": 22615 }, { "epoch": 3.6900489396411094, "grad_norm": 0.03864899277687073, "learning_rate": 0.0009784096888320703, "loss": 0.1689, "num_input_tokens_seen": 48812224, "step": 22620 }, { "epoch": 3.690864600326264, "grad_norm": 0.21585066616535187, "learning_rate": 0.000978388993211136, "loss": 0.1253, "num_input_tokens_seen": 48822976, "step": 22625 }, { "epoch": 3.6916802610114194, "grad_norm": 0.019149012863636017, "learning_rate": 0.0009783682878950416, "loss": 0.0763, "num_input_tokens_seen": 48833056, "step": 22630 }, { "epoch": 3.6924959216965743, "grad_norm": 0.16254763305187225, "learning_rate": 0.0009783475728842074, "loss": 0.0443, "num_input_tokens_seen": 48843808, "step": 22635 }, { "epoch": 3.693311582381729, "grad_norm": 0.11372499167919159, "learning_rate": 0.0009783268481790527, "loss": 0.0468, "num_input_tokens_seen": 48854528, "step": 22640 }, { "epoch": 3.6941272430668843, "grad_norm": 0.05084644630551338, "learning_rate": 0.0009783061137799975, "loss": 0.1891, "num_input_tokens_seen": 48865504, "step": 22645 }, { "epoch": 3.694942903752039, "grad_norm": 0.14557941257953644, "learning_rate": 0.000978285369687462, "loss": 0.1149, "num_input_tokens_seen": 48875456, "step": 22650 }, { "epoch": 3.695758564437194, "grad_norm": 0.10588455945253372, "learning_rate": 0.000978264615901867, "loss": 0.044, "num_input_tokens_seen": 48886240, "step": 22655 }, { "epoch": 3.6965742251223492, "grad_norm": 0.10484561324119568, "learning_rate": 0.0009782438524236327, "loss": 0.0664, "num_input_tokens_seen": 48897824, "step": 22660 }, { "epoch": 3.697389885807504, "grad_norm": 0.011320313438773155, "learning_rate": 0.00097822307925318, "loss": 0.0423, "num_input_tokens_seen": 48907264, "step": 22665 }, { "epoch": 3.698205546492659, "grad_norm": 0.01726059801876545, "learning_rate": 0.00097820229639093, "loss": 0.064, "num_input_tokens_seen": 48918240, "step": 22670 }, { "epoch": 3.699021207177814, "grad_norm": 0.04096648097038269, "learning_rate": 0.0009781815038373042, "loss": 0.0678, "num_input_tokens_seen": 48929088, "step": 22675 }, { "epoch": 3.699836867862969, "grad_norm": 0.2480023354291916, "learning_rate": 0.000978160701592723, "loss": 0.0569, "num_input_tokens_seen": 48940128, "step": 22680 }, { "epoch": 3.700652528548124, "grad_norm": 0.3255230784416199, "learning_rate": 0.000978139889657609, "loss": 0.2856, "num_input_tokens_seen": 48951232, "step": 22685 }, { "epoch": 3.701468189233279, "grad_norm": 0.1955031454563141, "learning_rate": 0.0009781190680323833, "loss": 0.1139, "num_input_tokens_seen": 48962432, "step": 22690 }, { "epoch": 3.702283849918434, "grad_norm": 0.19064848124980927, "learning_rate": 0.0009780982367174683, "loss": 0.1676, "num_input_tokens_seen": 48972480, "step": 22695 }, { "epoch": 3.7030995106035887, "grad_norm": 0.03315630182623863, "learning_rate": 0.000978077395713286, "loss": 0.0335, "num_input_tokens_seen": 48984256, "step": 22700 }, { "epoch": 3.703915171288744, "grad_norm": 0.3165643513202667, "learning_rate": 0.0009780565450202587, "loss": 0.1125, "num_input_tokens_seen": 48995712, "step": 22705 }, { "epoch": 3.7047308319738987, "grad_norm": 0.20892781019210815, "learning_rate": 0.0009780356846388091, "loss": 0.1488, "num_input_tokens_seen": 49007392, "step": 22710 }, { "epoch": 3.705546492659054, "grad_norm": 0.17165358364582062, "learning_rate": 0.00097801481456936, "loss": 0.0886, "num_input_tokens_seen": 49017504, "step": 22715 }, { "epoch": 3.706362153344209, "grad_norm": 0.030747707933187485, "learning_rate": 0.0009779939348123342, "loss": 0.0796, "num_input_tokens_seen": 49028000, "step": 22720 }, { "epoch": 3.7071778140293636, "grad_norm": 0.33692312240600586, "learning_rate": 0.000977973045368155, "loss": 0.137, "num_input_tokens_seen": 49037472, "step": 22725 }, { "epoch": 3.707993474714519, "grad_norm": 0.1625063121318817, "learning_rate": 0.0009779521462372457, "loss": 0.0426, "num_input_tokens_seen": 49049472, "step": 22730 }, { "epoch": 3.7088091353996737, "grad_norm": 0.21079306304454803, "learning_rate": 0.0009779312374200298, "loss": 0.0406, "num_input_tokens_seen": 49059744, "step": 22735 }, { "epoch": 3.709624796084829, "grad_norm": 0.007290721870958805, "learning_rate": 0.0009779103189169309, "loss": 0.1712, "num_input_tokens_seen": 49070240, "step": 22740 }, { "epoch": 3.710440456769984, "grad_norm": 0.05688055604696274, "learning_rate": 0.0009778893907283733, "loss": 0.0962, "num_input_tokens_seen": 49080768, "step": 22745 }, { "epoch": 3.7112561174551386, "grad_norm": 0.12140754610300064, "learning_rate": 0.000977868452854781, "loss": 0.041, "num_input_tokens_seen": 49092608, "step": 22750 }, { "epoch": 3.7120717781402934, "grad_norm": 0.0837424248456955, "learning_rate": 0.000977847505296578, "loss": 0.1249, "num_input_tokens_seen": 49104096, "step": 22755 }, { "epoch": 3.7128874388254487, "grad_norm": 0.3037433326244354, "learning_rate": 0.0009778265480541895, "loss": 0.1216, "num_input_tokens_seen": 49115072, "step": 22760 }, { "epoch": 3.7137030995106035, "grad_norm": 0.21703459322452545, "learning_rate": 0.0009778055811280396, "loss": 0.1207, "num_input_tokens_seen": 49125824, "step": 22765 }, { "epoch": 3.7145187601957588, "grad_norm": 0.06578963994979858, "learning_rate": 0.0009777846045185535, "loss": 0.0514, "num_input_tokens_seen": 49135712, "step": 22770 }, { "epoch": 3.7153344208809136, "grad_norm": 0.1484934240579605, "learning_rate": 0.0009777636182261562, "loss": 0.2558, "num_input_tokens_seen": 49147616, "step": 22775 }, { "epoch": 3.7161500815660684, "grad_norm": 0.00914775114506483, "learning_rate": 0.0009777426222512733, "loss": 0.0384, "num_input_tokens_seen": 49157984, "step": 22780 }, { "epoch": 3.7169657422512232, "grad_norm": 0.5723288059234619, "learning_rate": 0.0009777216165943298, "loss": 0.1001, "num_input_tokens_seen": 49168896, "step": 22785 }, { "epoch": 3.7177814029363785, "grad_norm": 0.2424352616071701, "learning_rate": 0.0009777006012557522, "loss": 0.2385, "num_input_tokens_seen": 49178752, "step": 22790 }, { "epoch": 3.7185970636215333, "grad_norm": 0.153018981218338, "learning_rate": 0.0009776795762359654, "loss": 0.0973, "num_input_tokens_seen": 49188384, "step": 22795 }, { "epoch": 3.7194127243066886, "grad_norm": 0.1670449674129486, "learning_rate": 0.0009776585415353963, "loss": 0.1564, "num_input_tokens_seen": 49199552, "step": 22800 }, { "epoch": 3.7202283849918434, "grad_norm": 0.031436704099178314, "learning_rate": 0.0009776374971544708, "loss": 0.0491, "num_input_tokens_seen": 49210336, "step": 22805 }, { "epoch": 3.721044045676998, "grad_norm": 0.13451409339904785, "learning_rate": 0.0009776164430936153, "loss": 0.0809, "num_input_tokens_seen": 49222208, "step": 22810 }, { "epoch": 3.7218597063621535, "grad_norm": 0.005694043356925249, "learning_rate": 0.000977595379353257, "loss": 0.0605, "num_input_tokens_seen": 49231872, "step": 22815 }, { "epoch": 3.7226753670473083, "grad_norm": 0.029418673366308212, "learning_rate": 0.0009775743059338223, "loss": 0.0765, "num_input_tokens_seen": 49242592, "step": 22820 }, { "epoch": 3.7234910277324635, "grad_norm": 0.04785500094294548, "learning_rate": 0.0009775532228357385, "loss": 0.2715, "num_input_tokens_seen": 49252480, "step": 22825 }, { "epoch": 3.7243066884176184, "grad_norm": 0.10465153306722641, "learning_rate": 0.0009775321300594328, "loss": 0.0562, "num_input_tokens_seen": 49263136, "step": 22830 }, { "epoch": 3.725122349102773, "grad_norm": 0.18826577067375183, "learning_rate": 0.0009775110276053327, "loss": 0.0937, "num_input_tokens_seen": 49273088, "step": 22835 }, { "epoch": 3.725938009787928, "grad_norm": 0.3600449860095978, "learning_rate": 0.000977489915473866, "loss": 0.0872, "num_input_tokens_seen": 49283360, "step": 22840 }, { "epoch": 3.7267536704730833, "grad_norm": 0.19091886281967163, "learning_rate": 0.0009774687936654602, "loss": 0.0794, "num_input_tokens_seen": 49291584, "step": 22845 }, { "epoch": 3.727569331158238, "grad_norm": 0.09567143768072128, "learning_rate": 0.0009774476621805437, "loss": 0.0825, "num_input_tokens_seen": 49301920, "step": 22850 }, { "epoch": 3.7283849918433933, "grad_norm": 0.02915286272764206, "learning_rate": 0.0009774265210195446, "loss": 0.0185, "num_input_tokens_seen": 49313056, "step": 22855 }, { "epoch": 3.729200652528548, "grad_norm": 0.05675291270017624, "learning_rate": 0.0009774053701828913, "loss": 0.0973, "num_input_tokens_seen": 49323232, "step": 22860 }, { "epoch": 3.730016313213703, "grad_norm": 0.07143950462341309, "learning_rate": 0.0009773842096710127, "loss": 0.0558, "num_input_tokens_seen": 49333536, "step": 22865 }, { "epoch": 3.7308319738988582, "grad_norm": 0.06837920099496841, "learning_rate": 0.0009773630394843374, "loss": 0.0795, "num_input_tokens_seen": 49344384, "step": 22870 }, { "epoch": 3.731647634584013, "grad_norm": 0.1514849215745926, "learning_rate": 0.0009773418596232945, "loss": 0.0473, "num_input_tokens_seen": 49356544, "step": 22875 }, { "epoch": 3.732463295269168, "grad_norm": 0.16093115508556366, "learning_rate": 0.0009773206700883135, "loss": 0.0695, "num_input_tokens_seen": 49368256, "step": 22880 }, { "epoch": 3.733278955954323, "grad_norm": 0.38132551312446594, "learning_rate": 0.0009772994708798232, "loss": 0.078, "num_input_tokens_seen": 49378752, "step": 22885 }, { "epoch": 3.734094616639478, "grad_norm": 0.17820590734481812, "learning_rate": 0.000977278261998254, "loss": 0.0346, "num_input_tokens_seen": 49388352, "step": 22890 }, { "epoch": 3.7349102773246328, "grad_norm": 0.001393240294419229, "learning_rate": 0.0009772570434440353, "loss": 0.1457, "num_input_tokens_seen": 49398944, "step": 22895 }, { "epoch": 3.735725938009788, "grad_norm": 0.19079141318798065, "learning_rate": 0.000977235815217597, "loss": 0.189, "num_input_tokens_seen": 49409152, "step": 22900 }, { "epoch": 3.736541598694943, "grad_norm": 0.10821019113063812, "learning_rate": 0.0009772145773193695, "loss": 0.0608, "num_input_tokens_seen": 49420000, "step": 22905 }, { "epoch": 3.737357259380098, "grad_norm": 0.027368539944291115, "learning_rate": 0.0009771933297497831, "loss": 0.1291, "num_input_tokens_seen": 49429568, "step": 22910 }, { "epoch": 3.738172920065253, "grad_norm": 0.01051405817270279, "learning_rate": 0.0009771720725092687, "loss": 0.0551, "num_input_tokens_seen": 49441056, "step": 22915 }, { "epoch": 3.7389885807504077, "grad_norm": 0.004634975455701351, "learning_rate": 0.000977150805598257, "loss": 0.1087, "num_input_tokens_seen": 49452704, "step": 22920 }, { "epoch": 3.7398042414355626, "grad_norm": 0.13914591073989868, "learning_rate": 0.0009771295290171788, "loss": 0.1149, "num_input_tokens_seen": 49463584, "step": 22925 }, { "epoch": 3.740619902120718, "grad_norm": 0.02511669136583805, "learning_rate": 0.0009771082427664655, "loss": 0.1349, "num_input_tokens_seen": 49474336, "step": 22930 }, { "epoch": 3.7414355628058726, "grad_norm": 0.16538390517234802, "learning_rate": 0.0009770869468465483, "loss": 0.1357, "num_input_tokens_seen": 49485056, "step": 22935 }, { "epoch": 3.742251223491028, "grad_norm": 0.07204695791006088, "learning_rate": 0.000977065641257859, "loss": 0.1759, "num_input_tokens_seen": 49495008, "step": 22940 }, { "epoch": 3.7430668841761827, "grad_norm": 0.00432331208139658, "learning_rate": 0.000977044326000829, "loss": 0.1739, "num_input_tokens_seen": 49505856, "step": 22945 }, { "epoch": 3.7438825448613375, "grad_norm": 0.021803587675094604, "learning_rate": 0.0009770230010758907, "loss": 0.1445, "num_input_tokens_seen": 49516992, "step": 22950 }, { "epoch": 3.744698205546493, "grad_norm": 0.05121257156133652, "learning_rate": 0.0009770016664834762, "loss": 0.0325, "num_input_tokens_seen": 49528256, "step": 22955 }, { "epoch": 3.7455138662316476, "grad_norm": 0.11353214085102081, "learning_rate": 0.000976980322224018, "loss": 0.0628, "num_input_tokens_seen": 49539456, "step": 22960 }, { "epoch": 3.746329526916803, "grad_norm": 0.07152816653251648, "learning_rate": 0.0009769589682979481, "loss": 0.0784, "num_input_tokens_seen": 49551296, "step": 22965 }, { "epoch": 3.7471451876019577, "grad_norm": 0.0191242266446352, "learning_rate": 0.0009769376047056998, "loss": 0.017, "num_input_tokens_seen": 49563104, "step": 22970 }, { "epoch": 3.7479608482871125, "grad_norm": 0.01414796058088541, "learning_rate": 0.0009769162314477058, "loss": 0.0172, "num_input_tokens_seen": 49574528, "step": 22975 }, { "epoch": 3.7487765089722673, "grad_norm": 0.2957731783390045, "learning_rate": 0.0009768948485243997, "loss": 0.1124, "num_input_tokens_seen": 49585056, "step": 22980 }, { "epoch": 3.7495921696574226, "grad_norm": 0.008859802968800068, "learning_rate": 0.0009768734559362142, "loss": 0.0777, "num_input_tokens_seen": 49597504, "step": 22985 }, { "epoch": 3.7504078303425774, "grad_norm": 0.004433393012732267, "learning_rate": 0.0009768520536835832, "loss": 0.0429, "num_input_tokens_seen": 49607392, "step": 22990 }, { "epoch": 3.7512234910277327, "grad_norm": 0.17656779289245605, "learning_rate": 0.0009768306417669405, "loss": 0.1314, "num_input_tokens_seen": 49618976, "step": 22995 }, { "epoch": 3.7520391517128875, "grad_norm": 0.3884519934654236, "learning_rate": 0.00097680922018672, "loss": 0.1933, "num_input_tokens_seen": 49629728, "step": 23000 }, { "epoch": 3.7528548123980423, "grad_norm": 0.04513560235500336, "learning_rate": 0.0009767877889433555, "loss": 0.0668, "num_input_tokens_seen": 49640832, "step": 23005 }, { "epoch": 3.753670473083197, "grad_norm": 0.3775264620780945, "learning_rate": 0.0009767663480372817, "loss": 0.1249, "num_input_tokens_seen": 49653056, "step": 23010 }, { "epoch": 3.7544861337683524, "grad_norm": 0.19494973123073578, "learning_rate": 0.0009767448974689332, "loss": 0.1174, "num_input_tokens_seen": 49664800, "step": 23015 }, { "epoch": 3.755301794453507, "grad_norm": 0.1403975635766983, "learning_rate": 0.0009767234372387444, "loss": 0.0716, "num_input_tokens_seen": 49675584, "step": 23020 }, { "epoch": 3.7561174551386625, "grad_norm": 0.11382707953453064, "learning_rate": 0.0009767019673471505, "loss": 0.0754, "num_input_tokens_seen": 49686720, "step": 23025 }, { "epoch": 3.7569331158238173, "grad_norm": 0.04909807816147804, "learning_rate": 0.0009766804877945864, "loss": 0.2532, "num_input_tokens_seen": 49697088, "step": 23030 }, { "epoch": 3.757748776508972, "grad_norm": 0.045643746852874756, "learning_rate": 0.0009766589985814875, "loss": 0.02, "num_input_tokens_seen": 49708000, "step": 23035 }, { "epoch": 3.7585644371941274, "grad_norm": 0.00902261957526207, "learning_rate": 0.0009766374997082893, "loss": 0.1165, "num_input_tokens_seen": 49718816, "step": 23040 }, { "epoch": 3.759380097879282, "grad_norm": 0.33467841148376465, "learning_rate": 0.0009766159911754277, "loss": 0.115, "num_input_tokens_seen": 49730208, "step": 23045 }, { "epoch": 3.7601957585644374, "grad_norm": 0.17694014310836792, "learning_rate": 0.0009765944729833382, "loss": 0.1291, "num_input_tokens_seen": 49741376, "step": 23050 }, { "epoch": 3.7610114192495923, "grad_norm": 0.23579668998718262, "learning_rate": 0.0009765729451324573, "loss": 0.0811, "num_input_tokens_seen": 49752224, "step": 23055 }, { "epoch": 3.761827079934747, "grad_norm": 0.0607072189450264, "learning_rate": 0.000976551407623221, "loss": 0.1751, "num_input_tokens_seen": 49762752, "step": 23060 }, { "epoch": 3.762642740619902, "grad_norm": 0.03212082386016846, "learning_rate": 0.0009765298604560657, "loss": 0.199, "num_input_tokens_seen": 49773536, "step": 23065 }, { "epoch": 3.763458401305057, "grad_norm": 0.038626305758953094, "learning_rate": 0.0009765083036314284, "loss": 0.0312, "num_input_tokens_seen": 49784832, "step": 23070 }, { "epoch": 3.764274061990212, "grad_norm": 0.014276145026087761, "learning_rate": 0.0009764867371497459, "loss": 0.077, "num_input_tokens_seen": 49796320, "step": 23075 }, { "epoch": 3.7650897226753672, "grad_norm": 0.009642582386732101, "learning_rate": 0.000976465161011455, "loss": 0.0663, "num_input_tokens_seen": 49806016, "step": 23080 }, { "epoch": 3.765905383360522, "grad_norm": 0.017863111570477486, "learning_rate": 0.0009764435752169933, "loss": 0.1287, "num_input_tokens_seen": 49817632, "step": 23085 }, { "epoch": 3.766721044045677, "grad_norm": 0.01759001798927784, "learning_rate": 0.0009764219797667982, "loss": 0.1506, "num_input_tokens_seen": 49828416, "step": 23090 }, { "epoch": 3.767536704730832, "grad_norm": 0.059844184666872025, "learning_rate": 0.0009764003746613073, "loss": 0.1552, "num_input_tokens_seen": 49839104, "step": 23095 }, { "epoch": 3.768352365415987, "grad_norm": 0.22938752174377441, "learning_rate": 0.0009763787599009583, "loss": 0.1499, "num_input_tokens_seen": 49849120, "step": 23100 }, { "epoch": 3.7691680261011418, "grad_norm": 0.033294398337602615, "learning_rate": 0.0009763571354861895, "loss": 0.0431, "num_input_tokens_seen": 49860128, "step": 23105 }, { "epoch": 3.769983686786297, "grad_norm": 0.2399105727672577, "learning_rate": 0.0009763355014174391, "loss": 0.1975, "num_input_tokens_seen": 49870752, "step": 23110 }, { "epoch": 3.770799347471452, "grad_norm": 0.06083809211850166, "learning_rate": 0.0009763138576951454, "loss": 0.0466, "num_input_tokens_seen": 49882720, "step": 23115 }, { "epoch": 3.7716150081566067, "grad_norm": 0.17513562738895416, "learning_rate": 0.0009762922043197471, "loss": 0.0678, "num_input_tokens_seen": 49894144, "step": 23120 }, { "epoch": 3.772430668841762, "grad_norm": 0.15750084817409515, "learning_rate": 0.0009762705412916831, "loss": 0.1083, "num_input_tokens_seen": 49904736, "step": 23125 }, { "epoch": 3.7732463295269167, "grad_norm": 0.040882594883441925, "learning_rate": 0.0009762488686113924, "loss": 0.061, "num_input_tokens_seen": 49916320, "step": 23130 }, { "epoch": 3.774061990212072, "grad_norm": 0.030770165845751762, "learning_rate": 0.0009762271862793143, "loss": 0.1684, "num_input_tokens_seen": 49927776, "step": 23135 }, { "epoch": 3.774877650897227, "grad_norm": 0.1521388590335846, "learning_rate": 0.000976205494295888, "loss": 0.1242, "num_input_tokens_seen": 49938592, "step": 23140 }, { "epoch": 3.7756933115823816, "grad_norm": 0.024314727634191513, "learning_rate": 0.0009761837926615533, "loss": 0.1707, "num_input_tokens_seen": 49949728, "step": 23145 }, { "epoch": 3.7765089722675365, "grad_norm": 0.26760736107826233, "learning_rate": 0.00097616208137675, "loss": 0.1442, "num_input_tokens_seen": 49960320, "step": 23150 }, { "epoch": 3.7773246329526917, "grad_norm": 0.06872869282960892, "learning_rate": 0.000976140360441918, "loss": 0.0968, "num_input_tokens_seen": 49970720, "step": 23155 }, { "epoch": 3.7781402936378465, "grad_norm": 0.23058588802814484, "learning_rate": 0.0009761186298574975, "loss": 0.0613, "num_input_tokens_seen": 49981280, "step": 23160 }, { "epoch": 3.778955954323002, "grad_norm": 0.17939208447933197, "learning_rate": 0.0009760968896239291, "loss": 0.1269, "num_input_tokens_seen": 49991872, "step": 23165 }, { "epoch": 3.7797716150081566, "grad_norm": 0.007740521337836981, "learning_rate": 0.0009760751397416532, "loss": 0.0447, "num_input_tokens_seen": 50002496, "step": 23170 }, { "epoch": 3.7805872756933114, "grad_norm": 0.06097311154007912, "learning_rate": 0.0009760533802111107, "loss": 0.0344, "num_input_tokens_seen": 50013152, "step": 23175 }, { "epoch": 3.7814029363784667, "grad_norm": 0.013580359518527985, "learning_rate": 0.0009760316110327426, "loss": 0.1166, "num_input_tokens_seen": 50024608, "step": 23180 }, { "epoch": 3.7822185970636215, "grad_norm": 0.026289189234375954, "learning_rate": 0.00097600983220699, "loss": 0.1978, "num_input_tokens_seen": 50035136, "step": 23185 }, { "epoch": 3.7830342577487768, "grad_norm": 0.2907015383243561, "learning_rate": 0.0009759880437342941, "loss": 0.1001, "num_input_tokens_seen": 50045184, "step": 23190 }, { "epoch": 3.7838499184339316, "grad_norm": 0.10428736358880997, "learning_rate": 0.0009759662456150967, "loss": 0.0392, "num_input_tokens_seen": 50055936, "step": 23195 }, { "epoch": 3.7846655791190864, "grad_norm": 0.06437470018863678, "learning_rate": 0.0009759444378498397, "loss": 0.0544, "num_input_tokens_seen": 50067552, "step": 23200 }, { "epoch": 3.7854812398042412, "grad_norm": 0.279992938041687, "learning_rate": 0.0009759226204389646, "loss": 0.1078, "num_input_tokens_seen": 50077984, "step": 23205 }, { "epoch": 3.7862969004893965, "grad_norm": 0.1573600023984909, "learning_rate": 0.0009759007933829141, "loss": 0.0666, "num_input_tokens_seen": 50087616, "step": 23210 }, { "epoch": 3.7871125611745513, "grad_norm": 0.14312948286533356, "learning_rate": 0.0009758789566821302, "loss": 0.0719, "num_input_tokens_seen": 50099232, "step": 23215 }, { "epoch": 3.7879282218597066, "grad_norm": 0.08059336990118027, "learning_rate": 0.0009758571103370556, "loss": 0.0446, "num_input_tokens_seen": 50109280, "step": 23220 }, { "epoch": 3.7887438825448614, "grad_norm": 0.04139859229326248, "learning_rate": 0.000975835254348133, "loss": 0.1663, "num_input_tokens_seen": 50120640, "step": 23225 }, { "epoch": 3.789559543230016, "grad_norm": 0.0923565924167633, "learning_rate": 0.0009758133887158053, "loss": 0.0346, "num_input_tokens_seen": 50131264, "step": 23230 }, { "epoch": 3.790375203915171, "grad_norm": 0.1526326984167099, "learning_rate": 0.0009757915134405155, "loss": 0.1485, "num_input_tokens_seen": 50142240, "step": 23235 }, { "epoch": 3.7911908646003263, "grad_norm": 0.04075642675161362, "learning_rate": 0.0009757696285227073, "loss": 0.0744, "num_input_tokens_seen": 50153472, "step": 23240 }, { "epoch": 3.792006525285481, "grad_norm": 0.12175858020782471, "learning_rate": 0.000975747733962824, "loss": 0.046, "num_input_tokens_seen": 50163040, "step": 23245 }, { "epoch": 3.7928221859706364, "grad_norm": 0.0805397480726242, "learning_rate": 0.0009757258297613095, "loss": 0.108, "num_input_tokens_seen": 50173440, "step": 23250 }, { "epoch": 3.793637846655791, "grad_norm": 0.2212839424610138, "learning_rate": 0.0009757039159186072, "loss": 0.0841, "num_input_tokens_seen": 50184128, "step": 23255 }, { "epoch": 3.794453507340946, "grad_norm": 0.1437491625547409, "learning_rate": 0.0009756819924351618, "loss": 0.0625, "num_input_tokens_seen": 50195072, "step": 23260 }, { "epoch": 3.7952691680261013, "grad_norm": 0.3002660870552063, "learning_rate": 0.0009756600593114174, "loss": 0.1137, "num_input_tokens_seen": 50205472, "step": 23265 }, { "epoch": 3.796084828711256, "grad_norm": 0.17316478490829468, "learning_rate": 0.0009756381165478183, "loss": 0.1532, "num_input_tokens_seen": 50216736, "step": 23270 }, { "epoch": 3.7969004893964113, "grad_norm": 0.009191228076815605, "learning_rate": 0.0009756161641448095, "loss": 0.047, "num_input_tokens_seen": 50227168, "step": 23275 }, { "epoch": 3.797716150081566, "grad_norm": 0.1876332312822342, "learning_rate": 0.0009755942021028356, "loss": 0.2183, "num_input_tokens_seen": 50237216, "step": 23280 }, { "epoch": 3.798531810766721, "grad_norm": 0.19151124358177185, "learning_rate": 0.0009755722304223422, "loss": 0.1763, "num_input_tokens_seen": 50248320, "step": 23285 }, { "epoch": 3.799347471451876, "grad_norm": 0.009156274609267712, "learning_rate": 0.000975550249103774, "loss": 0.0076, "num_input_tokens_seen": 50259072, "step": 23290 }, { "epoch": 3.800163132137031, "grad_norm": 0.13892458379268646, "learning_rate": 0.0009755282581475768, "loss": 0.1069, "num_input_tokens_seen": 50270400, "step": 23295 }, { "epoch": 3.800978792822186, "grad_norm": 0.07700437307357788, "learning_rate": 0.0009755062575541962, "loss": 0.0958, "num_input_tokens_seen": 50281536, "step": 23300 }, { "epoch": 3.801794453507341, "grad_norm": 0.0869818851351738, "learning_rate": 0.000975484247324078, "loss": 0.0294, "num_input_tokens_seen": 50292128, "step": 23305 }, { "epoch": 3.802610114192496, "grad_norm": 0.008653923869132996, "learning_rate": 0.0009754622274576684, "loss": 0.0455, "num_input_tokens_seen": 50303456, "step": 23310 }, { "epoch": 3.8034257748776508, "grad_norm": 0.04622099921107292, "learning_rate": 0.0009754401979554136, "loss": 0.0369, "num_input_tokens_seen": 50315008, "step": 23315 }, { "epoch": 3.804241435562806, "grad_norm": 0.2947373688220978, "learning_rate": 0.00097541815881776, "loss": 0.1732, "num_input_tokens_seen": 50325664, "step": 23320 }, { "epoch": 3.805057096247961, "grad_norm": 0.03993939980864525, "learning_rate": 0.0009753961100451544, "loss": 0.0489, "num_input_tokens_seen": 50336288, "step": 23325 }, { "epoch": 3.8058727569331157, "grad_norm": 0.3531322181224823, "learning_rate": 0.0009753740516380433, "loss": 0.121, "num_input_tokens_seen": 50346688, "step": 23330 }, { "epoch": 3.806688417618271, "grad_norm": 0.01671215333044529, "learning_rate": 0.0009753519835968743, "loss": 0.031, "num_input_tokens_seen": 50357728, "step": 23335 }, { "epoch": 3.8075040783034257, "grad_norm": 0.11535752564668655, "learning_rate": 0.0009753299059220941, "loss": 0.0314, "num_input_tokens_seen": 50369696, "step": 23340 }, { "epoch": 3.8083197389885806, "grad_norm": 0.20632320642471313, "learning_rate": 0.0009753078186141506, "loss": 0.1181, "num_input_tokens_seen": 50380064, "step": 23345 }, { "epoch": 3.809135399673736, "grad_norm": 0.011096244677901268, "learning_rate": 0.0009752857216734909, "loss": 0.083, "num_input_tokens_seen": 50390464, "step": 23350 }, { "epoch": 3.8099510603588906, "grad_norm": 0.43469545245170593, "learning_rate": 0.0009752636151005633, "loss": 0.129, "num_input_tokens_seen": 50401792, "step": 23355 }, { "epoch": 3.810766721044046, "grad_norm": 0.42306724190711975, "learning_rate": 0.0009752414988958156, "loss": 0.1193, "num_input_tokens_seen": 50411552, "step": 23360 }, { "epoch": 3.8115823817292007, "grad_norm": 0.07697244733572006, "learning_rate": 0.000975219373059696, "loss": 0.1119, "num_input_tokens_seen": 50421504, "step": 23365 }, { "epoch": 3.8123980424143555, "grad_norm": 0.3269035518169403, "learning_rate": 0.000975197237592653, "loss": 0.3384, "num_input_tokens_seen": 50433312, "step": 23370 }, { "epoch": 3.8132137030995104, "grad_norm": 0.32158389687538147, "learning_rate": 0.000975175092495135, "loss": 0.5309, "num_input_tokens_seen": 50443904, "step": 23375 }, { "epoch": 3.8140293637846656, "grad_norm": 0.09350975602865219, "learning_rate": 0.0009751529377675911, "loss": 0.0547, "num_input_tokens_seen": 50455424, "step": 23380 }, { "epoch": 3.8148450244698204, "grad_norm": 0.03345005586743355, "learning_rate": 0.00097513077341047, "loss": 0.0948, "num_input_tokens_seen": 50465952, "step": 23385 }, { "epoch": 3.8156606851549757, "grad_norm": 0.11232996731996536, "learning_rate": 0.0009751085994242212, "loss": 0.1311, "num_input_tokens_seen": 50478112, "step": 23390 }, { "epoch": 3.8164763458401305, "grad_norm": 0.2909262180328369, "learning_rate": 0.0009750864158092938, "loss": 0.1414, "num_input_tokens_seen": 50489856, "step": 23395 }, { "epoch": 3.8172920065252853, "grad_norm": 0.2786071300506592, "learning_rate": 0.0009750642225661375, "loss": 0.17, "num_input_tokens_seen": 50499776, "step": 23400 }, { "epoch": 3.8181076672104406, "grad_norm": 0.3075200915336609, "learning_rate": 0.0009750420196952021, "loss": 0.1739, "num_input_tokens_seen": 50509312, "step": 23405 }, { "epoch": 3.8189233278955954, "grad_norm": 0.4347343444824219, "learning_rate": 0.0009750198071969376, "loss": 0.1409, "num_input_tokens_seen": 50519968, "step": 23410 }, { "epoch": 3.8197389885807507, "grad_norm": 0.29417672753334045, "learning_rate": 0.0009749975850717941, "loss": 0.1131, "num_input_tokens_seen": 50531328, "step": 23415 }, { "epoch": 3.8205546492659055, "grad_norm": 0.07240878790616989, "learning_rate": 0.0009749753533202218, "loss": 0.0461, "num_input_tokens_seen": 50541280, "step": 23420 }, { "epoch": 3.8213703099510603, "grad_norm": 0.05643896013498306, "learning_rate": 0.0009749531119426716, "loss": 0.1252, "num_input_tokens_seen": 50553280, "step": 23425 }, { "epoch": 3.822185970636215, "grad_norm": 0.14703184366226196, "learning_rate": 0.000974930860939594, "loss": 0.0904, "num_input_tokens_seen": 50563808, "step": 23430 }, { "epoch": 3.8230016313213704, "grad_norm": 0.06538685411214828, "learning_rate": 0.0009749086003114399, "loss": 0.1353, "num_input_tokens_seen": 50575008, "step": 23435 }, { "epoch": 3.823817292006525, "grad_norm": 0.03718522936105728, "learning_rate": 0.0009748863300586605, "loss": 0.1444, "num_input_tokens_seen": 50586016, "step": 23440 }, { "epoch": 3.8246329526916805, "grad_norm": 0.15217286348342896, "learning_rate": 0.0009748640501817074, "loss": 0.0983, "num_input_tokens_seen": 50597440, "step": 23445 }, { "epoch": 3.8254486133768353, "grad_norm": 0.18148769438266754, "learning_rate": 0.0009748417606810319, "loss": 0.0995, "num_input_tokens_seen": 50608320, "step": 23450 }, { "epoch": 3.82626427406199, "grad_norm": 0.16997303068637848, "learning_rate": 0.0009748194615570857, "loss": 0.171, "num_input_tokens_seen": 50618144, "step": 23455 }, { "epoch": 3.827079934747145, "grad_norm": 0.023161446675658226, "learning_rate": 0.0009747971528103207, "loss": 0.0909, "num_input_tokens_seen": 50626304, "step": 23460 }, { "epoch": 3.8278955954323, "grad_norm": 0.06477691233158112, "learning_rate": 0.0009747748344411891, "loss": 0.2119, "num_input_tokens_seen": 50637856, "step": 23465 }, { "epoch": 3.828711256117455, "grad_norm": 0.38486188650131226, "learning_rate": 0.0009747525064501433, "loss": 0.193, "num_input_tokens_seen": 50648416, "step": 23470 }, { "epoch": 3.8295269168026103, "grad_norm": 0.1968144029378891, "learning_rate": 0.0009747301688376355, "loss": 0.1342, "num_input_tokens_seen": 50660320, "step": 23475 }, { "epoch": 3.830342577487765, "grad_norm": 0.13785341382026672, "learning_rate": 0.0009747078216041187, "loss": 0.0563, "num_input_tokens_seen": 50670848, "step": 23480 }, { "epoch": 3.83115823817292, "grad_norm": 0.1333019733428955, "learning_rate": 0.0009746854647500457, "loss": 0.2277, "num_input_tokens_seen": 50681792, "step": 23485 }, { "epoch": 3.831973898858075, "grad_norm": 0.19084607064723969, "learning_rate": 0.0009746630982758695, "loss": 0.1501, "num_input_tokens_seen": 50691168, "step": 23490 }, { "epoch": 3.83278955954323, "grad_norm": 0.03187166899442673, "learning_rate": 0.0009746407221820435, "loss": 0.0458, "num_input_tokens_seen": 50701760, "step": 23495 }, { "epoch": 3.8336052202283852, "grad_norm": 0.0460643395781517, "learning_rate": 0.0009746183364690212, "loss": 0.0652, "num_input_tokens_seen": 50712224, "step": 23500 }, { "epoch": 3.83442088091354, "grad_norm": 0.17877747118473053, "learning_rate": 0.0009745959411372561, "loss": 0.239, "num_input_tokens_seen": 50725056, "step": 23505 }, { "epoch": 3.835236541598695, "grad_norm": 0.05689656734466553, "learning_rate": 0.0009745735361872023, "loss": 0.0751, "num_input_tokens_seen": 50736800, "step": 23510 }, { "epoch": 3.8360522022838497, "grad_norm": 0.031051717698574066, "learning_rate": 0.0009745511216193137, "loss": 0.0285, "num_input_tokens_seen": 50747968, "step": 23515 }, { "epoch": 3.836867862969005, "grad_norm": 0.033384256064891815, "learning_rate": 0.0009745286974340445, "loss": 0.0933, "num_input_tokens_seen": 50759264, "step": 23520 }, { "epoch": 3.8376835236541598, "grad_norm": 0.07597340643405914, "learning_rate": 0.0009745062636318495, "loss": 0.0473, "num_input_tokens_seen": 50771456, "step": 23525 }, { "epoch": 3.838499184339315, "grad_norm": 0.19461698830127716, "learning_rate": 0.0009744838202131829, "loss": 0.1165, "num_input_tokens_seen": 50782048, "step": 23530 }, { "epoch": 3.83931484502447, "grad_norm": 0.1583615243434906, "learning_rate": 0.0009744613671784999, "loss": 0.1085, "num_input_tokens_seen": 50792704, "step": 23535 }, { "epoch": 3.8401305057096247, "grad_norm": 0.03587063401937485, "learning_rate": 0.0009744389045282554, "loss": 0.0606, "num_input_tokens_seen": 50803872, "step": 23540 }, { "epoch": 3.84094616639478, "grad_norm": 0.017446406185626984, "learning_rate": 0.0009744164322629046, "loss": 0.061, "num_input_tokens_seen": 50814688, "step": 23545 }, { "epoch": 3.8417618270799347, "grad_norm": 0.7785932421684265, "learning_rate": 0.0009743939503829027, "loss": 0.3442, "num_input_tokens_seen": 50824448, "step": 23550 }, { "epoch": 3.8425774877650896, "grad_norm": 0.07299356162548065, "learning_rate": 0.0009743714588887059, "loss": 0.031, "num_input_tokens_seen": 50834944, "step": 23555 }, { "epoch": 3.843393148450245, "grad_norm": 0.019956795498728752, "learning_rate": 0.0009743489577807696, "loss": 0.0854, "num_input_tokens_seen": 50845664, "step": 23560 }, { "epoch": 3.8442088091353996, "grad_norm": 0.023237688466906548, "learning_rate": 0.0009743264470595499, "loss": 0.1027, "num_input_tokens_seen": 50856832, "step": 23565 }, { "epoch": 3.8450244698205545, "grad_norm": 0.03229168429970741, "learning_rate": 0.0009743039267255031, "loss": 0.0699, "num_input_tokens_seen": 50867936, "step": 23570 }, { "epoch": 3.8458401305057097, "grad_norm": 0.05835679545998573, "learning_rate": 0.0009742813967790855, "loss": 0.1439, "num_input_tokens_seen": 50878720, "step": 23575 }, { "epoch": 3.8466557911908645, "grad_norm": 0.23732000589370728, "learning_rate": 0.0009742588572207538, "loss": 0.0841, "num_input_tokens_seen": 50888832, "step": 23580 }, { "epoch": 3.84747145187602, "grad_norm": 0.03452681377530098, "learning_rate": 0.0009742363080509647, "loss": 0.0928, "num_input_tokens_seen": 50899616, "step": 23585 }, { "epoch": 3.8482871125611746, "grad_norm": 0.06478162109851837, "learning_rate": 0.000974213749270175, "loss": 0.0317, "num_input_tokens_seen": 50910976, "step": 23590 }, { "epoch": 3.8491027732463294, "grad_norm": 0.009894883260130882, "learning_rate": 0.0009741911808788422, "loss": 0.0534, "num_input_tokens_seen": 50921120, "step": 23595 }, { "epoch": 3.8499184339314843, "grad_norm": 0.03167342394590378, "learning_rate": 0.0009741686028774236, "loss": 0.1172, "num_input_tokens_seen": 50933088, "step": 23600 }, { "epoch": 3.8507340946166395, "grad_norm": 0.030892131850123405, "learning_rate": 0.0009741460152663768, "loss": 0.1121, "num_input_tokens_seen": 50944352, "step": 23605 }, { "epoch": 3.8515497553017943, "grad_norm": 0.18825654685497284, "learning_rate": 0.0009741234180461593, "loss": 0.1522, "num_input_tokens_seen": 50953952, "step": 23610 }, { "epoch": 3.8523654159869496, "grad_norm": 0.0546070858836174, "learning_rate": 0.0009741008112172293, "loss": 0.1309, "num_input_tokens_seen": 50964608, "step": 23615 }, { "epoch": 3.8531810766721044, "grad_norm": 0.02363422140479088, "learning_rate": 0.0009740781947800452, "loss": 0.0941, "num_input_tokens_seen": 50975936, "step": 23620 }, { "epoch": 3.8539967373572592, "grad_norm": 0.5084974765777588, "learning_rate": 0.0009740555687350648, "loss": 0.0854, "num_input_tokens_seen": 50987616, "step": 23625 }, { "epoch": 3.8548123980424145, "grad_norm": 0.1427038013935089, "learning_rate": 0.0009740329330827471, "loss": 0.2238, "num_input_tokens_seen": 50999232, "step": 23630 }, { "epoch": 3.8556280587275693, "grad_norm": 0.02333742193877697, "learning_rate": 0.0009740102878235505, "loss": 0.1202, "num_input_tokens_seen": 51009472, "step": 23635 }, { "epoch": 3.8564437194127246, "grad_norm": 0.02186654694378376, "learning_rate": 0.0009739876329579343, "loss": 0.1625, "num_input_tokens_seen": 51019168, "step": 23640 }, { "epoch": 3.8572593800978794, "grad_norm": 0.09074044227600098, "learning_rate": 0.0009739649684863572, "loss": 0.0441, "num_input_tokens_seen": 51030304, "step": 23645 }, { "epoch": 3.858075040783034, "grad_norm": 0.08764736354351044, "learning_rate": 0.0009739422944092789, "loss": 0.0643, "num_input_tokens_seen": 51041824, "step": 23650 }, { "epoch": 3.858890701468189, "grad_norm": 0.03463083505630493, "learning_rate": 0.0009739196107271586, "loss": 0.158, "num_input_tokens_seen": 51052096, "step": 23655 }, { "epoch": 3.8597063621533443, "grad_norm": 0.17095983028411865, "learning_rate": 0.0009738969174404562, "loss": 0.2874, "num_input_tokens_seen": 51061216, "step": 23660 }, { "epoch": 3.860522022838499, "grad_norm": 0.08554663509130478, "learning_rate": 0.0009738742145496318, "loss": 0.0539, "num_input_tokens_seen": 51071104, "step": 23665 }, { "epoch": 3.8613376835236544, "grad_norm": 0.04276026785373688, "learning_rate": 0.000973851502055145, "loss": 0.2047, "num_input_tokens_seen": 51081888, "step": 23670 }, { "epoch": 3.862153344208809, "grad_norm": 0.07063541561365128, "learning_rate": 0.0009738287799574565, "loss": 0.1157, "num_input_tokens_seen": 51092096, "step": 23675 }, { "epoch": 3.862969004893964, "grad_norm": 0.042133551090955734, "learning_rate": 0.0009738060482570268, "loss": 0.1163, "num_input_tokens_seen": 51103328, "step": 23680 }, { "epoch": 3.863784665579119, "grad_norm": 0.0867886021733284, "learning_rate": 0.0009737833069543163, "loss": 0.216, "num_input_tokens_seen": 51113760, "step": 23685 }, { "epoch": 3.864600326264274, "grad_norm": 0.21544049680233002, "learning_rate": 0.0009737605560497862, "loss": 0.2185, "num_input_tokens_seen": 51124672, "step": 23690 }, { "epoch": 3.865415986949429, "grad_norm": 0.10628072917461395, "learning_rate": 0.0009737377955438973, "loss": 0.0407, "num_input_tokens_seen": 51134432, "step": 23695 }, { "epoch": 3.866231647634584, "grad_norm": 0.06566362828016281, "learning_rate": 0.000973715025437111, "loss": 0.0896, "num_input_tokens_seen": 51146720, "step": 23700 }, { "epoch": 3.867047308319739, "grad_norm": 0.01717267371714115, "learning_rate": 0.0009736922457298889, "loss": 0.0235, "num_input_tokens_seen": 51158304, "step": 23705 }, { "epoch": 3.867862969004894, "grad_norm": 0.20249362289905548, "learning_rate": 0.0009736694564226924, "loss": 0.0803, "num_input_tokens_seen": 51169600, "step": 23710 }, { "epoch": 3.868678629690049, "grad_norm": 0.061586104333400726, "learning_rate": 0.0009736466575159835, "loss": 0.1088, "num_input_tokens_seen": 51180064, "step": 23715 }, { "epoch": 3.869494290375204, "grad_norm": 0.12015491724014282, "learning_rate": 0.0009736238490102243, "loss": 0.1441, "num_input_tokens_seen": 51191264, "step": 23720 }, { "epoch": 3.870309951060359, "grad_norm": 0.15340359508991241, "learning_rate": 0.0009736010309058769, "loss": 0.081, "num_input_tokens_seen": 51201280, "step": 23725 }, { "epoch": 3.871125611745514, "grad_norm": 0.04334424436092377, "learning_rate": 0.0009735782032034038, "loss": 0.1492, "num_input_tokens_seen": 51212704, "step": 23730 }, { "epoch": 3.8719412724306688, "grad_norm": 0.2308911830186844, "learning_rate": 0.0009735553659032674, "loss": 0.2042, "num_input_tokens_seen": 51223168, "step": 23735 }, { "epoch": 3.8727569331158236, "grad_norm": 0.17102345824241638, "learning_rate": 0.000973532519005931, "loss": 0.1492, "num_input_tokens_seen": 51233984, "step": 23740 }, { "epoch": 3.873572593800979, "grad_norm": 0.2291397750377655, "learning_rate": 0.0009735096625118574, "loss": 0.0985, "num_input_tokens_seen": 51244992, "step": 23745 }, { "epoch": 3.8743882544861337, "grad_norm": 0.043828945606946945, "learning_rate": 0.0009734867964215099, "loss": 0.0638, "num_input_tokens_seen": 51254720, "step": 23750 }, { "epoch": 3.875203915171289, "grad_norm": 0.12774845957756042, "learning_rate": 0.0009734639207353516, "loss": 0.1755, "num_input_tokens_seen": 51264928, "step": 23755 }, { "epoch": 3.8760195758564437, "grad_norm": 0.1162806898355484, "learning_rate": 0.0009734410354538464, "loss": 0.0886, "num_input_tokens_seen": 51275680, "step": 23760 }, { "epoch": 3.8768352365415986, "grad_norm": 0.14220871031284332, "learning_rate": 0.0009734181405774581, "loss": 0.1806, "num_input_tokens_seen": 51286656, "step": 23765 }, { "epoch": 3.877650897226754, "grad_norm": 0.022244248539209366, "learning_rate": 0.0009733952361066505, "loss": 0.1095, "num_input_tokens_seen": 51297568, "step": 23770 }, { "epoch": 3.8784665579119086, "grad_norm": 0.6627296805381775, "learning_rate": 0.0009733723220418877, "loss": 0.2441, "num_input_tokens_seen": 51307168, "step": 23775 }, { "epoch": 3.8792822185970635, "grad_norm": 0.07143672555685043, "learning_rate": 0.0009733493983836345, "loss": 0.0574, "num_input_tokens_seen": 51318464, "step": 23780 }, { "epoch": 3.8800978792822187, "grad_norm": 0.04589008167386055, "learning_rate": 0.0009733264651323553, "loss": 0.1622, "num_input_tokens_seen": 51328608, "step": 23785 }, { "epoch": 3.8809135399673735, "grad_norm": 0.08878227323293686, "learning_rate": 0.0009733035222885149, "loss": 0.0495, "num_input_tokens_seen": 51340224, "step": 23790 }, { "epoch": 3.8817292006525284, "grad_norm": 0.095435231924057, "learning_rate": 0.000973280569852578, "loss": 0.0657, "num_input_tokens_seen": 51350432, "step": 23795 }, { "epoch": 3.8825448613376836, "grad_norm": 0.46010127663612366, "learning_rate": 0.00097325760782501, "loss": 0.1968, "num_input_tokens_seen": 51361696, "step": 23800 }, { "epoch": 3.8833605220228384, "grad_norm": 0.07100843638181686, "learning_rate": 0.0009732346362062763, "loss": 0.0642, "num_input_tokens_seen": 51373344, "step": 23805 }, { "epoch": 3.8841761827079937, "grad_norm": 0.2558983564376831, "learning_rate": 0.0009732116549968421, "loss": 0.0794, "num_input_tokens_seen": 51384224, "step": 23810 }, { "epoch": 3.8849918433931485, "grad_norm": 0.043228037655353546, "learning_rate": 0.0009731886641971737, "loss": 0.093, "num_input_tokens_seen": 51394272, "step": 23815 }, { "epoch": 3.8858075040783033, "grad_norm": 0.018683865666389465, "learning_rate": 0.0009731656638077367, "loss": 0.3759, "num_input_tokens_seen": 51405248, "step": 23820 }, { "epoch": 3.886623164763458, "grad_norm": 0.10302640497684479, "learning_rate": 0.0009731426538289971, "loss": 0.1078, "num_input_tokens_seen": 51415648, "step": 23825 }, { "epoch": 3.8874388254486134, "grad_norm": 0.02997562289237976, "learning_rate": 0.0009731196342614214, "loss": 0.1012, "num_input_tokens_seen": 51427328, "step": 23830 }, { "epoch": 3.8882544861337682, "grad_norm": 0.09169988334178925, "learning_rate": 0.0009730966051054763, "loss": 0.0854, "num_input_tokens_seen": 51438560, "step": 23835 }, { "epoch": 3.8890701468189235, "grad_norm": 0.05968739837408066, "learning_rate": 0.0009730735663616281, "loss": 0.1731, "num_input_tokens_seen": 51450624, "step": 23840 }, { "epoch": 3.8898858075040783, "grad_norm": 0.09480788558721542, "learning_rate": 0.0009730505180303441, "loss": 0.0432, "num_input_tokens_seen": 51461120, "step": 23845 }, { "epoch": 3.890701468189233, "grad_norm": 0.280889093875885, "learning_rate": 0.0009730274601120913, "loss": 0.1684, "num_input_tokens_seen": 51471808, "step": 23850 }, { "epoch": 3.8915171288743884, "grad_norm": 0.07659041881561279, "learning_rate": 0.0009730043926073369, "loss": 0.1549, "num_input_tokens_seen": 51481664, "step": 23855 }, { "epoch": 3.892332789559543, "grad_norm": 0.06041976436972618, "learning_rate": 0.0009729813155165484, "loss": 0.0815, "num_input_tokens_seen": 51492480, "step": 23860 }, { "epoch": 3.8931484502446985, "grad_norm": 0.17896243929862976, "learning_rate": 0.0009729582288401934, "loss": 0.115, "num_input_tokens_seen": 51503104, "step": 23865 }, { "epoch": 3.8939641109298533, "grad_norm": 0.13407690823078156, "learning_rate": 0.0009729351325787402, "loss": 0.1577, "num_input_tokens_seen": 51513632, "step": 23870 }, { "epoch": 3.894779771615008, "grad_norm": 0.018490616232156754, "learning_rate": 0.0009729120267326564, "loss": 0.1152, "num_input_tokens_seen": 51524192, "step": 23875 }, { "epoch": 3.895595432300163, "grad_norm": 0.014588012360036373, "learning_rate": 0.0009728889113024103, "loss": 0.0322, "num_input_tokens_seen": 51535808, "step": 23880 }, { "epoch": 3.896411092985318, "grad_norm": 0.01506887562572956, "learning_rate": 0.0009728657862884707, "loss": 0.0472, "num_input_tokens_seen": 51547200, "step": 23885 }, { "epoch": 3.897226753670473, "grad_norm": 0.16400204598903656, "learning_rate": 0.0009728426516913061, "loss": 0.126, "num_input_tokens_seen": 51558912, "step": 23890 }, { "epoch": 3.8980424143556283, "grad_norm": 0.013204592280089855, "learning_rate": 0.0009728195075113851, "loss": 0.1135, "num_input_tokens_seen": 51569152, "step": 23895 }, { "epoch": 3.898858075040783, "grad_norm": 0.03372429683804512, "learning_rate": 0.000972796353749177, "loss": 0.0941, "num_input_tokens_seen": 51578656, "step": 23900 }, { "epoch": 3.899673735725938, "grad_norm": 0.0906345471739769, "learning_rate": 0.0009727731904051513, "loss": 0.1759, "num_input_tokens_seen": 51590720, "step": 23905 }, { "epoch": 3.9004893964110927, "grad_norm": 0.33975720405578613, "learning_rate": 0.0009727500174797769, "loss": 0.156, "num_input_tokens_seen": 51601760, "step": 23910 }, { "epoch": 3.901305057096248, "grad_norm": 0.0500725582242012, "learning_rate": 0.0009727268349735237, "loss": 0.0944, "num_input_tokens_seen": 51613408, "step": 23915 }, { "epoch": 3.902120717781403, "grad_norm": 0.19835978746414185, "learning_rate": 0.0009727036428868616, "loss": 0.1434, "num_input_tokens_seen": 51625440, "step": 23920 }, { "epoch": 3.902936378466558, "grad_norm": 0.024962209165096283, "learning_rate": 0.0009726804412202604, "loss": 0.1786, "num_input_tokens_seen": 51635712, "step": 23925 }, { "epoch": 3.903752039151713, "grad_norm": 0.07299522310495377, "learning_rate": 0.0009726572299741904, "loss": 0.1003, "num_input_tokens_seen": 51647104, "step": 23930 }, { "epoch": 3.9045676998368677, "grad_norm": 0.08222479373216629, "learning_rate": 0.0009726340091491221, "loss": 0.0757, "num_input_tokens_seen": 51657792, "step": 23935 }, { "epoch": 3.905383360522023, "grad_norm": 0.0386415533721447, "learning_rate": 0.000972610778745526, "loss": 0.0398, "num_input_tokens_seen": 51666976, "step": 23940 }, { "epoch": 3.9061990212071778, "grad_norm": 0.157911479473114, "learning_rate": 0.0009725875387638729, "loss": 0.1307, "num_input_tokens_seen": 51678240, "step": 23945 }, { "epoch": 3.907014681892333, "grad_norm": 0.09003937989473343, "learning_rate": 0.0009725642892046339, "loss": 0.0936, "num_input_tokens_seen": 51688320, "step": 23950 }, { "epoch": 3.907830342577488, "grad_norm": 0.14532308280467987, "learning_rate": 0.00097254103006828, "loss": 0.0668, "num_input_tokens_seen": 51699072, "step": 23955 }, { "epoch": 3.9086460032626427, "grad_norm": 0.48094627261161804, "learning_rate": 0.0009725177613552827, "loss": 0.1571, "num_input_tokens_seen": 51709824, "step": 23960 }, { "epoch": 3.9094616639477975, "grad_norm": 0.03152914717793465, "learning_rate": 0.0009724944830661135, "loss": 0.151, "num_input_tokens_seen": 51719776, "step": 23965 }, { "epoch": 3.9102773246329527, "grad_norm": 0.19058245420455933, "learning_rate": 0.0009724711952012442, "loss": 0.0609, "num_input_tokens_seen": 51729856, "step": 23970 }, { "epoch": 3.9110929853181076, "grad_norm": 0.19879646599292755, "learning_rate": 0.0009724478977611469, "loss": 0.1084, "num_input_tokens_seen": 51739840, "step": 23975 }, { "epoch": 3.911908646003263, "grad_norm": 0.17769742012023926, "learning_rate": 0.0009724245907462934, "loss": 0.1844, "num_input_tokens_seen": 51751584, "step": 23980 }, { "epoch": 3.9127243066884176, "grad_norm": 0.1687610000371933, "learning_rate": 0.0009724012741571563, "loss": 0.0698, "num_input_tokens_seen": 51762752, "step": 23985 }, { "epoch": 3.9135399673735725, "grad_norm": 0.009640276432037354, "learning_rate": 0.000972377947994208, "loss": 0.0349, "num_input_tokens_seen": 51773824, "step": 23990 }, { "epoch": 3.9143556280587277, "grad_norm": 0.3404175639152527, "learning_rate": 0.0009723546122579217, "loss": 0.0871, "num_input_tokens_seen": 51785888, "step": 23995 }, { "epoch": 3.9151712887438825, "grad_norm": 0.19050191342830658, "learning_rate": 0.0009723312669487696, "loss": 0.1133, "num_input_tokens_seen": 51796352, "step": 24000 }, { "epoch": 3.9159869494290374, "grad_norm": 0.5547030568122864, "learning_rate": 0.0009723079120672254, "loss": 0.1605, "num_input_tokens_seen": 51806880, "step": 24005 }, { "epoch": 3.9168026101141926, "grad_norm": 4.843212127685547, "learning_rate": 0.0009722845476137621, "loss": 0.3922, "num_input_tokens_seen": 51816416, "step": 24010 }, { "epoch": 3.9176182707993474, "grad_norm": 0.5055539011955261, "learning_rate": 0.0009722611735888532, "loss": 0.2511, "num_input_tokens_seen": 51827808, "step": 24015 }, { "epoch": 3.9184339314845023, "grad_norm": 0.09190112352371216, "learning_rate": 0.0009722377899929727, "loss": 0.0667, "num_input_tokens_seen": 51838784, "step": 24020 }, { "epoch": 3.9192495921696575, "grad_norm": 0.007673450279980898, "learning_rate": 0.0009722143968265942, "loss": 0.0724, "num_input_tokens_seen": 51849472, "step": 24025 }, { "epoch": 3.9200652528548123, "grad_norm": 0.3366202116012573, "learning_rate": 0.0009721909940901918, "loss": 0.3023, "num_input_tokens_seen": 51861280, "step": 24030 }, { "epoch": 3.9208809135399676, "grad_norm": 0.03231438994407654, "learning_rate": 0.0009721675817842402, "loss": 0.1527, "num_input_tokens_seen": 51872192, "step": 24035 }, { "epoch": 3.9216965742251224, "grad_norm": 0.09222828596830368, "learning_rate": 0.0009721441599092133, "loss": 0.0958, "num_input_tokens_seen": 51883392, "step": 24040 }, { "epoch": 3.9225122349102772, "grad_norm": 0.13727694749832153, "learning_rate": 0.0009721207284655862, "loss": 0.222, "num_input_tokens_seen": 51895104, "step": 24045 }, { "epoch": 3.923327895595432, "grad_norm": 0.026204874739050865, "learning_rate": 0.0009720972874538334, "loss": 0.1125, "num_input_tokens_seen": 51907200, "step": 24050 }, { "epoch": 3.9241435562805873, "grad_norm": 0.3605559170246124, "learning_rate": 0.0009720738368744304, "loss": 0.2623, "num_input_tokens_seen": 51919040, "step": 24055 }, { "epoch": 3.924959216965742, "grad_norm": 0.13947243988513947, "learning_rate": 0.0009720503767278522, "loss": 0.1135, "num_input_tokens_seen": 51930272, "step": 24060 }, { "epoch": 3.9257748776508974, "grad_norm": 0.0637165904045105, "learning_rate": 0.0009720269070145742, "loss": 0.0454, "num_input_tokens_seen": 51940352, "step": 24065 }, { "epoch": 3.926590538336052, "grad_norm": 0.1416083574295044, "learning_rate": 0.000972003427735072, "loss": 0.0758, "num_input_tokens_seen": 51951104, "step": 24070 }, { "epoch": 3.927406199021207, "grad_norm": 0.14216937124729156, "learning_rate": 0.0009719799388898219, "loss": 0.2473, "num_input_tokens_seen": 51962688, "step": 24075 }, { "epoch": 3.9282218597063623, "grad_norm": 0.11263450980186462, "learning_rate": 0.0009719564404792993, "loss": 0.1215, "num_input_tokens_seen": 51972480, "step": 24080 }, { "epoch": 3.929037520391517, "grad_norm": 0.08934614807367325, "learning_rate": 0.0009719329325039807, "loss": 0.132, "num_input_tokens_seen": 51982880, "step": 24085 }, { "epoch": 3.9298531810766724, "grad_norm": 0.17413219809532166, "learning_rate": 0.0009719094149643426, "loss": 0.0516, "num_input_tokens_seen": 51993984, "step": 24090 }, { "epoch": 3.930668841761827, "grad_norm": 0.1729128211736679, "learning_rate": 0.0009718858878608617, "loss": 0.0646, "num_input_tokens_seen": 52005472, "step": 24095 }, { "epoch": 3.931484502446982, "grad_norm": 0.017950816079974174, "learning_rate": 0.0009718623511940145, "loss": 0.1366, "num_input_tokens_seen": 52015040, "step": 24100 }, { "epoch": 3.932300163132137, "grad_norm": 0.023168236017227173, "learning_rate": 0.0009718388049642781, "loss": 0.0732, "num_input_tokens_seen": 52025920, "step": 24105 }, { "epoch": 3.933115823817292, "grad_norm": 0.14352481067180634, "learning_rate": 0.00097181524917213, "loss": 0.1955, "num_input_tokens_seen": 52036640, "step": 24110 }, { "epoch": 3.933931484502447, "grad_norm": 0.019288597628474236, "learning_rate": 0.0009717916838180471, "loss": 0.0822, "num_input_tokens_seen": 52047296, "step": 24115 }, { "epoch": 3.934747145187602, "grad_norm": 0.09332055598497391, "learning_rate": 0.0009717681089025073, "loss": 0.1454, "num_input_tokens_seen": 52058688, "step": 24120 }, { "epoch": 3.935562805872757, "grad_norm": 0.06229764223098755, "learning_rate": 0.0009717445244259882, "loss": 0.0266, "num_input_tokens_seen": 52070912, "step": 24125 }, { "epoch": 3.936378466557912, "grad_norm": 0.1792927384376526, "learning_rate": 0.0009717209303889679, "loss": 0.0582, "num_input_tokens_seen": 52081216, "step": 24130 }, { "epoch": 3.9371941272430666, "grad_norm": 0.17280422151088715, "learning_rate": 0.0009716973267919246, "loss": 0.0826, "num_input_tokens_seen": 52091744, "step": 24135 }, { "epoch": 3.938009787928222, "grad_norm": 0.03176361694931984, "learning_rate": 0.0009716737136353365, "loss": 0.0727, "num_input_tokens_seen": 52102560, "step": 24140 }, { "epoch": 3.9388254486133767, "grad_norm": 0.26055094599723816, "learning_rate": 0.0009716500909196824, "loss": 0.0577, "num_input_tokens_seen": 52113152, "step": 24145 }, { "epoch": 3.939641109298532, "grad_norm": 0.16252781450748444, "learning_rate": 0.0009716264586454406, "loss": 0.1684, "num_input_tokens_seen": 52124000, "step": 24150 }, { "epoch": 3.9404567699836868, "grad_norm": 0.456612765789032, "learning_rate": 0.0009716028168130906, "loss": 0.1063, "num_input_tokens_seen": 52136128, "step": 24155 }, { "epoch": 3.9412724306688416, "grad_norm": 0.01321106031537056, "learning_rate": 0.000971579165423111, "loss": 0.0915, "num_input_tokens_seen": 52147392, "step": 24160 }, { "epoch": 3.942088091353997, "grad_norm": 0.014208265580236912, "learning_rate": 0.0009715555044759815, "loss": 0.1119, "num_input_tokens_seen": 52158688, "step": 24165 }, { "epoch": 3.9429037520391517, "grad_norm": 0.34182924032211304, "learning_rate": 0.0009715318339721814, "loss": 0.0864, "num_input_tokens_seen": 52169024, "step": 24170 }, { "epoch": 3.943719412724307, "grad_norm": 0.230413019657135, "learning_rate": 0.0009715081539121908, "loss": 0.0606, "num_input_tokens_seen": 52179200, "step": 24175 }, { "epoch": 3.9445350734094617, "grad_norm": 0.1131008118391037, "learning_rate": 0.0009714844642964891, "loss": 0.1439, "num_input_tokens_seen": 52190976, "step": 24180 }, { "epoch": 3.9453507340946166, "grad_norm": 0.0557442344725132, "learning_rate": 0.0009714607651255565, "loss": 0.121, "num_input_tokens_seen": 52202240, "step": 24185 }, { "epoch": 3.9461663947797714, "grad_norm": 0.0334637425839901, "learning_rate": 0.0009714370563998736, "loss": 0.1161, "num_input_tokens_seen": 52214176, "step": 24190 }, { "epoch": 3.9469820554649266, "grad_norm": 0.059169892221689224, "learning_rate": 0.0009714133381199205, "loss": 0.1388, "num_input_tokens_seen": 52225568, "step": 24195 }, { "epoch": 3.9477977161500815, "grad_norm": 0.06997410953044891, "learning_rate": 0.0009713896102861782, "loss": 0.0548, "num_input_tokens_seen": 52236000, "step": 24200 }, { "epoch": 3.9486133768352367, "grad_norm": 0.3897138833999634, "learning_rate": 0.0009713658728991274, "loss": 0.0737, "num_input_tokens_seen": 52246752, "step": 24205 }, { "epoch": 3.9494290375203915, "grad_norm": 0.39354151487350464, "learning_rate": 0.0009713421259592493, "loss": 0.1298, "num_input_tokens_seen": 52255168, "step": 24210 }, { "epoch": 3.9502446982055464, "grad_norm": 0.29306307435035706, "learning_rate": 0.0009713183694670249, "loss": 0.0568, "num_input_tokens_seen": 52265728, "step": 24215 }, { "epoch": 3.9510603588907016, "grad_norm": 0.18733108043670654, "learning_rate": 0.000971294603422936, "loss": 0.0565, "num_input_tokens_seen": 52276480, "step": 24220 }, { "epoch": 3.9518760195758564, "grad_norm": 0.027989938855171204, "learning_rate": 0.000971270827827464, "loss": 0.0378, "num_input_tokens_seen": 52287264, "step": 24225 }, { "epoch": 3.9526916802610113, "grad_norm": 0.33323073387145996, "learning_rate": 0.0009712470426810909, "loss": 0.2929, "num_input_tokens_seen": 52297056, "step": 24230 }, { "epoch": 3.9535073409461665, "grad_norm": 0.24140670895576477, "learning_rate": 0.0009712232479842986, "loss": 0.0644, "num_input_tokens_seen": 52306976, "step": 24235 }, { "epoch": 3.9543230016313213, "grad_norm": 0.2808579206466675, "learning_rate": 0.0009711994437375693, "loss": 0.0764, "num_input_tokens_seen": 52316736, "step": 24240 }, { "epoch": 3.955138662316476, "grad_norm": 0.013691402040421963, "learning_rate": 0.0009711756299413856, "loss": 0.0788, "num_input_tokens_seen": 52326496, "step": 24245 }, { "epoch": 3.9559543230016314, "grad_norm": 0.09495241940021515, "learning_rate": 0.0009711518065962302, "loss": 0.0902, "num_input_tokens_seen": 52338336, "step": 24250 }, { "epoch": 3.9567699836867862, "grad_norm": 0.015896232798695564, "learning_rate": 0.0009711279737025856, "loss": 0.0631, "num_input_tokens_seen": 52349056, "step": 24255 }, { "epoch": 3.9575856443719415, "grad_norm": 0.35698509216308594, "learning_rate": 0.0009711041312609349, "loss": 0.1384, "num_input_tokens_seen": 52360640, "step": 24260 }, { "epoch": 3.9584013050570963, "grad_norm": 0.017111321911215782, "learning_rate": 0.0009710802792717613, "loss": 0.0447, "num_input_tokens_seen": 52372608, "step": 24265 }, { "epoch": 3.959216965742251, "grad_norm": 0.3478092849254608, "learning_rate": 0.0009710564177355483, "loss": 0.2142, "num_input_tokens_seen": 52386016, "step": 24270 }, { "epoch": 3.960032626427406, "grad_norm": 0.05999065190553665, "learning_rate": 0.0009710325466527794, "loss": 0.2207, "num_input_tokens_seen": 52397088, "step": 24275 }, { "epoch": 3.960848287112561, "grad_norm": 0.16255341470241547, "learning_rate": 0.0009710086660239386, "loss": 0.1429, "num_input_tokens_seen": 52408704, "step": 24280 }, { "epoch": 3.961663947797716, "grad_norm": 0.0768781378865242, "learning_rate": 0.0009709847758495094, "loss": 0.0511, "num_input_tokens_seen": 52419680, "step": 24285 }, { "epoch": 3.9624796084828713, "grad_norm": 0.2642399072647095, "learning_rate": 0.0009709608761299763, "loss": 0.0782, "num_input_tokens_seen": 52430816, "step": 24290 }, { "epoch": 3.963295269168026, "grad_norm": 0.0479847714304924, "learning_rate": 0.0009709369668658237, "loss": 0.0251, "num_input_tokens_seen": 52442368, "step": 24295 }, { "epoch": 3.964110929853181, "grad_norm": 0.06506838649511337, "learning_rate": 0.0009709130480575359, "loss": 0.1518, "num_input_tokens_seen": 52452160, "step": 24300 }, { "epoch": 3.964926590538336, "grad_norm": 0.03802900016307831, "learning_rate": 0.0009708891197055978, "loss": 0.0629, "num_input_tokens_seen": 52464064, "step": 24305 }, { "epoch": 3.965742251223491, "grad_norm": 0.13694965839385986, "learning_rate": 0.0009708651818104943, "loss": 0.0432, "num_input_tokens_seen": 52475552, "step": 24310 }, { "epoch": 3.9665579119086463, "grad_norm": 0.031155675649642944, "learning_rate": 0.0009708412343727106, "loss": 0.1315, "num_input_tokens_seen": 52486112, "step": 24315 }, { "epoch": 3.967373572593801, "grad_norm": 0.07723990827798843, "learning_rate": 0.000970817277392732, "loss": 0.0341, "num_input_tokens_seen": 52497216, "step": 24320 }, { "epoch": 3.968189233278956, "grad_norm": 0.024341948330402374, "learning_rate": 0.000970793310871044, "loss": 0.0806, "num_input_tokens_seen": 52507616, "step": 24325 }, { "epoch": 3.9690048939641107, "grad_norm": 0.07085871696472168, "learning_rate": 0.0009707693348081323, "loss": 0.1129, "num_input_tokens_seen": 52518624, "step": 24330 }, { "epoch": 3.969820554649266, "grad_norm": 0.06642138957977295, "learning_rate": 0.0009707453492044829, "loss": 0.2105, "num_input_tokens_seen": 52530624, "step": 24335 }, { "epoch": 3.970636215334421, "grad_norm": 0.7213853597640991, "learning_rate": 0.0009707213540605817, "loss": 0.1408, "num_input_tokens_seen": 52540704, "step": 24340 }, { "epoch": 3.971451876019576, "grad_norm": 0.15944816172122955, "learning_rate": 0.0009706973493769152, "loss": 0.2442, "num_input_tokens_seen": 52550912, "step": 24345 }, { "epoch": 3.972267536704731, "grad_norm": 0.1875671148300171, "learning_rate": 0.0009706733351539696, "loss": 0.0982, "num_input_tokens_seen": 52562112, "step": 24350 }, { "epoch": 3.9730831973898857, "grad_norm": 0.0812494158744812, "learning_rate": 0.0009706493113922318, "loss": 0.063, "num_input_tokens_seen": 52573184, "step": 24355 }, { "epoch": 3.9738988580750405, "grad_norm": 0.033762574195861816, "learning_rate": 0.000970625278092189, "loss": 0.0526, "num_input_tokens_seen": 52584704, "step": 24360 }, { "epoch": 3.9747145187601958, "grad_norm": 0.08540665358304977, "learning_rate": 0.0009706012352543276, "loss": 0.073, "num_input_tokens_seen": 52595648, "step": 24365 }, { "epoch": 3.9755301794453506, "grad_norm": 0.02744572050869465, "learning_rate": 0.0009705771828791353, "loss": 0.0905, "num_input_tokens_seen": 52605728, "step": 24370 }, { "epoch": 3.976345840130506, "grad_norm": 0.05221731588244438, "learning_rate": 0.0009705531209670993, "loss": 0.0625, "num_input_tokens_seen": 52615936, "step": 24375 }, { "epoch": 3.9771615008156607, "grad_norm": 0.006962298881262541, "learning_rate": 0.0009705290495187073, "loss": 0.0343, "num_input_tokens_seen": 52626560, "step": 24380 }, { "epoch": 3.9779771615008155, "grad_norm": 0.12329696863889694, "learning_rate": 0.0009705049685344474, "loss": 0.0985, "num_input_tokens_seen": 52636160, "step": 24385 }, { "epoch": 3.9787928221859707, "grad_norm": 0.14080482721328735, "learning_rate": 0.0009704808780148074, "loss": 0.0635, "num_input_tokens_seen": 52648544, "step": 24390 }, { "epoch": 3.9796084828711256, "grad_norm": 0.05773625522851944, "learning_rate": 0.0009704567779602754, "loss": 0.0154, "num_input_tokens_seen": 52659328, "step": 24395 }, { "epoch": 3.980424143556281, "grad_norm": 0.316988468170166, "learning_rate": 0.0009704326683713402, "loss": 0.1524, "num_input_tokens_seen": 52669408, "step": 24400 }, { "epoch": 3.9812398042414356, "grad_norm": 0.4685678482055664, "learning_rate": 0.00097040854924849, "loss": 0.1127, "num_input_tokens_seen": 52679232, "step": 24405 }, { "epoch": 3.9820554649265905, "grad_norm": 0.164290189743042, "learning_rate": 0.0009703844205922139, "loss": 0.0962, "num_input_tokens_seen": 52691008, "step": 24410 }, { "epoch": 3.9828711256117453, "grad_norm": 0.20805250108242035, "learning_rate": 0.0009703602824030007, "loss": 0.1594, "num_input_tokens_seen": 52701728, "step": 24415 }, { "epoch": 3.9836867862969005, "grad_norm": 0.06583689898252487, "learning_rate": 0.0009703361346813398, "loss": 0.0701, "num_input_tokens_seen": 52712480, "step": 24420 }, { "epoch": 3.9845024469820554, "grad_norm": 0.0669354498386383, "learning_rate": 0.0009703119774277205, "loss": 0.0166, "num_input_tokens_seen": 52722048, "step": 24425 }, { "epoch": 3.9853181076672106, "grad_norm": 0.06806125491857529, "learning_rate": 0.0009702878106426321, "loss": 0.2666, "num_input_tokens_seen": 52731264, "step": 24430 }, { "epoch": 3.9861337683523654, "grad_norm": 0.32292285561561584, "learning_rate": 0.0009702636343265649, "loss": 0.15, "num_input_tokens_seen": 52742144, "step": 24435 }, { "epoch": 3.9869494290375203, "grad_norm": 0.19130632281303406, "learning_rate": 0.0009702394484800084, "loss": 0.1667, "num_input_tokens_seen": 52752352, "step": 24440 }, { "epoch": 3.9877650897226755, "grad_norm": 0.15561111271381378, "learning_rate": 0.000970215253103453, "loss": 0.1446, "num_input_tokens_seen": 52762464, "step": 24445 }, { "epoch": 3.9885807504078303, "grad_norm": 0.03692816570401192, "learning_rate": 0.0009701910481973889, "loss": 0.0747, "num_input_tokens_seen": 52773312, "step": 24450 }, { "epoch": 3.9893964110929856, "grad_norm": 0.02050204388797283, "learning_rate": 0.0009701668337623069, "loss": 0.0408, "num_input_tokens_seen": 52784608, "step": 24455 }, { "epoch": 3.9902120717781404, "grad_norm": 0.24797934293746948, "learning_rate": 0.0009701426097986974, "loss": 0.1083, "num_input_tokens_seen": 52795040, "step": 24460 }, { "epoch": 3.9910277324632952, "grad_norm": 0.012386571615934372, "learning_rate": 0.0009701183763070516, "loss": 0.0266, "num_input_tokens_seen": 52805056, "step": 24465 }, { "epoch": 3.99184339314845, "grad_norm": 0.3520895540714264, "learning_rate": 0.0009700941332878605, "loss": 0.2748, "num_input_tokens_seen": 52815872, "step": 24470 }, { "epoch": 3.9926590538336053, "grad_norm": 0.26927512884140015, "learning_rate": 0.0009700698807416153, "loss": 0.3077, "num_input_tokens_seen": 52827264, "step": 24475 }, { "epoch": 3.99347471451876, "grad_norm": 0.07021971046924591, "learning_rate": 0.0009700456186688078, "loss": 0.091, "num_input_tokens_seen": 52838720, "step": 24480 }, { "epoch": 3.9942903752039154, "grad_norm": 0.18801428377628326, "learning_rate": 0.0009700213470699295, "loss": 0.1289, "num_input_tokens_seen": 52849344, "step": 24485 }, { "epoch": 3.99510603588907, "grad_norm": 0.23362679779529572, "learning_rate": 0.0009699970659454723, "loss": 0.1978, "num_input_tokens_seen": 52859872, "step": 24490 }, { "epoch": 3.995921696574225, "grad_norm": 0.05110766738653183, "learning_rate": 0.0009699727752959284, "loss": 0.1485, "num_input_tokens_seen": 52869472, "step": 24495 }, { "epoch": 3.99673735725938, "grad_norm": 0.007008140906691551, "learning_rate": 0.00096994847512179, "loss": 0.0583, "num_input_tokens_seen": 52879936, "step": 24500 }, { "epoch": 3.997553017944535, "grad_norm": 0.032286640256643295, "learning_rate": 0.0009699241654235495, "loss": 0.056, "num_input_tokens_seen": 52891264, "step": 24505 }, { "epoch": 3.99836867862969, "grad_norm": 0.21897633373737335, "learning_rate": 0.0009698998462016997, "loss": 0.0781, "num_input_tokens_seen": 52903488, "step": 24510 }, { "epoch": 3.999184339314845, "grad_norm": 0.06787893176078796, "learning_rate": 0.0009698755174567333, "loss": 0.0984, "num_input_tokens_seen": 52915712, "step": 24515 }, { "epoch": 4.0, "grad_norm": 0.006321717519313097, "learning_rate": 0.0009698511791891435, "loss": 0.0523, "num_input_tokens_seen": 52924864, "step": 24520 }, { "epoch": 4.0, "eval_loss": 0.12727603316307068, "eval_runtime": 104.3199, "eval_samples_per_second": 26.122, "eval_steps_per_second": 6.538, "num_input_tokens_seen": 52924864, "step": 24520 }, { "epoch": 4.000815660685155, "grad_norm": 0.10601882636547089, "learning_rate": 0.0009698268313994236, "loss": 0.0416, "num_input_tokens_seen": 52934880, "step": 24525 }, { "epoch": 4.00163132137031, "grad_norm": 0.19732458889484406, "learning_rate": 0.0009698024740880668, "loss": 0.2092, "num_input_tokens_seen": 52947104, "step": 24530 }, { "epoch": 4.002446982055465, "grad_norm": 0.06999547779560089, "learning_rate": 0.0009697781072555672, "loss": 0.0997, "num_input_tokens_seen": 52957376, "step": 24535 }, { "epoch": 4.00326264274062, "grad_norm": 0.40887579321861267, "learning_rate": 0.0009697537309024181, "loss": 0.2025, "num_input_tokens_seen": 52969056, "step": 24540 }, { "epoch": 4.004078303425775, "grad_norm": 0.06993808597326279, "learning_rate": 0.0009697293450291136, "loss": 0.0516, "num_input_tokens_seen": 52980832, "step": 24545 }, { "epoch": 4.00489396411093, "grad_norm": 0.07739964872598648, "learning_rate": 0.0009697049496361481, "loss": 0.0745, "num_input_tokens_seen": 52991520, "step": 24550 }, { "epoch": 4.005709624796085, "grad_norm": 0.022353854030370712, "learning_rate": 0.000969680544724016, "loss": 0.0501, "num_input_tokens_seen": 53003136, "step": 24555 }, { "epoch": 4.006525285481239, "grad_norm": 0.26448503136634827, "learning_rate": 0.0009696561302932117, "loss": 0.1522, "num_input_tokens_seen": 53014880, "step": 24560 }, { "epoch": 4.007340946166395, "grad_norm": 0.013454681262373924, "learning_rate": 0.0009696317063442303, "loss": 0.0632, "num_input_tokens_seen": 53025088, "step": 24565 }, { "epoch": 4.00815660685155, "grad_norm": 0.06075728312134743, "learning_rate": 0.0009696072728775664, "loss": 0.1127, "num_input_tokens_seen": 53035744, "step": 24570 }, { "epoch": 4.008972267536705, "grad_norm": 0.057915687561035156, "learning_rate": 0.0009695828298937155, "loss": 0.0787, "num_input_tokens_seen": 53046240, "step": 24575 }, { "epoch": 4.00978792822186, "grad_norm": 0.04714732617139816, "learning_rate": 0.0009695583773931728, "loss": 0.0344, "num_input_tokens_seen": 53057824, "step": 24580 }, { "epoch": 4.010603588907014, "grad_norm": 0.05536492168903351, "learning_rate": 0.000969533915376434, "loss": 0.2383, "num_input_tokens_seen": 53068288, "step": 24585 }, { "epoch": 4.011419249592169, "grad_norm": 0.022904792800545692, "learning_rate": 0.0009695094438439947, "loss": 0.0573, "num_input_tokens_seen": 53079808, "step": 24590 }, { "epoch": 4.012234910277325, "grad_norm": 0.12712028622627258, "learning_rate": 0.000969484962796351, "loss": 0.0664, "num_input_tokens_seen": 53090176, "step": 24595 }, { "epoch": 4.01305057096248, "grad_norm": 0.023165058344602585, "learning_rate": 0.0009694604722339987, "loss": 0.0826, "num_input_tokens_seen": 53101344, "step": 24600 }, { "epoch": 4.013866231647635, "grad_norm": 0.018889570608735085, "learning_rate": 0.0009694359721574345, "loss": 0.1299, "num_input_tokens_seen": 53113280, "step": 24605 }, { "epoch": 4.014681892332789, "grad_norm": 0.02854148857295513, "learning_rate": 0.0009694114625671548, "loss": 0.2324, "num_input_tokens_seen": 53122976, "step": 24610 }, { "epoch": 4.015497553017944, "grad_norm": 0.011138183064758778, "learning_rate": 0.0009693869434636564, "loss": 0.0435, "num_input_tokens_seen": 53134208, "step": 24615 }, { "epoch": 4.0163132137031, "grad_norm": 0.1200600266456604, "learning_rate": 0.000969362414847436, "loss": 0.0935, "num_input_tokens_seen": 53144192, "step": 24620 }, { "epoch": 4.017128874388255, "grad_norm": 0.17180626094341278, "learning_rate": 0.0009693378767189909, "loss": 0.1214, "num_input_tokens_seen": 53154976, "step": 24625 }, { "epoch": 4.0179445350734095, "grad_norm": 0.025815125554800034, "learning_rate": 0.0009693133290788184, "loss": 0.0175, "num_input_tokens_seen": 53166080, "step": 24630 }, { "epoch": 4.018760195758564, "grad_norm": 0.0840173065662384, "learning_rate": 0.0009692887719274159, "loss": 0.0601, "num_input_tokens_seen": 53175648, "step": 24635 }, { "epoch": 4.019575856443719, "grad_norm": 0.3115505874156952, "learning_rate": 0.0009692642052652811, "loss": 0.1466, "num_input_tokens_seen": 53187168, "step": 24640 }, { "epoch": 4.020391517128874, "grad_norm": 0.08636072278022766, "learning_rate": 0.0009692396290929118, "loss": 0.088, "num_input_tokens_seen": 53197728, "step": 24645 }, { "epoch": 4.02120717781403, "grad_norm": 0.27797725796699524, "learning_rate": 0.0009692150434108061, "loss": 0.1652, "num_input_tokens_seen": 53208320, "step": 24650 }, { "epoch": 4.0220228384991845, "grad_norm": 0.04580129310488701, "learning_rate": 0.0009691904482194625, "loss": 0.0802, "num_input_tokens_seen": 53219968, "step": 24655 }, { "epoch": 4.022838499184339, "grad_norm": 0.027268704026937485, "learning_rate": 0.000969165843519379, "loss": 0.0541, "num_input_tokens_seen": 53229504, "step": 24660 }, { "epoch": 4.023654159869494, "grad_norm": 0.009743648581206799, "learning_rate": 0.0009691412293110546, "loss": 0.0359, "num_input_tokens_seen": 53240576, "step": 24665 }, { "epoch": 4.024469820554649, "grad_norm": 0.007040457334369421, "learning_rate": 0.0009691166055949881, "loss": 0.12, "num_input_tokens_seen": 53250784, "step": 24670 }, { "epoch": 4.025285481239805, "grad_norm": 0.016454510390758514, "learning_rate": 0.0009690919723716785, "loss": 0.276, "num_input_tokens_seen": 53260768, "step": 24675 }, { "epoch": 4.0261011419249595, "grad_norm": 0.045097168534994125, "learning_rate": 0.000969067329641625, "loss": 0.0526, "num_input_tokens_seen": 53272224, "step": 24680 }, { "epoch": 4.026916802610114, "grad_norm": 0.06507758051156998, "learning_rate": 0.000969042677405327, "loss": 0.0151, "num_input_tokens_seen": 53283744, "step": 24685 }, { "epoch": 4.027732463295269, "grad_norm": 0.15810927748680115, "learning_rate": 0.0009690180156632839, "loss": 0.1287, "num_input_tokens_seen": 53295488, "step": 24690 }, { "epoch": 4.028548123980424, "grad_norm": 0.0405065082013607, "learning_rate": 0.000968993344415996, "loss": 0.0203, "num_input_tokens_seen": 53306208, "step": 24695 }, { "epoch": 4.029363784665579, "grad_norm": 0.16484472155570984, "learning_rate": 0.0009689686636639629, "loss": 0.0675, "num_input_tokens_seen": 53316896, "step": 24700 }, { "epoch": 4.0301794453507345, "grad_norm": 0.3420546352863312, "learning_rate": 0.000968943973407685, "loss": 0.1018, "num_input_tokens_seen": 53328384, "step": 24705 }, { "epoch": 4.030995106035889, "grad_norm": 0.05358998849987984, "learning_rate": 0.0009689192736476624, "loss": 0.0494, "num_input_tokens_seen": 53339360, "step": 24710 }, { "epoch": 4.031810766721044, "grad_norm": 0.26518210768699646, "learning_rate": 0.000968894564384396, "loss": 0.1023, "num_input_tokens_seen": 53350656, "step": 24715 }, { "epoch": 4.032626427406199, "grad_norm": 0.017618702724575996, "learning_rate": 0.0009688698456183863, "loss": 0.1192, "num_input_tokens_seen": 53361600, "step": 24720 }, { "epoch": 4.033442088091354, "grad_norm": 0.07377619296312332, "learning_rate": 0.0009688451173501345, "loss": 0.2517, "num_input_tokens_seen": 53373024, "step": 24725 }, { "epoch": 4.034257748776509, "grad_norm": 0.027281904593110085, "learning_rate": 0.0009688203795801415, "loss": 0.0425, "num_input_tokens_seen": 53383872, "step": 24730 }, { "epoch": 4.035073409461664, "grad_norm": 0.03449869900941849, "learning_rate": 0.0009687956323089088, "loss": 0.0635, "num_input_tokens_seen": 53395392, "step": 24735 }, { "epoch": 4.035889070146819, "grad_norm": 0.24531051516532898, "learning_rate": 0.000968770875536938, "loss": 0.1237, "num_input_tokens_seen": 53406464, "step": 24740 }, { "epoch": 4.036704730831974, "grad_norm": 0.08671460300683975, "learning_rate": 0.0009687461092647308, "loss": 0.0299, "num_input_tokens_seen": 53417120, "step": 24745 }, { "epoch": 4.037520391517129, "grad_norm": 0.3040340542793274, "learning_rate": 0.0009687213334927888, "loss": 0.1765, "num_input_tokens_seen": 53428128, "step": 24750 }, { "epoch": 4.0383360522022835, "grad_norm": 0.06941133737564087, "learning_rate": 0.0009686965482216145, "loss": 0.1766, "num_input_tokens_seen": 53438656, "step": 24755 }, { "epoch": 4.039151712887439, "grad_norm": 0.1264486461877823, "learning_rate": 0.00096867175345171, "loss": 0.1142, "num_input_tokens_seen": 53449824, "step": 24760 }, { "epoch": 4.039967373572594, "grad_norm": 0.057845428586006165, "learning_rate": 0.0009686469491835779, "loss": 0.047, "num_input_tokens_seen": 53460384, "step": 24765 }, { "epoch": 4.040783034257749, "grad_norm": 0.2089562714099884, "learning_rate": 0.0009686221354177209, "loss": 0.0944, "num_input_tokens_seen": 53472608, "step": 24770 }, { "epoch": 4.041598694942904, "grad_norm": 0.12175298482179642, "learning_rate": 0.0009685973121546417, "loss": 0.1989, "num_input_tokens_seen": 53484128, "step": 24775 }, { "epoch": 4.0424143556280585, "grad_norm": 0.02800212986767292, "learning_rate": 0.0009685724793948436, "loss": 0.0514, "num_input_tokens_seen": 53494560, "step": 24780 }, { "epoch": 4.043230016313213, "grad_norm": 0.4970827102661133, "learning_rate": 0.0009685476371388298, "loss": 0.0691, "num_input_tokens_seen": 53506112, "step": 24785 }, { "epoch": 4.044045676998369, "grad_norm": 0.07728522270917892, "learning_rate": 0.0009685227853871037, "loss": 0.0762, "num_input_tokens_seen": 53516896, "step": 24790 }, { "epoch": 4.044861337683524, "grad_norm": 0.06438777595758438, "learning_rate": 0.000968497924140169, "loss": 0.156, "num_input_tokens_seen": 53527360, "step": 24795 }, { "epoch": 4.045676998368679, "grad_norm": 0.021527748554944992, "learning_rate": 0.0009684730533985296, "loss": 0.1165, "num_input_tokens_seen": 53537376, "step": 24800 }, { "epoch": 4.0464926590538335, "grad_norm": 0.03212462738156319, "learning_rate": 0.0009684481731626895, "loss": 0.1602, "num_input_tokens_seen": 53548128, "step": 24805 }, { "epoch": 4.047308319738988, "grad_norm": 0.1865341067314148, "learning_rate": 0.0009684232834331528, "loss": 0.0942, "num_input_tokens_seen": 53559104, "step": 24810 }, { "epoch": 4.048123980424143, "grad_norm": 0.08570602536201477, "learning_rate": 0.000968398384210424, "loss": 0.1614, "num_input_tokens_seen": 53569536, "step": 24815 }, { "epoch": 4.048939641109299, "grad_norm": 0.016816744580864906, "learning_rate": 0.0009683734754950078, "loss": 0.0303, "num_input_tokens_seen": 53580320, "step": 24820 }, { "epoch": 4.049755301794454, "grad_norm": 0.25714829564094543, "learning_rate": 0.0009683485572874089, "loss": 0.1741, "num_input_tokens_seen": 53590656, "step": 24825 }, { "epoch": 4.0505709624796085, "grad_norm": 0.2475864142179489, "learning_rate": 0.0009683236295881324, "loss": 0.1764, "num_input_tokens_seen": 53601600, "step": 24830 }, { "epoch": 4.051386623164763, "grad_norm": 0.08192267268896103, "learning_rate": 0.0009682986923976834, "loss": 0.0579, "num_input_tokens_seen": 53612384, "step": 24835 }, { "epoch": 4.052202283849918, "grad_norm": 0.033257972449064255, "learning_rate": 0.0009682737457165673, "loss": 0.1704, "num_input_tokens_seen": 53622272, "step": 24840 }, { "epoch": 4.053017944535074, "grad_norm": 0.008494735695421696, "learning_rate": 0.0009682487895452898, "loss": 0.0993, "num_input_tokens_seen": 53633408, "step": 24845 }, { "epoch": 4.053833605220229, "grad_norm": 0.27779561281204224, "learning_rate": 0.0009682238238843565, "loss": 0.1618, "num_input_tokens_seen": 53644352, "step": 24850 }, { "epoch": 4.054649265905383, "grad_norm": 0.03326565399765968, "learning_rate": 0.0009681988487342735, "loss": 0.0514, "num_input_tokens_seen": 53655328, "step": 24855 }, { "epoch": 4.055464926590538, "grad_norm": 0.07106968760490417, "learning_rate": 0.0009681738640955466, "loss": 0.0364, "num_input_tokens_seen": 53666208, "step": 24860 }, { "epoch": 4.056280587275693, "grad_norm": 0.0074151367880403996, "learning_rate": 0.0009681488699686827, "loss": 0.1129, "num_input_tokens_seen": 53676800, "step": 24865 }, { "epoch": 4.057096247960848, "grad_norm": 0.032533202320337296, "learning_rate": 0.000968123866354188, "loss": 0.1006, "num_input_tokens_seen": 53688448, "step": 24870 }, { "epoch": 4.057911908646004, "grad_norm": 0.17371034622192383, "learning_rate": 0.0009680988532525693, "loss": 0.0634, "num_input_tokens_seen": 53700320, "step": 24875 }, { "epoch": 4.058727569331158, "grad_norm": 0.016324106603860855, "learning_rate": 0.0009680738306643335, "loss": 0.0723, "num_input_tokens_seen": 53711552, "step": 24880 }, { "epoch": 4.059543230016313, "grad_norm": 0.0885259360074997, "learning_rate": 0.0009680487985899878, "loss": 0.0363, "num_input_tokens_seen": 53722496, "step": 24885 }, { "epoch": 4.060358890701468, "grad_norm": 0.1811944991350174, "learning_rate": 0.0009680237570300392, "loss": 0.0338, "num_input_tokens_seen": 53733696, "step": 24890 }, { "epoch": 4.061174551386623, "grad_norm": 0.030723657459020615, "learning_rate": 0.0009679987059849956, "loss": 0.0621, "num_input_tokens_seen": 53744320, "step": 24895 }, { "epoch": 4.061990212071779, "grad_norm": 0.008955578319728374, "learning_rate": 0.0009679736454553645, "loss": 0.1588, "num_input_tokens_seen": 53757376, "step": 24900 }, { "epoch": 4.062805872756933, "grad_norm": 0.04930857568979263, "learning_rate": 0.0009679485754416538, "loss": 0.052, "num_input_tokens_seen": 53765856, "step": 24905 }, { "epoch": 4.063621533442088, "grad_norm": 0.3661515712738037, "learning_rate": 0.0009679234959443717, "loss": 0.1314, "num_input_tokens_seen": 53777824, "step": 24910 }, { "epoch": 4.064437194127243, "grad_norm": 0.11676975339651108, "learning_rate": 0.0009678984069640262, "loss": 0.1151, "num_input_tokens_seen": 53788992, "step": 24915 }, { "epoch": 4.065252854812398, "grad_norm": 0.015325689688324928, "learning_rate": 0.000967873308501126, "loss": 0.1413, "num_input_tokens_seen": 53801056, "step": 24920 }, { "epoch": 4.066068515497553, "grad_norm": 0.2761600911617279, "learning_rate": 0.0009678482005561795, "loss": 0.309, "num_input_tokens_seen": 53812096, "step": 24925 }, { "epoch": 4.066884176182708, "grad_norm": 0.09583926945924759, "learning_rate": 0.0009678230831296959, "loss": 0.1595, "num_input_tokens_seen": 53823104, "step": 24930 }, { "epoch": 4.067699836867863, "grad_norm": 0.06730013340711594, "learning_rate": 0.000967797956222184, "loss": 0.0445, "num_input_tokens_seen": 53834272, "step": 24935 }, { "epoch": 4.068515497553018, "grad_norm": 0.045218661427497864, "learning_rate": 0.000967772819834153, "loss": 0.104, "num_input_tokens_seen": 53844544, "step": 24940 }, { "epoch": 4.069331158238173, "grad_norm": 0.014737669378519058, "learning_rate": 0.0009677476739661124, "loss": 0.2276, "num_input_tokens_seen": 53856768, "step": 24945 }, { "epoch": 4.070146818923328, "grad_norm": 0.22313740849494934, "learning_rate": 0.0009677225186185719, "loss": 0.1925, "num_input_tokens_seen": 53866944, "step": 24950 }, { "epoch": 4.0709624796084825, "grad_norm": 0.07140855491161346, "learning_rate": 0.0009676973537920411, "loss": 0.0309, "num_input_tokens_seen": 53877920, "step": 24955 }, { "epoch": 4.071778140293638, "grad_norm": 0.1447368711233139, "learning_rate": 0.0009676721794870302, "loss": 0.1665, "num_input_tokens_seen": 53888960, "step": 24960 }, { "epoch": 4.072593800978793, "grad_norm": 0.25620612502098083, "learning_rate": 0.0009676469957040492, "loss": 0.1921, "num_input_tokens_seen": 53899040, "step": 24965 }, { "epoch": 4.073409461663948, "grad_norm": 0.15624554455280304, "learning_rate": 0.0009676218024436087, "loss": 0.0798, "num_input_tokens_seen": 53909824, "step": 24970 }, { "epoch": 4.074225122349103, "grad_norm": 0.020270979031920433, "learning_rate": 0.0009675965997062192, "loss": 0.0455, "num_input_tokens_seen": 53920448, "step": 24975 }, { "epoch": 4.075040783034257, "grad_norm": 0.08254649490118027, "learning_rate": 0.0009675713874923912, "loss": 0.0397, "num_input_tokens_seen": 53930048, "step": 24980 }, { "epoch": 4.075856443719413, "grad_norm": 0.05702510103583336, "learning_rate": 0.0009675461658026361, "loss": 0.0786, "num_input_tokens_seen": 53940480, "step": 24985 }, { "epoch": 4.076672104404568, "grad_norm": 0.09859247505664825, "learning_rate": 0.0009675209346374647, "loss": 0.2502, "num_input_tokens_seen": 53951168, "step": 24990 }, { "epoch": 4.077487765089723, "grad_norm": 0.024416720494627953, "learning_rate": 0.0009674956939973885, "loss": 0.0294, "num_input_tokens_seen": 53961664, "step": 24995 }, { "epoch": 4.078303425774878, "grad_norm": 0.26302993297576904, "learning_rate": 0.0009674704438829189, "loss": 0.1044, "num_input_tokens_seen": 53971968, "step": 25000 }, { "epoch": 4.079119086460032, "grad_norm": 0.03912968188524246, "learning_rate": 0.0009674451842945679, "loss": 0.1025, "num_input_tokens_seen": 53982688, "step": 25005 }, { "epoch": 4.079934747145187, "grad_norm": 0.7045699954032898, "learning_rate": 0.0009674199152328472, "loss": 0.4122, "num_input_tokens_seen": 53993568, "step": 25010 }, { "epoch": 4.080750407830343, "grad_norm": 0.020701715722680092, "learning_rate": 0.0009673946366982689, "loss": 0.0646, "num_input_tokens_seen": 54004064, "step": 25015 }, { "epoch": 4.081566068515498, "grad_norm": 0.061216384172439575, "learning_rate": 0.0009673693486913453, "loss": 0.0468, "num_input_tokens_seen": 54014784, "step": 25020 }, { "epoch": 4.082381729200653, "grad_norm": 0.06756249070167542, "learning_rate": 0.000967344051212589, "loss": 0.1098, "num_input_tokens_seen": 54024416, "step": 25025 }, { "epoch": 4.083197389885807, "grad_norm": 0.031191300600767136, "learning_rate": 0.0009673187442625126, "loss": 0.1058, "num_input_tokens_seen": 54036480, "step": 25030 }, { "epoch": 4.084013050570962, "grad_norm": 0.043119218200445175, "learning_rate": 0.0009672934278416292, "loss": 0.0548, "num_input_tokens_seen": 54046528, "step": 25035 }, { "epoch": 4.084828711256117, "grad_norm": 0.01362586859613657, "learning_rate": 0.0009672681019504514, "loss": 0.0424, "num_input_tokens_seen": 54057536, "step": 25040 }, { "epoch": 4.085644371941273, "grad_norm": 0.19945666193962097, "learning_rate": 0.0009672427665894929, "loss": 0.0648, "num_input_tokens_seen": 54067744, "step": 25045 }, { "epoch": 4.0864600326264275, "grad_norm": 0.28742578625679016, "learning_rate": 0.0009672174217592671, "loss": 0.1821, "num_input_tokens_seen": 54078400, "step": 25050 }, { "epoch": 4.087275693311582, "grad_norm": 0.01776294969022274, "learning_rate": 0.0009671920674602874, "loss": 0.0677, "num_input_tokens_seen": 54088384, "step": 25055 }, { "epoch": 4.088091353996737, "grad_norm": 0.2157251089811325, "learning_rate": 0.0009671667036930678, "loss": 0.1497, "num_input_tokens_seen": 54098464, "step": 25060 }, { "epoch": 4.088907014681892, "grad_norm": 0.12279714643955231, "learning_rate": 0.0009671413304581224, "loss": 0.046, "num_input_tokens_seen": 54110240, "step": 25065 }, { "epoch": 4.089722675367048, "grad_norm": 0.016327911987900734, "learning_rate": 0.0009671159477559652, "loss": 0.0543, "num_input_tokens_seen": 54122208, "step": 25070 }, { "epoch": 4.0905383360522025, "grad_norm": 0.0545714907348156, "learning_rate": 0.0009670905555871108, "loss": 0.1086, "num_input_tokens_seen": 54133184, "step": 25075 }, { "epoch": 4.091353996737357, "grad_norm": 0.022817470133304596, "learning_rate": 0.0009670651539520737, "loss": 0.1056, "num_input_tokens_seen": 54144000, "step": 25080 }, { "epoch": 4.092169657422512, "grad_norm": 0.020105836912989616, "learning_rate": 0.0009670397428513688, "loss": 0.0467, "num_input_tokens_seen": 54155904, "step": 25085 }, { "epoch": 4.092985318107667, "grad_norm": 0.28786158561706543, "learning_rate": 0.000967014322285511, "loss": 0.1207, "num_input_tokens_seen": 54166400, "step": 25090 }, { "epoch": 4.093800978792822, "grad_norm": 0.10848649591207504, "learning_rate": 0.0009669888922550154, "loss": 0.1342, "num_input_tokens_seen": 54176928, "step": 25095 }, { "epoch": 4.0946166394779775, "grad_norm": 0.029086461290717125, "learning_rate": 0.0009669634527603977, "loss": 0.0517, "num_input_tokens_seen": 54188928, "step": 25100 }, { "epoch": 4.095432300163132, "grad_norm": 0.3767969012260437, "learning_rate": 0.000966938003802173, "loss": 0.0606, "num_input_tokens_seen": 54200192, "step": 25105 }, { "epoch": 4.096247960848287, "grad_norm": 0.22158771753311157, "learning_rate": 0.0009669125453808573, "loss": 0.103, "num_input_tokens_seen": 54211040, "step": 25110 }, { "epoch": 4.097063621533442, "grad_norm": 0.06871854513883591, "learning_rate": 0.0009668870774969668, "loss": 0.0187, "num_input_tokens_seen": 54222560, "step": 25115 }, { "epoch": 4.097879282218597, "grad_norm": 0.00790423434227705, "learning_rate": 0.0009668616001510173, "loss": 0.0914, "num_input_tokens_seen": 54233792, "step": 25120 }, { "epoch": 4.0986949429037525, "grad_norm": 0.1413654237985611, "learning_rate": 0.0009668361133435252, "loss": 0.126, "num_input_tokens_seen": 54245632, "step": 25125 }, { "epoch": 4.099510603588907, "grad_norm": 0.08008521795272827, "learning_rate": 0.0009668106170750071, "loss": 0.1374, "num_input_tokens_seen": 54255776, "step": 25130 }, { "epoch": 4.100326264274062, "grad_norm": 0.35652026534080505, "learning_rate": 0.0009667851113459795, "loss": 0.1463, "num_input_tokens_seen": 54266720, "step": 25135 }, { "epoch": 4.101141924959217, "grad_norm": 0.05076137185096741, "learning_rate": 0.0009667595961569595, "loss": 0.202, "num_input_tokens_seen": 54277408, "step": 25140 }, { "epoch": 4.101957585644372, "grad_norm": 0.05224034562706947, "learning_rate": 0.0009667340715084641, "loss": 0.1355, "num_input_tokens_seen": 54288608, "step": 25145 }, { "epoch": 4.102773246329527, "grad_norm": 0.016414156183600426, "learning_rate": 0.0009667085374010107, "loss": 0.0597, "num_input_tokens_seen": 54298848, "step": 25150 }, { "epoch": 4.103588907014682, "grad_norm": 0.010601280257105827, "learning_rate": 0.0009666829938351169, "loss": 0.0886, "num_input_tokens_seen": 54309088, "step": 25155 }, { "epoch": 4.104404567699837, "grad_norm": 0.02552439644932747, "learning_rate": 0.0009666574408113, "loss": 0.0656, "num_input_tokens_seen": 54318048, "step": 25160 }, { "epoch": 4.105220228384992, "grad_norm": 0.05170471593737602, "learning_rate": 0.0009666318783300782, "loss": 0.0475, "num_input_tokens_seen": 54328928, "step": 25165 }, { "epoch": 4.106035889070147, "grad_norm": 0.13868382573127747, "learning_rate": 0.0009666063063919693, "loss": 0.0674, "num_input_tokens_seen": 54340224, "step": 25170 }, { "epoch": 4.1068515497553015, "grad_norm": 0.12029463052749634, "learning_rate": 0.0009665807249974917, "loss": 0.0516, "num_input_tokens_seen": 54351584, "step": 25175 }, { "epoch": 4.107667210440456, "grad_norm": 0.02942846529185772, "learning_rate": 0.0009665551341471639, "loss": 0.1123, "num_input_tokens_seen": 54360896, "step": 25180 }, { "epoch": 4.108482871125612, "grad_norm": 0.01946329139173031, "learning_rate": 0.0009665295338415044, "loss": 0.1048, "num_input_tokens_seen": 54373024, "step": 25185 }, { "epoch": 4.109298531810767, "grad_norm": 0.025805486366152763, "learning_rate": 0.0009665039240810319, "loss": 0.061, "num_input_tokens_seen": 54384800, "step": 25190 }, { "epoch": 4.110114192495922, "grad_norm": 0.2747875154018402, "learning_rate": 0.0009664783048662658, "loss": 0.0558, "num_input_tokens_seen": 54395392, "step": 25195 }, { "epoch": 4.1109298531810765, "grad_norm": 0.04171789437532425, "learning_rate": 0.0009664526761977249, "loss": 0.1369, "num_input_tokens_seen": 54406848, "step": 25200 }, { "epoch": 4.111745513866231, "grad_norm": 0.17319516837596893, "learning_rate": 0.0009664270380759289, "loss": 0.2112, "num_input_tokens_seen": 54418944, "step": 25205 }, { "epoch": 4.112561174551387, "grad_norm": 0.08778028190135956, "learning_rate": 0.0009664013905013971, "loss": 0.0948, "num_input_tokens_seen": 54429792, "step": 25210 }, { "epoch": 4.113376835236542, "grad_norm": 0.18914636969566345, "learning_rate": 0.0009663757334746497, "loss": 0.0718, "num_input_tokens_seen": 54440800, "step": 25215 }, { "epoch": 4.114192495921697, "grad_norm": 0.034624796360731125, "learning_rate": 0.0009663500669962063, "loss": 0.1077, "num_input_tokens_seen": 54452608, "step": 25220 }, { "epoch": 4.1150081566068515, "grad_norm": 0.1814250648021698, "learning_rate": 0.0009663243910665872, "loss": 0.0399, "num_input_tokens_seen": 54463488, "step": 25225 }, { "epoch": 4.115823817292006, "grad_norm": 0.011065627448260784, "learning_rate": 0.0009662987056863128, "loss": 0.0498, "num_input_tokens_seen": 54474656, "step": 25230 }, { "epoch": 4.116639477977161, "grad_norm": 0.03705109655857086, "learning_rate": 0.0009662730108559034, "loss": 0.1093, "num_input_tokens_seen": 54483744, "step": 25235 }, { "epoch": 4.117455138662317, "grad_norm": 0.02399521879851818, "learning_rate": 0.0009662473065758801, "loss": 0.0472, "num_input_tokens_seen": 54494976, "step": 25240 }, { "epoch": 4.118270799347472, "grad_norm": 0.2585715651512146, "learning_rate": 0.0009662215928467636, "loss": 0.0411, "num_input_tokens_seen": 54506368, "step": 25245 }, { "epoch": 4.1190864600326265, "grad_norm": 0.314626008272171, "learning_rate": 0.000966195869669075, "loss": 0.0651, "num_input_tokens_seen": 54517920, "step": 25250 }, { "epoch": 4.119902120717781, "grad_norm": 0.058214277029037476, "learning_rate": 0.0009661701370433358, "loss": 0.0455, "num_input_tokens_seen": 54528416, "step": 25255 }, { "epoch": 4.120717781402936, "grad_norm": 0.4382697343826294, "learning_rate": 0.0009661443949700674, "loss": 0.1008, "num_input_tokens_seen": 54539328, "step": 25260 }, { "epoch": 4.121533442088092, "grad_norm": 0.06901302188634872, "learning_rate": 0.0009661186434497915, "loss": 0.0201, "num_input_tokens_seen": 54549376, "step": 25265 }, { "epoch": 4.122349102773247, "grad_norm": 0.09416428953409195, "learning_rate": 0.0009660928824830299, "loss": 0.0587, "num_input_tokens_seen": 54560064, "step": 25270 }, { "epoch": 4.123164763458401, "grad_norm": 0.023836754262447357, "learning_rate": 0.0009660671120703048, "loss": 0.1078, "num_input_tokens_seen": 54570848, "step": 25275 }, { "epoch": 4.123980424143556, "grad_norm": 0.007587221916764975, "learning_rate": 0.0009660413322121384, "loss": 0.2007, "num_input_tokens_seen": 54581216, "step": 25280 }, { "epoch": 4.124796084828711, "grad_norm": 0.06471949815750122, "learning_rate": 0.0009660155429090531, "loss": 0.1016, "num_input_tokens_seen": 54592896, "step": 25285 }, { "epoch": 4.125611745513866, "grad_norm": 0.08148226141929626, "learning_rate": 0.0009659897441615717, "loss": 0.2952, "num_input_tokens_seen": 54603296, "step": 25290 }, { "epoch": 4.126427406199022, "grad_norm": 0.0327051542699337, "learning_rate": 0.000965963935970217, "loss": 0.1216, "num_input_tokens_seen": 54613696, "step": 25295 }, { "epoch": 4.127243066884176, "grad_norm": 0.26715996861457825, "learning_rate": 0.0009659381183355121, "loss": 0.1101, "num_input_tokens_seen": 54625184, "step": 25300 }, { "epoch": 4.128058727569331, "grad_norm": 0.24433206021785736, "learning_rate": 0.0009659122912579801, "loss": 0.0762, "num_input_tokens_seen": 54636064, "step": 25305 }, { "epoch": 4.128874388254486, "grad_norm": 0.04172002896666527, "learning_rate": 0.0009658864547381445, "loss": 0.0547, "num_input_tokens_seen": 54646336, "step": 25310 }, { "epoch": 4.129690048939641, "grad_norm": 0.011472988873720169, "learning_rate": 0.0009658606087765288, "loss": 0.1969, "num_input_tokens_seen": 54657920, "step": 25315 }, { "epoch": 4.130505709624796, "grad_norm": 0.12703658640384674, "learning_rate": 0.0009658347533736569, "loss": 0.1543, "num_input_tokens_seen": 54669088, "step": 25320 }, { "epoch": 4.131321370309951, "grad_norm": 0.018232373520731926, "learning_rate": 0.0009658088885300528, "loss": 0.0953, "num_input_tokens_seen": 54678304, "step": 25325 }, { "epoch": 4.132137030995106, "grad_norm": 0.26807183027267456, "learning_rate": 0.0009657830142462406, "loss": 0.1074, "num_input_tokens_seen": 54688768, "step": 25330 }, { "epoch": 4.132952691680261, "grad_norm": 0.01655859686434269, "learning_rate": 0.0009657571305227449, "loss": 0.0531, "num_input_tokens_seen": 54699872, "step": 25335 }, { "epoch": 4.133768352365416, "grad_norm": 0.13570939004421234, "learning_rate": 0.0009657312373600899, "loss": 0.0634, "num_input_tokens_seen": 54709600, "step": 25340 }, { "epoch": 4.134584013050571, "grad_norm": 0.10058213770389557, "learning_rate": 0.0009657053347588005, "loss": 0.2194, "num_input_tokens_seen": 54720960, "step": 25345 }, { "epoch": 4.135399673735726, "grad_norm": 0.08489968627691269, "learning_rate": 0.0009656794227194019, "loss": 0.0539, "num_input_tokens_seen": 54731520, "step": 25350 }, { "epoch": 4.136215334420881, "grad_norm": 0.17520399391651154, "learning_rate": 0.0009656535012424189, "loss": 0.1046, "num_input_tokens_seen": 54742432, "step": 25355 }, { "epoch": 4.137030995106036, "grad_norm": 0.4783601462841034, "learning_rate": 0.000965627570328377, "loss": 0.1682, "num_input_tokens_seen": 54753248, "step": 25360 }, { "epoch": 4.137846655791191, "grad_norm": 0.04738840088248253, "learning_rate": 0.0009656016299778017, "loss": 0.0543, "num_input_tokens_seen": 54763584, "step": 25365 }, { "epoch": 4.138662316476346, "grad_norm": 0.2905915081501007, "learning_rate": 0.0009655756801912188, "loss": 0.1306, "num_input_tokens_seen": 54774240, "step": 25370 }, { "epoch": 4.1394779771615005, "grad_norm": 0.0538397952914238, "learning_rate": 0.000965549720969154, "loss": 0.1106, "num_input_tokens_seen": 54785056, "step": 25375 }, { "epoch": 4.140293637846656, "grad_norm": 0.16023701429367065, "learning_rate": 0.0009655237523121336, "loss": 0.0648, "num_input_tokens_seen": 54796704, "step": 25380 }, { "epoch": 4.141109298531811, "grad_norm": 0.09773515164852142, "learning_rate": 0.0009654977742206837, "loss": 0.0519, "num_input_tokens_seen": 54807872, "step": 25385 }, { "epoch": 4.141924959216966, "grad_norm": 0.21291978657245636, "learning_rate": 0.000965471786695331, "loss": 0.1499, "num_input_tokens_seen": 54818624, "step": 25390 }, { "epoch": 4.142740619902121, "grad_norm": 0.30301910638809204, "learning_rate": 0.0009654457897366021, "loss": 0.1738, "num_input_tokens_seen": 54828864, "step": 25395 }, { "epoch": 4.143556280587275, "grad_norm": 0.12408158928155899, "learning_rate": 0.0009654197833450235, "loss": 0.0846, "num_input_tokens_seen": 54840576, "step": 25400 }, { "epoch": 4.14437194127243, "grad_norm": 0.08526446670293808, "learning_rate": 0.0009653937675211229, "loss": 0.051, "num_input_tokens_seen": 54852064, "step": 25405 }, { "epoch": 4.145187601957586, "grad_norm": 0.1611376702785492, "learning_rate": 0.000965367742265427, "loss": 0.0999, "num_input_tokens_seen": 54862400, "step": 25410 }, { "epoch": 4.146003262642741, "grad_norm": 0.02126043103635311, "learning_rate": 0.0009653417075784635, "loss": 0.0441, "num_input_tokens_seen": 54873728, "step": 25415 }, { "epoch": 4.146818923327896, "grad_norm": 0.09502722322940826, "learning_rate": 0.0009653156634607601, "loss": 0.0848, "num_input_tokens_seen": 54885024, "step": 25420 }, { "epoch": 4.14763458401305, "grad_norm": 0.06004756689071655, "learning_rate": 0.0009652896099128443, "loss": 0.0715, "num_input_tokens_seen": 54895424, "step": 25425 }, { "epoch": 4.148450244698205, "grad_norm": 0.2930968105792999, "learning_rate": 0.0009652635469352443, "loss": 0.1358, "num_input_tokens_seen": 54906944, "step": 25430 }, { "epoch": 4.149265905383361, "grad_norm": 0.020675642415881157, "learning_rate": 0.0009652374745284884, "loss": 0.1213, "num_input_tokens_seen": 54916736, "step": 25435 }, { "epoch": 4.150081566068516, "grad_norm": 0.4541945457458496, "learning_rate": 0.0009652113926931048, "loss": 0.1811, "num_input_tokens_seen": 54927232, "step": 25440 }, { "epoch": 4.150897226753671, "grad_norm": 0.0489797443151474, "learning_rate": 0.0009651853014296223, "loss": 0.1274, "num_input_tokens_seen": 54937024, "step": 25445 }, { "epoch": 4.151712887438825, "grad_norm": 0.030233897268772125, "learning_rate": 0.0009651592007385694, "loss": 0.0824, "num_input_tokens_seen": 54947584, "step": 25450 }, { "epoch": 4.15252854812398, "grad_norm": 0.03191889822483063, "learning_rate": 0.0009651330906204752, "loss": 0.0361, "num_input_tokens_seen": 54959104, "step": 25455 }, { "epoch": 4.153344208809135, "grad_norm": 0.011361703276634216, "learning_rate": 0.0009651069710758689, "loss": 0.0869, "num_input_tokens_seen": 54970880, "step": 25460 }, { "epoch": 4.154159869494291, "grad_norm": 0.08558481931686401, "learning_rate": 0.0009650808421052798, "loss": 0.0764, "num_input_tokens_seen": 54982176, "step": 25465 }, { "epoch": 4.1549755301794455, "grad_norm": 0.17563608288764954, "learning_rate": 0.0009650547037092374, "loss": 0.1344, "num_input_tokens_seen": 54993248, "step": 25470 }, { "epoch": 4.1557911908646, "grad_norm": 0.06877048313617706, "learning_rate": 0.0009650285558882715, "loss": 0.3046, "num_input_tokens_seen": 55004864, "step": 25475 }, { "epoch": 4.156606851549755, "grad_norm": 0.04510578513145447, "learning_rate": 0.0009650023986429119, "loss": 0.0306, "num_input_tokens_seen": 55015552, "step": 25480 }, { "epoch": 4.15742251223491, "grad_norm": 0.27285414934158325, "learning_rate": 0.000964976231973689, "loss": 0.1472, "num_input_tokens_seen": 55027872, "step": 25485 }, { "epoch": 4.158238172920065, "grad_norm": 0.15133671462535858, "learning_rate": 0.0009649500558811328, "loss": 0.1079, "num_input_tokens_seen": 55039424, "step": 25490 }, { "epoch": 4.1590538336052205, "grad_norm": 0.15949246287345886, "learning_rate": 0.0009649238703657739, "loss": 0.0885, "num_input_tokens_seen": 55049440, "step": 25495 }, { "epoch": 4.159869494290375, "grad_norm": 0.03408646956086159, "learning_rate": 0.0009648976754281429, "loss": 0.0872, "num_input_tokens_seen": 55060064, "step": 25500 }, { "epoch": 4.16068515497553, "grad_norm": 0.25437799096107483, "learning_rate": 0.0009648714710687708, "loss": 0.1717, "num_input_tokens_seen": 55070368, "step": 25505 }, { "epoch": 4.161500815660685, "grad_norm": 0.008440135978162289, "learning_rate": 0.0009648452572881885, "loss": 0.0454, "num_input_tokens_seen": 55081760, "step": 25510 }, { "epoch": 4.16231647634584, "grad_norm": 0.04753102734684944, "learning_rate": 0.0009648190340869274, "loss": 0.0463, "num_input_tokens_seen": 55092928, "step": 25515 }, { "epoch": 4.1631321370309955, "grad_norm": 0.11574207246303558, "learning_rate": 0.000964792801465519, "loss": 0.0861, "num_input_tokens_seen": 55103360, "step": 25520 }, { "epoch": 4.16394779771615, "grad_norm": 0.0759081095457077, "learning_rate": 0.0009647665594244947, "loss": 0.0526, "num_input_tokens_seen": 55112864, "step": 25525 }, { "epoch": 4.164763458401305, "grad_norm": 0.11457517743110657, "learning_rate": 0.0009647403079643866, "loss": 0.1815, "num_input_tokens_seen": 55123744, "step": 25530 }, { "epoch": 4.16557911908646, "grad_norm": 0.11583174765110016, "learning_rate": 0.0009647140470857267, "loss": 0.0611, "num_input_tokens_seen": 55135072, "step": 25535 }, { "epoch": 4.166394779771615, "grad_norm": 0.3797663748264313, "learning_rate": 0.0009646877767890469, "loss": 0.1186, "num_input_tokens_seen": 55146304, "step": 25540 }, { "epoch": 4.16721044045677, "grad_norm": 0.2456364929676056, "learning_rate": 0.00096466149707488, "loss": 0.1357, "num_input_tokens_seen": 55156512, "step": 25545 }, { "epoch": 4.168026101141925, "grad_norm": 0.012389078736305237, "learning_rate": 0.0009646352079437582, "loss": 0.057, "num_input_tokens_seen": 55167808, "step": 25550 }, { "epoch": 4.16884176182708, "grad_norm": 0.6323313117027283, "learning_rate": 0.0009646089093962145, "loss": 0.1397, "num_input_tokens_seen": 55178048, "step": 25555 }, { "epoch": 4.169657422512235, "grad_norm": 0.3194624185562134, "learning_rate": 0.0009645826014327819, "loss": 0.0766, "num_input_tokens_seen": 55188896, "step": 25560 }, { "epoch": 4.17047308319739, "grad_norm": 0.08132106810808182, "learning_rate": 0.0009645562840539935, "loss": 0.078, "num_input_tokens_seen": 55198912, "step": 25565 }, { "epoch": 4.171288743882545, "grad_norm": 0.1452178955078125, "learning_rate": 0.0009645299572603827, "loss": 0.1232, "num_input_tokens_seen": 55209504, "step": 25570 }, { "epoch": 4.1721044045677, "grad_norm": 0.02837168239057064, "learning_rate": 0.000964503621052483, "loss": 0.2416, "num_input_tokens_seen": 55219456, "step": 25575 }, { "epoch": 4.172920065252855, "grad_norm": 0.22412027418613434, "learning_rate": 0.0009644772754308281, "loss": 0.1037, "num_input_tokens_seen": 55230400, "step": 25580 }, { "epoch": 4.17373572593801, "grad_norm": 0.15283769369125366, "learning_rate": 0.0009644509203959522, "loss": 0.0538, "num_input_tokens_seen": 55242048, "step": 25585 }, { "epoch": 4.174551386623165, "grad_norm": 0.06101622059941292, "learning_rate": 0.0009644245559483891, "loss": 0.0852, "num_input_tokens_seen": 55252256, "step": 25590 }, { "epoch": 4.1753670473083195, "grad_norm": 0.31945663690567017, "learning_rate": 0.0009643981820886731, "loss": 0.1152, "num_input_tokens_seen": 55262912, "step": 25595 }, { "epoch": 4.176182707993474, "grad_norm": 0.2228097915649414, "learning_rate": 0.0009643717988173389, "loss": 0.1422, "num_input_tokens_seen": 55273920, "step": 25600 }, { "epoch": 4.17699836867863, "grad_norm": 0.136541947722435, "learning_rate": 0.0009643454061349211, "loss": 0.0732, "num_input_tokens_seen": 55284992, "step": 25605 }, { "epoch": 4.177814029363785, "grad_norm": 0.07295294106006622, "learning_rate": 0.0009643190040419545, "loss": 0.2826, "num_input_tokens_seen": 55295392, "step": 25610 }, { "epoch": 4.17862969004894, "grad_norm": 0.2564244866371155, "learning_rate": 0.0009642925925389743, "loss": 0.3393, "num_input_tokens_seen": 55306048, "step": 25615 }, { "epoch": 4.1794453507340945, "grad_norm": 0.19424451887607574, "learning_rate": 0.0009642661716265156, "loss": 0.1063, "num_input_tokens_seen": 55315776, "step": 25620 }, { "epoch": 4.180261011419249, "grad_norm": 0.19137835502624512, "learning_rate": 0.0009642397413051142, "loss": 0.0954, "num_input_tokens_seen": 55326400, "step": 25625 }, { "epoch": 4.181076672104404, "grad_norm": 0.24608632922172546, "learning_rate": 0.0009642133015753054, "loss": 0.2832, "num_input_tokens_seen": 55337376, "step": 25630 }, { "epoch": 4.18189233278956, "grad_norm": 0.04915120452642441, "learning_rate": 0.0009641868524376252, "loss": 0.0838, "num_input_tokens_seen": 55347584, "step": 25635 }, { "epoch": 4.182707993474715, "grad_norm": 0.0421190969645977, "learning_rate": 0.0009641603938926093, "loss": 0.1133, "num_input_tokens_seen": 55358784, "step": 25640 }, { "epoch": 4.1835236541598695, "grad_norm": 0.09190967679023743, "learning_rate": 0.0009641339259407946, "loss": 0.0353, "num_input_tokens_seen": 55369248, "step": 25645 }, { "epoch": 4.184339314845024, "grad_norm": 0.03399736434221268, "learning_rate": 0.0009641074485827168, "loss": 0.0583, "num_input_tokens_seen": 55378784, "step": 25650 }, { "epoch": 4.185154975530179, "grad_norm": 0.02337212860584259, "learning_rate": 0.0009640809618189129, "loss": 0.1396, "num_input_tokens_seen": 55390048, "step": 25655 }, { "epoch": 4.185970636215335, "grad_norm": 0.10072391480207443, "learning_rate": 0.0009640544656499197, "loss": 0.136, "num_input_tokens_seen": 55401120, "step": 25660 }, { "epoch": 4.18678629690049, "grad_norm": 0.18243588507175446, "learning_rate": 0.0009640279600762738, "loss": 0.1119, "num_input_tokens_seen": 55412192, "step": 25665 }, { "epoch": 4.1876019575856445, "grad_norm": 0.16235780715942383, "learning_rate": 0.0009640014450985129, "loss": 0.273, "num_input_tokens_seen": 55422880, "step": 25670 }, { "epoch": 4.188417618270799, "grad_norm": 0.14665402472019196, "learning_rate": 0.0009639749207171739, "loss": 0.2189, "num_input_tokens_seen": 55434080, "step": 25675 }, { "epoch": 4.189233278955954, "grad_norm": 0.10478851199150085, "learning_rate": 0.0009639483869327946, "loss": 0.0945, "num_input_tokens_seen": 55444576, "step": 25680 }, { "epoch": 4.190048939641109, "grad_norm": 0.09666270017623901, "learning_rate": 0.0009639218437459125, "loss": 0.1206, "num_input_tokens_seen": 55455648, "step": 25685 }, { "epoch": 4.190864600326265, "grad_norm": 0.017156513407826424, "learning_rate": 0.000963895291157066, "loss": 0.1561, "num_input_tokens_seen": 55466304, "step": 25690 }, { "epoch": 4.191680261011419, "grad_norm": 0.09270573407411575, "learning_rate": 0.0009638687291667927, "loss": 0.1989, "num_input_tokens_seen": 55476032, "step": 25695 }, { "epoch": 4.192495921696574, "grad_norm": 0.09968163818120956, "learning_rate": 0.0009638421577756313, "loss": 0.0395, "num_input_tokens_seen": 55487072, "step": 25700 }, { "epoch": 4.193311582381729, "grad_norm": 0.0475512258708477, "learning_rate": 0.0009638155769841201, "loss": 0.0853, "num_input_tokens_seen": 55498272, "step": 25705 }, { "epoch": 4.194127243066884, "grad_norm": 0.03529131039977074, "learning_rate": 0.0009637889867927978, "loss": 0.0582, "num_input_tokens_seen": 55508800, "step": 25710 }, { "epoch": 4.19494290375204, "grad_norm": 0.12867122888565063, "learning_rate": 0.0009637623872022034, "loss": 0.0761, "num_input_tokens_seen": 55520128, "step": 25715 }, { "epoch": 4.195758564437194, "grad_norm": 0.003700670087710023, "learning_rate": 0.0009637357782128758, "loss": 0.0828, "num_input_tokens_seen": 55531776, "step": 25720 }, { "epoch": 4.196574225122349, "grad_norm": 0.10053911805152893, "learning_rate": 0.0009637091598253544, "loss": 0.0955, "num_input_tokens_seen": 55543040, "step": 25725 }, { "epoch": 4.197389885807504, "grad_norm": 0.04466938227415085, "learning_rate": 0.0009636825320401787, "loss": 0.0486, "num_input_tokens_seen": 55554080, "step": 25730 }, { "epoch": 4.198205546492659, "grad_norm": 0.011144734919071198, "learning_rate": 0.0009636558948578882, "loss": 0.0234, "num_input_tokens_seen": 55565408, "step": 25735 }, { "epoch": 4.199021207177814, "grad_norm": 0.01494020875543356, "learning_rate": 0.0009636292482790229, "loss": 0.1964, "num_input_tokens_seen": 55576224, "step": 25740 }, { "epoch": 4.199836867862969, "grad_norm": 0.018054723739624023, "learning_rate": 0.0009636025923041227, "loss": 0.0563, "num_input_tokens_seen": 55586912, "step": 25745 }, { "epoch": 4.200652528548124, "grad_norm": 0.0247498769313097, "learning_rate": 0.0009635759269337276, "loss": 0.0704, "num_input_tokens_seen": 55597696, "step": 25750 }, { "epoch": 4.201468189233279, "grad_norm": 0.12987296283245087, "learning_rate": 0.0009635492521683785, "loss": 0.1079, "num_input_tokens_seen": 55608096, "step": 25755 }, { "epoch": 4.202283849918434, "grad_norm": 0.03196963295340538, "learning_rate": 0.0009635225680086157, "loss": 0.0891, "num_input_tokens_seen": 55618112, "step": 25760 }, { "epoch": 4.203099510603589, "grad_norm": 0.012279840186238289, "learning_rate": 0.00096349587445498, "loss": 0.0528, "num_input_tokens_seen": 55629952, "step": 25765 }, { "epoch": 4.2039151712887435, "grad_norm": 0.20143692195415497, "learning_rate": 0.0009634691715080124, "loss": 0.1146, "num_input_tokens_seen": 55639808, "step": 25770 }, { "epoch": 4.204730831973899, "grad_norm": 0.06499030441045761, "learning_rate": 0.0009634424591682542, "loss": 0.1626, "num_input_tokens_seen": 55649824, "step": 25775 }, { "epoch": 4.205546492659054, "grad_norm": 0.18569932878017426, "learning_rate": 0.0009634157374362466, "loss": 0.0857, "num_input_tokens_seen": 55658944, "step": 25780 }, { "epoch": 4.206362153344209, "grad_norm": 0.15316970646381378, "learning_rate": 0.0009633890063125313, "loss": 0.0486, "num_input_tokens_seen": 55668096, "step": 25785 }, { "epoch": 4.207177814029364, "grad_norm": 0.007165601477026939, "learning_rate": 0.0009633622657976498, "loss": 0.1663, "num_input_tokens_seen": 55678496, "step": 25790 }, { "epoch": 4.2079934747145185, "grad_norm": 0.01177747081965208, "learning_rate": 0.0009633355158921441, "loss": 0.207, "num_input_tokens_seen": 55690304, "step": 25795 }, { "epoch": 4.208809135399674, "grad_norm": 0.07368795573711395, "learning_rate": 0.0009633087565965564, "loss": 0.066, "num_input_tokens_seen": 55700448, "step": 25800 }, { "epoch": 4.209624796084829, "grad_norm": 0.04800066724419594, "learning_rate": 0.0009632819879114291, "loss": 0.0748, "num_input_tokens_seen": 55711392, "step": 25805 }, { "epoch": 4.210440456769984, "grad_norm": 0.17369753122329712, "learning_rate": 0.0009632552098373045, "loss": 0.196, "num_input_tokens_seen": 55722336, "step": 25810 }, { "epoch": 4.211256117455139, "grad_norm": 0.022813379764556885, "learning_rate": 0.0009632284223747255, "loss": 0.0677, "num_input_tokens_seen": 55733056, "step": 25815 }, { "epoch": 4.212071778140293, "grad_norm": 0.04420700669288635, "learning_rate": 0.0009632016255242348, "loss": 0.0635, "num_input_tokens_seen": 55744192, "step": 25820 }, { "epoch": 4.212887438825448, "grad_norm": 0.07131849974393845, "learning_rate": 0.0009631748192863756, "loss": 0.0775, "num_input_tokens_seen": 55753440, "step": 25825 }, { "epoch": 4.213703099510604, "grad_norm": 0.03363104164600372, "learning_rate": 0.0009631480036616911, "loss": 0.0953, "num_input_tokens_seen": 55762720, "step": 25830 }, { "epoch": 4.214518760195759, "grad_norm": 0.05466218665242195, "learning_rate": 0.0009631211786507248, "loss": 0.0522, "num_input_tokens_seen": 55771968, "step": 25835 }, { "epoch": 4.215334420880914, "grad_norm": 0.02284003049135208, "learning_rate": 0.0009630943442540202, "loss": 0.2105, "num_input_tokens_seen": 55782048, "step": 25840 }, { "epoch": 4.216150081566068, "grad_norm": 0.46598702669143677, "learning_rate": 0.0009630675004721212, "loss": 0.2925, "num_input_tokens_seen": 55792864, "step": 25845 }, { "epoch": 4.216965742251223, "grad_norm": 0.21863995492458344, "learning_rate": 0.000963040647305572, "loss": 0.0909, "num_input_tokens_seen": 55804416, "step": 25850 }, { "epoch": 4.217781402936378, "grad_norm": 0.1444971114397049, "learning_rate": 0.0009630137847549166, "loss": 0.1113, "num_input_tokens_seen": 55815840, "step": 25855 }, { "epoch": 4.218597063621534, "grad_norm": 0.05662737414240837, "learning_rate": 0.0009629869128206997, "loss": 0.089, "num_input_tokens_seen": 55826496, "step": 25860 }, { "epoch": 4.219412724306689, "grad_norm": 0.07088807970285416, "learning_rate": 0.0009629600315034652, "loss": 0.0936, "num_input_tokens_seen": 55836192, "step": 25865 }, { "epoch": 4.220228384991843, "grad_norm": 0.02619568631052971, "learning_rate": 0.0009629331408037588, "loss": 0.0716, "num_input_tokens_seen": 55848256, "step": 25870 }, { "epoch": 4.221044045676998, "grad_norm": 0.050660304725170135, "learning_rate": 0.0009629062407221248, "loss": 0.1392, "num_input_tokens_seen": 55858432, "step": 25875 }, { "epoch": 4.221859706362153, "grad_norm": 0.033399470150470734, "learning_rate": 0.0009628793312591086, "loss": 0.0685, "num_input_tokens_seen": 55869248, "step": 25880 }, { "epoch": 4.222675367047309, "grad_norm": 0.09224677085876465, "learning_rate": 0.0009628524124152555, "loss": 0.0955, "num_input_tokens_seen": 55879232, "step": 25885 }, { "epoch": 4.2234910277324635, "grad_norm": 0.04470451921224594, "learning_rate": 0.0009628254841911113, "loss": 0.0594, "num_input_tokens_seen": 55889792, "step": 25890 }, { "epoch": 4.224306688417618, "grad_norm": 0.1628771424293518, "learning_rate": 0.0009627985465872214, "loss": 0.1982, "num_input_tokens_seen": 55901216, "step": 25895 }, { "epoch": 4.225122349102773, "grad_norm": 0.29836219549179077, "learning_rate": 0.0009627715996041319, "loss": 0.1425, "num_input_tokens_seen": 55911968, "step": 25900 }, { "epoch": 4.225938009787928, "grad_norm": 0.0919804647564888, "learning_rate": 0.0009627446432423888, "loss": 0.0967, "num_input_tokens_seen": 55921440, "step": 25905 }, { "epoch": 4.226753670473083, "grad_norm": 0.025671975687146187, "learning_rate": 0.0009627176775025385, "loss": 0.0525, "num_input_tokens_seen": 55931872, "step": 25910 }, { "epoch": 4.2275693311582385, "grad_norm": 0.11402413994073868, "learning_rate": 0.0009626907023851275, "loss": 0.1776, "num_input_tokens_seen": 55942208, "step": 25915 }, { "epoch": 4.228384991843393, "grad_norm": 0.14735448360443115, "learning_rate": 0.0009626637178907024, "loss": 0.1074, "num_input_tokens_seen": 55952352, "step": 25920 }, { "epoch": 4.229200652528548, "grad_norm": 0.03276858851313591, "learning_rate": 0.0009626367240198101, "loss": 0.0593, "num_input_tokens_seen": 55964288, "step": 25925 }, { "epoch": 4.230016313213703, "grad_norm": 0.20905552804470062, "learning_rate": 0.0009626097207729978, "loss": 0.0978, "num_input_tokens_seen": 55975648, "step": 25930 }, { "epoch": 4.230831973898858, "grad_norm": 0.12839220464229584, "learning_rate": 0.0009625827081508125, "loss": 0.1711, "num_input_tokens_seen": 55986368, "step": 25935 }, { "epoch": 4.231647634584013, "grad_norm": 0.005625420715659857, "learning_rate": 0.000962555686153802, "loss": 0.0093, "num_input_tokens_seen": 55996896, "step": 25940 }, { "epoch": 4.232463295269168, "grad_norm": 0.19467593729496002, "learning_rate": 0.0009625286547825136, "loss": 0.0622, "num_input_tokens_seen": 56006592, "step": 25945 }, { "epoch": 4.233278955954323, "grad_norm": 0.07500498741865158, "learning_rate": 0.0009625016140374952, "loss": 0.1537, "num_input_tokens_seen": 56016512, "step": 25950 }, { "epoch": 4.234094616639478, "grad_norm": 0.02065885066986084, "learning_rate": 0.0009624745639192949, "loss": 0.0533, "num_input_tokens_seen": 56025920, "step": 25955 }, { "epoch": 4.234910277324633, "grad_norm": 0.008839918300509453, "learning_rate": 0.0009624475044284609, "loss": 0.1495, "num_input_tokens_seen": 56034880, "step": 25960 }, { "epoch": 4.235725938009788, "grad_norm": 0.03758295252919197, "learning_rate": 0.0009624204355655416, "loss": 0.2348, "num_input_tokens_seen": 56045184, "step": 25965 }, { "epoch": 4.236541598694943, "grad_norm": 0.08983998745679855, "learning_rate": 0.0009623933573310855, "loss": 0.1742, "num_input_tokens_seen": 56055872, "step": 25970 }, { "epoch": 4.237357259380098, "grad_norm": 0.09549932926893234, "learning_rate": 0.0009623662697256414, "loss": 0.1658, "num_input_tokens_seen": 56068160, "step": 25975 }, { "epoch": 4.238172920065253, "grad_norm": 0.008782517164945602, "learning_rate": 0.0009623391727497584, "loss": 0.0257, "num_input_tokens_seen": 56078880, "step": 25980 }, { "epoch": 4.238988580750408, "grad_norm": 0.01383864413946867, "learning_rate": 0.0009623120664039855, "loss": 0.1021, "num_input_tokens_seen": 56090016, "step": 25985 }, { "epoch": 4.239804241435563, "grad_norm": 0.06465844064950943, "learning_rate": 0.000962284950688872, "loss": 0.0317, "num_input_tokens_seen": 56100576, "step": 25990 }, { "epoch": 4.240619902120717, "grad_norm": 0.06504257023334503, "learning_rate": 0.0009622578256049675, "loss": 0.1385, "num_input_tokens_seen": 56108512, "step": 25995 }, { "epoch": 4.241435562805873, "grad_norm": 0.07334943115711212, "learning_rate": 0.0009622306911528219, "loss": 0.1096, "num_input_tokens_seen": 56119360, "step": 26000 }, { "epoch": 4.242251223491028, "grad_norm": 0.03921614959836006, "learning_rate": 0.0009622035473329848, "loss": 0.0251, "num_input_tokens_seen": 56130944, "step": 26005 }, { "epoch": 4.243066884176183, "grad_norm": 0.05885370075702667, "learning_rate": 0.0009621763941460067, "loss": 0.0752, "num_input_tokens_seen": 56142496, "step": 26010 }, { "epoch": 4.2438825448613375, "grad_norm": 0.10634082555770874, "learning_rate": 0.0009621492315924375, "loss": 0.0891, "num_input_tokens_seen": 56153920, "step": 26015 }, { "epoch": 4.244698205546492, "grad_norm": 0.1679215431213379, "learning_rate": 0.0009621220596728278, "loss": 0.033, "num_input_tokens_seen": 56164640, "step": 26020 }, { "epoch": 4.245513866231648, "grad_norm": 0.06742087751626968, "learning_rate": 0.0009620948783877285, "loss": 0.0266, "num_input_tokens_seen": 56174336, "step": 26025 }, { "epoch": 4.246329526916803, "grad_norm": 0.029800092801451683, "learning_rate": 0.0009620676877376902, "loss": 0.0215, "num_input_tokens_seen": 56185120, "step": 26030 }, { "epoch": 4.247145187601958, "grad_norm": 0.013753470033407211, "learning_rate": 0.000962040487723264, "loss": 0.0376, "num_input_tokens_seen": 56195360, "step": 26035 }, { "epoch": 4.2479608482871125, "grad_norm": 0.006139541510492563, "learning_rate": 0.0009620132783450011, "loss": 0.0435, "num_input_tokens_seen": 56205440, "step": 26040 }, { "epoch": 4.248776508972267, "grad_norm": 0.11157204210758209, "learning_rate": 0.0009619860596034531, "loss": 0.2533, "num_input_tokens_seen": 56216256, "step": 26045 }, { "epoch": 4.249592169657422, "grad_norm": 0.13364621996879578, "learning_rate": 0.0009619588314991716, "loss": 0.1817, "num_input_tokens_seen": 56227712, "step": 26050 }, { "epoch": 4.250407830342578, "grad_norm": 0.02315470390021801, "learning_rate": 0.0009619315940327082, "loss": 0.0271, "num_input_tokens_seen": 56239296, "step": 26055 }, { "epoch": 4.251223491027733, "grad_norm": 0.020722759887576103, "learning_rate": 0.0009619043472046151, "loss": 0.1503, "num_input_tokens_seen": 56248800, "step": 26060 }, { "epoch": 4.2520391517128875, "grad_norm": 0.13340431451797485, "learning_rate": 0.0009618770910154444, "loss": 0.1147, "num_input_tokens_seen": 56260576, "step": 26065 }, { "epoch": 4.252854812398042, "grad_norm": 0.045745156705379486, "learning_rate": 0.0009618498254657486, "loss": 0.1478, "num_input_tokens_seen": 56271680, "step": 26070 }, { "epoch": 4.253670473083197, "grad_norm": 0.03793095797300339, "learning_rate": 0.00096182255055608, "loss": 0.2014, "num_input_tokens_seen": 56282400, "step": 26075 }, { "epoch": 4.254486133768353, "grad_norm": 0.07550855726003647, "learning_rate": 0.0009617952662869918, "loss": 0.103, "num_input_tokens_seen": 56292640, "step": 26080 }, { "epoch": 4.255301794453508, "grad_norm": 0.0617767870426178, "learning_rate": 0.0009617679726590366, "loss": 0.0324, "num_input_tokens_seen": 56304064, "step": 26085 }, { "epoch": 4.2561174551386625, "grad_norm": 0.2718863785266876, "learning_rate": 0.0009617406696727676, "loss": 0.16, "num_input_tokens_seen": 56315008, "step": 26090 }, { "epoch": 4.256933115823817, "grad_norm": 0.0740048885345459, "learning_rate": 0.0009617133573287382, "loss": 0.0663, "num_input_tokens_seen": 56325344, "step": 26095 }, { "epoch": 4.257748776508972, "grad_norm": 0.0031481743790209293, "learning_rate": 0.0009616860356275019, "loss": 0.0327, "num_input_tokens_seen": 56335904, "step": 26100 }, { "epoch": 4.258564437194127, "grad_norm": 0.01902507059276104, "learning_rate": 0.0009616587045696124, "loss": 0.0436, "num_input_tokens_seen": 56347200, "step": 26105 }, { "epoch": 4.259380097879283, "grad_norm": 0.1126399040222168, "learning_rate": 0.0009616313641556235, "loss": 0.0567, "num_input_tokens_seen": 56358976, "step": 26110 }, { "epoch": 4.260195758564437, "grad_norm": 0.21379083395004272, "learning_rate": 0.0009616040143860896, "loss": 0.128, "num_input_tokens_seen": 56369344, "step": 26115 }, { "epoch": 4.261011419249592, "grad_norm": 0.03041026182472706, "learning_rate": 0.0009615766552615645, "loss": 0.1547, "num_input_tokens_seen": 56380896, "step": 26120 }, { "epoch": 4.261827079934747, "grad_norm": 0.024570845067501068, "learning_rate": 0.0009615492867826032, "loss": 0.1976, "num_input_tokens_seen": 56392768, "step": 26125 }, { "epoch": 4.262642740619902, "grad_norm": 0.13344892859458923, "learning_rate": 0.00096152190894976, "loss": 0.0765, "num_input_tokens_seen": 56403648, "step": 26130 }, { "epoch": 4.263458401305057, "grad_norm": 0.03469875454902649, "learning_rate": 0.0009614945217635897, "loss": 0.0482, "num_input_tokens_seen": 56415072, "step": 26135 }, { "epoch": 4.264274061990212, "grad_norm": 0.07122458517551422, "learning_rate": 0.0009614671252246476, "loss": 0.0994, "num_input_tokens_seen": 56424672, "step": 26140 }, { "epoch": 4.265089722675367, "grad_norm": 0.017191950231790543, "learning_rate": 0.0009614397193334887, "loss": 0.149, "num_input_tokens_seen": 56435008, "step": 26145 }, { "epoch": 4.265905383360522, "grad_norm": 0.0890747532248497, "learning_rate": 0.0009614123040906686, "loss": 0.0575, "num_input_tokens_seen": 56445472, "step": 26150 }, { "epoch": 4.266721044045677, "grad_norm": 0.005226694047451019, "learning_rate": 0.0009613848794967428, "loss": 0.0893, "num_input_tokens_seen": 56456480, "step": 26155 }, { "epoch": 4.267536704730832, "grad_norm": 0.19118745625019073, "learning_rate": 0.0009613574455522671, "loss": 0.043, "num_input_tokens_seen": 56466272, "step": 26160 }, { "epoch": 4.268352365415987, "grad_norm": 0.07536280900239944, "learning_rate": 0.0009613300022577974, "loss": 0.1838, "num_input_tokens_seen": 56477664, "step": 26165 }, { "epoch": 4.269168026101142, "grad_norm": 0.44081366062164307, "learning_rate": 0.00096130254961389, "loss": 0.2856, "num_input_tokens_seen": 56489536, "step": 26170 }, { "epoch": 4.269983686786297, "grad_norm": 0.2881818115711212, "learning_rate": 0.0009612750876211014, "loss": 0.3022, "num_input_tokens_seen": 56499392, "step": 26175 }, { "epoch": 4.270799347471452, "grad_norm": 0.23953905701637268, "learning_rate": 0.0009612476162799878, "loss": 0.1056, "num_input_tokens_seen": 56510144, "step": 26180 }, { "epoch": 4.271615008156607, "grad_norm": 0.029476171359419823, "learning_rate": 0.0009612201355911061, "loss": 0.098, "num_input_tokens_seen": 56521024, "step": 26185 }, { "epoch": 4.2724306688417615, "grad_norm": 0.0396023653447628, "learning_rate": 0.0009611926455550135, "loss": 0.0201, "num_input_tokens_seen": 56532256, "step": 26190 }, { "epoch": 4.273246329526917, "grad_norm": 0.26259082555770874, "learning_rate": 0.0009611651461722666, "loss": 0.1628, "num_input_tokens_seen": 56541888, "step": 26195 }, { "epoch": 4.274061990212072, "grad_norm": 0.008079355582594872, "learning_rate": 0.0009611376374434231, "loss": 0.0999, "num_input_tokens_seen": 56551072, "step": 26200 }, { "epoch": 4.274877650897227, "grad_norm": 0.06699647009372711, "learning_rate": 0.0009611101193690403, "loss": 0.0592, "num_input_tokens_seen": 56560832, "step": 26205 }, { "epoch": 4.275693311582382, "grad_norm": 0.03974662348628044, "learning_rate": 0.0009610825919496761, "loss": 0.0884, "num_input_tokens_seen": 56571008, "step": 26210 }, { "epoch": 4.2765089722675365, "grad_norm": 0.3732370436191559, "learning_rate": 0.0009610550551858881, "loss": 0.2252, "num_input_tokens_seen": 56581504, "step": 26215 }, { "epoch": 4.277324632952691, "grad_norm": 0.16455523669719696, "learning_rate": 0.0009610275090782347, "loss": 0.1004, "num_input_tokens_seen": 56591072, "step": 26220 }, { "epoch": 4.278140293637847, "grad_norm": 0.16191259026527405, "learning_rate": 0.0009609999536272738, "loss": 0.0668, "num_input_tokens_seen": 56602688, "step": 26225 }, { "epoch": 4.278955954323002, "grad_norm": 0.31982356309890747, "learning_rate": 0.0009609723888335641, "loss": 0.0857, "num_input_tokens_seen": 56612512, "step": 26230 }, { "epoch": 4.279771615008157, "grad_norm": 0.04833389073610306, "learning_rate": 0.0009609448146976642, "loss": 0.1475, "num_input_tokens_seen": 56623328, "step": 26235 }, { "epoch": 4.280587275693311, "grad_norm": 0.10816501080989838, "learning_rate": 0.0009609172312201328, "loss": 0.1005, "num_input_tokens_seen": 56634848, "step": 26240 }, { "epoch": 4.281402936378466, "grad_norm": 0.022151626646518707, "learning_rate": 0.000960889638401529, "loss": 0.1442, "num_input_tokens_seen": 56648032, "step": 26245 }, { "epoch": 4.282218597063622, "grad_norm": 0.2465866357088089, "learning_rate": 0.0009608620362424121, "loss": 0.2242, "num_input_tokens_seen": 56659968, "step": 26250 }, { "epoch": 4.283034257748777, "grad_norm": 0.013975062407553196, "learning_rate": 0.0009608344247433412, "loss": 0.0362, "num_input_tokens_seen": 56671392, "step": 26255 }, { "epoch": 4.283849918433932, "grad_norm": 0.1647869199514389, "learning_rate": 0.0009608068039048763, "loss": 0.0968, "num_input_tokens_seen": 56681664, "step": 26260 }, { "epoch": 4.284665579119086, "grad_norm": 0.07556545734405518, "learning_rate": 0.0009607791737275769, "loss": 0.0977, "num_input_tokens_seen": 56693024, "step": 26265 }, { "epoch": 4.285481239804241, "grad_norm": 0.024229027330875397, "learning_rate": 0.0009607515342120028, "loss": 0.1231, "num_input_tokens_seen": 56704000, "step": 26270 }, { "epoch": 4.286296900489396, "grad_norm": 0.01008087769150734, "learning_rate": 0.0009607238853587144, "loss": 0.1553, "num_input_tokens_seen": 56713632, "step": 26275 }, { "epoch": 4.287112561174552, "grad_norm": 0.044155705720186234, "learning_rate": 0.0009606962271682722, "loss": 0.0325, "num_input_tokens_seen": 56725184, "step": 26280 }, { "epoch": 4.287928221859707, "grad_norm": 0.05729542300105095, "learning_rate": 0.0009606685596412364, "loss": 0.0705, "num_input_tokens_seen": 56735840, "step": 26285 }, { "epoch": 4.288743882544861, "grad_norm": 0.08739973604679108, "learning_rate": 0.0009606408827781679, "loss": 0.0861, "num_input_tokens_seen": 56747424, "step": 26290 }, { "epoch": 4.289559543230016, "grad_norm": 0.20126527547836304, "learning_rate": 0.0009606131965796274, "loss": 0.098, "num_input_tokens_seen": 56757568, "step": 26295 }, { "epoch": 4.290375203915171, "grad_norm": 0.023548198863863945, "learning_rate": 0.0009605855010461761, "loss": 0.0954, "num_input_tokens_seen": 56767456, "step": 26300 }, { "epoch": 4.291190864600326, "grad_norm": 0.17590487003326416, "learning_rate": 0.0009605577961783756, "loss": 0.093, "num_input_tokens_seen": 56778784, "step": 26305 }, { "epoch": 4.2920065252854815, "grad_norm": 0.019137341529130936, "learning_rate": 0.0009605300819767869, "loss": 0.1363, "num_input_tokens_seen": 56789696, "step": 26310 }, { "epoch": 4.292822185970636, "grad_norm": 0.05824754759669304, "learning_rate": 0.000960502358441972, "loss": 0.183, "num_input_tokens_seen": 56800000, "step": 26315 }, { "epoch": 4.293637846655791, "grad_norm": 0.05928918346762657, "learning_rate": 0.0009604746255744925, "loss": 0.0995, "num_input_tokens_seen": 56811648, "step": 26320 }, { "epoch": 4.294453507340946, "grad_norm": 0.05164933577179909, "learning_rate": 0.0009604468833749105, "loss": 0.151, "num_input_tokens_seen": 56822848, "step": 26325 }, { "epoch": 4.295269168026101, "grad_norm": 0.47522300481796265, "learning_rate": 0.0009604191318437885, "loss": 0.2465, "num_input_tokens_seen": 56833760, "step": 26330 }, { "epoch": 4.2960848287112565, "grad_norm": 0.04436272755265236, "learning_rate": 0.0009603913709816886, "loss": 0.11, "num_input_tokens_seen": 56845248, "step": 26335 }, { "epoch": 4.296900489396411, "grad_norm": 0.07085258513689041, "learning_rate": 0.0009603636007891735, "loss": 0.0631, "num_input_tokens_seen": 56855744, "step": 26340 }, { "epoch": 4.297716150081566, "grad_norm": 0.00735789118334651, "learning_rate": 0.0009603358212668061, "loss": 0.0812, "num_input_tokens_seen": 56866272, "step": 26345 }, { "epoch": 4.298531810766721, "grad_norm": 0.13049672544002533, "learning_rate": 0.0009603080324151492, "loss": 0.0574, "num_input_tokens_seen": 56876704, "step": 26350 }, { "epoch": 4.299347471451876, "grad_norm": 0.2287766933441162, "learning_rate": 0.0009602802342347661, "loss": 0.1043, "num_input_tokens_seen": 56887392, "step": 26355 }, { "epoch": 4.300163132137031, "grad_norm": 0.08889561891555786, "learning_rate": 0.0009602524267262203, "loss": 0.0464, "num_input_tokens_seen": 56897120, "step": 26360 }, { "epoch": 4.300978792822186, "grad_norm": 0.06518486142158508, "learning_rate": 0.0009602246098900749, "loss": 0.1029, "num_input_tokens_seen": 56908320, "step": 26365 }, { "epoch": 4.301794453507341, "grad_norm": 0.34472137689590454, "learning_rate": 0.0009601967837268941, "loss": 0.0979, "num_input_tokens_seen": 56918944, "step": 26370 }, { "epoch": 4.302610114192496, "grad_norm": 0.1568017303943634, "learning_rate": 0.0009601689482372417, "loss": 0.0798, "num_input_tokens_seen": 56929184, "step": 26375 }, { "epoch": 4.303425774877651, "grad_norm": 0.007233389187604189, "learning_rate": 0.0009601411034216818, "loss": 0.0573, "num_input_tokens_seen": 56939232, "step": 26380 }, { "epoch": 4.304241435562806, "grad_norm": 0.23331192135810852, "learning_rate": 0.0009601132492807787, "loss": 0.0663, "num_input_tokens_seen": 56950656, "step": 26385 }, { "epoch": 4.30505709624796, "grad_norm": 0.019311008974909782, "learning_rate": 0.000960085385815097, "loss": 0.0477, "num_input_tokens_seen": 56961952, "step": 26390 }, { "epoch": 4.305872756933116, "grad_norm": 0.14969076216220856, "learning_rate": 0.0009600575130252012, "loss": 0.0805, "num_input_tokens_seen": 56972576, "step": 26395 }, { "epoch": 4.306688417618271, "grad_norm": 0.48947593569755554, "learning_rate": 0.0009600296309116563, "loss": 0.145, "num_input_tokens_seen": 56982848, "step": 26400 }, { "epoch": 4.307504078303426, "grad_norm": 0.12700121104717255, "learning_rate": 0.0009600017394750274, "loss": 0.0504, "num_input_tokens_seen": 56993312, "step": 26405 }, { "epoch": 4.308319738988581, "grad_norm": 0.03944782167673111, "learning_rate": 0.0009599738387158794, "loss": 0.0171, "num_input_tokens_seen": 57004832, "step": 26410 }, { "epoch": 4.309135399673735, "grad_norm": 0.13977976143360138, "learning_rate": 0.0009599459286347783, "loss": 0.2345, "num_input_tokens_seen": 57015424, "step": 26415 }, { "epoch": 4.309951060358891, "grad_norm": 0.010792110115289688, "learning_rate": 0.0009599180092322894, "loss": 0.1879, "num_input_tokens_seen": 57027360, "step": 26420 }, { "epoch": 4.310766721044046, "grad_norm": 0.0110248364508152, "learning_rate": 0.0009598900805089786, "loss": 0.06, "num_input_tokens_seen": 57038752, "step": 26425 }, { "epoch": 4.311582381729201, "grad_norm": 0.0762624517083168, "learning_rate": 0.0009598621424654119, "loss": 0.0667, "num_input_tokens_seen": 57050208, "step": 26430 }, { "epoch": 4.3123980424143555, "grad_norm": 0.0991593673825264, "learning_rate": 0.0009598341951021557, "loss": 0.1169, "num_input_tokens_seen": 57060768, "step": 26435 }, { "epoch": 4.31321370309951, "grad_norm": 0.16670478880405426, "learning_rate": 0.0009598062384197759, "loss": 0.1032, "num_input_tokens_seen": 57071712, "step": 26440 }, { "epoch": 4.314029363784665, "grad_norm": 0.01614641398191452, "learning_rate": 0.0009597782724188395, "loss": 0.072, "num_input_tokens_seen": 57082656, "step": 26445 }, { "epoch": 4.314845024469821, "grad_norm": 0.18557684123516083, "learning_rate": 0.0009597502970999132, "loss": 0.0773, "num_input_tokens_seen": 57092288, "step": 26450 }, { "epoch": 4.315660685154976, "grad_norm": 0.018934570252895355, "learning_rate": 0.0009597223124635639, "loss": 0.1762, "num_input_tokens_seen": 57102688, "step": 26455 }, { "epoch": 4.3164763458401305, "grad_norm": 0.2445397824048996, "learning_rate": 0.0009596943185103586, "loss": 0.1432, "num_input_tokens_seen": 57114080, "step": 26460 }, { "epoch": 4.317292006525285, "grad_norm": 0.03462572023272514, "learning_rate": 0.0009596663152408648, "loss": 0.0559, "num_input_tokens_seen": 57123552, "step": 26465 }, { "epoch": 4.31810766721044, "grad_norm": 0.12840338051319122, "learning_rate": 0.0009596383026556501, "loss": 0.0163, "num_input_tokens_seen": 57133664, "step": 26470 }, { "epoch": 4.318923327895595, "grad_norm": 0.10195285081863403, "learning_rate": 0.000959610280755282, "loss": 0.175, "num_input_tokens_seen": 57143648, "step": 26475 }, { "epoch": 4.319738988580751, "grad_norm": 0.7600626945495605, "learning_rate": 0.0009595822495403286, "loss": 0.1068, "num_input_tokens_seen": 57152896, "step": 26480 }, { "epoch": 4.3205546492659055, "grad_norm": 0.04918425902724266, "learning_rate": 0.0009595542090113579, "loss": 0.1369, "num_input_tokens_seen": 57162400, "step": 26485 }, { "epoch": 4.32137030995106, "grad_norm": 0.36300233006477356, "learning_rate": 0.0009595261591689381, "loss": 0.1331, "num_input_tokens_seen": 57172736, "step": 26490 }, { "epoch": 4.322185970636215, "grad_norm": 0.44943392276763916, "learning_rate": 0.0009594981000136377, "loss": 0.123, "num_input_tokens_seen": 57183392, "step": 26495 }, { "epoch": 4.32300163132137, "grad_norm": 0.2966060936450958, "learning_rate": 0.0009594700315460254, "loss": 0.1606, "num_input_tokens_seen": 57194080, "step": 26500 }, { "epoch": 4.323817292006526, "grad_norm": 0.22001191973686218, "learning_rate": 0.0009594419537666701, "loss": 0.1117, "num_input_tokens_seen": 57205216, "step": 26505 }, { "epoch": 4.3246329526916805, "grad_norm": 0.04149933159351349, "learning_rate": 0.0009594138666761407, "loss": 0.1006, "num_input_tokens_seen": 57216704, "step": 26510 }, { "epoch": 4.325448613376835, "grad_norm": 0.2898261845111847, "learning_rate": 0.0009593857702750065, "loss": 0.1117, "num_input_tokens_seen": 57228704, "step": 26515 }, { "epoch": 4.32626427406199, "grad_norm": 0.24761654436588287, "learning_rate": 0.0009593576645638369, "loss": 0.1437, "num_input_tokens_seen": 57237056, "step": 26520 }, { "epoch": 4.327079934747145, "grad_norm": 0.029689989984035492, "learning_rate": 0.0009593295495432015, "loss": 0.0963, "num_input_tokens_seen": 57247296, "step": 26525 }, { "epoch": 4.327895595432301, "grad_norm": 0.04988129064440727, "learning_rate": 0.00095930142521367, "loss": 0.1126, "num_input_tokens_seen": 57257888, "step": 26530 }, { "epoch": 4.328711256117455, "grad_norm": 0.06792977452278137, "learning_rate": 0.0009592732915758127, "loss": 0.0192, "num_input_tokens_seen": 57267904, "step": 26535 }, { "epoch": 4.32952691680261, "grad_norm": 0.04278651997447014, "learning_rate": 0.0009592451486301991, "loss": 0.0635, "num_input_tokens_seen": 57279040, "step": 26540 }, { "epoch": 4.330342577487765, "grad_norm": 0.059347234666347504, "learning_rate": 0.0009592169963774004, "loss": 0.0467, "num_input_tokens_seen": 57291232, "step": 26545 }, { "epoch": 4.33115823817292, "grad_norm": 0.011570718139410019, "learning_rate": 0.0009591888348179865, "loss": 0.0415, "num_input_tokens_seen": 57302784, "step": 26550 }, { "epoch": 4.331973898858075, "grad_norm": 0.0646468997001648, "learning_rate": 0.0009591606639525283, "loss": 0.1049, "num_input_tokens_seen": 57314656, "step": 26555 }, { "epoch": 4.33278955954323, "grad_norm": 0.4404638707637787, "learning_rate": 0.0009591324837815969, "loss": 0.1643, "num_input_tokens_seen": 57325792, "step": 26560 }, { "epoch": 4.333605220228385, "grad_norm": 0.00760237593203783, "learning_rate": 0.0009591042943057631, "loss": 0.0339, "num_input_tokens_seen": 57336128, "step": 26565 }, { "epoch": 4.33442088091354, "grad_norm": 0.021946754306554794, "learning_rate": 0.0009590760955255985, "loss": 0.0645, "num_input_tokens_seen": 57346432, "step": 26570 }, { "epoch": 4.335236541598695, "grad_norm": 0.05866868421435356, "learning_rate": 0.0009590478874416744, "loss": 0.1834, "num_input_tokens_seen": 57357600, "step": 26575 }, { "epoch": 4.33605220228385, "grad_norm": 0.3842392563819885, "learning_rate": 0.0009590196700545626, "loss": 0.1677, "num_input_tokens_seen": 57367968, "step": 26580 }, { "epoch": 4.3368678629690045, "grad_norm": 0.021405866369605064, "learning_rate": 0.0009589914433648347, "loss": 0.1015, "num_input_tokens_seen": 57378080, "step": 26585 }, { "epoch": 4.33768352365416, "grad_norm": 0.04342464730143547, "learning_rate": 0.000958963207373063, "loss": 0.0651, "num_input_tokens_seen": 57389888, "step": 26590 }, { "epoch": 4.338499184339315, "grad_norm": 0.2572534680366516, "learning_rate": 0.0009589349620798197, "loss": 0.161, "num_input_tokens_seen": 57401664, "step": 26595 }, { "epoch": 4.33931484502447, "grad_norm": 0.3200789988040924, "learning_rate": 0.0009589067074856772, "loss": 0.1025, "num_input_tokens_seen": 57413120, "step": 26600 }, { "epoch": 4.340130505709625, "grad_norm": 0.016770942136645317, "learning_rate": 0.0009588784435912082, "loss": 0.0226, "num_input_tokens_seen": 57425024, "step": 26605 }, { "epoch": 4.3409461663947795, "grad_norm": 0.11740769445896149, "learning_rate": 0.0009588501703969852, "loss": 0.0426, "num_input_tokens_seen": 57435552, "step": 26610 }, { "epoch": 4.341761827079935, "grad_norm": 0.11614541709423065, "learning_rate": 0.0009588218879035815, "loss": 0.1844, "num_input_tokens_seen": 57446560, "step": 26615 }, { "epoch": 4.34257748776509, "grad_norm": 0.005883972160518169, "learning_rate": 0.0009587935961115701, "loss": 0.0583, "num_input_tokens_seen": 57457600, "step": 26620 }, { "epoch": 4.343393148450245, "grad_norm": 0.018213029950857162, "learning_rate": 0.0009587652950215247, "loss": 0.0235, "num_input_tokens_seen": 57467008, "step": 26625 }, { "epoch": 4.3442088091354, "grad_norm": 0.04565092921257019, "learning_rate": 0.0009587369846340184, "loss": 0.0612, "num_input_tokens_seen": 57476320, "step": 26630 }, { "epoch": 4.3450244698205545, "grad_norm": 0.03621744364500046, "learning_rate": 0.000958708664949625, "loss": 0.1356, "num_input_tokens_seen": 57488192, "step": 26635 }, { "epoch": 4.345840130505709, "grad_norm": 0.3558013439178467, "learning_rate": 0.0009586803359689189, "loss": 0.1281, "num_input_tokens_seen": 57499008, "step": 26640 }, { "epoch": 4.346655791190865, "grad_norm": 0.35529565811157227, "learning_rate": 0.0009586519976924739, "loss": 0.1677, "num_input_tokens_seen": 57510080, "step": 26645 }, { "epoch": 4.34747145187602, "grad_norm": 0.3363238275051117, "learning_rate": 0.0009586236501208642, "loss": 0.1342, "num_input_tokens_seen": 57518240, "step": 26650 }, { "epoch": 4.348287112561175, "grad_norm": 0.2017209827899933, "learning_rate": 0.0009585952932546644, "loss": 0.0591, "num_input_tokens_seen": 57527936, "step": 26655 }, { "epoch": 4.349102773246329, "grad_norm": 0.03162151575088501, "learning_rate": 0.0009585669270944493, "loss": 0.0596, "num_input_tokens_seen": 57538784, "step": 26660 }, { "epoch": 4.349918433931484, "grad_norm": 0.009448545053601265, "learning_rate": 0.0009585385516407936, "loss": 0.0125, "num_input_tokens_seen": 57550208, "step": 26665 }, { "epoch": 4.350734094616639, "grad_norm": 0.06576218456029892, "learning_rate": 0.0009585101668942726, "loss": 0.0683, "num_input_tokens_seen": 57561536, "step": 26670 }, { "epoch": 4.351549755301795, "grad_norm": 0.736595630645752, "learning_rate": 0.0009584817728554613, "loss": 0.1653, "num_input_tokens_seen": 57573440, "step": 26675 }, { "epoch": 4.35236541598695, "grad_norm": 0.14241448044776917, "learning_rate": 0.0009584533695249353, "loss": 0.1311, "num_input_tokens_seen": 57585056, "step": 26680 }, { "epoch": 4.353181076672104, "grad_norm": 0.05996230989694595, "learning_rate": 0.0009584249569032701, "loss": 0.1471, "num_input_tokens_seen": 57595136, "step": 26685 }, { "epoch": 4.353996737357259, "grad_norm": 0.18184806406497955, "learning_rate": 0.0009583965349910417, "loss": 0.076, "num_input_tokens_seen": 57605568, "step": 26690 }, { "epoch": 4.354812398042414, "grad_norm": 0.12142419815063477, "learning_rate": 0.0009583681037888259, "loss": 0.0337, "num_input_tokens_seen": 57616000, "step": 26695 }, { "epoch": 4.35562805872757, "grad_norm": 0.11618295311927795, "learning_rate": 0.0009583396632971991, "loss": 0.2426, "num_input_tokens_seen": 57625760, "step": 26700 }, { "epoch": 4.356443719412725, "grad_norm": 0.10647706687450409, "learning_rate": 0.0009583112135167376, "loss": 0.0873, "num_input_tokens_seen": 57635968, "step": 26705 }, { "epoch": 4.357259380097879, "grad_norm": 0.12725499272346497, "learning_rate": 0.0009582827544480177, "loss": 0.0475, "num_input_tokens_seen": 57647616, "step": 26710 }, { "epoch": 4.358075040783034, "grad_norm": 0.009684933349490166, "learning_rate": 0.0009582542860916166, "loss": 0.0462, "num_input_tokens_seen": 57658464, "step": 26715 }, { "epoch": 4.358890701468189, "grad_norm": 0.087120421230793, "learning_rate": 0.000958225808448111, "loss": 0.1968, "num_input_tokens_seen": 57669472, "step": 26720 }, { "epoch": 4.359706362153344, "grad_norm": 0.00963442213833332, "learning_rate": 0.0009581973215180782, "loss": 0.0602, "num_input_tokens_seen": 57681120, "step": 26725 }, { "epoch": 4.3605220228384995, "grad_norm": 0.05102827772498131, "learning_rate": 0.0009581688253020952, "loss": 0.165, "num_input_tokens_seen": 57691872, "step": 26730 }, { "epoch": 4.361337683523654, "grad_norm": 0.11385705322027206, "learning_rate": 0.00095814031980074, "loss": 0.1298, "num_input_tokens_seen": 57702816, "step": 26735 }, { "epoch": 4.362153344208809, "grad_norm": 0.29417338967323303, "learning_rate": 0.0009581118050145898, "loss": 0.3598, "num_input_tokens_seen": 57713888, "step": 26740 }, { "epoch": 4.362969004893964, "grad_norm": 0.02234242856502533, "learning_rate": 0.0009580832809442228, "loss": 0.0761, "num_input_tokens_seen": 57725984, "step": 26745 }, { "epoch": 4.363784665579119, "grad_norm": 0.04497283697128296, "learning_rate": 0.000958054747590217, "loss": 0.137, "num_input_tokens_seen": 57736160, "step": 26750 }, { "epoch": 4.364600326264274, "grad_norm": 0.17340140044689178, "learning_rate": 0.0009580262049531508, "loss": 0.1077, "num_input_tokens_seen": 57746656, "step": 26755 }, { "epoch": 4.365415986949429, "grad_norm": 0.10127101093530655, "learning_rate": 0.0009579976530336023, "loss": 0.0664, "num_input_tokens_seen": 57758432, "step": 26760 }, { "epoch": 4.366231647634584, "grad_norm": 0.05625090375542641, "learning_rate": 0.0009579690918321504, "loss": 0.0372, "num_input_tokens_seen": 57770240, "step": 26765 }, { "epoch": 4.367047308319739, "grad_norm": 0.013620378449559212, "learning_rate": 0.0009579405213493739, "loss": 0.1627, "num_input_tokens_seen": 57780864, "step": 26770 }, { "epoch": 4.367862969004894, "grad_norm": 0.25830385088920593, "learning_rate": 0.0009579119415858518, "loss": 0.1771, "num_input_tokens_seen": 57791456, "step": 26775 }, { "epoch": 4.368678629690049, "grad_norm": 0.01864568516612053, "learning_rate": 0.0009578833525421633, "loss": 0.0957, "num_input_tokens_seen": 57803584, "step": 26780 }, { "epoch": 4.369494290375204, "grad_norm": 0.047464534640312195, "learning_rate": 0.0009578547542188878, "loss": 0.059, "num_input_tokens_seen": 57814688, "step": 26785 }, { "epoch": 4.370309951060359, "grad_norm": 0.09426793456077576, "learning_rate": 0.0009578261466166049, "loss": 0.0806, "num_input_tokens_seen": 57825696, "step": 26790 }, { "epoch": 4.371125611745514, "grad_norm": 0.010405541397631168, "learning_rate": 0.0009577975297358943, "loss": 0.0867, "num_input_tokens_seen": 57836672, "step": 26795 }, { "epoch": 4.371941272430669, "grad_norm": 0.030167369171977043, "learning_rate": 0.0009577689035773359, "loss": 0.0733, "num_input_tokens_seen": 57847136, "step": 26800 }, { "epoch": 4.372756933115824, "grad_norm": 0.025368329137563705, "learning_rate": 0.0009577402681415102, "loss": 0.0959, "num_input_tokens_seen": 57857472, "step": 26805 }, { "epoch": 4.373572593800978, "grad_norm": 0.48700112104415894, "learning_rate": 0.0009577116234289971, "loss": 0.1173, "num_input_tokens_seen": 57868000, "step": 26810 }, { "epoch": 4.374388254486134, "grad_norm": 0.19687391817569733, "learning_rate": 0.0009576829694403772, "loss": 0.06, "num_input_tokens_seen": 57876896, "step": 26815 }, { "epoch": 4.375203915171289, "grad_norm": 0.0715906172990799, "learning_rate": 0.0009576543061762315, "loss": 0.0206, "num_input_tokens_seen": 57887744, "step": 26820 }, { "epoch": 4.376019575856444, "grad_norm": 0.025065617635846138, "learning_rate": 0.0009576256336371407, "loss": 0.0574, "num_input_tokens_seen": 57899008, "step": 26825 }, { "epoch": 4.376835236541599, "grad_norm": 0.0021913934033364058, "learning_rate": 0.0009575969518236857, "loss": 0.0423, "num_input_tokens_seen": 57908960, "step": 26830 }, { "epoch": 4.377650897226753, "grad_norm": 0.18651030957698822, "learning_rate": 0.0009575682607364482, "loss": 0.1584, "num_input_tokens_seen": 57919296, "step": 26835 }, { "epoch": 4.378466557911908, "grad_norm": 0.047549303621053696, "learning_rate": 0.0009575395603760095, "loss": 0.066, "num_input_tokens_seen": 57929024, "step": 26840 }, { "epoch": 4.379282218597064, "grad_norm": 0.33597493171691895, "learning_rate": 0.000957510850742951, "loss": 0.1057, "num_input_tokens_seen": 57939680, "step": 26845 }, { "epoch": 4.380097879282219, "grad_norm": 0.01669122278690338, "learning_rate": 0.0009574821318378547, "loss": 0.0431, "num_input_tokens_seen": 57951392, "step": 26850 }, { "epoch": 4.3809135399673735, "grad_norm": 0.031550005078315735, "learning_rate": 0.0009574534036613028, "loss": 0.0713, "num_input_tokens_seen": 57961920, "step": 26855 }, { "epoch": 4.381729200652528, "grad_norm": 0.18732404708862305, "learning_rate": 0.0009574246662138772, "loss": 0.0716, "num_input_tokens_seen": 57973472, "step": 26860 }, { "epoch": 4.382544861337683, "grad_norm": 0.09832603484392166, "learning_rate": 0.0009573959194961604, "loss": 0.0435, "num_input_tokens_seen": 57983776, "step": 26865 }, { "epoch": 4.383360522022839, "grad_norm": 0.00680912472307682, "learning_rate": 0.0009573671635087352, "loss": 0.0161, "num_input_tokens_seen": 57994368, "step": 26870 }, { "epoch": 4.384176182707994, "grad_norm": 0.015062980353832245, "learning_rate": 0.0009573383982521841, "loss": 0.0328, "num_input_tokens_seen": 58005280, "step": 26875 }, { "epoch": 4.3849918433931485, "grad_norm": 0.121588796377182, "learning_rate": 0.0009573096237270903, "loss": 0.1566, "num_input_tokens_seen": 58014976, "step": 26880 }, { "epoch": 4.385807504078303, "grad_norm": 0.07023459672927856, "learning_rate": 0.0009572808399340368, "loss": 0.0458, "num_input_tokens_seen": 58025792, "step": 26885 }, { "epoch": 4.386623164763458, "grad_norm": 0.07783626019954681, "learning_rate": 0.000957252046873607, "loss": 0.0879, "num_input_tokens_seen": 58035712, "step": 26890 }, { "epoch": 4.387438825448613, "grad_norm": 0.2602803707122803, "learning_rate": 0.0009572232445463843, "loss": 0.1401, "num_input_tokens_seen": 58047072, "step": 26895 }, { "epoch": 4.388254486133769, "grad_norm": 0.1164298802614212, "learning_rate": 0.0009571944329529526, "loss": 0.1106, "num_input_tokens_seen": 58057632, "step": 26900 }, { "epoch": 4.3890701468189235, "grad_norm": 0.15169483423233032, "learning_rate": 0.0009571656120938956, "loss": 0.0937, "num_input_tokens_seen": 58069824, "step": 26905 }, { "epoch": 4.389885807504078, "grad_norm": 0.2765485346317291, "learning_rate": 0.0009571367819697978, "loss": 0.1225, "num_input_tokens_seen": 58079936, "step": 26910 }, { "epoch": 4.390701468189233, "grad_norm": 0.07105875760316849, "learning_rate": 0.000957107942581243, "loss": 0.1035, "num_input_tokens_seen": 58090816, "step": 26915 }, { "epoch": 4.391517128874388, "grad_norm": 0.12565481662750244, "learning_rate": 0.0009570790939288159, "loss": 0.0348, "num_input_tokens_seen": 58101792, "step": 26920 }, { "epoch": 4.392332789559543, "grad_norm": 0.0789114311337471, "learning_rate": 0.0009570502360131011, "loss": 0.0775, "num_input_tokens_seen": 58112704, "step": 26925 }, { "epoch": 4.3931484502446985, "grad_norm": 0.3488839864730835, "learning_rate": 0.0009570213688346833, "loss": 0.0987, "num_input_tokens_seen": 58123712, "step": 26930 }, { "epoch": 4.393964110929853, "grad_norm": 0.3666483461856842, "learning_rate": 0.000956992492394148, "loss": 0.2112, "num_input_tokens_seen": 58136320, "step": 26935 }, { "epoch": 4.394779771615008, "grad_norm": 0.04762290045619011, "learning_rate": 0.00095696360669208, "loss": 0.1582, "num_input_tokens_seen": 58147680, "step": 26940 }, { "epoch": 4.395595432300163, "grad_norm": 0.020458528771996498, "learning_rate": 0.0009569347117290647, "loss": 0.0237, "num_input_tokens_seen": 58160736, "step": 26945 }, { "epoch": 4.396411092985318, "grad_norm": 0.013689234852790833, "learning_rate": 0.0009569058075056878, "loss": 0.1191, "num_input_tokens_seen": 58172480, "step": 26950 }, { "epoch": 4.397226753670473, "grad_norm": 0.04353228211402893, "learning_rate": 0.0009568768940225352, "loss": 0.1386, "num_input_tokens_seen": 58183872, "step": 26955 }, { "epoch": 4.398042414355628, "grad_norm": 0.19040422141551971, "learning_rate": 0.0009568479712801926, "loss": 0.041, "num_input_tokens_seen": 58195328, "step": 26960 }, { "epoch": 4.398858075040783, "grad_norm": 0.009100801311433315, "learning_rate": 0.0009568190392792464, "loss": 0.0448, "num_input_tokens_seen": 58205024, "step": 26965 }, { "epoch": 4.399673735725938, "grad_norm": 0.5477445721626282, "learning_rate": 0.000956790098020283, "loss": 0.0487, "num_input_tokens_seen": 58215712, "step": 26970 }, { "epoch": 4.400489396411093, "grad_norm": 0.07602336257696152, "learning_rate": 0.0009567611475038886, "loss": 0.0787, "num_input_tokens_seen": 58226976, "step": 26975 }, { "epoch": 4.401305057096248, "grad_norm": 0.5634533166885376, "learning_rate": 0.0009567321877306501, "loss": 0.1809, "num_input_tokens_seen": 58238528, "step": 26980 }, { "epoch": 4.402120717781403, "grad_norm": 0.08520092815160751, "learning_rate": 0.0009567032187011546, "loss": 0.1198, "num_input_tokens_seen": 58248640, "step": 26985 }, { "epoch": 4.402936378466558, "grad_norm": 0.31448301672935486, "learning_rate": 0.0009566742404159887, "loss": 0.2176, "num_input_tokens_seen": 58259456, "step": 26990 }, { "epoch": 4.403752039151713, "grad_norm": 0.636321485042572, "learning_rate": 0.0009566452528757402, "loss": 0.0819, "num_input_tokens_seen": 58270944, "step": 26995 }, { "epoch": 4.404567699836868, "grad_norm": 0.06585033982992172, "learning_rate": 0.0009566162560809963, "loss": 0.1645, "num_input_tokens_seen": 58282176, "step": 27000 }, { "epoch": 4.4053833605220225, "grad_norm": 0.07904354482889175, "learning_rate": 0.0009565872500323447, "loss": 0.1037, "num_input_tokens_seen": 58293568, "step": 27005 }, { "epoch": 4.406199021207178, "grad_norm": 0.26125428080558777, "learning_rate": 0.0009565582347303733, "loss": 0.0964, "num_input_tokens_seen": 58303392, "step": 27010 }, { "epoch": 4.407014681892333, "grad_norm": 0.18388380110263824, "learning_rate": 0.00095652921017567, "loss": 0.1634, "num_input_tokens_seen": 58315488, "step": 27015 }, { "epoch": 4.407830342577488, "grad_norm": 0.019667578861117363, "learning_rate": 0.0009565001763688233, "loss": 0.1106, "num_input_tokens_seen": 58327232, "step": 27020 }, { "epoch": 4.408646003262643, "grad_norm": 0.031216414645314217, "learning_rate": 0.0009564711333104213, "loss": 0.1675, "num_input_tokens_seen": 58338528, "step": 27025 }, { "epoch": 4.4094616639477975, "grad_norm": 0.2866635024547577, "learning_rate": 0.0009564420810010526, "loss": 0.1191, "num_input_tokens_seen": 58348960, "step": 27030 }, { "epoch": 4.410277324632952, "grad_norm": 0.0751042440533638, "learning_rate": 0.0009564130194413061, "loss": 0.0808, "num_input_tokens_seen": 58360096, "step": 27035 }, { "epoch": 4.411092985318108, "grad_norm": 0.062259331345558167, "learning_rate": 0.0009563839486317709, "loss": 0.0325, "num_input_tokens_seen": 58370752, "step": 27040 }, { "epoch": 4.411908646003263, "grad_norm": 0.012364414520561695, "learning_rate": 0.000956354868573036, "loss": 0.1722, "num_input_tokens_seen": 58382848, "step": 27045 }, { "epoch": 4.412724306688418, "grad_norm": 0.01138612162321806, "learning_rate": 0.0009563257792656908, "loss": 0.0342, "num_input_tokens_seen": 58393920, "step": 27050 }, { "epoch": 4.4135399673735725, "grad_norm": 0.1060384064912796, "learning_rate": 0.0009562966807103246, "loss": 0.0606, "num_input_tokens_seen": 58406208, "step": 27055 }, { "epoch": 4.414355628058727, "grad_norm": 0.07780595123767853, "learning_rate": 0.0009562675729075274, "loss": 0.02, "num_input_tokens_seen": 58418016, "step": 27060 }, { "epoch": 4.415171288743883, "grad_norm": 0.22014375030994415, "learning_rate": 0.0009562384558578891, "loss": 0.12, "num_input_tokens_seen": 58428928, "step": 27065 }, { "epoch": 4.415986949429038, "grad_norm": 0.04966195672750473, "learning_rate": 0.0009562093295619996, "loss": 0.158, "num_input_tokens_seen": 58440672, "step": 27070 }, { "epoch": 4.416802610114193, "grad_norm": 0.021149907261133194, "learning_rate": 0.0009561801940204493, "loss": 0.1177, "num_input_tokens_seen": 58452512, "step": 27075 }, { "epoch": 4.417618270799347, "grad_norm": 0.5484504103660583, "learning_rate": 0.0009561510492338287, "loss": 0.1342, "num_input_tokens_seen": 58462656, "step": 27080 }, { "epoch": 4.418433931484502, "grad_norm": 0.04352255165576935, "learning_rate": 0.0009561218952027286, "loss": 0.1195, "num_input_tokens_seen": 58473280, "step": 27085 }, { "epoch": 4.419249592169657, "grad_norm": 0.024366460740566254, "learning_rate": 0.0009560927319277395, "loss": 0.0883, "num_input_tokens_seen": 58483104, "step": 27090 }, { "epoch": 4.420065252854813, "grad_norm": 0.021009201183915138, "learning_rate": 0.0009560635594094524, "loss": 0.1471, "num_input_tokens_seen": 58494304, "step": 27095 }, { "epoch": 4.420880913539968, "grad_norm": 0.009694544598460197, "learning_rate": 0.000956034377648459, "loss": 0.2072, "num_input_tokens_seen": 58505440, "step": 27100 }, { "epoch": 4.421696574225122, "grad_norm": 0.2850847542285919, "learning_rate": 0.0009560051866453503, "loss": 0.0923, "num_input_tokens_seen": 58516032, "step": 27105 }, { "epoch": 4.422512234910277, "grad_norm": 0.015372009947896004, "learning_rate": 0.000955975986400718, "loss": 0.0465, "num_input_tokens_seen": 58526848, "step": 27110 }, { "epoch": 4.423327895595432, "grad_norm": 0.02016020007431507, "learning_rate": 0.000955946776915154, "loss": 0.0774, "num_input_tokens_seen": 58537120, "step": 27115 }, { "epoch": 4.424143556280587, "grad_norm": 0.33619624376296997, "learning_rate": 0.00095591755818925, "loss": 0.0885, "num_input_tokens_seen": 58547424, "step": 27120 }, { "epoch": 4.424959216965743, "grad_norm": 0.026671146973967552, "learning_rate": 0.0009558883302235984, "loss": 0.1378, "num_input_tokens_seen": 58558560, "step": 27125 }, { "epoch": 4.425774877650897, "grad_norm": 0.013660904951393604, "learning_rate": 0.0009558590930187913, "loss": 0.0701, "num_input_tokens_seen": 58569760, "step": 27130 }, { "epoch": 4.426590538336052, "grad_norm": 0.41237568855285645, "learning_rate": 0.0009558298465754216, "loss": 0.1474, "num_input_tokens_seen": 58580864, "step": 27135 }, { "epoch": 4.427406199021207, "grad_norm": 0.16437549889087677, "learning_rate": 0.0009558005908940816, "loss": 0.0595, "num_input_tokens_seen": 58592192, "step": 27140 }, { "epoch": 4.428221859706362, "grad_norm": 0.02606436051428318, "learning_rate": 0.0009557713259753647, "loss": 0.1298, "num_input_tokens_seen": 58603680, "step": 27145 }, { "epoch": 4.4290375203915175, "grad_norm": 0.30858924984931946, "learning_rate": 0.0009557420518198634, "loss": 0.122, "num_input_tokens_seen": 58614528, "step": 27150 }, { "epoch": 4.429853181076672, "grad_norm": 0.5089007616043091, "learning_rate": 0.0009557127684281714, "loss": 0.0624, "num_input_tokens_seen": 58624224, "step": 27155 }, { "epoch": 4.430668841761827, "grad_norm": 0.18792207539081573, "learning_rate": 0.000955683475800882, "loss": 0.2561, "num_input_tokens_seen": 58634656, "step": 27160 }, { "epoch": 4.431484502446982, "grad_norm": 0.013766072690486908, "learning_rate": 0.0009556541739385889, "loss": 0.2151, "num_input_tokens_seen": 58646368, "step": 27165 }, { "epoch": 4.432300163132137, "grad_norm": 0.19999128580093384, "learning_rate": 0.000955624862841886, "loss": 0.2036, "num_input_tokens_seen": 58657408, "step": 27170 }, { "epoch": 4.433115823817292, "grad_norm": 0.04755646735429764, "learning_rate": 0.0009555955425113672, "loss": 0.1305, "num_input_tokens_seen": 58667840, "step": 27175 }, { "epoch": 4.433931484502447, "grad_norm": 0.23142164945602417, "learning_rate": 0.0009555662129476266, "loss": 0.1368, "num_input_tokens_seen": 58679040, "step": 27180 }, { "epoch": 4.434747145187602, "grad_norm": 0.07983970642089844, "learning_rate": 0.0009555368741512589, "loss": 0.0816, "num_input_tokens_seen": 58690304, "step": 27185 }, { "epoch": 4.435562805872757, "grad_norm": 0.13339519500732422, "learning_rate": 0.0009555075261228586, "loss": 0.0748, "num_input_tokens_seen": 58700256, "step": 27190 }, { "epoch": 4.436378466557912, "grad_norm": 0.011461270973086357, "learning_rate": 0.0009554781688630204, "loss": 0.0707, "num_input_tokens_seen": 58711392, "step": 27195 }, { "epoch": 4.437194127243067, "grad_norm": 0.012431362643837929, "learning_rate": 0.0009554488023723394, "loss": 0.1109, "num_input_tokens_seen": 58722336, "step": 27200 }, { "epoch": 4.438009787928221, "grad_norm": 0.06176788732409477, "learning_rate": 0.0009554194266514105, "loss": 0.0672, "num_input_tokens_seen": 58733440, "step": 27205 }, { "epoch": 4.438825448613377, "grad_norm": 0.10728387534618378, "learning_rate": 0.0009553900417008292, "loss": 0.0855, "num_input_tokens_seen": 58744928, "step": 27210 }, { "epoch": 4.439641109298532, "grad_norm": 0.0385863296687603, "learning_rate": 0.000955360647521191, "loss": 0.0421, "num_input_tokens_seen": 58755648, "step": 27215 }, { "epoch": 4.440456769983687, "grad_norm": 0.033035214990377426, "learning_rate": 0.0009553312441130916, "loss": 0.0979, "num_input_tokens_seen": 58765824, "step": 27220 }, { "epoch": 4.441272430668842, "grad_norm": 0.11581233888864517, "learning_rate": 0.0009553018314771269, "loss": 0.1806, "num_input_tokens_seen": 58776160, "step": 27225 }, { "epoch": 4.442088091353996, "grad_norm": 0.02753908559679985, "learning_rate": 0.0009552724096138931, "loss": 0.0426, "num_input_tokens_seen": 58786880, "step": 27230 }, { "epoch": 4.442903752039152, "grad_norm": 0.23759329319000244, "learning_rate": 0.0009552429785239863, "loss": 0.0739, "num_input_tokens_seen": 58797280, "step": 27235 }, { "epoch": 4.443719412724307, "grad_norm": 0.028480688109993935, "learning_rate": 0.0009552135382080029, "loss": 0.1333, "num_input_tokens_seen": 58807712, "step": 27240 }, { "epoch": 4.444535073409462, "grad_norm": 0.34940534830093384, "learning_rate": 0.0009551840886665398, "loss": 0.0534, "num_input_tokens_seen": 58819488, "step": 27245 }, { "epoch": 4.445350734094617, "grad_norm": 0.00677880784496665, "learning_rate": 0.0009551546299001938, "loss": 0.0552, "num_input_tokens_seen": 58828800, "step": 27250 }, { "epoch": 4.446166394779771, "grad_norm": 0.05643823742866516, "learning_rate": 0.0009551251619095616, "loss": 0.1587, "num_input_tokens_seen": 58837888, "step": 27255 }, { "epoch": 4.446982055464926, "grad_norm": 0.22889064252376556, "learning_rate": 0.0009550956846952408, "loss": 0.2129, "num_input_tokens_seen": 58848320, "step": 27260 }, { "epoch": 4.447797716150082, "grad_norm": 0.02221514843404293, "learning_rate": 0.0009550661982578286, "loss": 0.033, "num_input_tokens_seen": 58859392, "step": 27265 }, { "epoch": 4.448613376835237, "grad_norm": 0.46305903792381287, "learning_rate": 0.0009550367025979225, "loss": 0.1376, "num_input_tokens_seen": 58870048, "step": 27270 }, { "epoch": 4.4494290375203915, "grad_norm": 0.23319874703884125, "learning_rate": 0.0009550071977161203, "loss": 0.2168, "num_input_tokens_seen": 58879904, "step": 27275 }, { "epoch": 4.450244698205546, "grad_norm": 0.26876041293144226, "learning_rate": 0.0009549776836130202, "loss": 0.1826, "num_input_tokens_seen": 58891104, "step": 27280 }, { "epoch": 4.451060358890701, "grad_norm": 0.013156954199075699, "learning_rate": 0.0009549481602892201, "loss": 0.1593, "num_input_tokens_seen": 58901376, "step": 27285 }, { "epoch": 4.451876019575856, "grad_norm": 0.0071356529369950294, "learning_rate": 0.0009549186277453184, "loss": 0.0489, "num_input_tokens_seen": 58914560, "step": 27290 }, { "epoch": 4.452691680261012, "grad_norm": 0.08076464384794235, "learning_rate": 0.0009548890859819138, "loss": 0.0486, "num_input_tokens_seen": 58925248, "step": 27295 }, { "epoch": 4.4535073409461665, "grad_norm": 0.06414244323968887, "learning_rate": 0.0009548595349996045, "loss": 0.069, "num_input_tokens_seen": 58935328, "step": 27300 }, { "epoch": 4.454323001631321, "grad_norm": 0.03880259022116661, "learning_rate": 0.0009548299747989897, "loss": 0.065, "num_input_tokens_seen": 58946528, "step": 27305 }, { "epoch": 4.455138662316476, "grad_norm": 0.13126346468925476, "learning_rate": 0.0009548004053806686, "loss": 0.1176, "num_input_tokens_seen": 58958688, "step": 27310 }, { "epoch": 4.455954323001631, "grad_norm": 0.37605220079421997, "learning_rate": 0.0009547708267452403, "loss": 0.207, "num_input_tokens_seen": 58969120, "step": 27315 }, { "epoch": 4.456769983686787, "grad_norm": 0.010259648784995079, "learning_rate": 0.0009547412388933042, "loss": 0.03, "num_input_tokens_seen": 58979296, "step": 27320 }, { "epoch": 4.4575856443719415, "grad_norm": 0.02998652495443821, "learning_rate": 0.0009547116418254601, "loss": 0.1154, "num_input_tokens_seen": 58990016, "step": 27325 }, { "epoch": 4.458401305057096, "grad_norm": 0.08258122950792313, "learning_rate": 0.0009546820355423077, "loss": 0.1072, "num_input_tokens_seen": 59001216, "step": 27330 }, { "epoch": 4.459216965742251, "grad_norm": 0.38865208625793457, "learning_rate": 0.0009546524200444471, "loss": 0.1575, "num_input_tokens_seen": 59012224, "step": 27335 }, { "epoch": 4.460032626427406, "grad_norm": 0.12662218511104584, "learning_rate": 0.0009546227953324784, "loss": 0.1688, "num_input_tokens_seen": 59022400, "step": 27340 }, { "epoch": 4.460848287112561, "grad_norm": 0.06498900055885315, "learning_rate": 0.000954593161407002, "loss": 0.046, "num_input_tokens_seen": 59032160, "step": 27345 }, { "epoch": 4.4616639477977165, "grad_norm": 0.020646408200263977, "learning_rate": 0.0009545635182686185, "loss": 0.0774, "num_input_tokens_seen": 59043264, "step": 27350 }, { "epoch": 4.462479608482871, "grad_norm": 0.07763660699129105, "learning_rate": 0.0009545338659179286, "loss": 0.1001, "num_input_tokens_seen": 59053344, "step": 27355 }, { "epoch": 4.463295269168026, "grad_norm": 0.20993928611278534, "learning_rate": 0.0009545042043555334, "loss": 0.0665, "num_input_tokens_seen": 59064832, "step": 27360 }, { "epoch": 4.464110929853181, "grad_norm": 0.19798365235328674, "learning_rate": 0.000954474533582034, "loss": 0.0335, "num_input_tokens_seen": 59076608, "step": 27365 }, { "epoch": 4.464926590538336, "grad_norm": 0.020919742062687874, "learning_rate": 0.0009544448535980315, "loss": 0.0799, "num_input_tokens_seen": 59088512, "step": 27370 }, { "epoch": 4.465742251223491, "grad_norm": 0.3673371374607086, "learning_rate": 0.0009544151644041275, "loss": 0.0565, "num_input_tokens_seen": 59098912, "step": 27375 }, { "epoch": 4.466557911908646, "grad_norm": 0.38302820920944214, "learning_rate": 0.0009543854660009237, "loss": 0.063, "num_input_tokens_seen": 59110240, "step": 27380 }, { "epoch": 4.467373572593801, "grad_norm": 0.13988201320171356, "learning_rate": 0.0009543557583890221, "loss": 0.2371, "num_input_tokens_seen": 59121184, "step": 27385 }, { "epoch": 4.468189233278956, "grad_norm": 0.005193037446588278, "learning_rate": 0.0009543260415690247, "loss": 0.0335, "num_input_tokens_seen": 59131584, "step": 27390 }, { "epoch": 4.469004893964111, "grad_norm": 0.3440542221069336, "learning_rate": 0.0009542963155415336, "loss": 0.0841, "num_input_tokens_seen": 59140672, "step": 27395 }, { "epoch": 4.4698205546492655, "grad_norm": 0.09182848036289215, "learning_rate": 0.0009542665803071515, "loss": 0.1761, "num_input_tokens_seen": 59150624, "step": 27400 }, { "epoch": 4.470636215334421, "grad_norm": 0.36571675539016724, "learning_rate": 0.0009542368358664806, "loss": 0.0817, "num_input_tokens_seen": 59161760, "step": 27405 }, { "epoch": 4.471451876019576, "grad_norm": 0.14988970756530762, "learning_rate": 0.0009542070822201244, "loss": 0.0312, "num_input_tokens_seen": 59172480, "step": 27410 }, { "epoch": 4.472267536704731, "grad_norm": 0.011259081773459911, "learning_rate": 0.0009541773193686851, "loss": 0.275, "num_input_tokens_seen": 59183936, "step": 27415 }, { "epoch": 4.473083197389886, "grad_norm": 0.5073511600494385, "learning_rate": 0.0009541475473127664, "loss": 0.1317, "num_input_tokens_seen": 59193664, "step": 27420 }, { "epoch": 4.4738988580750405, "grad_norm": 0.11972982436418533, "learning_rate": 0.0009541177660529715, "loss": 0.0303, "num_input_tokens_seen": 59206528, "step": 27425 }, { "epoch": 4.474714518760196, "grad_norm": 0.18764494359493256, "learning_rate": 0.0009540879755899041, "loss": 0.1177, "num_input_tokens_seen": 59217632, "step": 27430 }, { "epoch": 4.475530179445351, "grad_norm": 0.3757596015930176, "learning_rate": 0.0009540581759241676, "loss": 0.1833, "num_input_tokens_seen": 59228128, "step": 27435 }, { "epoch": 4.476345840130506, "grad_norm": 0.1986420601606369, "learning_rate": 0.0009540283670563663, "loss": 0.1075, "num_input_tokens_seen": 59238560, "step": 27440 }, { "epoch": 4.477161500815661, "grad_norm": 0.09201784431934357, "learning_rate": 0.0009539985489871041, "loss": 0.0719, "num_input_tokens_seen": 59250624, "step": 27445 }, { "epoch": 4.4779771615008155, "grad_norm": 0.10558003187179565, "learning_rate": 0.0009539687217169855, "loss": 0.2469, "num_input_tokens_seen": 59260992, "step": 27450 }, { "epoch": 4.47879282218597, "grad_norm": 0.19885654747486115, "learning_rate": 0.0009539388852466146, "loss": 0.1899, "num_input_tokens_seen": 59272384, "step": 27455 }, { "epoch": 4.479608482871126, "grad_norm": 0.12073806673288345, "learning_rate": 0.0009539090395765966, "loss": 0.0972, "num_input_tokens_seen": 59283168, "step": 27460 }, { "epoch": 4.480424143556281, "grad_norm": 0.06492199003696442, "learning_rate": 0.000953879184707536, "loss": 0.1159, "num_input_tokens_seen": 59293888, "step": 27465 }, { "epoch": 4.481239804241436, "grad_norm": 0.03415746986865997, "learning_rate": 0.0009538493206400378, "loss": 0.1075, "num_input_tokens_seen": 59304160, "step": 27470 }, { "epoch": 4.4820554649265905, "grad_norm": 0.26859527826309204, "learning_rate": 0.0009538194473747077, "loss": 0.0596, "num_input_tokens_seen": 59315712, "step": 27475 }, { "epoch": 4.482871125611745, "grad_norm": 0.07123342901468277, "learning_rate": 0.0009537895649121504, "loss": 0.0653, "num_input_tokens_seen": 59327168, "step": 27480 }, { "epoch": 4.4836867862969, "grad_norm": 0.036786098033189774, "learning_rate": 0.0009537596732529721, "loss": 0.0857, "num_input_tokens_seen": 59337280, "step": 27485 }, { "epoch": 4.484502446982056, "grad_norm": 0.17139144241809845, "learning_rate": 0.0009537297723977784, "loss": 0.0594, "num_input_tokens_seen": 59348224, "step": 27490 }, { "epoch": 4.485318107667211, "grad_norm": 0.11064877361059189, "learning_rate": 0.0009536998623471752, "loss": 0.1071, "num_input_tokens_seen": 59358912, "step": 27495 }, { "epoch": 4.486133768352365, "grad_norm": 0.09924093633890152, "learning_rate": 0.0009536699431017688, "loss": 0.0429, "num_input_tokens_seen": 59368512, "step": 27500 }, { "epoch": 4.48694942903752, "grad_norm": 0.034761808812618256, "learning_rate": 0.0009536400146621653, "loss": 0.1475, "num_input_tokens_seen": 59378944, "step": 27505 }, { "epoch": 4.487765089722675, "grad_norm": 0.12799222767353058, "learning_rate": 0.0009536100770289717, "loss": 0.0713, "num_input_tokens_seen": 59388928, "step": 27510 }, { "epoch": 4.488580750407831, "grad_norm": 0.06082871928811073, "learning_rate": 0.0009535801302027942, "loss": 0.0973, "num_input_tokens_seen": 59399872, "step": 27515 }, { "epoch": 4.489396411092986, "grad_norm": 0.14628073573112488, "learning_rate": 0.0009535501741842401, "loss": 0.0674, "num_input_tokens_seen": 59410016, "step": 27520 }, { "epoch": 4.49021207177814, "grad_norm": 0.05691082775592804, "learning_rate": 0.0009535202089739162, "loss": 0.0798, "num_input_tokens_seen": 59419104, "step": 27525 }, { "epoch": 4.491027732463295, "grad_norm": 0.24521081149578094, "learning_rate": 0.0009534902345724301, "loss": 0.0838, "num_input_tokens_seen": 59428384, "step": 27530 }, { "epoch": 4.49184339314845, "grad_norm": 0.23419110476970673, "learning_rate": 0.000953460250980389, "loss": 0.2692, "num_input_tokens_seen": 59438304, "step": 27535 }, { "epoch": 4.492659053833605, "grad_norm": 0.06884628534317017, "learning_rate": 0.0009534302581984007, "loss": 0.0509, "num_input_tokens_seen": 59448096, "step": 27540 }, { "epoch": 4.493474714518761, "grad_norm": 0.2032497227191925, "learning_rate": 0.000953400256227073, "loss": 0.0598, "num_input_tokens_seen": 59457120, "step": 27545 }, { "epoch": 4.494290375203915, "grad_norm": 0.033530332148075104, "learning_rate": 0.0009533702450670138, "loss": 0.1134, "num_input_tokens_seen": 59467776, "step": 27550 }, { "epoch": 4.49510603588907, "grad_norm": 0.013734309002757072, "learning_rate": 0.0009533402247188317, "loss": 0.0855, "num_input_tokens_seen": 59478432, "step": 27555 }, { "epoch": 4.495921696574225, "grad_norm": 0.2584000527858734, "learning_rate": 0.0009533101951831347, "loss": 0.0944, "num_input_tokens_seen": 59488576, "step": 27560 }, { "epoch": 4.49673735725938, "grad_norm": 0.020997680723667145, "learning_rate": 0.0009532801564605315, "loss": 0.1837, "num_input_tokens_seen": 59499744, "step": 27565 }, { "epoch": 4.497553017944535, "grad_norm": 0.033963337540626526, "learning_rate": 0.000953250108551631, "loss": 0.092, "num_input_tokens_seen": 59511168, "step": 27570 }, { "epoch": 4.49836867862969, "grad_norm": 0.06273580342531204, "learning_rate": 0.0009532200514570419, "loss": 0.0754, "num_input_tokens_seen": 59521792, "step": 27575 }, { "epoch": 4.499184339314845, "grad_norm": 1.150261640548706, "learning_rate": 0.0009531899851773737, "loss": 0.1627, "num_input_tokens_seen": 59533760, "step": 27580 }, { "epoch": 4.5, "grad_norm": 0.3091362714767456, "learning_rate": 0.0009531599097132354, "loss": 0.0692, "num_input_tokens_seen": 59544032, "step": 27585 }, { "epoch": 4.500815660685155, "grad_norm": 0.298798143863678, "learning_rate": 0.0009531298250652367, "loss": 0.1744, "num_input_tokens_seen": 59554848, "step": 27590 }, { "epoch": 4.50163132137031, "grad_norm": 0.2764005661010742, "learning_rate": 0.0009530997312339873, "loss": 0.0378, "num_input_tokens_seen": 59566784, "step": 27595 }, { "epoch": 4.502446982055465, "grad_norm": 0.18775354325771332, "learning_rate": 0.000953069628220097, "loss": 0.066, "num_input_tokens_seen": 59578304, "step": 27600 }, { "epoch": 4.50326264274062, "grad_norm": 0.02123364619910717, "learning_rate": 0.0009530395160241759, "loss": 0.0483, "num_input_tokens_seen": 59588800, "step": 27605 }, { "epoch": 4.504078303425775, "grad_norm": 0.04233346879482269, "learning_rate": 0.0009530093946468343, "loss": 0.1217, "num_input_tokens_seen": 59599744, "step": 27610 }, { "epoch": 4.50489396411093, "grad_norm": 0.07643648982048035, "learning_rate": 0.0009529792640886827, "loss": 0.0943, "num_input_tokens_seen": 59611232, "step": 27615 }, { "epoch": 4.505709624796085, "grad_norm": 0.317594051361084, "learning_rate": 0.0009529491243503316, "loss": 0.2318, "num_input_tokens_seen": 59622624, "step": 27620 }, { "epoch": 4.506525285481239, "grad_norm": 0.02191939949989319, "learning_rate": 0.000952918975432392, "loss": 0.2521, "num_input_tokens_seen": 59633088, "step": 27625 }, { "epoch": 4.507340946166395, "grad_norm": 0.05196700617671013, "learning_rate": 0.0009528888173354746, "loss": 0.2042, "num_input_tokens_seen": 59644512, "step": 27630 }, { "epoch": 4.50815660685155, "grad_norm": 0.3491312861442566, "learning_rate": 0.000952858650060191, "loss": 0.2346, "num_input_tokens_seen": 59654848, "step": 27635 }, { "epoch": 4.508972267536705, "grad_norm": 0.052667226642370224, "learning_rate": 0.0009528284736071522, "loss": 0.0752, "num_input_tokens_seen": 59665376, "step": 27640 }, { "epoch": 4.50978792822186, "grad_norm": 0.0517134852707386, "learning_rate": 0.00095279828797697, "loss": 0.0467, "num_input_tokens_seen": 59676928, "step": 27645 }, { "epoch": 4.510603588907014, "grad_norm": 0.03166373446583748, "learning_rate": 0.000952768093170256, "loss": 0.06, "num_input_tokens_seen": 59687392, "step": 27650 }, { "epoch": 4.511419249592169, "grad_norm": 0.05490044131875038, "learning_rate": 0.0009527378891876223, "loss": 0.0331, "num_input_tokens_seen": 59699008, "step": 27655 }, { "epoch": 4.512234910277325, "grad_norm": 0.12731541693210602, "learning_rate": 0.0009527076760296809, "loss": 0.0861, "num_input_tokens_seen": 59709760, "step": 27660 }, { "epoch": 4.51305057096248, "grad_norm": 0.00876610353589058, "learning_rate": 0.0009526774536970442, "loss": 0.0176, "num_input_tokens_seen": 59720416, "step": 27665 }, { "epoch": 4.513866231647635, "grad_norm": 0.023783892393112183, "learning_rate": 0.0009526472221903247, "loss": 0.0955, "num_input_tokens_seen": 59732224, "step": 27670 }, { "epoch": 4.514681892332789, "grad_norm": 0.10947423428297043, "learning_rate": 0.0009526169815101349, "loss": 0.1511, "num_input_tokens_seen": 59742016, "step": 27675 }, { "epoch": 4.515497553017944, "grad_norm": 0.033525265753269196, "learning_rate": 0.0009525867316570877, "loss": 0.1652, "num_input_tokens_seen": 59752448, "step": 27680 }, { "epoch": 4.5163132137031, "grad_norm": 0.3412551283836365, "learning_rate": 0.0009525564726317963, "loss": 0.1189, "num_input_tokens_seen": 59762560, "step": 27685 }, { "epoch": 4.517128874388255, "grad_norm": 0.30381548404693604, "learning_rate": 0.000952526204434874, "loss": 0.1528, "num_input_tokens_seen": 59774432, "step": 27690 }, { "epoch": 4.5179445350734095, "grad_norm": 0.050089091062545776, "learning_rate": 0.000952495927066934, "loss": 0.0337, "num_input_tokens_seen": 59785408, "step": 27695 }, { "epoch": 4.518760195758564, "grad_norm": 0.25242364406585693, "learning_rate": 0.00095246564052859, "loss": 0.1278, "num_input_tokens_seen": 59795840, "step": 27700 }, { "epoch": 4.519575856443719, "grad_norm": 0.06711065024137497, "learning_rate": 0.0009524353448204558, "loss": 0.091, "num_input_tokens_seen": 59806560, "step": 27705 }, { "epoch": 4.520391517128875, "grad_norm": 0.26348116993904114, "learning_rate": 0.0009524050399431454, "loss": 0.1, "num_input_tokens_seen": 59817056, "step": 27710 }, { "epoch": 4.52120717781403, "grad_norm": 0.2639275789260864, "learning_rate": 0.0009523747258972729, "loss": 0.1917, "num_input_tokens_seen": 59827904, "step": 27715 }, { "epoch": 4.5220228384991845, "grad_norm": 0.06595410406589508, "learning_rate": 0.0009523444026834528, "loss": 0.0678, "num_input_tokens_seen": 59839360, "step": 27720 }, { "epoch": 4.522838499184339, "grad_norm": 0.0727091059088707, "learning_rate": 0.0009523140703022995, "loss": 0.0429, "num_input_tokens_seen": 59849952, "step": 27725 }, { "epoch": 4.523654159869494, "grad_norm": 0.335652619600296, "learning_rate": 0.0009522837287544277, "loss": 0.0643, "num_input_tokens_seen": 59860384, "step": 27730 }, { "epoch": 4.524469820554649, "grad_norm": 0.05466686189174652, "learning_rate": 0.0009522533780404526, "loss": 0.1141, "num_input_tokens_seen": 59871584, "step": 27735 }, { "epoch": 4.525285481239804, "grad_norm": 0.9618749618530273, "learning_rate": 0.0009522230181609888, "loss": 0.1526, "num_input_tokens_seen": 59882784, "step": 27740 }, { "epoch": 4.5261011419249595, "grad_norm": 0.23050382733345032, "learning_rate": 0.000952192649116652, "loss": 0.0758, "num_input_tokens_seen": 59892448, "step": 27745 }, { "epoch": 4.526916802610114, "grad_norm": 0.023088475689291954, "learning_rate": 0.0009521622709080574, "loss": 0.0212, "num_input_tokens_seen": 59903616, "step": 27750 }, { "epoch": 4.527732463295269, "grad_norm": 0.012031014077365398, "learning_rate": 0.0009521318835358208, "loss": 0.0247, "num_input_tokens_seen": 59914336, "step": 27755 }, { "epoch": 4.528548123980424, "grad_norm": 0.532911479473114, "learning_rate": 0.000952101487000558, "loss": 0.1668, "num_input_tokens_seen": 59926240, "step": 27760 }, { "epoch": 4.529363784665579, "grad_norm": 0.18051278591156006, "learning_rate": 0.0009520710813028852, "loss": 0.067, "num_input_tokens_seen": 59937408, "step": 27765 }, { "epoch": 4.5301794453507345, "grad_norm": 0.14865101873874664, "learning_rate": 0.0009520406664434183, "loss": 0.1237, "num_input_tokens_seen": 59947872, "step": 27770 }, { "epoch": 4.530995106035889, "grad_norm": 0.010670283809304237, "learning_rate": 0.0009520102424227739, "loss": 0.0499, "num_input_tokens_seen": 59958592, "step": 27775 }, { "epoch": 4.531810766721044, "grad_norm": 0.06124710664153099, "learning_rate": 0.0009519798092415683, "loss": 0.0676, "num_input_tokens_seen": 59969568, "step": 27780 }, { "epoch": 4.532626427406199, "grad_norm": 0.017388418316841125, "learning_rate": 0.0009519493669004189, "loss": 0.0682, "num_input_tokens_seen": 59978496, "step": 27785 }, { "epoch": 4.533442088091354, "grad_norm": 0.08035194128751755, "learning_rate": 0.0009519189153999419, "loss": 0.0959, "num_input_tokens_seen": 59989760, "step": 27790 }, { "epoch": 4.5342577487765094, "grad_norm": 0.9899337887763977, "learning_rate": 0.0009518884547407549, "loss": 0.0851, "num_input_tokens_seen": 60001216, "step": 27795 }, { "epoch": 4.535073409461664, "grad_norm": 0.09079289436340332, "learning_rate": 0.0009518579849234752, "loss": 0.1676, "num_input_tokens_seen": 60010240, "step": 27800 }, { "epoch": 4.535889070146819, "grad_norm": 0.475882887840271, "learning_rate": 0.00095182750594872, "loss": 0.1955, "num_input_tokens_seen": 60021312, "step": 27805 }, { "epoch": 4.536704730831974, "grad_norm": 0.5861313343048096, "learning_rate": 0.0009517970178171074, "loss": 0.0653, "num_input_tokens_seen": 60030912, "step": 27810 }, { "epoch": 4.537520391517129, "grad_norm": 0.058709632605314255, "learning_rate": 0.000951766520529255, "loss": 0.0565, "num_input_tokens_seen": 60042144, "step": 27815 }, { "epoch": 4.5383360522022835, "grad_norm": 0.15842676162719727, "learning_rate": 0.0009517360140857809, "loss": 0.2073, "num_input_tokens_seen": 60054272, "step": 27820 }, { "epoch": 4.539151712887438, "grad_norm": 0.12224596738815308, "learning_rate": 0.0009517054984873035, "loss": 0.0392, "num_input_tokens_seen": 60066336, "step": 27825 }, { "epoch": 4.539967373572594, "grad_norm": 0.03720884397625923, "learning_rate": 0.0009516749737344412, "loss": 0.0513, "num_input_tokens_seen": 60077600, "step": 27830 }, { "epoch": 4.540783034257749, "grad_norm": 0.03292649984359741, "learning_rate": 0.0009516444398278125, "loss": 0.0178, "num_input_tokens_seen": 60088128, "step": 27835 }, { "epoch": 4.541598694942904, "grad_norm": 0.34509697556495667, "learning_rate": 0.0009516138967680363, "loss": 0.2017, "num_input_tokens_seen": 60099936, "step": 27840 }, { "epoch": 4.5424143556280585, "grad_norm": 0.06459648907184601, "learning_rate": 0.0009515833445557314, "loss": 0.0928, "num_input_tokens_seen": 60111136, "step": 27845 }, { "epoch": 4.543230016313213, "grad_norm": 0.039163604378700256, "learning_rate": 0.0009515527831915174, "loss": 0.2193, "num_input_tokens_seen": 60122080, "step": 27850 }, { "epoch": 4.544045676998369, "grad_norm": 0.02926347777247429, "learning_rate": 0.0009515222126760132, "loss": 0.0298, "num_input_tokens_seen": 60131840, "step": 27855 }, { "epoch": 4.544861337683524, "grad_norm": 0.016035068780183792, "learning_rate": 0.0009514916330098386, "loss": 0.0316, "num_input_tokens_seen": 60143136, "step": 27860 }, { "epoch": 4.545676998368679, "grad_norm": 0.41988199949264526, "learning_rate": 0.0009514610441936133, "loss": 0.1433, "num_input_tokens_seen": 60152992, "step": 27865 }, { "epoch": 4.5464926590538335, "grad_norm": 0.5281210541725159, "learning_rate": 0.0009514304462279574, "loss": 0.1545, "num_input_tokens_seen": 60163936, "step": 27870 }, { "epoch": 4.547308319738988, "grad_norm": 0.16280846297740936, "learning_rate": 0.0009513998391134906, "loss": 0.0625, "num_input_tokens_seen": 60175264, "step": 27875 }, { "epoch": 4.548123980424144, "grad_norm": 0.06196806579828262, "learning_rate": 0.0009513692228508336, "loss": 0.0912, "num_input_tokens_seen": 60185856, "step": 27880 }, { "epoch": 4.548939641109299, "grad_norm": 0.07203327864408493, "learning_rate": 0.0009513385974406066, "loss": 0.1174, "num_input_tokens_seen": 60196320, "step": 27885 }, { "epoch": 4.549755301794454, "grad_norm": 0.39255791902542114, "learning_rate": 0.0009513079628834305, "loss": 0.2093, "num_input_tokens_seen": 60207232, "step": 27890 }, { "epoch": 4.5505709624796085, "grad_norm": 0.17047370970249176, "learning_rate": 0.0009512773191799258, "loss": 0.1221, "num_input_tokens_seen": 60217728, "step": 27895 }, { "epoch": 4.551386623164763, "grad_norm": 0.05832361802458763, "learning_rate": 0.0009512466663307138, "loss": 0.0821, "num_input_tokens_seen": 60227616, "step": 27900 }, { "epoch": 4.552202283849918, "grad_norm": 0.07795868813991547, "learning_rate": 0.0009512160043364157, "loss": 0.0459, "num_input_tokens_seen": 60238528, "step": 27905 }, { "epoch": 4.553017944535073, "grad_norm": 0.373523086309433, "learning_rate": 0.0009511853331976527, "loss": 0.2657, "num_input_tokens_seen": 60250240, "step": 27910 }, { "epoch": 4.553833605220229, "grad_norm": 0.06490880995988846, "learning_rate": 0.0009511546529150467, "loss": 0.1595, "num_input_tokens_seen": 60261344, "step": 27915 }, { "epoch": 4.554649265905383, "grad_norm": 0.08953238278627396, "learning_rate": 0.0009511239634892195, "loss": 0.0465, "num_input_tokens_seen": 60272384, "step": 27920 }, { "epoch": 4.555464926590538, "grad_norm": 0.04774772748351097, "learning_rate": 0.0009510932649207926, "loss": 0.1101, "num_input_tokens_seen": 60283328, "step": 27925 }, { "epoch": 4.556280587275693, "grad_norm": 0.020910922437906265, "learning_rate": 0.0009510625572103886, "loss": 0.078, "num_input_tokens_seen": 60294720, "step": 27930 }, { "epoch": 4.557096247960848, "grad_norm": 0.055919099599123, "learning_rate": 0.0009510318403586297, "loss": 0.1233, "num_input_tokens_seen": 60305152, "step": 27935 }, { "epoch": 4.557911908646004, "grad_norm": 0.053642354905605316, "learning_rate": 0.0009510011143661382, "loss": 0.092, "num_input_tokens_seen": 60316288, "step": 27940 }, { "epoch": 4.558727569331158, "grad_norm": 0.06777095794677734, "learning_rate": 0.0009509703792335371, "loss": 0.1594, "num_input_tokens_seen": 60327104, "step": 27945 }, { "epoch": 4.559543230016313, "grad_norm": 0.07216033339500427, "learning_rate": 0.0009509396349614492, "loss": 0.0388, "num_input_tokens_seen": 60336288, "step": 27950 }, { "epoch": 4.560358890701468, "grad_norm": 0.43026602268218994, "learning_rate": 0.0009509088815504975, "loss": 0.0574, "num_input_tokens_seen": 60346656, "step": 27955 }, { "epoch": 4.561174551386623, "grad_norm": 0.37049925327301025, "learning_rate": 0.0009508781190013053, "loss": 0.1895, "num_input_tokens_seen": 60356800, "step": 27960 }, { "epoch": 4.561990212071779, "grad_norm": 0.2918992042541504, "learning_rate": 0.0009508473473144961, "loss": 0.0929, "num_input_tokens_seen": 60367488, "step": 27965 }, { "epoch": 4.562805872756933, "grad_norm": 0.086954265832901, "learning_rate": 0.0009508165664906933, "loss": 0.3417, "num_input_tokens_seen": 60377472, "step": 27970 }, { "epoch": 4.563621533442088, "grad_norm": 0.18810193240642548, "learning_rate": 0.000950785776530521, "loss": 0.1634, "num_input_tokens_seen": 60387168, "step": 27975 }, { "epoch": 4.564437194127243, "grad_norm": 0.31819218397140503, "learning_rate": 0.0009507549774346029, "loss": 0.156, "num_input_tokens_seen": 60396416, "step": 27980 }, { "epoch": 4.565252854812398, "grad_norm": 0.10246092826128006, "learning_rate": 0.0009507241692035635, "loss": 0.1482, "num_input_tokens_seen": 60408256, "step": 27985 }, { "epoch": 4.566068515497553, "grad_norm": 0.1291825920343399, "learning_rate": 0.0009506933518380272, "loss": 0.0856, "num_input_tokens_seen": 60419840, "step": 27990 }, { "epoch": 4.566884176182708, "grad_norm": 0.1122777983546257, "learning_rate": 0.0009506625253386181, "loss": 0.0501, "num_input_tokens_seen": 60431936, "step": 27995 }, { "epoch": 4.567699836867863, "grad_norm": 0.023731501772999763, "learning_rate": 0.0009506316897059614, "loss": 0.0425, "num_input_tokens_seen": 60442624, "step": 28000 }, { "epoch": 4.568515497553018, "grad_norm": 0.008432602509856224, "learning_rate": 0.0009506008449406818, "loss": 0.1376, "num_input_tokens_seen": 60452992, "step": 28005 }, { "epoch": 4.569331158238173, "grad_norm": 0.09145193547010422, "learning_rate": 0.0009505699910434043, "loss": 0.0987, "num_input_tokens_seen": 60463008, "step": 28010 }, { "epoch": 4.570146818923328, "grad_norm": 0.14021052420139313, "learning_rate": 0.0009505391280147545, "loss": 0.0776, "num_input_tokens_seen": 60473152, "step": 28015 }, { "epoch": 4.5709624796084825, "grad_norm": 0.051749154925346375, "learning_rate": 0.0009505082558553577, "loss": 0.0504, "num_input_tokens_seen": 60484576, "step": 28020 }, { "epoch": 4.571778140293638, "grad_norm": 0.14295457303524017, "learning_rate": 0.0009504773745658395, "loss": 0.0659, "num_input_tokens_seen": 60495904, "step": 28025 }, { "epoch": 4.572593800978793, "grad_norm": 0.18052180111408234, "learning_rate": 0.0009504464841468259, "loss": 0.0558, "num_input_tokens_seen": 60507520, "step": 28030 }, { "epoch": 4.573409461663948, "grad_norm": 0.32684656977653503, "learning_rate": 0.000950415584598943, "loss": 0.3463, "num_input_tokens_seen": 60517760, "step": 28035 }, { "epoch": 4.574225122349103, "grad_norm": 0.02932673878967762, "learning_rate": 0.0009503846759228167, "loss": 0.0531, "num_input_tokens_seen": 60528576, "step": 28040 }, { "epoch": 4.575040783034257, "grad_norm": 0.1805507093667984, "learning_rate": 0.0009503537581190736, "loss": 0.0702, "num_input_tokens_seen": 60538176, "step": 28045 }, { "epoch": 4.575856443719413, "grad_norm": 0.015057875774800777, "learning_rate": 0.0009503228311883402, "loss": 0.0504, "num_input_tokens_seen": 60549920, "step": 28050 }, { "epoch": 4.576672104404568, "grad_norm": 0.2520870268344879, "learning_rate": 0.0009502918951312436, "loss": 0.2374, "num_input_tokens_seen": 60560256, "step": 28055 }, { "epoch": 4.577487765089723, "grad_norm": 0.4409969747066498, "learning_rate": 0.0009502609499484104, "loss": 0.0882, "num_input_tokens_seen": 60571616, "step": 28060 }, { "epoch": 4.578303425774878, "grad_norm": 0.033272627741098404, "learning_rate": 0.0009502299956404679, "loss": 0.0149, "num_input_tokens_seen": 60582656, "step": 28065 }, { "epoch": 4.579119086460032, "grad_norm": 0.05307943373918533, "learning_rate": 0.0009501990322080433, "loss": 0.0772, "num_input_tokens_seen": 60594336, "step": 28070 }, { "epoch": 4.579934747145187, "grad_norm": 0.014589712955057621, "learning_rate": 0.0009501680596517641, "loss": 0.0815, "num_input_tokens_seen": 60603488, "step": 28075 }, { "epoch": 4.580750407830343, "grad_norm": 0.13704244792461395, "learning_rate": 0.0009501370779722582, "loss": 0.0811, "num_input_tokens_seen": 60615168, "step": 28080 }, { "epoch": 4.581566068515498, "grad_norm": 0.07694116234779358, "learning_rate": 0.0009501060871701534, "loss": 0.0314, "num_input_tokens_seen": 60626848, "step": 28085 }, { "epoch": 4.582381729200653, "grad_norm": 0.07556813955307007, "learning_rate": 0.0009500750872460778, "loss": 0.1362, "num_input_tokens_seen": 60636704, "step": 28090 }, { "epoch": 4.583197389885807, "grad_norm": 0.2576342523097992, "learning_rate": 0.0009500440782006594, "loss": 0.2279, "num_input_tokens_seen": 60648160, "step": 28095 }, { "epoch": 4.584013050570962, "grad_norm": 0.09015540033578873, "learning_rate": 0.000950013060034527, "loss": 0.1763, "num_input_tokens_seen": 60660256, "step": 28100 }, { "epoch": 4.584828711256117, "grad_norm": 0.43354761600494385, "learning_rate": 0.0009499820327483091, "loss": 0.08, "num_input_tokens_seen": 60671264, "step": 28105 }, { "epoch": 4.585644371941273, "grad_norm": 0.4234149754047394, "learning_rate": 0.0009499509963426342, "loss": 0.2154, "num_input_tokens_seen": 60681632, "step": 28110 }, { "epoch": 4.5864600326264275, "grad_norm": 0.09801032394170761, "learning_rate": 0.0009499199508181318, "loss": 0.1627, "num_input_tokens_seen": 60693280, "step": 28115 }, { "epoch": 4.587275693311582, "grad_norm": 0.028818046674132347, "learning_rate": 0.0009498888961754308, "loss": 0.0507, "num_input_tokens_seen": 60704032, "step": 28120 }, { "epoch": 4.588091353996737, "grad_norm": 0.1119341105222702, "learning_rate": 0.0009498578324151606, "loss": 0.0506, "num_input_tokens_seen": 60715392, "step": 28125 }, { "epoch": 4.588907014681892, "grad_norm": 0.21024644374847412, "learning_rate": 0.0009498267595379506, "loss": 0.1228, "num_input_tokens_seen": 60726208, "step": 28130 }, { "epoch": 4.589722675367048, "grad_norm": 0.029708225280046463, "learning_rate": 0.0009497956775444307, "loss": 0.1748, "num_input_tokens_seen": 60736512, "step": 28135 }, { "epoch": 4.5905383360522025, "grad_norm": 0.10178112983703613, "learning_rate": 0.0009497645864352309, "loss": 0.0572, "num_input_tokens_seen": 60748192, "step": 28140 }, { "epoch": 4.591353996737357, "grad_norm": 0.23327049612998962, "learning_rate": 0.0009497334862109812, "loss": 0.1509, "num_input_tokens_seen": 60758496, "step": 28145 }, { "epoch": 4.592169657422512, "grad_norm": 0.1691860556602478, "learning_rate": 0.0009497023768723119, "loss": 0.2637, "num_input_tokens_seen": 60768928, "step": 28150 }, { "epoch": 4.592985318107667, "grad_norm": 0.15249758958816528, "learning_rate": 0.0009496712584198532, "loss": 0.0474, "num_input_tokens_seen": 60779968, "step": 28155 }, { "epoch": 4.593800978792823, "grad_norm": 0.1041051596403122, "learning_rate": 0.0009496401308542363, "loss": 0.1744, "num_input_tokens_seen": 60790752, "step": 28160 }, { "epoch": 4.5946166394779775, "grad_norm": 0.16467103362083435, "learning_rate": 0.0009496089941760915, "loss": 0.0868, "num_input_tokens_seen": 60801376, "step": 28165 }, { "epoch": 4.595432300163132, "grad_norm": 0.11182987689971924, "learning_rate": 0.0009495778483860502, "loss": 0.1276, "num_input_tokens_seen": 60811808, "step": 28170 }, { "epoch": 4.596247960848287, "grad_norm": 0.08140698820352554, "learning_rate": 0.0009495466934847434, "loss": 0.1185, "num_input_tokens_seen": 60823072, "step": 28175 }, { "epoch": 4.597063621533442, "grad_norm": 0.44066882133483887, "learning_rate": 0.0009495155294728026, "loss": 0.2151, "num_input_tokens_seen": 60836096, "step": 28180 }, { "epoch": 4.597879282218597, "grad_norm": 0.09586780518293381, "learning_rate": 0.0009494843563508594, "loss": 0.0668, "num_input_tokens_seen": 60847680, "step": 28185 }, { "epoch": 4.598694942903752, "grad_norm": 0.07137401401996613, "learning_rate": 0.0009494531741195454, "loss": 0.071, "num_input_tokens_seen": 60859552, "step": 28190 }, { "epoch": 4.599510603588907, "grad_norm": 0.21888108551502228, "learning_rate": 0.0009494219827794928, "loss": 0.2128, "num_input_tokens_seen": 60870016, "step": 28195 }, { "epoch": 4.600326264274062, "grad_norm": 0.07460836321115494, "learning_rate": 0.0009493907823313334, "loss": 0.044, "num_input_tokens_seen": 60880096, "step": 28200 }, { "epoch": 4.601141924959217, "grad_norm": 0.018966104835271835, "learning_rate": 0.0009493595727756998, "loss": 0.2319, "num_input_tokens_seen": 60890976, "step": 28205 }, { "epoch": 4.601957585644372, "grad_norm": 0.11741123348474503, "learning_rate": 0.0009493283541132245, "loss": 0.0926, "num_input_tokens_seen": 60901824, "step": 28210 }, { "epoch": 4.602773246329527, "grad_norm": 0.19202911853790283, "learning_rate": 0.0009492971263445401, "loss": 0.0951, "num_input_tokens_seen": 60912832, "step": 28215 }, { "epoch": 4.603588907014682, "grad_norm": 0.329880952835083, "learning_rate": 0.0009492658894702792, "loss": 0.1464, "num_input_tokens_seen": 60924800, "step": 28220 }, { "epoch": 4.604404567699837, "grad_norm": 0.04760138690471649, "learning_rate": 0.0009492346434910753, "loss": 0.1413, "num_input_tokens_seen": 60934624, "step": 28225 }, { "epoch": 4.605220228384992, "grad_norm": 0.0375560000538826, "learning_rate": 0.0009492033884075615, "loss": 0.0936, "num_input_tokens_seen": 60944320, "step": 28230 }, { "epoch": 4.606035889070147, "grad_norm": 0.0230337455868721, "learning_rate": 0.000949172124220371, "loss": 0.0568, "num_input_tokens_seen": 60954656, "step": 28235 }, { "epoch": 4.6068515497553015, "grad_norm": 0.027361124753952026, "learning_rate": 0.0009491408509301378, "loss": 0.0586, "num_input_tokens_seen": 60965472, "step": 28240 }, { "epoch": 4.607667210440457, "grad_norm": 0.14101111888885498, "learning_rate": 0.0009491095685374954, "loss": 0.1584, "num_input_tokens_seen": 60977216, "step": 28245 }, { "epoch": 4.608482871125612, "grad_norm": 0.06582579761743546, "learning_rate": 0.0009490782770430777, "loss": 0.0327, "num_input_tokens_seen": 60988448, "step": 28250 }, { "epoch": 4.609298531810767, "grad_norm": 0.09056776762008667, "learning_rate": 0.0009490469764475191, "loss": 0.1582, "num_input_tokens_seen": 60998080, "step": 28255 }, { "epoch": 4.610114192495922, "grad_norm": 0.24751944839954376, "learning_rate": 0.0009490156667514541, "loss": 0.1159, "num_input_tokens_seen": 61009760, "step": 28260 }, { "epoch": 4.6109298531810765, "grad_norm": 0.0399743914604187, "learning_rate": 0.0009489843479555167, "loss": 0.04, "num_input_tokens_seen": 61020032, "step": 28265 }, { "epoch": 4.611745513866231, "grad_norm": 0.2925543487071991, "learning_rate": 0.000948953020060342, "loss": 0.137, "num_input_tokens_seen": 61030080, "step": 28270 }, { "epoch": 4.612561174551386, "grad_norm": 0.08580419421195984, "learning_rate": 0.0009489216830665649, "loss": 0.2296, "num_input_tokens_seen": 61041056, "step": 28275 }, { "epoch": 4.613376835236542, "grad_norm": 0.25643032789230347, "learning_rate": 0.0009488903369748203, "loss": 0.0933, "num_input_tokens_seen": 61051616, "step": 28280 }, { "epoch": 4.614192495921697, "grad_norm": 0.2385556548833847, "learning_rate": 0.0009488589817857435, "loss": 0.152, "num_input_tokens_seen": 61062368, "step": 28285 }, { "epoch": 4.6150081566068515, "grad_norm": 0.06179996207356453, "learning_rate": 0.0009488276174999702, "loss": 0.0256, "num_input_tokens_seen": 61072288, "step": 28290 }, { "epoch": 4.615823817292006, "grad_norm": 0.017545096576213837, "learning_rate": 0.0009487962441181357, "loss": 0.1244, "num_input_tokens_seen": 61083072, "step": 28295 }, { "epoch": 4.616639477977161, "grad_norm": 0.19974499940872192, "learning_rate": 0.0009487648616408762, "loss": 0.1108, "num_input_tokens_seen": 61093696, "step": 28300 }, { "epoch": 4.617455138662317, "grad_norm": 0.07213705778121948, "learning_rate": 0.0009487334700688273, "loss": 0.0713, "num_input_tokens_seen": 61105184, "step": 28305 }, { "epoch": 4.618270799347472, "grad_norm": 0.016088631004095078, "learning_rate": 0.0009487020694026254, "loss": 0.1797, "num_input_tokens_seen": 61116672, "step": 28310 }, { "epoch": 4.6190864600326265, "grad_norm": 0.0635932981967926, "learning_rate": 0.0009486706596429068, "loss": 0.038, "num_input_tokens_seen": 61128096, "step": 28315 }, { "epoch": 4.619902120717781, "grad_norm": 0.2019156962633133, "learning_rate": 0.0009486392407903082, "loss": 0.0932, "num_input_tokens_seen": 61139456, "step": 28320 }, { "epoch": 4.620717781402936, "grad_norm": 0.37027159333229065, "learning_rate": 0.000948607812845466, "loss": 0.2531, "num_input_tokens_seen": 61150112, "step": 28325 }, { "epoch": 4.621533442088092, "grad_norm": 0.09408716857433319, "learning_rate": 0.0009485763758090176, "loss": 0.0528, "num_input_tokens_seen": 61161280, "step": 28330 }, { "epoch": 4.622349102773247, "grad_norm": 0.1147380992770195, "learning_rate": 0.0009485449296815999, "loss": 0.1622, "num_input_tokens_seen": 61171616, "step": 28335 }, { "epoch": 4.623164763458401, "grad_norm": 0.3761114776134491, "learning_rate": 0.00094851347446385, "loss": 0.2054, "num_input_tokens_seen": 61182112, "step": 28340 }, { "epoch": 4.623980424143556, "grad_norm": 0.06285937130451202, "learning_rate": 0.0009484820101564058, "loss": 0.0385, "num_input_tokens_seen": 61193568, "step": 28345 }, { "epoch": 4.624796084828711, "grad_norm": 0.11572349071502686, "learning_rate": 0.0009484505367599045, "loss": 0.1166, "num_input_tokens_seen": 61204576, "step": 28350 }, { "epoch": 4.625611745513866, "grad_norm": 0.3199574649333954, "learning_rate": 0.0009484190542749844, "loss": 0.1668, "num_input_tokens_seen": 61215008, "step": 28355 }, { "epoch": 4.626427406199021, "grad_norm": 0.28772443532943726, "learning_rate": 0.0009483875627022831, "loss": 0.2416, "num_input_tokens_seen": 61226496, "step": 28360 }, { "epoch": 4.627243066884176, "grad_norm": 0.14901868999004364, "learning_rate": 0.0009483560620424391, "loss": 0.1247, "num_input_tokens_seen": 61236928, "step": 28365 }, { "epoch": 4.628058727569331, "grad_norm": 0.2141197770833969, "learning_rate": 0.0009483245522960909, "loss": 0.1105, "num_input_tokens_seen": 61247904, "step": 28370 }, { "epoch": 4.628874388254486, "grad_norm": 0.37782979011535645, "learning_rate": 0.0009482930334638766, "loss": 0.2187, "num_input_tokens_seen": 61257696, "step": 28375 }, { "epoch": 4.629690048939641, "grad_norm": 0.3024876117706299, "learning_rate": 0.0009482615055464354, "loss": 0.0889, "num_input_tokens_seen": 61267936, "step": 28380 }, { "epoch": 4.630505709624796, "grad_norm": 0.17452985048294067, "learning_rate": 0.0009482299685444062, "loss": 0.0819, "num_input_tokens_seen": 61277888, "step": 28385 }, { "epoch": 4.631321370309951, "grad_norm": 0.2274598926305771, "learning_rate": 0.0009481984224584279, "loss": 0.0447, "num_input_tokens_seen": 61289120, "step": 28390 }, { "epoch": 4.632137030995106, "grad_norm": 0.02358092926442623, "learning_rate": 0.0009481668672891401, "loss": 0.118, "num_input_tokens_seen": 61300064, "step": 28395 }, { "epoch": 4.632952691680261, "grad_norm": 0.018093647435307503, "learning_rate": 0.0009481353030371822, "loss": 0.2195, "num_input_tokens_seen": 61310496, "step": 28400 }, { "epoch": 4.633768352365416, "grad_norm": 0.04678453877568245, "learning_rate": 0.0009481037297031939, "loss": 0.0395, "num_input_tokens_seen": 61321312, "step": 28405 }, { "epoch": 4.634584013050571, "grad_norm": 0.027887552976608276, "learning_rate": 0.0009480721472878151, "loss": 0.0278, "num_input_tokens_seen": 61330848, "step": 28410 }, { "epoch": 4.635399673735726, "grad_norm": 0.010929257608950138, "learning_rate": 0.0009480405557916858, "loss": 0.1273, "num_input_tokens_seen": 61342048, "step": 28415 }, { "epoch": 4.636215334420881, "grad_norm": 0.03942070156335831, "learning_rate": 0.0009480089552154461, "loss": 0.1061, "num_input_tokens_seen": 61351040, "step": 28420 }, { "epoch": 4.637030995106036, "grad_norm": 0.02512892708182335, "learning_rate": 0.0009479773455597367, "loss": 0.067, "num_input_tokens_seen": 61361856, "step": 28425 }, { "epoch": 4.637846655791191, "grad_norm": 0.07515872269868851, "learning_rate": 0.0009479457268251981, "loss": 0.0572, "num_input_tokens_seen": 61372608, "step": 28430 }, { "epoch": 4.638662316476346, "grad_norm": 0.1327066272497177, "learning_rate": 0.0009479140990124711, "loss": 0.0589, "num_input_tokens_seen": 61382688, "step": 28435 }, { "epoch": 4.6394779771615005, "grad_norm": 0.2763438820838928, "learning_rate": 0.0009478824621221967, "loss": 0.2558, "num_input_tokens_seen": 61392672, "step": 28440 }, { "epoch": 4.640293637846656, "grad_norm": 0.04280819371342659, "learning_rate": 0.0009478508161550159, "loss": 0.0531, "num_input_tokens_seen": 61403936, "step": 28445 }, { "epoch": 4.641109298531811, "grad_norm": 0.26437467336654663, "learning_rate": 0.0009478191611115702, "loss": 0.059, "num_input_tokens_seen": 61415520, "step": 28450 }, { "epoch": 4.641924959216966, "grad_norm": 0.02384626492857933, "learning_rate": 0.0009477874969925011, "loss": 0.1528, "num_input_tokens_seen": 61425952, "step": 28455 }, { "epoch": 4.642740619902121, "grad_norm": 0.30584952235221863, "learning_rate": 0.0009477558237984503, "loss": 0.2125, "num_input_tokens_seen": 61435136, "step": 28460 }, { "epoch": 4.643556280587275, "grad_norm": 0.09020836651325226, "learning_rate": 0.0009477241415300599, "loss": 0.1508, "num_input_tokens_seen": 61446496, "step": 28465 }, { "epoch": 4.64437194127243, "grad_norm": 0.19583779573440552, "learning_rate": 0.0009476924501879715, "loss": 0.1607, "num_input_tokens_seen": 61456640, "step": 28470 }, { "epoch": 4.645187601957586, "grad_norm": 0.1582312434911728, "learning_rate": 0.0009476607497728279, "loss": 0.1313, "num_input_tokens_seen": 61466880, "step": 28475 }, { "epoch": 4.646003262642741, "grad_norm": 0.019827794283628464, "learning_rate": 0.0009476290402852712, "loss": 0.1118, "num_input_tokens_seen": 61477984, "step": 28480 }, { "epoch": 4.646818923327896, "grad_norm": 0.1482212096452713, "learning_rate": 0.0009475973217259442, "loss": 0.0515, "num_input_tokens_seen": 61487904, "step": 28485 }, { "epoch": 4.64763458401305, "grad_norm": 0.25197887420654297, "learning_rate": 0.0009475655940954896, "loss": 0.1329, "num_input_tokens_seen": 61499680, "step": 28490 }, { "epoch": 4.648450244698205, "grad_norm": 0.017045412212610245, "learning_rate": 0.0009475338573945504, "loss": 0.0544, "num_input_tokens_seen": 61511072, "step": 28495 }, { "epoch": 4.649265905383361, "grad_norm": 0.2887918949127197, "learning_rate": 0.0009475021116237699, "loss": 0.1577, "num_input_tokens_seen": 61520992, "step": 28500 }, { "epoch": 4.650081566068516, "grad_norm": 0.1841408908367157, "learning_rate": 0.0009474703567837915, "loss": 0.1086, "num_input_tokens_seen": 61531872, "step": 28505 }, { "epoch": 4.650897226753671, "grad_norm": 0.1422518640756607, "learning_rate": 0.0009474385928752585, "loss": 0.0806, "num_input_tokens_seen": 61542624, "step": 28510 }, { "epoch": 4.651712887438825, "grad_norm": 0.22157488763332367, "learning_rate": 0.0009474068198988151, "loss": 0.056, "num_input_tokens_seen": 61554368, "step": 28515 }, { "epoch": 4.65252854812398, "grad_norm": 0.1017087996006012, "learning_rate": 0.0009473750378551046, "loss": 0.0549, "num_input_tokens_seen": 61565792, "step": 28520 }, { "epoch": 4.653344208809135, "grad_norm": 0.014127549715340137, "learning_rate": 0.0009473432467447715, "loss": 0.023, "num_input_tokens_seen": 61576576, "step": 28525 }, { "epoch": 4.654159869494291, "grad_norm": 0.3358805477619171, "learning_rate": 0.00094731144656846, "loss": 0.0733, "num_input_tokens_seen": 61587680, "step": 28530 }, { "epoch": 4.6549755301794455, "grad_norm": 0.17817462980747223, "learning_rate": 0.0009472796373268147, "loss": 0.1124, "num_input_tokens_seen": 61596832, "step": 28535 }, { "epoch": 4.6557911908646, "grad_norm": 0.05175946652889252, "learning_rate": 0.00094724781902048, "loss": 0.0243, "num_input_tokens_seen": 61608384, "step": 28540 }, { "epoch": 4.656606851549755, "grad_norm": 0.36476796865463257, "learning_rate": 0.0009472159916501011, "loss": 0.2223, "num_input_tokens_seen": 61619776, "step": 28545 }, { "epoch": 4.65742251223491, "grad_norm": 0.030783280730247498, "learning_rate": 0.0009471841552163225, "loss": 0.1694, "num_input_tokens_seen": 61630048, "step": 28550 }, { "epoch": 4.658238172920065, "grad_norm": 0.15471051633358002, "learning_rate": 0.0009471523097197898, "loss": 0.1001, "num_input_tokens_seen": 61638528, "step": 28555 }, { "epoch": 4.6590538336052205, "grad_norm": 0.013789483346045017, "learning_rate": 0.0009471204551611483, "loss": 0.1014, "num_input_tokens_seen": 61650016, "step": 28560 }, { "epoch": 4.659869494290375, "grad_norm": 0.41173651814460754, "learning_rate": 0.0009470885915410437, "loss": 0.0858, "num_input_tokens_seen": 61661248, "step": 28565 }, { "epoch": 4.66068515497553, "grad_norm": 0.13466188311576843, "learning_rate": 0.0009470567188601214, "loss": 0.1178, "num_input_tokens_seen": 61671552, "step": 28570 }, { "epoch": 4.661500815660685, "grad_norm": 0.4683748185634613, "learning_rate": 0.0009470248371190277, "loss": 0.1633, "num_input_tokens_seen": 61682944, "step": 28575 }, { "epoch": 4.66231647634584, "grad_norm": 0.32457029819488525, "learning_rate": 0.0009469929463184086, "loss": 0.075, "num_input_tokens_seen": 61694176, "step": 28580 }, { "epoch": 4.6631321370309955, "grad_norm": 0.13208745419979095, "learning_rate": 0.0009469610464589104, "loss": 0.1095, "num_input_tokens_seen": 61704384, "step": 28585 }, { "epoch": 4.66394779771615, "grad_norm": 0.49576517939567566, "learning_rate": 0.0009469291375411795, "loss": 0.2386, "num_input_tokens_seen": 61715776, "step": 28590 }, { "epoch": 4.664763458401305, "grad_norm": 0.10463026165962219, "learning_rate": 0.0009468972195658626, "loss": 0.1067, "num_input_tokens_seen": 61726304, "step": 28595 }, { "epoch": 4.66557911908646, "grad_norm": 0.023099267855286598, "learning_rate": 0.0009468652925336068, "loss": 0.0676, "num_input_tokens_seen": 61738176, "step": 28600 }, { "epoch": 4.666394779771615, "grad_norm": 0.0174805149435997, "learning_rate": 0.0009468333564450587, "loss": 0.1445, "num_input_tokens_seen": 61748640, "step": 28605 }, { "epoch": 4.6672104404567705, "grad_norm": 0.06755024939775467, "learning_rate": 0.000946801411300866, "loss": 0.2073, "num_input_tokens_seen": 61759968, "step": 28610 }, { "epoch": 4.668026101141925, "grad_norm": 0.1285349428653717, "learning_rate": 0.0009467694571016758, "loss": 0.2167, "num_input_tokens_seen": 61770880, "step": 28615 }, { "epoch": 4.66884176182708, "grad_norm": 0.04046652466058731, "learning_rate": 0.0009467374938481359, "loss": 0.0538, "num_input_tokens_seen": 61781920, "step": 28620 }, { "epoch": 4.669657422512235, "grad_norm": 0.07488615065813065, "learning_rate": 0.0009467055215408939, "loss": 0.1093, "num_input_tokens_seen": 61792992, "step": 28625 }, { "epoch": 4.67047308319739, "grad_norm": 0.15380187332630157, "learning_rate": 0.0009466735401805977, "loss": 0.1343, "num_input_tokens_seen": 61804224, "step": 28630 }, { "epoch": 4.671288743882545, "grad_norm": 0.1613842397928238, "learning_rate": 0.0009466415497678957, "loss": 0.1174, "num_input_tokens_seen": 61814976, "step": 28635 }, { "epoch": 4.672104404567699, "grad_norm": 0.3689861595630646, "learning_rate": 0.000946609550303436, "loss": 0.2101, "num_input_tokens_seen": 61826048, "step": 28640 }, { "epoch": 4.672920065252855, "grad_norm": 0.010686925612390041, "learning_rate": 0.0009465775417878673, "loss": 0.0603, "num_input_tokens_seen": 61836352, "step": 28645 }, { "epoch": 4.67373572593801, "grad_norm": 0.05649590119719505, "learning_rate": 0.0009465455242218382, "loss": 0.054, "num_input_tokens_seen": 61847520, "step": 28650 }, { "epoch": 4.674551386623165, "grad_norm": 0.022635241970419884, "learning_rate": 0.0009465134976059975, "loss": 0.1273, "num_input_tokens_seen": 61859392, "step": 28655 }, { "epoch": 4.6753670473083195, "grad_norm": 0.03181033208966255, "learning_rate": 0.0009464814619409942, "loss": 0.0444, "num_input_tokens_seen": 61871040, "step": 28660 }, { "epoch": 4.676182707993474, "grad_norm": 0.017732923850417137, "learning_rate": 0.0009464494172274778, "loss": 0.1387, "num_input_tokens_seen": 61881632, "step": 28665 }, { "epoch": 4.67699836867863, "grad_norm": 0.10532383620738983, "learning_rate": 0.0009464173634660978, "loss": 0.2053, "num_input_tokens_seen": 61892640, "step": 28670 }, { "epoch": 4.677814029363785, "grad_norm": 0.09472110867500305, "learning_rate": 0.0009463853006575032, "loss": 0.1087, "num_input_tokens_seen": 61903040, "step": 28675 }, { "epoch": 4.67862969004894, "grad_norm": 0.05978702753782272, "learning_rate": 0.0009463532288023444, "loss": 0.2078, "num_input_tokens_seen": 61914080, "step": 28680 }, { "epoch": 4.6794453507340945, "grad_norm": 0.019631559029221535, "learning_rate": 0.0009463211479012712, "loss": 0.0497, "num_input_tokens_seen": 61924992, "step": 28685 }, { "epoch": 4.680261011419249, "grad_norm": 0.08632207661867142, "learning_rate": 0.0009462890579549338, "loss": 0.07, "num_input_tokens_seen": 61935936, "step": 28690 }, { "epoch": 4.681076672104405, "grad_norm": 0.05848938971757889, "learning_rate": 0.0009462569589639825, "loss": 0.0627, "num_input_tokens_seen": 61947264, "step": 28695 }, { "epoch": 4.68189233278956, "grad_norm": 0.050638485699892044, "learning_rate": 0.0009462248509290676, "loss": 0.0754, "num_input_tokens_seen": 61958240, "step": 28700 }, { "epoch": 4.682707993474715, "grad_norm": 0.008484597317874432, "learning_rate": 0.0009461927338508402, "loss": 0.0723, "num_input_tokens_seen": 61968032, "step": 28705 }, { "epoch": 4.6835236541598695, "grad_norm": 0.021634764969348907, "learning_rate": 0.0009461606077299509, "loss": 0.0638, "num_input_tokens_seen": 61978816, "step": 28710 }, { "epoch": 4.684339314845024, "grad_norm": 0.05825052037835121, "learning_rate": 0.000946128472567051, "loss": 0.0285, "num_input_tokens_seen": 61989952, "step": 28715 }, { "epoch": 4.685154975530179, "grad_norm": 0.3632923364639282, "learning_rate": 0.0009460963283627917, "loss": 0.1058, "num_input_tokens_seen": 62000064, "step": 28720 }, { "epoch": 4.685970636215334, "grad_norm": 0.03136396408081055, "learning_rate": 0.0009460641751178243, "loss": 0.0267, "num_input_tokens_seen": 62009536, "step": 28725 }, { "epoch": 4.68678629690049, "grad_norm": 0.24552157521247864, "learning_rate": 0.0009460320128328003, "loss": 0.0595, "num_input_tokens_seen": 62019392, "step": 28730 }, { "epoch": 4.6876019575856445, "grad_norm": 0.13903677463531494, "learning_rate": 0.0009459998415083721, "loss": 0.0555, "num_input_tokens_seen": 62029632, "step": 28735 }, { "epoch": 4.688417618270799, "grad_norm": 0.009625233709812164, "learning_rate": 0.000945967661145191, "loss": 0.0747, "num_input_tokens_seen": 62041856, "step": 28740 }, { "epoch": 4.689233278955954, "grad_norm": 0.33257022500038147, "learning_rate": 0.0009459354717439097, "loss": 0.0708, "num_input_tokens_seen": 62052384, "step": 28745 }, { "epoch": 4.690048939641109, "grad_norm": 0.012039701454341412, "learning_rate": 0.0009459032733051805, "loss": 0.2577, "num_input_tokens_seen": 62063520, "step": 28750 }, { "epoch": 4.690864600326265, "grad_norm": 0.016524773091077805, "learning_rate": 0.0009458710658296555, "loss": 0.1283, "num_input_tokens_seen": 62071712, "step": 28755 }, { "epoch": 4.691680261011419, "grad_norm": 0.17553286254405975, "learning_rate": 0.000945838849317988, "loss": 0.1407, "num_input_tokens_seen": 62083232, "step": 28760 }, { "epoch": 4.692495921696574, "grad_norm": 0.4376017153263092, "learning_rate": 0.0009458066237708302, "loss": 0.1293, "num_input_tokens_seen": 62094816, "step": 28765 }, { "epoch": 4.693311582381729, "grad_norm": 0.7487449049949646, "learning_rate": 0.0009457743891888359, "loss": 0.1069, "num_input_tokens_seen": 62105856, "step": 28770 }, { "epoch": 4.694127243066884, "grad_norm": 0.44276344776153564, "learning_rate": 0.0009457421455726582, "loss": 0.2166, "num_input_tokens_seen": 62116192, "step": 28775 }, { "epoch": 4.69494290375204, "grad_norm": 0.17641574144363403, "learning_rate": 0.0009457098929229503, "loss": 0.1854, "num_input_tokens_seen": 62125824, "step": 28780 }, { "epoch": 4.695758564437194, "grad_norm": 0.191221222281456, "learning_rate": 0.0009456776312403661, "loss": 0.1289, "num_input_tokens_seen": 62135744, "step": 28785 }, { "epoch": 4.696574225122349, "grad_norm": 0.15283788740634918, "learning_rate": 0.0009456453605255592, "loss": 0.2246, "num_input_tokens_seen": 62146400, "step": 28790 }, { "epoch": 4.697389885807504, "grad_norm": 0.08199690282344818, "learning_rate": 0.0009456130807791839, "loss": 0.0553, "num_input_tokens_seen": 62157568, "step": 28795 }, { "epoch": 4.698205546492659, "grad_norm": 0.01363020297139883, "learning_rate": 0.000945580792001894, "loss": 0.0398, "num_input_tokens_seen": 62167168, "step": 28800 }, { "epoch": 4.699021207177814, "grad_norm": 0.1189170628786087, "learning_rate": 0.0009455484941943442, "loss": 0.0908, "num_input_tokens_seen": 62176896, "step": 28805 }, { "epoch": 4.699836867862969, "grad_norm": 0.02594807930290699, "learning_rate": 0.0009455161873571889, "loss": 0.0665, "num_input_tokens_seen": 62186784, "step": 28810 }, { "epoch": 4.700652528548124, "grad_norm": 0.12495096772909164, "learning_rate": 0.000945483871491083, "loss": 0.0627, "num_input_tokens_seen": 62196224, "step": 28815 }, { "epoch": 4.701468189233279, "grad_norm": 0.12190598994493484, "learning_rate": 0.0009454515465966812, "loss": 0.0897, "num_input_tokens_seen": 62206944, "step": 28820 }, { "epoch": 4.702283849918434, "grad_norm": 0.12342122942209244, "learning_rate": 0.0009454192126746388, "loss": 0.0579, "num_input_tokens_seen": 62217728, "step": 28825 }, { "epoch": 4.703099510603589, "grad_norm": 0.01240597479045391, "learning_rate": 0.000945386869725611, "loss": 0.0614, "num_input_tokens_seen": 62228352, "step": 28830 }, { "epoch": 4.7039151712887435, "grad_norm": 0.1738576591014862, "learning_rate": 0.0009453545177502532, "loss": 0.054, "num_input_tokens_seen": 62239360, "step": 28835 }, { "epoch": 4.704730831973899, "grad_norm": 0.2731996774673462, "learning_rate": 0.0009453221567492211, "loss": 0.1596, "num_input_tokens_seen": 62249408, "step": 28840 }, { "epoch": 4.705546492659054, "grad_norm": 0.17217917740345, "learning_rate": 0.0009452897867231705, "loss": 0.197, "num_input_tokens_seen": 62261152, "step": 28845 }, { "epoch": 4.706362153344209, "grad_norm": 0.1391860395669937, "learning_rate": 0.0009452574076727576, "loss": 0.0932, "num_input_tokens_seen": 62271136, "step": 28850 }, { "epoch": 4.707177814029364, "grad_norm": 0.014520938508212566, "learning_rate": 0.0009452250195986385, "loss": 0.049, "num_input_tokens_seen": 62280672, "step": 28855 }, { "epoch": 4.7079934747145185, "grad_norm": 0.0473255030810833, "learning_rate": 0.0009451926225014695, "loss": 0.0292, "num_input_tokens_seen": 62291680, "step": 28860 }, { "epoch": 4.708809135399674, "grad_norm": 0.0565347820520401, "learning_rate": 0.0009451602163819073, "loss": 0.0395, "num_input_tokens_seen": 62302496, "step": 28865 }, { "epoch": 4.709624796084829, "grad_norm": 0.2138064205646515, "learning_rate": 0.0009451278012406086, "loss": 0.1508, "num_input_tokens_seen": 62313728, "step": 28870 }, { "epoch": 4.710440456769984, "grad_norm": 0.02583884820342064, "learning_rate": 0.0009450953770782304, "loss": 0.1423, "num_input_tokens_seen": 62325280, "step": 28875 }, { "epoch": 4.711256117455139, "grad_norm": 0.36336708068847656, "learning_rate": 0.0009450629438954296, "loss": 0.0801, "num_input_tokens_seen": 62335936, "step": 28880 }, { "epoch": 4.712071778140293, "grad_norm": 0.03173749893903732, "learning_rate": 0.0009450305016928636, "loss": 0.0158, "num_input_tokens_seen": 62346912, "step": 28885 }, { "epoch": 4.712887438825448, "grad_norm": 0.3685295581817627, "learning_rate": 0.00094499805047119, "loss": 0.092, "num_input_tokens_seen": 62357472, "step": 28890 }, { "epoch": 4.713703099510604, "grad_norm": 0.14548586308956146, "learning_rate": 0.0009449655902310665, "loss": 0.153, "num_input_tokens_seen": 62367296, "step": 28895 }, { "epoch": 4.714518760195759, "grad_norm": 0.0328923724591732, "learning_rate": 0.0009449331209731507, "loss": 0.0442, "num_input_tokens_seen": 62378592, "step": 28900 }, { "epoch": 4.715334420880914, "grad_norm": 0.14777664840221405, "learning_rate": 0.0009449006426981007, "loss": 0.1339, "num_input_tokens_seen": 62389888, "step": 28905 }, { "epoch": 4.716150081566068, "grad_norm": 0.009563075378537178, "learning_rate": 0.0009448681554065749, "loss": 0.0661, "num_input_tokens_seen": 62401792, "step": 28910 }, { "epoch": 4.716965742251223, "grad_norm": 0.4300788938999176, "learning_rate": 0.0009448356590992316, "loss": 0.1694, "num_input_tokens_seen": 62412128, "step": 28915 }, { "epoch": 4.717781402936378, "grad_norm": 0.031330499798059464, "learning_rate": 0.0009448031537767292, "loss": 0.0588, "num_input_tokens_seen": 62422528, "step": 28920 }, { "epoch": 4.718597063621534, "grad_norm": 0.04600265622138977, "learning_rate": 0.0009447706394397266, "loss": 0.0272, "num_input_tokens_seen": 62433888, "step": 28925 }, { "epoch": 4.719412724306689, "grad_norm": 0.001835047034546733, "learning_rate": 0.0009447381160888831, "loss": 0.0871, "num_input_tokens_seen": 62445504, "step": 28930 }, { "epoch": 4.720228384991843, "grad_norm": 0.09745094925165176, "learning_rate": 0.0009447055837248572, "loss": 0.1551, "num_input_tokens_seen": 62456288, "step": 28935 }, { "epoch": 4.721044045676998, "grad_norm": 0.3168526291847229, "learning_rate": 0.0009446730423483085, "loss": 0.2446, "num_input_tokens_seen": 62466240, "step": 28940 }, { "epoch": 4.721859706362153, "grad_norm": 0.3104914724826813, "learning_rate": 0.0009446404919598965, "loss": 0.0793, "num_input_tokens_seen": 62476064, "step": 28945 }, { "epoch": 4.722675367047309, "grad_norm": 0.06456045061349869, "learning_rate": 0.000944607932560281, "loss": 0.117, "num_input_tokens_seen": 62487680, "step": 28950 }, { "epoch": 4.7234910277324635, "grad_norm": 0.037263624370098114, "learning_rate": 0.0009445753641501215, "loss": 0.1009, "num_input_tokens_seen": 62498112, "step": 28955 }, { "epoch": 4.724306688417618, "grad_norm": 0.2770502269268036, "learning_rate": 0.0009445427867300785, "loss": 0.0601, "num_input_tokens_seen": 62509408, "step": 28960 }, { "epoch": 4.725122349102773, "grad_norm": 0.013540813699364662, "learning_rate": 0.0009445102003008119, "loss": 0.0641, "num_input_tokens_seen": 62520096, "step": 28965 }, { "epoch": 4.725938009787928, "grad_norm": 0.03198372945189476, "learning_rate": 0.0009444776048629822, "loss": 0.0626, "num_input_tokens_seen": 62531328, "step": 28970 }, { "epoch": 4.726753670473083, "grad_norm": 0.041561953723430634, "learning_rate": 0.0009444450004172498, "loss": 0.1203, "num_input_tokens_seen": 62540992, "step": 28975 }, { "epoch": 4.7275693311582385, "grad_norm": 0.391055703163147, "learning_rate": 0.0009444123869642758, "loss": 0.263, "num_input_tokens_seen": 62552192, "step": 28980 }, { "epoch": 4.728384991843393, "grad_norm": 0.009761184453964233, "learning_rate": 0.000944379764504721, "loss": 0.1412, "num_input_tokens_seen": 62562912, "step": 28985 }, { "epoch": 4.729200652528548, "grad_norm": 0.016184687614440918, "learning_rate": 0.0009443471330392466, "loss": 0.0807, "num_input_tokens_seen": 62571744, "step": 28990 }, { "epoch": 4.730016313213703, "grad_norm": 0.2903614640235901, "learning_rate": 0.0009443144925685137, "loss": 0.1308, "num_input_tokens_seen": 62582400, "step": 28995 }, { "epoch": 4.730831973898858, "grad_norm": 0.03474726900458336, "learning_rate": 0.0009442818430931841, "loss": 0.0571, "num_input_tokens_seen": 62593792, "step": 29000 }, { "epoch": 4.731647634584013, "grad_norm": 0.036861762404441833, "learning_rate": 0.0009442491846139192, "loss": 0.028, "num_input_tokens_seen": 62604800, "step": 29005 }, { "epoch": 4.732463295269168, "grad_norm": 0.1424628347158432, "learning_rate": 0.0009442165171313811, "loss": 0.1245, "num_input_tokens_seen": 62614848, "step": 29010 }, { "epoch": 4.733278955954323, "grad_norm": 0.27228426933288574, "learning_rate": 0.0009441838406462318, "loss": 0.0876, "num_input_tokens_seen": 62626016, "step": 29015 }, { "epoch": 4.734094616639478, "grad_norm": 0.4459521770477295, "learning_rate": 0.0009441511551591333, "loss": 0.1778, "num_input_tokens_seen": 62636672, "step": 29020 }, { "epoch": 4.734910277324633, "grad_norm": 0.0647108405828476, "learning_rate": 0.0009441184606707484, "loss": 0.172, "num_input_tokens_seen": 62648320, "step": 29025 }, { "epoch": 4.735725938009788, "grad_norm": 0.18711964786052704, "learning_rate": 0.0009440857571817394, "loss": 0.0937, "num_input_tokens_seen": 62657920, "step": 29030 }, { "epoch": 4.736541598694943, "grad_norm": 0.0313325896859169, "learning_rate": 0.000944053044692769, "loss": 0.0374, "num_input_tokens_seen": 62669792, "step": 29035 }, { "epoch": 4.737357259380098, "grad_norm": 0.04535924270749092, "learning_rate": 0.0009440203232045005, "loss": 0.0616, "num_input_tokens_seen": 62680832, "step": 29040 }, { "epoch": 4.738172920065253, "grad_norm": 0.5510385632514954, "learning_rate": 0.000943987592717597, "loss": 0.2089, "num_input_tokens_seen": 62691360, "step": 29045 }, { "epoch": 4.738988580750408, "grad_norm": 0.010988340713083744, "learning_rate": 0.0009439548532327216, "loss": 0.045, "num_input_tokens_seen": 62702592, "step": 29050 }, { "epoch": 4.739804241435563, "grad_norm": 0.04708869010210037, "learning_rate": 0.0009439221047505377, "loss": 0.1195, "num_input_tokens_seen": 62713568, "step": 29055 }, { "epoch": 4.740619902120718, "grad_norm": 0.020267263054847717, "learning_rate": 0.0009438893472717094, "loss": 0.0362, "num_input_tokens_seen": 62724064, "step": 29060 }, { "epoch": 4.741435562805873, "grad_norm": 0.18239261209964752, "learning_rate": 0.0009438565807969005, "loss": 0.0878, "num_input_tokens_seen": 62734368, "step": 29065 }, { "epoch": 4.742251223491028, "grad_norm": 0.030594784766435623, "learning_rate": 0.0009438238053267746, "loss": 0.0627, "num_input_tokens_seen": 62744192, "step": 29070 }, { "epoch": 4.743066884176183, "grad_norm": 0.05353270843625069, "learning_rate": 0.0009437910208619964, "loss": 0.1013, "num_input_tokens_seen": 62754560, "step": 29075 }, { "epoch": 4.7438825448613375, "grad_norm": 0.09487147629261017, "learning_rate": 0.0009437582274032301, "loss": 0.0242, "num_input_tokens_seen": 62766048, "step": 29080 }, { "epoch": 4.744698205546492, "grad_norm": 0.16494220495224, "learning_rate": 0.0009437254249511404, "loss": 0.1358, "num_input_tokens_seen": 62777088, "step": 29085 }, { "epoch": 4.745513866231647, "grad_norm": 0.01544949971139431, "learning_rate": 0.0009436926135063922, "loss": 0.0739, "num_input_tokens_seen": 62787840, "step": 29090 }, { "epoch": 4.746329526916803, "grad_norm": 0.7570450305938721, "learning_rate": 0.0009436597930696502, "loss": 0.112, "num_input_tokens_seen": 62799072, "step": 29095 }, { "epoch": 4.747145187601958, "grad_norm": 0.17976275086402893, "learning_rate": 0.0009436269636415798, "loss": 0.0597, "num_input_tokens_seen": 62809888, "step": 29100 }, { "epoch": 4.7479608482871125, "grad_norm": 0.023640431463718414, "learning_rate": 0.000943594125222846, "loss": 0.1304, "num_input_tokens_seen": 62821312, "step": 29105 }, { "epoch": 4.748776508972267, "grad_norm": 0.020307883620262146, "learning_rate": 0.0009435612778141146, "loss": 0.0381, "num_input_tokens_seen": 62830848, "step": 29110 }, { "epoch": 4.749592169657422, "grad_norm": 0.15791724622249603, "learning_rate": 0.0009435284214160513, "loss": 0.0675, "num_input_tokens_seen": 62842400, "step": 29115 }, { "epoch": 4.750407830342578, "grad_norm": 0.10407941788434982, "learning_rate": 0.0009434955560293217, "loss": 0.0373, "num_input_tokens_seen": 62853248, "step": 29120 }, { "epoch": 4.751223491027733, "grad_norm": 0.294047474861145, "learning_rate": 0.0009434626816545922, "loss": 0.2955, "num_input_tokens_seen": 62863872, "step": 29125 }, { "epoch": 4.7520391517128875, "grad_norm": 0.030389513820409775, "learning_rate": 0.0009434297982925288, "loss": 0.0301, "num_input_tokens_seen": 62875648, "step": 29130 }, { "epoch": 4.752854812398042, "grad_norm": 0.1789858043193817, "learning_rate": 0.000943396905943798, "loss": 0.1093, "num_input_tokens_seen": 62886912, "step": 29135 }, { "epoch": 4.753670473083197, "grad_norm": 0.11116369068622589, "learning_rate": 0.0009433640046090664, "loss": 0.148, "num_input_tokens_seen": 62897280, "step": 29140 }, { "epoch": 4.754486133768353, "grad_norm": 0.7634891867637634, "learning_rate": 0.0009433310942890009, "loss": 0.094, "num_input_tokens_seen": 62908000, "step": 29145 }, { "epoch": 4.755301794453508, "grad_norm": 0.0109855430200696, "learning_rate": 0.0009432981749842683, "loss": 0.0551, "num_input_tokens_seen": 62918496, "step": 29150 }, { "epoch": 4.7561174551386625, "grad_norm": 0.014307678677141666, "learning_rate": 0.0009432652466955358, "loss": 0.1236, "num_input_tokens_seen": 62930592, "step": 29155 }, { "epoch": 4.756933115823817, "grad_norm": 0.16116374731063843, "learning_rate": 0.0009432323094234708, "loss": 0.1738, "num_input_tokens_seen": 62941472, "step": 29160 }, { "epoch": 4.757748776508972, "grad_norm": 0.04070528224110603, "learning_rate": 0.0009431993631687408, "loss": 0.1143, "num_input_tokens_seen": 62953120, "step": 29165 }, { "epoch": 4.758564437194127, "grad_norm": 0.03441860154271126, "learning_rate": 0.0009431664079320134, "loss": 0.2014, "num_input_tokens_seen": 62963712, "step": 29170 }, { "epoch": 4.759380097879282, "grad_norm": 0.03653792291879654, "learning_rate": 0.0009431334437139565, "loss": 0.0327, "num_input_tokens_seen": 62974880, "step": 29175 }, { "epoch": 4.760195758564437, "grad_norm": 0.06766723841428757, "learning_rate": 0.0009431004705152384, "loss": 0.0736, "num_input_tokens_seen": 62985408, "step": 29180 }, { "epoch": 4.761011419249592, "grad_norm": 0.15782971680164337, "learning_rate": 0.0009430674883365269, "loss": 0.0912, "num_input_tokens_seen": 62996352, "step": 29185 }, { "epoch": 4.761827079934747, "grad_norm": 0.06059948354959488, "learning_rate": 0.0009430344971784909, "loss": 0.0889, "num_input_tokens_seen": 63006752, "step": 29190 }, { "epoch": 4.762642740619902, "grad_norm": 0.028196195140480995, "learning_rate": 0.0009430014970417986, "loss": 0.1004, "num_input_tokens_seen": 63017952, "step": 29195 }, { "epoch": 4.763458401305057, "grad_norm": 0.16038507223129272, "learning_rate": 0.0009429684879271191, "loss": 0.1485, "num_input_tokens_seen": 63028896, "step": 29200 }, { "epoch": 4.764274061990212, "grad_norm": 0.3968661427497864, "learning_rate": 0.0009429354698351212, "loss": 0.0683, "num_input_tokens_seen": 63038464, "step": 29205 }, { "epoch": 4.765089722675367, "grad_norm": 0.04497956484556198, "learning_rate": 0.0009429024427664741, "loss": 0.1453, "num_input_tokens_seen": 63049184, "step": 29210 }, { "epoch": 4.765905383360522, "grad_norm": 0.010740892961621284, "learning_rate": 0.0009428694067218473, "loss": 0.0513, "num_input_tokens_seen": 63061696, "step": 29215 }, { "epoch": 4.766721044045677, "grad_norm": 0.025746723636984825, "learning_rate": 0.0009428363617019099, "loss": 0.0332, "num_input_tokens_seen": 63072384, "step": 29220 }, { "epoch": 4.767536704730832, "grad_norm": 0.09944232553243637, "learning_rate": 0.0009428033077073319, "loss": 0.1922, "num_input_tokens_seen": 63083936, "step": 29225 }, { "epoch": 4.768352365415987, "grad_norm": 0.03159724920988083, "learning_rate": 0.0009427702447387833, "loss": 0.0346, "num_input_tokens_seen": 63095008, "step": 29230 }, { "epoch": 4.769168026101142, "grad_norm": 0.05562152713537216, "learning_rate": 0.0009427371727969338, "loss": 0.027, "num_input_tokens_seen": 63106208, "step": 29235 }, { "epoch": 4.769983686786297, "grad_norm": 0.04226912558078766, "learning_rate": 0.000942704091882454, "loss": 0.0645, "num_input_tokens_seen": 63117472, "step": 29240 }, { "epoch": 4.770799347471452, "grad_norm": 0.007211787160485983, "learning_rate": 0.0009426710019960141, "loss": 0.0395, "num_input_tokens_seen": 63128608, "step": 29245 }, { "epoch": 4.771615008156607, "grad_norm": 0.005389931611716747, "learning_rate": 0.0009426379031382848, "loss": 0.1973, "num_input_tokens_seen": 63138976, "step": 29250 }, { "epoch": 4.7724306688417615, "grad_norm": 0.07806431502103806, "learning_rate": 0.0009426047953099368, "loss": 0.0558, "num_input_tokens_seen": 63150624, "step": 29255 }, { "epoch": 4.773246329526917, "grad_norm": 0.11518237739801407, "learning_rate": 0.0009425716785116412, "loss": 0.0792, "num_input_tokens_seen": 63161568, "step": 29260 }, { "epoch": 4.774061990212072, "grad_norm": 0.09587440639734268, "learning_rate": 0.0009425385527440691, "loss": 0.0922, "num_input_tokens_seen": 63172960, "step": 29265 }, { "epoch": 4.774877650897227, "grad_norm": 0.0075339749455451965, "learning_rate": 0.0009425054180078917, "loss": 0.1079, "num_input_tokens_seen": 63183712, "step": 29270 }, { "epoch": 4.775693311582382, "grad_norm": 0.014327740296721458, "learning_rate": 0.0009424722743037808, "loss": 0.058, "num_input_tokens_seen": 63193088, "step": 29275 }, { "epoch": 4.7765089722675365, "grad_norm": 0.33011358976364136, "learning_rate": 0.0009424391216324078, "loss": 0.1102, "num_input_tokens_seen": 63203616, "step": 29280 }, { "epoch": 4.777324632952691, "grad_norm": 0.10421055555343628, "learning_rate": 0.0009424059599944449, "loss": 0.1348, "num_input_tokens_seen": 63214784, "step": 29285 }, { "epoch": 4.778140293637847, "grad_norm": 0.10818914324045181, "learning_rate": 0.0009423727893905638, "loss": 0.0584, "num_input_tokens_seen": 63226784, "step": 29290 }, { "epoch": 4.778955954323002, "grad_norm": 0.02502039261162281, "learning_rate": 0.0009423396098214372, "loss": 0.0255, "num_input_tokens_seen": 63236992, "step": 29295 }, { "epoch": 4.779771615008157, "grad_norm": 0.017431804910302162, "learning_rate": 0.0009423064212877371, "loss": 0.0206, "num_input_tokens_seen": 63245920, "step": 29300 }, { "epoch": 4.780587275693311, "grad_norm": 0.32392820715904236, "learning_rate": 0.0009422732237901361, "loss": 0.0597, "num_input_tokens_seen": 63256640, "step": 29305 }, { "epoch": 4.781402936378466, "grad_norm": 0.19300010800361633, "learning_rate": 0.0009422400173293073, "loss": 0.1005, "num_input_tokens_seen": 63267328, "step": 29310 }, { "epoch": 4.782218597063622, "grad_norm": 0.20187383890151978, "learning_rate": 0.0009422068019059235, "loss": 0.1017, "num_input_tokens_seen": 63278272, "step": 29315 }, { "epoch": 4.783034257748777, "grad_norm": 0.07142680883407593, "learning_rate": 0.0009421735775206582, "loss": 0.0497, "num_input_tokens_seen": 63289312, "step": 29320 }, { "epoch": 4.783849918433932, "grad_norm": 0.6443974375724792, "learning_rate": 0.000942140344174184, "loss": 0.1654, "num_input_tokens_seen": 63300032, "step": 29325 }, { "epoch": 4.784665579119086, "grad_norm": 0.012274186126887798, "learning_rate": 0.0009421071018671749, "loss": 0.322, "num_input_tokens_seen": 63311552, "step": 29330 }, { "epoch": 4.785481239804241, "grad_norm": 0.4331901967525482, "learning_rate": 0.0009420738506003047, "loss": 0.208, "num_input_tokens_seen": 63322560, "step": 29335 }, { "epoch": 4.786296900489396, "grad_norm": 0.13902045786380768, "learning_rate": 0.0009420405903742471, "loss": 0.1738, "num_input_tokens_seen": 63334528, "step": 29340 }, { "epoch": 4.787112561174552, "grad_norm": 0.10463149845600128, "learning_rate": 0.000942007321189676, "loss": 0.0561, "num_input_tokens_seen": 63346080, "step": 29345 }, { "epoch": 4.787928221859707, "grad_norm": 0.05299828574061394, "learning_rate": 0.0009419740430472659, "loss": 0.0908, "num_input_tokens_seen": 63357152, "step": 29350 }, { "epoch": 4.788743882544861, "grad_norm": 0.07318619638681412, "learning_rate": 0.0009419407559476911, "loss": 0.1546, "num_input_tokens_seen": 63368416, "step": 29355 }, { "epoch": 4.789559543230016, "grad_norm": 0.2341724932193756, "learning_rate": 0.0009419074598916262, "loss": 0.0895, "num_input_tokens_seen": 63379168, "step": 29360 }, { "epoch": 4.790375203915171, "grad_norm": 0.14661934971809387, "learning_rate": 0.0009418741548797462, "loss": 0.0951, "num_input_tokens_seen": 63389824, "step": 29365 }, { "epoch": 4.791190864600326, "grad_norm": 0.029867149889469147, "learning_rate": 0.0009418408409127257, "loss": 0.0475, "num_input_tokens_seen": 63401152, "step": 29370 }, { "epoch": 4.7920065252854815, "grad_norm": 0.5909934043884277, "learning_rate": 0.0009418075179912402, "loss": 0.2034, "num_input_tokens_seen": 63413152, "step": 29375 }, { "epoch": 4.792822185970636, "grad_norm": 0.0257155392318964, "learning_rate": 0.0009417741861159648, "loss": 0.0287, "num_input_tokens_seen": 63424192, "step": 29380 }, { "epoch": 4.793637846655791, "grad_norm": 0.24909964203834534, "learning_rate": 0.0009417408452875751, "loss": 0.125, "num_input_tokens_seen": 63434464, "step": 29385 }, { "epoch": 4.794453507340946, "grad_norm": 0.3398919105529785, "learning_rate": 0.0009417074955067467, "loss": 0.1109, "num_input_tokens_seen": 63444064, "step": 29390 }, { "epoch": 4.795269168026101, "grad_norm": 0.04319724068045616, "learning_rate": 0.0009416741367741557, "loss": 0.1873, "num_input_tokens_seen": 63455904, "step": 29395 }, { "epoch": 4.7960848287112565, "grad_norm": 0.12128908932209015, "learning_rate": 0.0009416407690904778, "loss": 0.3491, "num_input_tokens_seen": 63467136, "step": 29400 }, { "epoch": 4.796900489396411, "grad_norm": 0.021471992135047913, "learning_rate": 0.0009416073924563897, "loss": 0.255, "num_input_tokens_seen": 63478784, "step": 29405 }, { "epoch": 4.797716150081566, "grad_norm": 0.10503476113080978, "learning_rate": 0.0009415740068725674, "loss": 0.0767, "num_input_tokens_seen": 63490304, "step": 29410 }, { "epoch": 4.798531810766721, "grad_norm": 0.029040468856692314, "learning_rate": 0.0009415406123396878, "loss": 0.0615, "num_input_tokens_seen": 63501280, "step": 29415 }, { "epoch": 4.799347471451876, "grad_norm": 0.13593031466007233, "learning_rate": 0.0009415072088584275, "loss": 0.072, "num_input_tokens_seen": 63512384, "step": 29420 }, { "epoch": 4.800163132137031, "grad_norm": 0.16249193251132965, "learning_rate": 0.0009414737964294635, "loss": 0.0614, "num_input_tokens_seen": 63522656, "step": 29425 }, { "epoch": 4.800978792822186, "grad_norm": 0.1004888117313385, "learning_rate": 0.0009414403750534731, "loss": 0.0627, "num_input_tokens_seen": 63534048, "step": 29430 }, { "epoch": 4.801794453507341, "grad_norm": 0.14339496195316315, "learning_rate": 0.0009414069447311333, "loss": 0.0887, "num_input_tokens_seen": 63545792, "step": 29435 }, { "epoch": 4.802610114192496, "grad_norm": 0.08759405463933945, "learning_rate": 0.0009413735054631218, "loss": 0.1602, "num_input_tokens_seen": 63556416, "step": 29440 }, { "epoch": 4.803425774877651, "grad_norm": 0.020508920773863792, "learning_rate": 0.0009413400572501164, "loss": 0.0237, "num_input_tokens_seen": 63566880, "step": 29445 }, { "epoch": 4.804241435562806, "grad_norm": 0.009880347177386284, "learning_rate": 0.0009413066000927948, "loss": 0.1356, "num_input_tokens_seen": 63577376, "step": 29450 }, { "epoch": 4.80505709624796, "grad_norm": 0.06456217914819717, "learning_rate": 0.0009412731339918353, "loss": 0.1309, "num_input_tokens_seen": 63587808, "step": 29455 }, { "epoch": 4.805872756933116, "grad_norm": 0.034411221742630005, "learning_rate": 0.0009412396589479157, "loss": 0.0786, "num_input_tokens_seen": 63597440, "step": 29460 }, { "epoch": 4.806688417618271, "grad_norm": 0.1142154186964035, "learning_rate": 0.0009412061749617147, "loss": 0.0233, "num_input_tokens_seen": 63608512, "step": 29465 }, { "epoch": 4.807504078303426, "grad_norm": 0.17176075279712677, "learning_rate": 0.0009411726820339109, "loss": 0.1125, "num_input_tokens_seen": 63620416, "step": 29470 }, { "epoch": 4.808319738988581, "grad_norm": 0.029743075370788574, "learning_rate": 0.000941139180165183, "loss": 0.1608, "num_input_tokens_seen": 63631072, "step": 29475 }, { "epoch": 4.809135399673735, "grad_norm": 0.04053172096610069, "learning_rate": 0.0009411056693562101, "loss": 0.0691, "num_input_tokens_seen": 63642752, "step": 29480 }, { "epoch": 4.809951060358891, "grad_norm": 0.26282501220703125, "learning_rate": 0.000941072149607671, "loss": 0.111, "num_input_tokens_seen": 63653824, "step": 29485 }, { "epoch": 4.810766721044046, "grad_norm": 0.007690738886594772, "learning_rate": 0.0009410386209202455, "loss": 0.2216, "num_input_tokens_seen": 63664576, "step": 29490 }, { "epoch": 4.811582381729201, "grad_norm": 0.2790180444717407, "learning_rate": 0.0009410050832946127, "loss": 0.1174, "num_input_tokens_seen": 63675744, "step": 29495 }, { "epoch": 4.8123980424143555, "grad_norm": 0.04947797954082489, "learning_rate": 0.0009409715367314527, "loss": 0.0995, "num_input_tokens_seen": 63686592, "step": 29500 }, { "epoch": 4.81321370309951, "grad_norm": 0.017259635031223297, "learning_rate": 0.0009409379812314447, "loss": 0.0334, "num_input_tokens_seen": 63696896, "step": 29505 }, { "epoch": 4.814029363784666, "grad_norm": 0.017932157963514328, "learning_rate": 0.0009409044167952694, "loss": 0.147, "num_input_tokens_seen": 63705664, "step": 29510 }, { "epoch": 4.814845024469821, "grad_norm": 0.039764653891325, "learning_rate": 0.0009408708434236066, "loss": 0.0678, "num_input_tokens_seen": 63716128, "step": 29515 }, { "epoch": 4.815660685154976, "grad_norm": 0.024869758635759354, "learning_rate": 0.000940837261117137, "loss": 0.0587, "num_input_tokens_seen": 63727328, "step": 29520 }, { "epoch": 4.8164763458401305, "grad_norm": 0.10473395138978958, "learning_rate": 0.000940803669876541, "loss": 0.085, "num_input_tokens_seen": 63737376, "step": 29525 }, { "epoch": 4.817292006525285, "grad_norm": 0.21407146751880646, "learning_rate": 0.0009407700697024995, "loss": 0.0998, "num_input_tokens_seen": 63748000, "step": 29530 }, { "epoch": 4.81810766721044, "grad_norm": 0.06024529039859772, "learning_rate": 0.0009407364605956933, "loss": 0.1499, "num_input_tokens_seen": 63759776, "step": 29535 }, { "epoch": 4.818923327895595, "grad_norm": 0.4007602632045746, "learning_rate": 0.0009407028425568036, "loss": 0.0966, "num_input_tokens_seen": 63771680, "step": 29540 }, { "epoch": 4.819738988580751, "grad_norm": 0.3571130335330963, "learning_rate": 0.0009406692155865117, "loss": 0.2908, "num_input_tokens_seen": 63783168, "step": 29545 }, { "epoch": 4.8205546492659055, "grad_norm": 0.054712578654289246, "learning_rate": 0.0009406355796854993, "loss": 0.176, "num_input_tokens_seen": 63793664, "step": 29550 }, { "epoch": 4.82137030995106, "grad_norm": 0.15671412646770477, "learning_rate": 0.0009406019348544478, "loss": 0.0858, "num_input_tokens_seen": 63804992, "step": 29555 }, { "epoch": 4.822185970636215, "grad_norm": 0.2422739565372467, "learning_rate": 0.000940568281094039, "loss": 0.084, "num_input_tokens_seen": 63815840, "step": 29560 }, { "epoch": 4.82300163132137, "grad_norm": 0.31544601917266846, "learning_rate": 0.0009405346184049552, "loss": 0.173, "num_input_tokens_seen": 63825664, "step": 29565 }, { "epoch": 4.823817292006526, "grad_norm": 0.013141264207661152, "learning_rate": 0.0009405009467878787, "loss": 0.0504, "num_input_tokens_seen": 63835392, "step": 29570 }, { "epoch": 4.8246329526916805, "grad_norm": 0.13666287064552307, "learning_rate": 0.0009404672662434914, "loss": 0.0563, "num_input_tokens_seen": 63846176, "step": 29575 }, { "epoch": 4.825448613376835, "grad_norm": 0.022302627563476562, "learning_rate": 0.0009404335767724763, "loss": 0.074, "num_input_tokens_seen": 63855744, "step": 29580 }, { "epoch": 4.82626427406199, "grad_norm": 0.0221390537917614, "learning_rate": 0.000940399878375516, "loss": 0.2367, "num_input_tokens_seen": 63866016, "step": 29585 }, { "epoch": 4.827079934747145, "grad_norm": 0.023553216829895973, "learning_rate": 0.0009403661710532936, "loss": 0.1651, "num_input_tokens_seen": 63877216, "step": 29590 }, { "epoch": 4.827895595432301, "grad_norm": 0.12567038834095, "learning_rate": 0.0009403324548064919, "loss": 0.087, "num_input_tokens_seen": 63887776, "step": 29595 }, { "epoch": 4.828711256117455, "grad_norm": 0.03263112157583237, "learning_rate": 0.0009402987296357946, "loss": 0.0394, "num_input_tokens_seen": 63897696, "step": 29600 }, { "epoch": 4.82952691680261, "grad_norm": 0.10870974510908127, "learning_rate": 0.0009402649955418848, "loss": 0.0375, "num_input_tokens_seen": 63908864, "step": 29605 }, { "epoch": 4.830342577487765, "grad_norm": 0.22527195513248444, "learning_rate": 0.0009402312525254464, "loss": 0.0351, "num_input_tokens_seen": 63919584, "step": 29610 }, { "epoch": 4.83115823817292, "grad_norm": 0.03808329626917839, "learning_rate": 0.0009401975005871632, "loss": 0.1348, "num_input_tokens_seen": 63929920, "step": 29615 }, { "epoch": 4.831973898858075, "grad_norm": 0.020397508516907692, "learning_rate": 0.0009401637397277193, "loss": 0.0166, "num_input_tokens_seen": 63939840, "step": 29620 }, { "epoch": 4.8327895595432295, "grad_norm": 0.18699325621128082, "learning_rate": 0.0009401299699477988, "loss": 0.2343, "num_input_tokens_seen": 63951456, "step": 29625 }, { "epoch": 4.833605220228385, "grad_norm": 0.247244730591774, "learning_rate": 0.0009400961912480861, "loss": 0.172, "num_input_tokens_seen": 63963072, "step": 29630 }, { "epoch": 4.83442088091354, "grad_norm": 0.151368647813797, "learning_rate": 0.0009400624036292657, "loss": 0.3132, "num_input_tokens_seen": 63974048, "step": 29635 }, { "epoch": 4.835236541598695, "grad_norm": 0.1525419056415558, "learning_rate": 0.0009400286070920226, "loss": 0.0805, "num_input_tokens_seen": 63983744, "step": 29640 }, { "epoch": 4.83605220228385, "grad_norm": 0.11460611969232559, "learning_rate": 0.0009399948016370415, "loss": 0.0613, "num_input_tokens_seen": 63993344, "step": 29645 }, { "epoch": 4.8368678629690045, "grad_norm": 0.02772326022386551, "learning_rate": 0.0009399609872650075, "loss": 0.0985, "num_input_tokens_seen": 64002752, "step": 29650 }, { "epoch": 4.83768352365416, "grad_norm": 0.09539767354726791, "learning_rate": 0.000939927163976606, "loss": 0.1117, "num_input_tokens_seen": 64014592, "step": 29655 }, { "epoch": 4.838499184339315, "grad_norm": 0.24687863886356354, "learning_rate": 0.0009398933317725225, "loss": 0.0804, "num_input_tokens_seen": 64025888, "step": 29660 }, { "epoch": 4.83931484502447, "grad_norm": 0.05439388379454613, "learning_rate": 0.0009398594906534424, "loss": 0.0417, "num_input_tokens_seen": 64037120, "step": 29665 }, { "epoch": 4.840130505709625, "grad_norm": 0.3626890778541565, "learning_rate": 0.0009398256406200518, "loss": 0.2545, "num_input_tokens_seen": 64047360, "step": 29670 }, { "epoch": 4.8409461663947795, "grad_norm": 0.112861767411232, "learning_rate": 0.0009397917816730368, "loss": 0.1243, "num_input_tokens_seen": 64058624, "step": 29675 }, { "epoch": 4.841761827079935, "grad_norm": 0.11057104170322418, "learning_rate": 0.0009397579138130832, "loss": 0.0968, "num_input_tokens_seen": 64069440, "step": 29680 }, { "epoch": 4.84257748776509, "grad_norm": 0.11971098184585571, "learning_rate": 0.0009397240370408777, "loss": 0.1761, "num_input_tokens_seen": 64079104, "step": 29685 }, { "epoch": 4.843393148450245, "grad_norm": 0.18452569842338562, "learning_rate": 0.0009396901513571068, "loss": 0.1048, "num_input_tokens_seen": 64088064, "step": 29690 }, { "epoch": 4.8442088091354, "grad_norm": 0.03464442119002342, "learning_rate": 0.0009396562567624572, "loss": 0.0337, "num_input_tokens_seen": 64098976, "step": 29695 }, { "epoch": 4.8450244698205545, "grad_norm": 0.19903625547885895, "learning_rate": 0.0009396223532576159, "loss": 0.1559, "num_input_tokens_seen": 64108992, "step": 29700 }, { "epoch": 4.845840130505709, "grad_norm": 0.49042823910713196, "learning_rate": 0.0009395884408432696, "loss": 0.118, "num_input_tokens_seen": 64118624, "step": 29705 }, { "epoch": 4.846655791190865, "grad_norm": 0.36629512906074524, "learning_rate": 0.0009395545195201062, "loss": 0.278, "num_input_tokens_seen": 64127776, "step": 29710 }, { "epoch": 4.84747145187602, "grad_norm": 0.2922069728374481, "learning_rate": 0.0009395205892888126, "loss": 0.17, "num_input_tokens_seen": 64138560, "step": 29715 }, { "epoch": 4.848287112561175, "grad_norm": 0.06980336457490921, "learning_rate": 0.0009394866501500769, "loss": 0.0758, "num_input_tokens_seen": 64149408, "step": 29720 }, { "epoch": 4.849102773246329, "grad_norm": 0.04333982244133949, "learning_rate": 0.0009394527021045866, "loss": 0.0881, "num_input_tokens_seen": 64159104, "step": 29725 }, { "epoch": 4.849918433931484, "grad_norm": 0.04412687197327614, "learning_rate": 0.0009394187451530298, "loss": 0.1598, "num_input_tokens_seen": 64170048, "step": 29730 }, { "epoch": 4.850734094616639, "grad_norm": 0.04543422907590866, "learning_rate": 0.0009393847792960948, "loss": 0.0837, "num_input_tokens_seen": 64181024, "step": 29735 }, { "epoch": 4.851549755301795, "grad_norm": 0.06632301956415176, "learning_rate": 0.0009393508045344697, "loss": 0.0704, "num_input_tokens_seen": 64192768, "step": 29740 }, { "epoch": 4.85236541598695, "grad_norm": 0.10186554491519928, "learning_rate": 0.0009393168208688432, "loss": 0.1283, "num_input_tokens_seen": 64203872, "step": 29745 }, { "epoch": 4.853181076672104, "grad_norm": 0.2383325695991516, "learning_rate": 0.0009392828282999042, "loss": 0.1097, "num_input_tokens_seen": 64215488, "step": 29750 }, { "epoch": 4.853996737357259, "grad_norm": 0.0762874111533165, "learning_rate": 0.0009392488268283412, "loss": 0.062, "num_input_tokens_seen": 64226944, "step": 29755 }, { "epoch": 4.854812398042414, "grad_norm": 0.10847117751836777, "learning_rate": 0.0009392148164548436, "loss": 0.1095, "num_input_tokens_seen": 64236416, "step": 29760 }, { "epoch": 4.85562805872757, "grad_norm": 0.00667162612080574, "learning_rate": 0.0009391807971801005, "loss": 0.0292, "num_input_tokens_seen": 64246464, "step": 29765 }, { "epoch": 4.856443719412725, "grad_norm": 0.051318198442459106, "learning_rate": 0.0009391467690048014, "loss": 0.0763, "num_input_tokens_seen": 64257568, "step": 29770 }, { "epoch": 4.857259380097879, "grad_norm": 0.016104619950056076, "learning_rate": 0.000939112731929636, "loss": 0.0408, "num_input_tokens_seen": 64267136, "step": 29775 }, { "epoch": 4.858075040783034, "grad_norm": 0.058087367564439774, "learning_rate": 0.000939078685955294, "loss": 0.1462, "num_input_tokens_seen": 64278080, "step": 29780 }, { "epoch": 4.858890701468189, "grad_norm": 0.14451642334461212, "learning_rate": 0.0009390446310824654, "loss": 0.1651, "num_input_tokens_seen": 64288064, "step": 29785 }, { "epoch": 4.859706362153344, "grad_norm": 0.17152109742164612, "learning_rate": 0.0009390105673118405, "loss": 0.1358, "num_input_tokens_seen": 64300032, "step": 29790 }, { "epoch": 4.8605220228384995, "grad_norm": 0.01143634133040905, "learning_rate": 0.0009389764946441094, "loss": 0.0747, "num_input_tokens_seen": 64311040, "step": 29795 }, { "epoch": 4.861337683523654, "grad_norm": 0.20366577804088593, "learning_rate": 0.0009389424130799628, "loss": 0.1981, "num_input_tokens_seen": 64322912, "step": 29800 }, { "epoch": 4.862153344208809, "grad_norm": 0.21836383640766144, "learning_rate": 0.0009389083226200914, "loss": 0.122, "num_input_tokens_seen": 64333792, "step": 29805 }, { "epoch": 4.862969004893964, "grad_norm": 0.0703054666519165, "learning_rate": 0.0009388742232651859, "loss": 0.0761, "num_input_tokens_seen": 64344448, "step": 29810 }, { "epoch": 4.863784665579119, "grad_norm": 0.30703631043434143, "learning_rate": 0.0009388401150159377, "loss": 0.1806, "num_input_tokens_seen": 64355360, "step": 29815 }, { "epoch": 4.864600326264274, "grad_norm": 0.06548678874969482, "learning_rate": 0.0009388059978730377, "loss": 0.1619, "num_input_tokens_seen": 64367136, "step": 29820 }, { "epoch": 4.865415986949429, "grad_norm": 0.2728617191314697, "learning_rate": 0.0009387718718371776, "loss": 0.1363, "num_input_tokens_seen": 64377760, "step": 29825 }, { "epoch": 4.866231647634584, "grad_norm": 0.1754475086927414, "learning_rate": 0.0009387377369090489, "loss": 0.0744, "num_input_tokens_seen": 64388288, "step": 29830 }, { "epoch": 4.867047308319739, "grad_norm": 0.051699407398700714, "learning_rate": 0.0009387035930893433, "loss": 0.0904, "num_input_tokens_seen": 64399552, "step": 29835 }, { "epoch": 4.867862969004894, "grad_norm": 0.04078228399157524, "learning_rate": 0.0009386694403787529, "loss": 0.099, "num_input_tokens_seen": 64410048, "step": 29840 }, { "epoch": 4.868678629690049, "grad_norm": 0.07273681461811066, "learning_rate": 0.0009386352787779697, "loss": 0.033, "num_input_tokens_seen": 64420096, "step": 29845 }, { "epoch": 4.869494290375204, "grad_norm": 0.3901907503604889, "learning_rate": 0.0009386011082876863, "loss": 0.1145, "num_input_tokens_seen": 64431680, "step": 29850 }, { "epoch": 4.870309951060359, "grad_norm": 0.13881205022335052, "learning_rate": 0.000938566928908595, "loss": 0.0858, "num_input_tokens_seen": 64443488, "step": 29855 }, { "epoch": 4.871125611745514, "grad_norm": 0.11534479260444641, "learning_rate": 0.0009385327406413883, "loss": 0.0529, "num_input_tokens_seen": 64453408, "step": 29860 }, { "epoch": 4.871941272430669, "grad_norm": 0.13814304769039154, "learning_rate": 0.0009384985434867597, "loss": 0.0582, "num_input_tokens_seen": 64463104, "step": 29865 }, { "epoch": 4.872756933115824, "grad_norm": 0.34494641423225403, "learning_rate": 0.0009384643374454014, "loss": 0.0534, "num_input_tokens_seen": 64473760, "step": 29870 }, { "epoch": 4.873572593800979, "grad_norm": 0.13519392907619476, "learning_rate": 0.0009384301225180074, "loss": 0.06, "num_input_tokens_seen": 64485376, "step": 29875 }, { "epoch": 4.874388254486134, "grad_norm": 0.010102201253175735, "learning_rate": 0.0009383958987052706, "loss": 0.1586, "num_input_tokens_seen": 64496672, "step": 29880 }, { "epoch": 4.875203915171289, "grad_norm": 1.6514872312545776, "learning_rate": 0.0009383616660078849, "loss": 0.1679, "num_input_tokens_seen": 64507584, "step": 29885 }, { "epoch": 4.876019575856444, "grad_norm": 0.11608785390853882, "learning_rate": 0.0009383274244265438, "loss": 0.1559, "num_input_tokens_seen": 64517760, "step": 29890 }, { "epoch": 4.876835236541599, "grad_norm": 0.15965931117534637, "learning_rate": 0.0009382931739619416, "loss": 0.1601, "num_input_tokens_seen": 64529248, "step": 29895 }, { "epoch": 4.877650897226753, "grad_norm": 0.10025332123041153, "learning_rate": 0.000938258914614772, "loss": 0.0283, "num_input_tokens_seen": 64538656, "step": 29900 }, { "epoch": 4.878466557911908, "grad_norm": 0.18756964802742004, "learning_rate": 0.0009382246463857295, "loss": 0.0507, "num_input_tokens_seen": 64549568, "step": 29905 }, { "epoch": 4.879282218597064, "grad_norm": 0.12705452740192413, "learning_rate": 0.0009381903692755087, "loss": 0.0769, "num_input_tokens_seen": 64560736, "step": 29910 }, { "epoch": 4.880097879282219, "grad_norm": 0.008374611847102642, "learning_rate": 0.0009381560832848043, "loss": 0.0326, "num_input_tokens_seen": 64572704, "step": 29915 }, { "epoch": 4.8809135399673735, "grad_norm": 0.011017858982086182, "learning_rate": 0.0009381217884143109, "loss": 0.141, "num_input_tokens_seen": 64584160, "step": 29920 }, { "epoch": 4.881729200652528, "grad_norm": 0.2521616220474243, "learning_rate": 0.0009380874846647236, "loss": 0.1532, "num_input_tokens_seen": 64594496, "step": 29925 }, { "epoch": 4.882544861337683, "grad_norm": 0.15471269190311432, "learning_rate": 0.0009380531720367378, "loss": 0.0255, "num_input_tokens_seen": 64604832, "step": 29930 }, { "epoch": 4.883360522022839, "grad_norm": 0.1828734129667282, "learning_rate": 0.0009380188505310488, "loss": 0.1386, "num_input_tokens_seen": 64615072, "step": 29935 }, { "epoch": 4.884176182707994, "grad_norm": 0.015691427513957024, "learning_rate": 0.0009379845201483519, "loss": 0.0502, "num_input_tokens_seen": 64624736, "step": 29940 }, { "epoch": 4.8849918433931485, "grad_norm": 0.048035524785518646, "learning_rate": 0.0009379501808893433, "loss": 0.0335, "num_input_tokens_seen": 64636256, "step": 29945 }, { "epoch": 4.885807504078303, "grad_norm": 0.22111110389232635, "learning_rate": 0.0009379158327547186, "loss": 0.0445, "num_input_tokens_seen": 64646112, "step": 29950 }, { "epoch": 4.886623164763458, "grad_norm": 0.06628531962633133, "learning_rate": 0.000937881475745174, "loss": 0.0636, "num_input_tokens_seen": 64655584, "step": 29955 }, { "epoch": 4.887438825448614, "grad_norm": 0.06996522843837738, "learning_rate": 0.0009378471098614059, "loss": 0.0872, "num_input_tokens_seen": 64665952, "step": 29960 }, { "epoch": 4.888254486133769, "grad_norm": 0.020639104768633842, "learning_rate": 0.0009378127351041106, "loss": 0.0538, "num_input_tokens_seen": 64677312, "step": 29965 }, { "epoch": 4.8890701468189235, "grad_norm": 0.05463831126689911, "learning_rate": 0.0009377783514739848, "loss": 0.093, "num_input_tokens_seen": 64687744, "step": 29970 }, { "epoch": 4.889885807504078, "grad_norm": 0.3268308639526367, "learning_rate": 0.0009377439589717254, "loss": 0.0685, "num_input_tokens_seen": 64696096, "step": 29975 }, { "epoch": 4.890701468189233, "grad_norm": 0.09988824278116226, "learning_rate": 0.0009377095575980293, "loss": 0.0652, "num_input_tokens_seen": 64707104, "step": 29980 }, { "epoch": 4.891517128874388, "grad_norm": 0.5178554058074951, "learning_rate": 0.0009376751473535939, "loss": 0.1403, "num_input_tokens_seen": 64717376, "step": 29985 }, { "epoch": 4.892332789559543, "grad_norm": 0.5496912598609924, "learning_rate": 0.0009376407282391161, "loss": 0.2846, "num_input_tokens_seen": 64728096, "step": 29990 }, { "epoch": 4.8931484502446985, "grad_norm": 0.012093810364603996, "learning_rate": 0.0009376063002552939, "loss": 0.0584, "num_input_tokens_seen": 64738784, "step": 29995 }, { "epoch": 4.893964110929853, "grad_norm": 0.4196426272392273, "learning_rate": 0.0009375718634028249, "loss": 0.131, "num_input_tokens_seen": 64750048, "step": 30000 }, { "epoch": 4.894779771615008, "grad_norm": 0.18824540078639984, "learning_rate": 0.0009375374176824071, "loss": 0.0457, "num_input_tokens_seen": 64758752, "step": 30005 }, { "epoch": 4.895595432300163, "grad_norm": 0.11146194487810135, "learning_rate": 0.0009375029630947384, "loss": 0.1676, "num_input_tokens_seen": 64770464, "step": 30010 }, { "epoch": 4.896411092985318, "grad_norm": 0.628357470035553, "learning_rate": 0.000937468499640517, "loss": 0.1396, "num_input_tokens_seen": 64781504, "step": 30015 }, { "epoch": 4.897226753670473, "grad_norm": 0.06570843607187271, "learning_rate": 0.0009374340273204416, "loss": 0.0842, "num_input_tokens_seen": 64792288, "step": 30020 }, { "epoch": 4.898042414355628, "grad_norm": 0.38559427857398987, "learning_rate": 0.0009373995461352107, "loss": 0.1111, "num_input_tokens_seen": 64803200, "step": 30025 }, { "epoch": 4.898858075040783, "grad_norm": 0.49251124262809753, "learning_rate": 0.0009373650560855232, "loss": 0.081, "num_input_tokens_seen": 64813216, "step": 30030 }, { "epoch": 4.899673735725938, "grad_norm": 0.2723062336444855, "learning_rate": 0.0009373305571720779, "loss": 0.0586, "num_input_tokens_seen": 64824672, "step": 30035 }, { "epoch": 4.900489396411093, "grad_norm": 0.012264762073755264, "learning_rate": 0.0009372960493955741, "loss": 0.0716, "num_input_tokens_seen": 64833888, "step": 30040 }, { "epoch": 4.901305057096248, "grad_norm": 0.06054016947746277, "learning_rate": 0.0009372615327567111, "loss": 0.0736, "num_input_tokens_seen": 64845376, "step": 30045 }, { "epoch": 4.902120717781403, "grad_norm": 0.04564357548952103, "learning_rate": 0.0009372270072561885, "loss": 0.0798, "num_input_tokens_seen": 64855616, "step": 30050 }, { "epoch": 4.902936378466558, "grad_norm": 0.6209138631820679, "learning_rate": 0.0009371924728947059, "loss": 0.165, "num_input_tokens_seen": 64866400, "step": 30055 }, { "epoch": 4.903752039151713, "grad_norm": 0.028712505474686623, "learning_rate": 0.0009371579296729631, "loss": 0.0468, "num_input_tokens_seen": 64876480, "step": 30060 }, { "epoch": 4.904567699836868, "grad_norm": 0.05397950857877731, "learning_rate": 0.0009371233775916604, "loss": 0.0634, "num_input_tokens_seen": 64887104, "step": 30065 }, { "epoch": 4.9053833605220225, "grad_norm": 0.21328820288181305, "learning_rate": 0.0009370888166514979, "loss": 0.0406, "num_input_tokens_seen": 64897152, "step": 30070 }, { "epoch": 4.906199021207177, "grad_norm": 0.4408571422100067, "learning_rate": 0.0009370542468531761, "loss": 0.2691, "num_input_tokens_seen": 64908384, "step": 30075 }, { "epoch": 4.907014681892333, "grad_norm": 0.1310759335756302, "learning_rate": 0.0009370196681973955, "loss": 0.0849, "num_input_tokens_seen": 64918720, "step": 30080 }, { "epoch": 4.907830342577488, "grad_norm": 0.07348483800888062, "learning_rate": 0.0009369850806848569, "loss": 0.2141, "num_input_tokens_seen": 64930816, "step": 30085 }, { "epoch": 4.908646003262643, "grad_norm": 0.03151915967464447, "learning_rate": 0.0009369504843162613, "loss": 0.1985, "num_input_tokens_seen": 64942880, "step": 30090 }, { "epoch": 4.9094616639477975, "grad_norm": 0.022546501830220222, "learning_rate": 0.0009369158790923098, "loss": 0.0388, "num_input_tokens_seen": 64953344, "step": 30095 }, { "epoch": 4.910277324632952, "grad_norm": 0.2897094190120697, "learning_rate": 0.0009368812650137038, "loss": 0.0755, "num_input_tokens_seen": 64964608, "step": 30100 }, { "epoch": 4.911092985318108, "grad_norm": 0.08280867338180542, "learning_rate": 0.0009368466420811446, "loss": 0.033, "num_input_tokens_seen": 64975648, "step": 30105 }, { "epoch": 4.911908646003263, "grad_norm": 0.19784578680992126, "learning_rate": 0.0009368120102953341, "loss": 0.0593, "num_input_tokens_seen": 64985216, "step": 30110 }, { "epoch": 4.912724306688418, "grad_norm": 0.2454591989517212, "learning_rate": 0.0009367773696569742, "loss": 0.1424, "num_input_tokens_seen": 64997248, "step": 30115 }, { "epoch": 4.9135399673735725, "grad_norm": 0.29510167241096497, "learning_rate": 0.0009367427201667667, "loss": 0.0383, "num_input_tokens_seen": 65008384, "step": 30120 }, { "epoch": 4.914355628058727, "grad_norm": 0.027076195925474167, "learning_rate": 0.000936708061825414, "loss": 0.0274, "num_input_tokens_seen": 65018496, "step": 30125 }, { "epoch": 4.915171288743883, "grad_norm": 0.03842652961611748, "learning_rate": 0.0009366733946336184, "loss": 0.0291, "num_input_tokens_seen": 65028864, "step": 30130 }, { "epoch": 4.915986949429038, "grad_norm": 0.028925683349370956, "learning_rate": 0.0009366387185920824, "loss": 0.1313, "num_input_tokens_seen": 65040032, "step": 30135 }, { "epoch": 4.916802610114193, "grad_norm": 0.07092883437871933, "learning_rate": 0.0009366040337015089, "loss": 0.0924, "num_input_tokens_seen": 65051584, "step": 30140 }, { "epoch": 4.917618270799347, "grad_norm": 0.12043336033821106, "learning_rate": 0.0009365693399626009, "loss": 0.0425, "num_input_tokens_seen": 65061920, "step": 30145 }, { "epoch": 4.918433931484502, "grad_norm": 0.17061249911785126, "learning_rate": 0.0009365346373760613, "loss": 0.1847, "num_input_tokens_seen": 65072576, "step": 30150 }, { "epoch": 4.919249592169657, "grad_norm": 0.023153536021709442, "learning_rate": 0.0009364999259425935, "loss": 0.1012, "num_input_tokens_seen": 65082816, "step": 30155 }, { "epoch": 4.920065252854813, "grad_norm": 0.12975503504276276, "learning_rate": 0.0009364652056629008, "loss": 0.0644, "num_input_tokens_seen": 65092864, "step": 30160 }, { "epoch": 4.920880913539968, "grad_norm": 0.43951335549354553, "learning_rate": 0.0009364304765376872, "loss": 0.307, "num_input_tokens_seen": 65103776, "step": 30165 }, { "epoch": 4.921696574225122, "grad_norm": 0.03118697740137577, "learning_rate": 0.0009363957385676563, "loss": 0.1467, "num_input_tokens_seen": 65115936, "step": 30170 }, { "epoch": 4.922512234910277, "grad_norm": 0.06966501474380493, "learning_rate": 0.0009363609917535122, "loss": 0.061, "num_input_tokens_seen": 65125536, "step": 30175 }, { "epoch": 4.923327895595432, "grad_norm": 0.025214113295078278, "learning_rate": 0.000936326236095959, "loss": 0.1919, "num_input_tokens_seen": 65136160, "step": 30180 }, { "epoch": 4.924143556280587, "grad_norm": 0.2285212278366089, "learning_rate": 0.0009362914715957011, "loss": 0.0646, "num_input_tokens_seen": 65146688, "step": 30185 }, { "epoch": 4.924959216965743, "grad_norm": 0.040142208337783813, "learning_rate": 0.000936256698253443, "loss": 0.0932, "num_input_tokens_seen": 65157024, "step": 30190 }, { "epoch": 4.925774877650897, "grad_norm": 0.07100296765565872, "learning_rate": 0.0009362219160698895, "loss": 0.1319, "num_input_tokens_seen": 65167360, "step": 30195 }, { "epoch": 4.926590538336052, "grad_norm": 0.2810527980327606, "learning_rate": 0.0009361871250457457, "loss": 0.1097, "num_input_tokens_seen": 65178080, "step": 30200 }, { "epoch": 4.927406199021207, "grad_norm": 0.28353655338287354, "learning_rate": 0.0009361523251817161, "loss": 0.1216, "num_input_tokens_seen": 65189824, "step": 30205 }, { "epoch": 4.928221859706362, "grad_norm": 0.018714554607868195, "learning_rate": 0.0009361175164785065, "loss": 0.0863, "num_input_tokens_seen": 65200448, "step": 30210 }, { "epoch": 4.9290375203915175, "grad_norm": 0.1125372052192688, "learning_rate": 0.0009360826989368223, "loss": 0.0545, "num_input_tokens_seen": 65212896, "step": 30215 }, { "epoch": 4.929853181076672, "grad_norm": 0.03366836905479431, "learning_rate": 0.0009360478725573689, "loss": 0.1093, "num_input_tokens_seen": 65224096, "step": 30220 }, { "epoch": 4.930668841761827, "grad_norm": 0.300168514251709, "learning_rate": 0.0009360130373408522, "loss": 0.0719, "num_input_tokens_seen": 65233472, "step": 30225 }, { "epoch": 4.931484502446982, "grad_norm": 0.10281326621770859, "learning_rate": 0.000935978193287978, "loss": 0.1318, "num_input_tokens_seen": 65244320, "step": 30230 }, { "epoch": 4.932300163132137, "grad_norm": 0.021494394168257713, "learning_rate": 0.0009359433403994529, "loss": 0.0781, "num_input_tokens_seen": 65255872, "step": 30235 }, { "epoch": 4.933115823817292, "grad_norm": 0.07253220677375793, "learning_rate": 0.0009359084786759828, "loss": 0.1711, "num_input_tokens_seen": 65265856, "step": 30240 }, { "epoch": 4.933931484502447, "grad_norm": 0.029242802411317825, "learning_rate": 0.0009358736081182746, "loss": 0.1843, "num_input_tokens_seen": 65275712, "step": 30245 }, { "epoch": 4.934747145187602, "grad_norm": 0.044183261692523956, "learning_rate": 0.0009358387287270346, "loss": 0.0587, "num_input_tokens_seen": 65285056, "step": 30250 }, { "epoch": 4.935562805872757, "grad_norm": 0.0510975643992424, "learning_rate": 0.0009358038405029699, "loss": 0.054, "num_input_tokens_seen": 65296096, "step": 30255 }, { "epoch": 4.936378466557912, "grad_norm": 0.0451134592294693, "learning_rate": 0.0009357689434467875, "loss": 0.1473, "num_input_tokens_seen": 65307136, "step": 30260 }, { "epoch": 4.937194127243067, "grad_norm": 0.013538416475057602, "learning_rate": 0.0009357340375591947, "loss": 0.1251, "num_input_tokens_seen": 65316832, "step": 30265 }, { "epoch": 4.938009787928221, "grad_norm": 0.2710576057434082, "learning_rate": 0.0009356991228408988, "loss": 0.2022, "num_input_tokens_seen": 65327072, "step": 30270 }, { "epoch": 4.938825448613377, "grad_norm": 0.21041344106197357, "learning_rate": 0.0009356641992926075, "loss": 0.1011, "num_input_tokens_seen": 65336864, "step": 30275 }, { "epoch": 4.939641109298532, "grad_norm": 0.03190971910953522, "learning_rate": 0.0009356292669150286, "loss": 0.1918, "num_input_tokens_seen": 65347904, "step": 30280 }, { "epoch": 4.940456769983687, "grad_norm": 0.4128536283969879, "learning_rate": 0.0009355943257088698, "loss": 0.2331, "num_input_tokens_seen": 65358880, "step": 30285 }, { "epoch": 4.941272430668842, "grad_norm": 0.0862661674618721, "learning_rate": 0.0009355593756748395, "loss": 0.0553, "num_input_tokens_seen": 65369856, "step": 30290 }, { "epoch": 4.942088091353996, "grad_norm": 0.0422455258667469, "learning_rate": 0.0009355244168136459, "loss": 0.0915, "num_input_tokens_seen": 65381056, "step": 30295 }, { "epoch": 4.942903752039152, "grad_norm": 0.48945385217666626, "learning_rate": 0.0009354894491259975, "loss": 0.2201, "num_input_tokens_seen": 65391680, "step": 30300 }, { "epoch": 4.943719412724307, "grad_norm": 0.016438154503703117, "learning_rate": 0.0009354544726126029, "loss": 0.0407, "num_input_tokens_seen": 65403648, "step": 30305 }, { "epoch": 4.944535073409462, "grad_norm": 0.4240013062953949, "learning_rate": 0.000935419487274171, "loss": 0.0429, "num_input_tokens_seen": 65415232, "step": 30310 }, { "epoch": 4.945350734094617, "grad_norm": 0.016234492883086205, "learning_rate": 0.0009353844931114108, "loss": 0.0618, "num_input_tokens_seen": 65425088, "step": 30315 }, { "epoch": 4.946166394779771, "grad_norm": 0.007609333377331495, "learning_rate": 0.0009353494901250316, "loss": 0.044, "num_input_tokens_seen": 65436096, "step": 30320 }, { "epoch": 4.946982055464927, "grad_norm": 0.023671409115195274, "learning_rate": 0.0009353144783157428, "loss": 0.0796, "num_input_tokens_seen": 65448096, "step": 30325 }, { "epoch": 4.947797716150082, "grad_norm": 0.024401243776082993, "learning_rate": 0.0009352794576842536, "loss": 0.0438, "num_input_tokens_seen": 65459744, "step": 30330 }, { "epoch": 4.948613376835237, "grad_norm": 0.16251614689826965, "learning_rate": 0.0009352444282312742, "loss": 0.0833, "num_input_tokens_seen": 65469536, "step": 30335 }, { "epoch": 4.9494290375203915, "grad_norm": 0.031157614663243294, "learning_rate": 0.0009352093899575143, "loss": 0.0311, "num_input_tokens_seen": 65480032, "step": 30340 }, { "epoch": 4.950244698205546, "grad_norm": 0.3116149306297302, "learning_rate": 0.0009351743428636838, "loss": 0.1266, "num_input_tokens_seen": 65490656, "step": 30345 }, { "epoch": 4.951060358890701, "grad_norm": 0.2033126950263977, "learning_rate": 0.0009351392869504934, "loss": 0.0796, "num_input_tokens_seen": 65502656, "step": 30350 }, { "epoch": 4.951876019575856, "grad_norm": 0.06770803779363632, "learning_rate": 0.0009351042222186533, "loss": 0.1136, "num_input_tokens_seen": 65513472, "step": 30355 }, { "epoch": 4.952691680261012, "grad_norm": 0.10633420944213867, "learning_rate": 0.0009350691486688743, "loss": 0.0715, "num_input_tokens_seen": 65522656, "step": 30360 }, { "epoch": 4.9535073409461665, "grad_norm": 0.4701528549194336, "learning_rate": 0.0009350340663018668, "loss": 0.1155, "num_input_tokens_seen": 65533824, "step": 30365 }, { "epoch": 4.954323001631321, "grad_norm": 0.47463637590408325, "learning_rate": 0.0009349989751183422, "loss": 0.1674, "num_input_tokens_seen": 65545440, "step": 30370 }, { "epoch": 4.955138662316476, "grad_norm": 0.012722990475594997, "learning_rate": 0.0009349638751190115, "loss": 0.0198, "num_input_tokens_seen": 65555936, "step": 30375 }, { "epoch": 4.955954323001631, "grad_norm": 0.23726746439933777, "learning_rate": 0.0009349287663045862, "loss": 0.0361, "num_input_tokens_seen": 65566784, "step": 30380 }, { "epoch": 4.956769983686787, "grad_norm": 0.006022026762366295, "learning_rate": 0.0009348936486757775, "loss": 0.0325, "num_input_tokens_seen": 65577344, "step": 30385 }, { "epoch": 4.9575856443719415, "grad_norm": 0.012197334319353104, "learning_rate": 0.0009348585222332975, "loss": 0.0129, "num_input_tokens_seen": 65588256, "step": 30390 }, { "epoch": 4.958401305057096, "grad_norm": 0.3923030197620392, "learning_rate": 0.0009348233869778577, "loss": 0.2384, "num_input_tokens_seen": 65600064, "step": 30395 }, { "epoch": 4.959216965742251, "grad_norm": 0.09563448280096054, "learning_rate": 0.0009347882429101706, "loss": 0.0978, "num_input_tokens_seen": 65611552, "step": 30400 }, { "epoch": 4.960032626427406, "grad_norm": 0.031905822455883026, "learning_rate": 0.000934753090030948, "loss": 0.0503, "num_input_tokens_seen": 65622208, "step": 30405 }, { "epoch": 4.960848287112562, "grad_norm": 0.1544463038444519, "learning_rate": 0.0009347179283409027, "loss": 0.1834, "num_input_tokens_seen": 65633280, "step": 30410 }, { "epoch": 4.9616639477977165, "grad_norm": 0.07640384882688522, "learning_rate": 0.0009346827578407468, "loss": 0.1682, "num_input_tokens_seen": 65643424, "step": 30415 }, { "epoch": 4.962479608482871, "grad_norm": 0.09786983579397202, "learning_rate": 0.0009346475785311936, "loss": 0.1323, "num_input_tokens_seen": 65654400, "step": 30420 }, { "epoch": 4.963295269168026, "grad_norm": 0.1392572969198227, "learning_rate": 0.0009346123904129558, "loss": 0.0979, "num_input_tokens_seen": 65665920, "step": 30425 }, { "epoch": 4.964110929853181, "grad_norm": 0.2265726923942566, "learning_rate": 0.0009345771934867464, "loss": 0.1892, "num_input_tokens_seen": 65676128, "step": 30430 }, { "epoch": 4.964926590538336, "grad_norm": 0.042617909610271454, "learning_rate": 0.000934541987753279, "loss": 0.106, "num_input_tokens_seen": 65687072, "step": 30435 }, { "epoch": 4.9657422512234906, "grad_norm": 0.32296258211135864, "learning_rate": 0.0009345067732132671, "loss": 0.2053, "num_input_tokens_seen": 65698464, "step": 30440 }, { "epoch": 4.966557911908646, "grad_norm": 0.2745981812477112, "learning_rate": 0.0009344715498674241, "loss": 0.1528, "num_input_tokens_seen": 65708736, "step": 30445 }, { "epoch": 4.967373572593801, "grad_norm": 0.07116451859474182, "learning_rate": 0.0009344363177164639, "loss": 0.2705, "num_input_tokens_seen": 65720224, "step": 30450 }, { "epoch": 4.968189233278956, "grad_norm": 0.059191279113292694, "learning_rate": 0.0009344010767611007, "loss": 0.032, "num_input_tokens_seen": 65731872, "step": 30455 }, { "epoch": 4.969004893964111, "grad_norm": 0.03668803721666336, "learning_rate": 0.0009343658270020485, "loss": 0.2502, "num_input_tokens_seen": 65742816, "step": 30460 }, { "epoch": 4.9698205546492655, "grad_norm": 0.048953376710414886, "learning_rate": 0.000934330568440022, "loss": 0.099, "num_input_tokens_seen": 65754976, "step": 30465 }, { "epoch": 4.970636215334421, "grad_norm": 0.16218747198581696, "learning_rate": 0.0009342953010757353, "loss": 0.1382, "num_input_tokens_seen": 65765184, "step": 30470 }, { "epoch": 4.971451876019576, "grad_norm": 0.09414005279541016, "learning_rate": 0.0009342600249099036, "loss": 0.0389, "num_input_tokens_seen": 65775488, "step": 30475 }, { "epoch": 4.972267536704731, "grad_norm": 0.290388822555542, "learning_rate": 0.0009342247399432414, "loss": 0.2852, "num_input_tokens_seen": 65786176, "step": 30480 }, { "epoch": 4.973083197389886, "grad_norm": 0.1155647560954094, "learning_rate": 0.0009341894461764641, "loss": 0.0434, "num_input_tokens_seen": 65796928, "step": 30485 }, { "epoch": 4.9738988580750405, "grad_norm": 0.19735847413539886, "learning_rate": 0.0009341541436102868, "loss": 0.094, "num_input_tokens_seen": 65805920, "step": 30490 }, { "epoch": 4.974714518760196, "grad_norm": 0.14072448015213013, "learning_rate": 0.0009341188322454251, "loss": 0.0864, "num_input_tokens_seen": 65817376, "step": 30495 }, { "epoch": 4.975530179445351, "grad_norm": 0.17053815722465515, "learning_rate": 0.0009340835120825946, "loss": 0.1606, "num_input_tokens_seen": 65828224, "step": 30500 }, { "epoch": 4.976345840130506, "grad_norm": 0.08141333609819412, "learning_rate": 0.0009340481831225109, "loss": 0.105, "num_input_tokens_seen": 65837408, "step": 30505 }, { "epoch": 4.977161500815661, "grad_norm": 0.07430624216794968, "learning_rate": 0.0009340128453658902, "loss": 0.0405, "num_input_tokens_seen": 65847968, "step": 30510 }, { "epoch": 4.9779771615008155, "grad_norm": 0.3916773796081543, "learning_rate": 0.0009339774988134487, "loss": 0.1222, "num_input_tokens_seen": 65858080, "step": 30515 }, { "epoch": 4.97879282218597, "grad_norm": 0.06253232061862946, "learning_rate": 0.0009339421434659025, "loss": 0.0639, "num_input_tokens_seen": 65868800, "step": 30520 }, { "epoch": 4.979608482871125, "grad_norm": 0.054424576461315155, "learning_rate": 0.0009339067793239682, "loss": 0.1295, "num_input_tokens_seen": 65879168, "step": 30525 }, { "epoch": 4.980424143556281, "grad_norm": 0.11795315146446228, "learning_rate": 0.0009338714063883627, "loss": 0.1647, "num_input_tokens_seen": 65890336, "step": 30530 }, { "epoch": 4.981239804241436, "grad_norm": 0.5576001405715942, "learning_rate": 0.0009338360246598028, "loss": 0.0856, "num_input_tokens_seen": 65900608, "step": 30535 }, { "epoch": 4.9820554649265905, "grad_norm": 0.2980618476867676, "learning_rate": 0.0009338006341390053, "loss": 0.1715, "num_input_tokens_seen": 65911520, "step": 30540 }, { "epoch": 4.982871125611745, "grad_norm": 0.2847082316875458, "learning_rate": 0.0009337652348266879, "loss": 0.0927, "num_input_tokens_seen": 65921824, "step": 30545 }, { "epoch": 4.9836867862969, "grad_norm": 0.04898188263177872, "learning_rate": 0.0009337298267235675, "loss": 0.0898, "num_input_tokens_seen": 65932992, "step": 30550 }, { "epoch": 4.984502446982056, "grad_norm": 0.04886677488684654, "learning_rate": 0.0009336944098303621, "loss": 0.0963, "num_input_tokens_seen": 65944832, "step": 30555 }, { "epoch": 4.985318107667211, "grad_norm": 0.04521568492054939, "learning_rate": 0.0009336589841477893, "loss": 0.1691, "num_input_tokens_seen": 65955424, "step": 30560 }, { "epoch": 4.986133768352365, "grad_norm": 0.10076390951871872, "learning_rate": 0.0009336235496765669, "loss": 0.0651, "num_input_tokens_seen": 65967104, "step": 30565 }, { "epoch": 4.98694942903752, "grad_norm": 0.414104700088501, "learning_rate": 0.0009335881064174134, "loss": 0.1101, "num_input_tokens_seen": 65978688, "step": 30570 }, { "epoch": 4.987765089722675, "grad_norm": 0.3290456235408783, "learning_rate": 0.0009335526543710466, "loss": 0.2528, "num_input_tokens_seen": 65989184, "step": 30575 }, { "epoch": 4.988580750407831, "grad_norm": 0.15977032482624054, "learning_rate": 0.0009335171935381854, "loss": 0.1516, "num_input_tokens_seen": 65999424, "step": 30580 }, { "epoch": 4.989396411092986, "grad_norm": 0.003968545701354742, "learning_rate": 0.0009334817239195483, "loss": 0.1027, "num_input_tokens_seen": 66010944, "step": 30585 }, { "epoch": 4.99021207177814, "grad_norm": 0.01838322915136814, "learning_rate": 0.0009334462455158543, "loss": 0.0335, "num_input_tokens_seen": 66021184, "step": 30590 }, { "epoch": 4.991027732463295, "grad_norm": 0.004844896961003542, "learning_rate": 0.0009334107583278222, "loss": 0.04, "num_input_tokens_seen": 66030368, "step": 30595 }, { "epoch": 4.99184339314845, "grad_norm": 0.04051712527871132, "learning_rate": 0.0009333752623561711, "loss": 0.1671, "num_input_tokens_seen": 66040192, "step": 30600 }, { "epoch": 4.992659053833605, "grad_norm": 0.021550197154283524, "learning_rate": 0.0009333397576016207, "loss": 0.0284, "num_input_tokens_seen": 66052256, "step": 30605 }, { "epoch": 4.993474714518761, "grad_norm": 0.26868751645088196, "learning_rate": 0.0009333042440648903, "loss": 0.1617, "num_input_tokens_seen": 66061344, "step": 30610 }, { "epoch": 4.994290375203915, "grad_norm": 0.09994775801897049, "learning_rate": 0.0009332687217466997, "loss": 0.1212, "num_input_tokens_seen": 66072832, "step": 30615 }, { "epoch": 4.99510603588907, "grad_norm": 0.03586184233427048, "learning_rate": 0.000933233190647769, "loss": 0.0139, "num_input_tokens_seen": 66082528, "step": 30620 }, { "epoch": 4.995921696574225, "grad_norm": 0.03174156695604324, "learning_rate": 0.0009331976507688178, "loss": 0.0962, "num_input_tokens_seen": 66093184, "step": 30625 }, { "epoch": 4.99673735725938, "grad_norm": 0.03697315976023674, "learning_rate": 0.0009331621021105668, "loss": 0.0444, "num_input_tokens_seen": 66104384, "step": 30630 }, { "epoch": 4.997553017944535, "grad_norm": 0.186958909034729, "learning_rate": 0.0009331265446737364, "loss": 0.116, "num_input_tokens_seen": 66115872, "step": 30635 }, { "epoch": 4.99836867862969, "grad_norm": 0.25306788086891174, "learning_rate": 0.0009330909784590469, "loss": 0.0752, "num_input_tokens_seen": 66127200, "step": 30640 }, { "epoch": 4.999184339314845, "grad_norm": 0.23427020013332367, "learning_rate": 0.0009330554034672194, "loss": 0.0766, "num_input_tokens_seen": 66137440, "step": 30645 }, { "epoch": 5.0, "grad_norm": 0.042081333696842194, "learning_rate": 0.0009330198196989749, "loss": 0.0214, "num_input_tokens_seen": 66146528, "step": 30650 }, { "epoch": 5.0, "eval_loss": 0.13101254403591156, "eval_runtime": 104.2871, "eval_samples_per_second": 26.13, "eval_steps_per_second": 6.54, "num_input_tokens_seen": 66146528, "step": 30650 }, { "epoch": 5.000815660685155, "grad_norm": 0.13222938776016235, "learning_rate": 0.0009329842271550342, "loss": 0.0678, "num_input_tokens_seen": 66157952, "step": 30655 }, { "epoch": 5.00163132137031, "grad_norm": 0.026135673746466637, "learning_rate": 0.0009329486258361191, "loss": 0.0916, "num_input_tokens_seen": 66169856, "step": 30660 }, { "epoch": 5.002446982055465, "grad_norm": 0.11097142845392227, "learning_rate": 0.0009329130157429507, "loss": 0.1393, "num_input_tokens_seen": 66179904, "step": 30665 }, { "epoch": 5.00326264274062, "grad_norm": 0.07252591848373413, "learning_rate": 0.000932877396876251, "loss": 0.025, "num_input_tokens_seen": 66191424, "step": 30670 }, { "epoch": 5.004078303425775, "grad_norm": 0.6022559404373169, "learning_rate": 0.0009328417692367415, "loss": 0.0774, "num_input_tokens_seen": 66201664, "step": 30675 }, { "epoch": 5.00489396411093, "grad_norm": 0.2203999161720276, "learning_rate": 0.0009328061328251445, "loss": 0.0397, "num_input_tokens_seen": 66212736, "step": 30680 }, { "epoch": 5.005709624796085, "grad_norm": 0.04577041044831276, "learning_rate": 0.0009327704876421824, "loss": 0.0922, "num_input_tokens_seen": 66224576, "step": 30685 }, { "epoch": 5.006525285481239, "grad_norm": 0.07327186316251755, "learning_rate": 0.000932734833688577, "loss": 0.041, "num_input_tokens_seen": 66235296, "step": 30690 }, { "epoch": 5.007340946166395, "grad_norm": 0.07028944790363312, "learning_rate": 0.0009326991709650514, "loss": 0.0599, "num_input_tokens_seen": 66247392, "step": 30695 }, { "epoch": 5.00815660685155, "grad_norm": 0.2089536339044571, "learning_rate": 0.0009326634994723282, "loss": 0.1903, "num_input_tokens_seen": 66259072, "step": 30700 }, { "epoch": 5.008972267536705, "grad_norm": 0.009966757148504257, "learning_rate": 0.0009326278192111304, "loss": 0.0567, "num_input_tokens_seen": 66268960, "step": 30705 }, { "epoch": 5.00978792822186, "grad_norm": 0.01493874192237854, "learning_rate": 0.0009325921301821809, "loss": 0.0951, "num_input_tokens_seen": 66279392, "step": 30710 }, { "epoch": 5.010603588907014, "grad_norm": 0.3276427686214447, "learning_rate": 0.000932556432386203, "loss": 0.1119, "num_input_tokens_seen": 66290368, "step": 30715 }, { "epoch": 5.011419249592169, "grad_norm": 0.006174455862492323, "learning_rate": 0.0009325207258239204, "loss": 0.0805, "num_input_tokens_seen": 66301536, "step": 30720 }, { "epoch": 5.012234910277325, "grad_norm": 0.26343920826911926, "learning_rate": 0.0009324850104960566, "loss": 0.218, "num_input_tokens_seen": 66311712, "step": 30725 }, { "epoch": 5.01305057096248, "grad_norm": 0.04809245839715004, "learning_rate": 0.0009324492864033354, "loss": 0.1018, "num_input_tokens_seen": 66321952, "step": 30730 }, { "epoch": 5.013866231647635, "grad_norm": 0.3119910955429077, "learning_rate": 0.0009324135535464808, "loss": 0.1669, "num_input_tokens_seen": 66332512, "step": 30735 }, { "epoch": 5.014681892332789, "grad_norm": 0.08749029040336609, "learning_rate": 0.000932377811926217, "loss": 0.0605, "num_input_tokens_seen": 66344064, "step": 30740 }, { "epoch": 5.015497553017944, "grad_norm": 0.24919606745243073, "learning_rate": 0.0009323420615432683, "loss": 0.1552, "num_input_tokens_seen": 66354880, "step": 30745 }, { "epoch": 5.0163132137031, "grad_norm": 0.2540583312511444, "learning_rate": 0.0009323063023983593, "loss": 0.066, "num_input_tokens_seen": 66365696, "step": 30750 }, { "epoch": 5.017128874388255, "grad_norm": 0.46120768785476685, "learning_rate": 0.0009322705344922146, "loss": 0.1084, "num_input_tokens_seen": 66376416, "step": 30755 }, { "epoch": 5.0179445350734095, "grad_norm": 0.11143125593662262, "learning_rate": 0.0009322347578255592, "loss": 0.0595, "num_input_tokens_seen": 66386816, "step": 30760 }, { "epoch": 5.018760195758564, "grad_norm": 0.00618914607912302, "learning_rate": 0.0009321989723991181, "loss": 0.0516, "num_input_tokens_seen": 66397792, "step": 30765 }, { "epoch": 5.019575856443719, "grad_norm": 0.3944168984889984, "learning_rate": 0.0009321631782136166, "loss": 0.1502, "num_input_tokens_seen": 66409312, "step": 30770 }, { "epoch": 5.020391517128874, "grad_norm": 0.07844163477420807, "learning_rate": 0.0009321273752697798, "loss": 0.0742, "num_input_tokens_seen": 66419552, "step": 30775 }, { "epoch": 5.02120717781403, "grad_norm": 0.2896440923213959, "learning_rate": 0.0009320915635683338, "loss": 0.0545, "num_input_tokens_seen": 66429888, "step": 30780 }, { "epoch": 5.0220228384991845, "grad_norm": 0.008451300673186779, "learning_rate": 0.0009320557431100041, "loss": 0.0377, "num_input_tokens_seen": 66440608, "step": 30785 }, { "epoch": 5.022838499184339, "grad_norm": 0.04283365607261658, "learning_rate": 0.0009320199138955165, "loss": 0.1041, "num_input_tokens_seen": 66452352, "step": 30790 }, { "epoch": 5.023654159869494, "grad_norm": 0.01760544441640377, "learning_rate": 0.0009319840759255976, "loss": 0.0836, "num_input_tokens_seen": 66463296, "step": 30795 }, { "epoch": 5.024469820554649, "grad_norm": 0.004050792194902897, "learning_rate": 0.0009319482292009731, "loss": 0.1768, "num_input_tokens_seen": 66473824, "step": 30800 }, { "epoch": 5.025285481239805, "grad_norm": 0.047440044581890106, "learning_rate": 0.0009319123737223698, "loss": 0.046, "num_input_tokens_seen": 66480928, "step": 30805 }, { "epoch": 5.0261011419249595, "grad_norm": 0.22247743606567383, "learning_rate": 0.0009318765094905144, "loss": 0.0644, "num_input_tokens_seen": 66490688, "step": 30810 }, { "epoch": 5.026916802610114, "grad_norm": 0.15218918025493622, "learning_rate": 0.0009318406365061336, "loss": 0.0601, "num_input_tokens_seen": 66502912, "step": 30815 }, { "epoch": 5.027732463295269, "grad_norm": 0.042794618755578995, "learning_rate": 0.0009318047547699546, "loss": 0.0611, "num_input_tokens_seen": 66513856, "step": 30820 }, { "epoch": 5.028548123980424, "grad_norm": 0.30290141701698303, "learning_rate": 0.0009317688642827044, "loss": 0.3072, "num_input_tokens_seen": 66525248, "step": 30825 }, { "epoch": 5.029363784665579, "grad_norm": 0.27225708961486816, "learning_rate": 0.0009317329650451103, "loss": 0.18, "num_input_tokens_seen": 66537120, "step": 30830 }, { "epoch": 5.0301794453507345, "grad_norm": 0.107964426279068, "learning_rate": 0.0009316970570579002, "loss": 0.2255, "num_input_tokens_seen": 66546912, "step": 30835 }, { "epoch": 5.030995106035889, "grad_norm": 0.09319799393415451, "learning_rate": 0.0009316611403218013, "loss": 0.0822, "num_input_tokens_seen": 66557824, "step": 30840 }, { "epoch": 5.031810766721044, "grad_norm": 0.16843600571155548, "learning_rate": 0.000931625214837542, "loss": 0.1343, "num_input_tokens_seen": 66569120, "step": 30845 }, { "epoch": 5.032626427406199, "grad_norm": 0.04245203360915184, "learning_rate": 0.0009315892806058501, "loss": 0.1307, "num_input_tokens_seen": 66579072, "step": 30850 }, { "epoch": 5.033442088091354, "grad_norm": 0.02443167380988598, "learning_rate": 0.0009315533376274541, "loss": 0.0381, "num_input_tokens_seen": 66589344, "step": 30855 }, { "epoch": 5.034257748776509, "grad_norm": 0.0931512787938118, "learning_rate": 0.0009315173859030821, "loss": 0.0467, "num_input_tokens_seen": 66599616, "step": 30860 }, { "epoch": 5.035073409461664, "grad_norm": 0.045971162617206573, "learning_rate": 0.0009314814254334627, "loss": 0.1343, "num_input_tokens_seen": 66609952, "step": 30865 }, { "epoch": 5.035889070146819, "grad_norm": 0.02318623661994934, "learning_rate": 0.000931445456219325, "loss": 0.0464, "num_input_tokens_seen": 66621344, "step": 30870 }, { "epoch": 5.036704730831974, "grad_norm": 0.0823347270488739, "learning_rate": 0.0009314094782613977, "loss": 0.1354, "num_input_tokens_seen": 66632864, "step": 30875 }, { "epoch": 5.037520391517129, "grad_norm": 0.037048403173685074, "learning_rate": 0.0009313734915604103, "loss": 0.1191, "num_input_tokens_seen": 66644352, "step": 30880 }, { "epoch": 5.0383360522022835, "grad_norm": 0.20066314935684204, "learning_rate": 0.0009313374961170917, "loss": 0.1622, "num_input_tokens_seen": 66655968, "step": 30885 }, { "epoch": 5.039151712887439, "grad_norm": 0.0077499705366790295, "learning_rate": 0.0009313014919321715, "loss": 0.038, "num_input_tokens_seen": 66668608, "step": 30890 }, { "epoch": 5.039967373572594, "grad_norm": 0.06268175691366196, "learning_rate": 0.0009312654790063795, "loss": 0.0542, "num_input_tokens_seen": 66678784, "step": 30895 }, { "epoch": 5.040783034257749, "grad_norm": 0.1645749807357788, "learning_rate": 0.0009312294573404454, "loss": 0.064, "num_input_tokens_seen": 66690048, "step": 30900 }, { "epoch": 5.041598694942904, "grad_norm": 0.011139431037008762, "learning_rate": 0.0009311934269350993, "loss": 0.0134, "num_input_tokens_seen": 66700768, "step": 30905 }, { "epoch": 5.0424143556280585, "grad_norm": 0.05394626408815384, "learning_rate": 0.0009311573877910716, "loss": 0.0587, "num_input_tokens_seen": 66711552, "step": 30910 }, { "epoch": 5.043230016313213, "grad_norm": 0.008870845660567284, "learning_rate": 0.0009311213399090921, "loss": 0.0988, "num_input_tokens_seen": 66721664, "step": 30915 }, { "epoch": 5.044045676998369, "grad_norm": 0.06445521116256714, "learning_rate": 0.000931085283289892, "loss": 0.0882, "num_input_tokens_seen": 66731904, "step": 30920 }, { "epoch": 5.044861337683524, "grad_norm": 0.014135945588350296, "learning_rate": 0.0009310492179342016, "loss": 0.0634, "num_input_tokens_seen": 66742912, "step": 30925 }, { "epoch": 5.045676998368679, "grad_norm": 0.05112599581480026, "learning_rate": 0.0009310131438427521, "loss": 0.0728, "num_input_tokens_seen": 66753728, "step": 30930 }, { "epoch": 5.0464926590538335, "grad_norm": 0.09556989371776581, "learning_rate": 0.0009309770610162744, "loss": 0.0875, "num_input_tokens_seen": 66764320, "step": 30935 }, { "epoch": 5.047308319738988, "grad_norm": 0.0715535432100296, "learning_rate": 0.0009309409694555, "loss": 0.2179, "num_input_tokens_seen": 66775904, "step": 30940 }, { "epoch": 5.048123980424143, "grad_norm": 0.27583763003349304, "learning_rate": 0.0009309048691611599, "loss": 0.0467, "num_input_tokens_seen": 66787136, "step": 30945 }, { "epoch": 5.048939641109299, "grad_norm": 0.2681281566619873, "learning_rate": 0.0009308687601339861, "loss": 0.1339, "num_input_tokens_seen": 66797184, "step": 30950 }, { "epoch": 5.049755301794454, "grad_norm": 0.04223368316888809, "learning_rate": 0.0009308326423747103, "loss": 0.2441, "num_input_tokens_seen": 66807520, "step": 30955 }, { "epoch": 5.0505709624796085, "grad_norm": 0.04481996223330498, "learning_rate": 0.0009307965158840644, "loss": 0.075, "num_input_tokens_seen": 66819072, "step": 30960 }, { "epoch": 5.051386623164763, "grad_norm": 0.05280722305178642, "learning_rate": 0.0009307603806627807, "loss": 0.1029, "num_input_tokens_seen": 66830016, "step": 30965 }, { "epoch": 5.052202283849918, "grad_norm": 0.13725540041923523, "learning_rate": 0.0009307242367115914, "loss": 0.0625, "num_input_tokens_seen": 66841440, "step": 30970 }, { "epoch": 5.053017944535074, "grad_norm": 0.07019257545471191, "learning_rate": 0.000930688084031229, "loss": 0.101, "num_input_tokens_seen": 66852064, "step": 30975 }, { "epoch": 5.053833605220229, "grad_norm": 0.12166785448789597, "learning_rate": 0.0009306519226224262, "loss": 0.0415, "num_input_tokens_seen": 66861792, "step": 30980 }, { "epoch": 5.054649265905383, "grad_norm": 0.058914583176374435, "learning_rate": 0.0009306157524859158, "loss": 0.0451, "num_input_tokens_seen": 66871808, "step": 30985 }, { "epoch": 5.055464926590538, "grad_norm": 0.018524901941418648, "learning_rate": 0.000930579573622431, "loss": 0.0296, "num_input_tokens_seen": 66883264, "step": 30990 }, { "epoch": 5.056280587275693, "grad_norm": 0.1477639526128769, "learning_rate": 0.0009305433860327049, "loss": 0.1472, "num_input_tokens_seen": 66892864, "step": 30995 }, { "epoch": 5.057096247960848, "grad_norm": 0.033064793795347214, "learning_rate": 0.0009305071897174708, "loss": 0.1906, "num_input_tokens_seen": 66903296, "step": 31000 }, { "epoch": 5.057911908646004, "grad_norm": 0.1508786827325821, "learning_rate": 0.0009304709846774625, "loss": 0.1035, "num_input_tokens_seen": 66914336, "step": 31005 }, { "epoch": 5.058727569331158, "grad_norm": 0.0729747787117958, "learning_rate": 0.0009304347709134136, "loss": 0.1138, "num_input_tokens_seen": 66926080, "step": 31010 }, { "epoch": 5.059543230016313, "grad_norm": 0.06870915740728378, "learning_rate": 0.000930398548426058, "loss": 0.0378, "num_input_tokens_seen": 66937920, "step": 31015 }, { "epoch": 5.060358890701468, "grad_norm": 0.06295592337846756, "learning_rate": 0.0009303623172161298, "loss": 0.0136, "num_input_tokens_seen": 66948064, "step": 31020 }, { "epoch": 5.061174551386623, "grad_norm": 0.16718323528766632, "learning_rate": 0.0009303260772843632, "loss": 0.0653, "num_input_tokens_seen": 66957760, "step": 31025 }, { "epoch": 5.061990212071779, "grad_norm": 0.04914763197302818, "learning_rate": 0.0009302898286314929, "loss": 0.053, "num_input_tokens_seen": 66968032, "step": 31030 }, { "epoch": 5.062805872756933, "grad_norm": 0.3244478404521942, "learning_rate": 0.0009302535712582532, "loss": 0.0849, "num_input_tokens_seen": 66978240, "step": 31035 }, { "epoch": 5.063621533442088, "grad_norm": 0.03669968247413635, "learning_rate": 0.0009302173051653792, "loss": 0.1129, "num_input_tokens_seen": 66988320, "step": 31040 }, { "epoch": 5.064437194127243, "grad_norm": 0.009116867557168007, "learning_rate": 0.0009301810303536056, "loss": 0.0153, "num_input_tokens_seen": 66999712, "step": 31045 }, { "epoch": 5.065252854812398, "grad_norm": 0.014079558663070202, "learning_rate": 0.0009301447468236678, "loss": 0.1364, "num_input_tokens_seen": 67011200, "step": 31050 }, { "epoch": 5.066068515497553, "grad_norm": 0.504715085029602, "learning_rate": 0.000930108454576301, "loss": 0.1044, "num_input_tokens_seen": 67021728, "step": 31055 }, { "epoch": 5.066884176182708, "grad_norm": 0.439147412776947, "learning_rate": 0.0009300721536122408, "loss": 0.1424, "num_input_tokens_seen": 67031584, "step": 31060 }, { "epoch": 5.067699836867863, "grad_norm": 0.16566607356071472, "learning_rate": 0.0009300358439322228, "loss": 0.1038, "num_input_tokens_seen": 67043488, "step": 31065 }, { "epoch": 5.068515497553018, "grad_norm": 0.0737314224243164, "learning_rate": 0.0009299995255369828, "loss": 0.0457, "num_input_tokens_seen": 67054496, "step": 31070 }, { "epoch": 5.069331158238173, "grad_norm": 0.042671337723731995, "learning_rate": 0.000929963198427257, "loss": 0.1187, "num_input_tokens_seen": 67065632, "step": 31075 }, { "epoch": 5.070146818923328, "grad_norm": 0.2767927944660187, "learning_rate": 0.0009299268626037815, "loss": 0.0794, "num_input_tokens_seen": 67076480, "step": 31080 }, { "epoch": 5.0709624796084825, "grad_norm": 0.25071287155151367, "learning_rate": 0.0009298905180672928, "loss": 0.1542, "num_input_tokens_seen": 67087328, "step": 31085 }, { "epoch": 5.071778140293638, "grad_norm": 0.07459647953510284, "learning_rate": 0.0009298541648185272, "loss": 0.0418, "num_input_tokens_seen": 67098912, "step": 31090 }, { "epoch": 5.072593800978793, "grad_norm": 0.5487614274024963, "learning_rate": 0.0009298178028582218, "loss": 0.1567, "num_input_tokens_seen": 67109312, "step": 31095 }, { "epoch": 5.073409461663948, "grad_norm": 0.5368494391441345, "learning_rate": 0.0009297814321871133, "loss": 0.0983, "num_input_tokens_seen": 67120128, "step": 31100 }, { "epoch": 5.074225122349103, "grad_norm": 0.16034793853759766, "learning_rate": 0.0009297450528059389, "loss": 0.0941, "num_input_tokens_seen": 67131040, "step": 31105 }, { "epoch": 5.075040783034257, "grad_norm": 0.32631683349609375, "learning_rate": 0.0009297086647154358, "loss": 0.1746, "num_input_tokens_seen": 67140448, "step": 31110 }, { "epoch": 5.075856443719413, "grad_norm": 0.1041693165898323, "learning_rate": 0.0009296722679163417, "loss": 0.1098, "num_input_tokens_seen": 67150592, "step": 31115 }, { "epoch": 5.076672104404568, "grad_norm": 0.06718619912862778, "learning_rate": 0.0009296358624093937, "loss": 0.0189, "num_input_tokens_seen": 67160416, "step": 31120 }, { "epoch": 5.077487765089723, "grad_norm": 0.1081736758351326, "learning_rate": 0.00092959944819533, "loss": 0.0446, "num_input_tokens_seen": 67171840, "step": 31125 }, { "epoch": 5.078303425774878, "grad_norm": 0.08862931281328201, "learning_rate": 0.0009295630252748885, "loss": 0.0238, "num_input_tokens_seen": 67183552, "step": 31130 }, { "epoch": 5.079119086460032, "grad_norm": 0.06089364364743233, "learning_rate": 0.0009295265936488076, "loss": 0.2143, "num_input_tokens_seen": 67194496, "step": 31135 }, { "epoch": 5.079934747145187, "grad_norm": 0.2689344882965088, "learning_rate": 0.0009294901533178251, "loss": 0.1596, "num_input_tokens_seen": 67204480, "step": 31140 }, { "epoch": 5.080750407830343, "grad_norm": 0.09414388239383698, "learning_rate": 0.0009294537042826798, "loss": 0.051, "num_input_tokens_seen": 67213280, "step": 31145 }, { "epoch": 5.081566068515498, "grad_norm": 0.15910618007183075, "learning_rate": 0.0009294172465441104, "loss": 0.1702, "num_input_tokens_seen": 67224416, "step": 31150 }, { "epoch": 5.082381729200653, "grad_norm": 0.42917969822883606, "learning_rate": 0.0009293807801028558, "loss": 0.1444, "num_input_tokens_seen": 67235296, "step": 31155 }, { "epoch": 5.083197389885807, "grad_norm": 0.009622442536056042, "learning_rate": 0.0009293443049596551, "loss": 0.0395, "num_input_tokens_seen": 67247040, "step": 31160 }, { "epoch": 5.084013050570962, "grad_norm": 0.03539435192942619, "learning_rate": 0.0009293078211152473, "loss": 0.0254, "num_input_tokens_seen": 67258048, "step": 31165 }, { "epoch": 5.084828711256117, "grad_norm": 0.016162622720003128, "learning_rate": 0.0009292713285703718, "loss": 0.0426, "num_input_tokens_seen": 67270496, "step": 31170 }, { "epoch": 5.085644371941273, "grad_norm": 0.34116441011428833, "learning_rate": 0.0009292348273257684, "loss": 0.0842, "num_input_tokens_seen": 67280672, "step": 31175 }, { "epoch": 5.0864600326264275, "grad_norm": 0.055893730372190475, "learning_rate": 0.0009291983173821765, "loss": 0.1891, "num_input_tokens_seen": 67290816, "step": 31180 }, { "epoch": 5.087275693311582, "grad_norm": 0.03510690852999687, "learning_rate": 0.0009291617987403364, "loss": 0.0816, "num_input_tokens_seen": 67301600, "step": 31185 }, { "epoch": 5.088091353996737, "grad_norm": 0.3024904727935791, "learning_rate": 0.000929125271400988, "loss": 0.0779, "num_input_tokens_seen": 67312608, "step": 31190 }, { "epoch": 5.088907014681892, "grad_norm": 0.05228859931230545, "learning_rate": 0.0009290887353648716, "loss": 0.0651, "num_input_tokens_seen": 67323040, "step": 31195 }, { "epoch": 5.089722675367048, "grad_norm": 0.07689611613750458, "learning_rate": 0.0009290521906327276, "loss": 0.1817, "num_input_tokens_seen": 67332576, "step": 31200 }, { "epoch": 5.0905383360522025, "grad_norm": 0.47072499990463257, "learning_rate": 0.0009290156372052967, "loss": 0.1537, "num_input_tokens_seen": 67343648, "step": 31205 }, { "epoch": 5.091353996737357, "grad_norm": 0.04127902537584305, "learning_rate": 0.0009289790750833196, "loss": 0.107, "num_input_tokens_seen": 67356448, "step": 31210 }, { "epoch": 5.092169657422512, "grad_norm": 0.034504398703575134, "learning_rate": 0.0009289425042675373, "loss": 0.0452, "num_input_tokens_seen": 67367520, "step": 31215 }, { "epoch": 5.092985318107667, "grad_norm": 0.43933600187301636, "learning_rate": 0.0009289059247586911, "loss": 0.3239, "num_input_tokens_seen": 67378752, "step": 31220 }, { "epoch": 5.093800978792822, "grad_norm": 0.26779478788375854, "learning_rate": 0.0009288693365575222, "loss": 0.1346, "num_input_tokens_seen": 67389824, "step": 31225 }, { "epoch": 5.0946166394779775, "grad_norm": 0.08533018082380295, "learning_rate": 0.0009288327396647722, "loss": 0.1229, "num_input_tokens_seen": 67401856, "step": 31230 }, { "epoch": 5.095432300163132, "grad_norm": 0.06029679998755455, "learning_rate": 0.0009287961340811826, "loss": 0.1721, "num_input_tokens_seen": 67411520, "step": 31235 }, { "epoch": 5.096247960848287, "grad_norm": 0.2775535583496094, "learning_rate": 0.0009287595198074955, "loss": 0.0806, "num_input_tokens_seen": 67421824, "step": 31240 }, { "epoch": 5.097063621533442, "grad_norm": 0.1342535763978958, "learning_rate": 0.0009287228968444527, "loss": 0.0807, "num_input_tokens_seen": 67432352, "step": 31245 }, { "epoch": 5.097879282218597, "grad_norm": 0.037833478301763535, "learning_rate": 0.0009286862651927966, "loss": 0.0546, "num_input_tokens_seen": 67443040, "step": 31250 }, { "epoch": 5.0986949429037525, "grad_norm": 0.22050011157989502, "learning_rate": 0.0009286496248532695, "loss": 0.0966, "num_input_tokens_seen": 67454560, "step": 31255 }, { "epoch": 5.099510603588907, "grad_norm": 0.04893016442656517, "learning_rate": 0.000928612975826614, "loss": 0.0377, "num_input_tokens_seen": 67464320, "step": 31260 }, { "epoch": 5.100326264274062, "grad_norm": 0.027219804003834724, "learning_rate": 0.0009285763181135727, "loss": 0.0819, "num_input_tokens_seen": 67473472, "step": 31265 }, { "epoch": 5.101141924959217, "grad_norm": 0.23508071899414062, "learning_rate": 0.0009285396517148888, "loss": 0.182, "num_input_tokens_seen": 67483584, "step": 31270 }, { "epoch": 5.101957585644372, "grad_norm": 0.2253478318452835, "learning_rate": 0.000928502976631305, "loss": 0.0728, "num_input_tokens_seen": 67494208, "step": 31275 }, { "epoch": 5.102773246329527, "grad_norm": 0.024605626240372658, "learning_rate": 0.0009284662928635649, "loss": 0.1157, "num_input_tokens_seen": 67503648, "step": 31280 }, { "epoch": 5.103588907014682, "grad_norm": 0.07976054400205612, "learning_rate": 0.0009284296004124118, "loss": 0.0528, "num_input_tokens_seen": 67513760, "step": 31285 }, { "epoch": 5.104404567699837, "grad_norm": 0.06092284247279167, "learning_rate": 0.0009283928992785894, "loss": 0.1538, "num_input_tokens_seen": 67524640, "step": 31290 }, { "epoch": 5.105220228384992, "grad_norm": 0.13910898566246033, "learning_rate": 0.0009283561894628414, "loss": 0.1144, "num_input_tokens_seen": 67534848, "step": 31295 }, { "epoch": 5.106035889070147, "grad_norm": 0.10759285092353821, "learning_rate": 0.0009283194709659117, "loss": 0.0624, "num_input_tokens_seen": 67546112, "step": 31300 }, { "epoch": 5.1068515497553015, "grad_norm": 0.32415008544921875, "learning_rate": 0.0009282827437885449, "loss": 0.0862, "num_input_tokens_seen": 67556544, "step": 31305 }, { "epoch": 5.107667210440456, "grad_norm": 0.13106942176818848, "learning_rate": 0.0009282460079314848, "loss": 0.0984, "num_input_tokens_seen": 67566880, "step": 31310 }, { "epoch": 5.108482871125612, "grad_norm": 0.17778359353542328, "learning_rate": 0.0009282092633954759, "loss": 0.1122, "num_input_tokens_seen": 67577760, "step": 31315 }, { "epoch": 5.109298531810767, "grad_norm": 0.46935024857521057, "learning_rate": 0.0009281725101812632, "loss": 0.2355, "num_input_tokens_seen": 67588672, "step": 31320 }, { "epoch": 5.110114192495922, "grad_norm": 0.027112260460853577, "learning_rate": 0.0009281357482895914, "loss": 0.033, "num_input_tokens_seen": 67599520, "step": 31325 }, { "epoch": 5.1109298531810765, "grad_norm": 0.32209512591362, "learning_rate": 0.0009280989777212055, "loss": 0.0413, "num_input_tokens_seen": 67610944, "step": 31330 }, { "epoch": 5.111745513866231, "grad_norm": 0.039760638028383255, "learning_rate": 0.0009280621984768507, "loss": 0.156, "num_input_tokens_seen": 67621696, "step": 31335 }, { "epoch": 5.112561174551387, "grad_norm": 0.14099331200122833, "learning_rate": 0.0009280254105572725, "loss": 0.0472, "num_input_tokens_seen": 67631136, "step": 31340 }, { "epoch": 5.113376835236542, "grad_norm": 0.2812722623348236, "learning_rate": 0.0009279886139632163, "loss": 0.1906, "num_input_tokens_seen": 67640288, "step": 31345 }, { "epoch": 5.114192495921697, "grad_norm": 0.009321368299424648, "learning_rate": 0.000927951808695428, "loss": 0.1025, "num_input_tokens_seen": 67651264, "step": 31350 }, { "epoch": 5.1150081566068515, "grad_norm": 0.0421270914375782, "learning_rate": 0.0009279149947546534, "loss": 0.1119, "num_input_tokens_seen": 67662880, "step": 31355 }, { "epoch": 5.115823817292006, "grad_norm": 0.5065827369689941, "learning_rate": 0.0009278781721416385, "loss": 0.1099, "num_input_tokens_seen": 67674560, "step": 31360 }, { "epoch": 5.116639477977161, "grad_norm": 0.3041188418865204, "learning_rate": 0.0009278413408571295, "loss": 0.0707, "num_input_tokens_seen": 67685440, "step": 31365 }, { "epoch": 5.117455138662317, "grad_norm": 0.3401942849159241, "learning_rate": 0.0009278045009018733, "loss": 0.205, "num_input_tokens_seen": 67696800, "step": 31370 }, { "epoch": 5.118270799347472, "grad_norm": 0.01374131627380848, "learning_rate": 0.000927767652276616, "loss": 0.0472, "num_input_tokens_seen": 67708128, "step": 31375 }, { "epoch": 5.1190864600326265, "grad_norm": 0.2611791491508484, "learning_rate": 0.0009277307949821045, "loss": 0.1759, "num_input_tokens_seen": 67719552, "step": 31380 }, { "epoch": 5.119902120717781, "grad_norm": 0.10769540816545486, "learning_rate": 0.000927693929019086, "loss": 0.182, "num_input_tokens_seen": 67729920, "step": 31385 }, { "epoch": 5.120717781402936, "grad_norm": 0.24421781301498413, "learning_rate": 0.0009276570543883074, "loss": 0.1504, "num_input_tokens_seen": 67743168, "step": 31390 }, { "epoch": 5.121533442088092, "grad_norm": 0.0941624641418457, "learning_rate": 0.000927620171090516, "loss": 0.0421, "num_input_tokens_seen": 67755104, "step": 31395 }, { "epoch": 5.122349102773247, "grad_norm": 0.09300605952739716, "learning_rate": 0.0009275832791264593, "loss": 0.0897, "num_input_tokens_seen": 67765600, "step": 31400 }, { "epoch": 5.123164763458401, "grad_norm": 0.08147823810577393, "learning_rate": 0.0009275463784968852, "loss": 0.1433, "num_input_tokens_seen": 67776032, "step": 31405 }, { "epoch": 5.123980424143556, "grad_norm": 0.04086275026202202, "learning_rate": 0.0009275094692025413, "loss": 0.0883, "num_input_tokens_seen": 67787264, "step": 31410 }, { "epoch": 5.124796084828711, "grad_norm": 0.10172951966524124, "learning_rate": 0.0009274725512441757, "loss": 0.0689, "num_input_tokens_seen": 67798624, "step": 31415 }, { "epoch": 5.125611745513866, "grad_norm": 0.12474427372217178, "learning_rate": 0.0009274356246225364, "loss": 0.0509, "num_input_tokens_seen": 67809856, "step": 31420 }, { "epoch": 5.126427406199022, "grad_norm": 0.052951257675886154, "learning_rate": 0.0009273986893383722, "loss": 0.0864, "num_input_tokens_seen": 67820576, "step": 31425 }, { "epoch": 5.127243066884176, "grad_norm": 0.2526801526546478, "learning_rate": 0.000927361745392431, "loss": 0.2413, "num_input_tokens_seen": 67832064, "step": 31430 }, { "epoch": 5.128058727569331, "grad_norm": 0.1386530101299286, "learning_rate": 0.0009273247927854622, "loss": 0.0271, "num_input_tokens_seen": 67842336, "step": 31435 }, { "epoch": 5.128874388254486, "grad_norm": 0.020157769322395325, "learning_rate": 0.0009272878315182141, "loss": 0.0519, "num_input_tokens_seen": 67854176, "step": 31440 }, { "epoch": 5.129690048939641, "grad_norm": 0.02612481452524662, "learning_rate": 0.0009272508615914363, "loss": 0.066, "num_input_tokens_seen": 67865120, "step": 31445 }, { "epoch": 5.130505709624796, "grad_norm": 0.30126649141311646, "learning_rate": 0.0009272138830058776, "loss": 0.0543, "num_input_tokens_seen": 67877056, "step": 31450 }, { "epoch": 5.131321370309951, "grad_norm": 0.07979356497526169, "learning_rate": 0.0009271768957622877, "loss": 0.1728, "num_input_tokens_seen": 67886592, "step": 31455 }, { "epoch": 5.132137030995106, "grad_norm": 0.028291519731283188, "learning_rate": 0.0009271398998614162, "loss": 0.0572, "num_input_tokens_seen": 67897792, "step": 31460 }, { "epoch": 5.132952691680261, "grad_norm": 0.22100113332271576, "learning_rate": 0.0009271028953040126, "loss": 0.0634, "num_input_tokens_seen": 67909600, "step": 31465 }, { "epoch": 5.133768352365416, "grad_norm": 0.435812771320343, "learning_rate": 0.0009270658820908271, "loss": 0.1216, "num_input_tokens_seen": 67920928, "step": 31470 }, { "epoch": 5.134584013050571, "grad_norm": 0.12650412321090698, "learning_rate": 0.0009270288602226096, "loss": 0.0694, "num_input_tokens_seen": 67932512, "step": 31475 }, { "epoch": 5.135399673735726, "grad_norm": 0.019517632201313972, "learning_rate": 0.0009269918297001106, "loss": 0.1054, "num_input_tokens_seen": 67943072, "step": 31480 }, { "epoch": 5.136215334420881, "grad_norm": 0.3243931829929352, "learning_rate": 0.0009269547905240805, "loss": 0.2145, "num_input_tokens_seen": 67954016, "step": 31485 }, { "epoch": 5.137030995106036, "grad_norm": 0.16320684552192688, "learning_rate": 0.00092691774269527, "loss": 0.0817, "num_input_tokens_seen": 67964864, "step": 31490 }, { "epoch": 5.137846655791191, "grad_norm": 0.0846920907497406, "learning_rate": 0.0009268806862144298, "loss": 0.0687, "num_input_tokens_seen": 67975904, "step": 31495 }, { "epoch": 5.138662316476346, "grad_norm": 0.04326556250452995, "learning_rate": 0.0009268436210823109, "loss": 0.1381, "num_input_tokens_seen": 67984512, "step": 31500 }, { "epoch": 5.1394779771615005, "grad_norm": 0.07311880588531494, "learning_rate": 0.0009268065472996645, "loss": 0.0208, "num_input_tokens_seen": 67995392, "step": 31505 }, { "epoch": 5.140293637846656, "grad_norm": 0.28384509682655334, "learning_rate": 0.0009267694648672423, "loss": 0.0677, "num_input_tokens_seen": 68006016, "step": 31510 }, { "epoch": 5.141109298531811, "grad_norm": 0.058670852333307266, "learning_rate": 0.0009267323737857952, "loss": 0.0382, "num_input_tokens_seen": 68016512, "step": 31515 }, { "epoch": 5.141924959216966, "grad_norm": 0.05646360665559769, "learning_rate": 0.0009266952740560752, "loss": 0.0186, "num_input_tokens_seen": 68028160, "step": 31520 }, { "epoch": 5.142740619902121, "grad_norm": 0.034488026052713394, "learning_rate": 0.0009266581656788342, "loss": 0.1105, "num_input_tokens_seen": 68039264, "step": 31525 }, { "epoch": 5.143556280587275, "grad_norm": 0.4161039888858795, "learning_rate": 0.0009266210486548243, "loss": 0.1821, "num_input_tokens_seen": 68050048, "step": 31530 }, { "epoch": 5.14437194127243, "grad_norm": 0.21656359732151031, "learning_rate": 0.0009265839229847975, "loss": 0.0753, "num_input_tokens_seen": 68061376, "step": 31535 }, { "epoch": 5.145187601957586, "grad_norm": 0.012548860162496567, "learning_rate": 0.0009265467886695064, "loss": 0.0235, "num_input_tokens_seen": 68072800, "step": 31540 }, { "epoch": 5.146003262642741, "grad_norm": 0.5382648706436157, "learning_rate": 0.0009265096457097035, "loss": 0.0928, "num_input_tokens_seen": 68084832, "step": 31545 }, { "epoch": 5.146818923327896, "grad_norm": 0.07256249338388443, "learning_rate": 0.0009264724941061418, "loss": 0.0699, "num_input_tokens_seen": 68096544, "step": 31550 }, { "epoch": 5.14763458401305, "grad_norm": 0.41795727610588074, "learning_rate": 0.0009264353338595736, "loss": 0.3499, "num_input_tokens_seen": 68105696, "step": 31555 }, { "epoch": 5.148450244698205, "grad_norm": 0.007594582159072161, "learning_rate": 0.0009263981649707527, "loss": 0.1525, "num_input_tokens_seen": 68117984, "step": 31560 }, { "epoch": 5.149265905383361, "grad_norm": 0.13215170800685883, "learning_rate": 0.0009263609874404319, "loss": 0.048, "num_input_tokens_seen": 68129376, "step": 31565 }, { "epoch": 5.150081566068516, "grad_norm": 0.07239355146884918, "learning_rate": 0.0009263238012693649, "loss": 0.1714, "num_input_tokens_seen": 68140224, "step": 31570 }, { "epoch": 5.150897226753671, "grad_norm": 0.3432246446609497, "learning_rate": 0.0009262866064583051, "loss": 0.1193, "num_input_tokens_seen": 68150912, "step": 31575 }, { "epoch": 5.151712887438825, "grad_norm": 0.0953001156449318, "learning_rate": 0.0009262494030080066, "loss": 0.0372, "num_input_tokens_seen": 68161248, "step": 31580 }, { "epoch": 5.15252854812398, "grad_norm": 0.19217583537101746, "learning_rate": 0.0009262121909192232, "loss": 0.0409, "num_input_tokens_seen": 68172288, "step": 31585 }, { "epoch": 5.153344208809135, "grad_norm": 0.6149972081184387, "learning_rate": 0.0009261749701927089, "loss": 0.0998, "num_input_tokens_seen": 68184000, "step": 31590 }, { "epoch": 5.154159869494291, "grad_norm": 0.18030406534671783, "learning_rate": 0.0009261377408292183, "loss": 0.3248, "num_input_tokens_seen": 68194208, "step": 31595 }, { "epoch": 5.1549755301794455, "grad_norm": 0.06821926683187485, "learning_rate": 0.0009261005028295058, "loss": 0.0587, "num_input_tokens_seen": 68205152, "step": 31600 }, { "epoch": 5.1557911908646, "grad_norm": 0.18538668751716614, "learning_rate": 0.000926063256194326, "loss": 0.3302, "num_input_tokens_seen": 68215968, "step": 31605 }, { "epoch": 5.156606851549755, "grad_norm": 0.1213301420211792, "learning_rate": 0.0009260260009244339, "loss": 0.06, "num_input_tokens_seen": 68228096, "step": 31610 }, { "epoch": 5.15742251223491, "grad_norm": 0.03839406743645668, "learning_rate": 0.0009259887370205844, "loss": 0.0775, "num_input_tokens_seen": 68238624, "step": 31615 }, { "epoch": 5.158238172920065, "grad_norm": 0.13685280084609985, "learning_rate": 0.0009259514644835327, "loss": 0.0749, "num_input_tokens_seen": 68249056, "step": 31620 }, { "epoch": 5.1590538336052205, "grad_norm": 0.19226589798927307, "learning_rate": 0.0009259141833140343, "loss": 0.0735, "num_input_tokens_seen": 68260064, "step": 31625 }, { "epoch": 5.159869494290375, "grad_norm": 0.3378715515136719, "learning_rate": 0.0009258768935128445, "loss": 0.076, "num_input_tokens_seen": 68269600, "step": 31630 }, { "epoch": 5.16068515497553, "grad_norm": 0.14282535016536713, "learning_rate": 0.0009258395950807194, "loss": 0.068, "num_input_tokens_seen": 68280160, "step": 31635 }, { "epoch": 5.161500815660685, "grad_norm": 0.014881022274494171, "learning_rate": 0.0009258022880184145, "loss": 0.0569, "num_input_tokens_seen": 68291616, "step": 31640 }, { "epoch": 5.16231647634584, "grad_norm": 0.7654488682746887, "learning_rate": 0.0009257649723266863, "loss": 0.0573, "num_input_tokens_seen": 68301952, "step": 31645 }, { "epoch": 5.1631321370309955, "grad_norm": 0.055387232452631, "learning_rate": 0.0009257276480062907, "loss": 0.1237, "num_input_tokens_seen": 68312256, "step": 31650 }, { "epoch": 5.16394779771615, "grad_norm": 0.25216802954673767, "learning_rate": 0.0009256903150579842, "loss": 0.0508, "num_input_tokens_seen": 68322784, "step": 31655 }, { "epoch": 5.164763458401305, "grad_norm": 0.13880954682826996, "learning_rate": 0.0009256529734825234, "loss": 0.0948, "num_input_tokens_seen": 68333504, "step": 31660 }, { "epoch": 5.16557911908646, "grad_norm": 0.018470045179128647, "learning_rate": 0.0009256156232806652, "loss": 0.0659, "num_input_tokens_seen": 68344480, "step": 31665 }, { "epoch": 5.166394779771615, "grad_norm": 0.26932740211486816, "learning_rate": 0.0009255782644531664, "loss": 0.0561, "num_input_tokens_seen": 68354976, "step": 31670 }, { "epoch": 5.16721044045677, "grad_norm": 0.05955954268574715, "learning_rate": 0.0009255408970007842, "loss": 0.1709, "num_input_tokens_seen": 68366176, "step": 31675 }, { "epoch": 5.168026101141925, "grad_norm": 0.0030427956953644753, "learning_rate": 0.0009255035209242759, "loss": 0.0757, "num_input_tokens_seen": 68375968, "step": 31680 }, { "epoch": 5.16884176182708, "grad_norm": 0.2134924829006195, "learning_rate": 0.0009254661362243991, "loss": 0.1036, "num_input_tokens_seen": 68388992, "step": 31685 }, { "epoch": 5.169657422512235, "grad_norm": 0.023975355550646782, "learning_rate": 0.000925428742901911, "loss": 0.1077, "num_input_tokens_seen": 68400352, "step": 31690 }, { "epoch": 5.17047308319739, "grad_norm": 0.044763561338186264, "learning_rate": 0.0009253913409575698, "loss": 0.1515, "num_input_tokens_seen": 68411072, "step": 31695 }, { "epoch": 5.171288743882545, "grad_norm": 0.01610422320663929, "learning_rate": 0.0009253539303921336, "loss": 0.0275, "num_input_tokens_seen": 68421856, "step": 31700 }, { "epoch": 5.1721044045677, "grad_norm": 0.10432357341051102, "learning_rate": 0.0009253165112063604, "loss": 0.08, "num_input_tokens_seen": 68431200, "step": 31705 }, { "epoch": 5.172920065252855, "grad_norm": 0.010319419205188751, "learning_rate": 0.0009252790834010085, "loss": 0.0554, "num_input_tokens_seen": 68441728, "step": 31710 }, { "epoch": 5.17373572593801, "grad_norm": 0.36150652170181274, "learning_rate": 0.0009252416469768363, "loss": 0.1983, "num_input_tokens_seen": 68452320, "step": 31715 }, { "epoch": 5.174551386623165, "grad_norm": 0.027730131521821022, "learning_rate": 0.0009252042019346029, "loss": 0.0636, "num_input_tokens_seen": 68461984, "step": 31720 }, { "epoch": 5.1753670473083195, "grad_norm": 0.014984754845499992, "learning_rate": 0.0009251667482750669, "loss": 0.0315, "num_input_tokens_seen": 68472928, "step": 31725 }, { "epoch": 5.176182707993474, "grad_norm": 0.11348821967840195, "learning_rate": 0.0009251292859989873, "loss": 0.0886, "num_input_tokens_seen": 68483072, "step": 31730 }, { "epoch": 5.17699836867863, "grad_norm": 0.028797926381230354, "learning_rate": 0.0009250918151071235, "loss": 0.059, "num_input_tokens_seen": 68493312, "step": 31735 }, { "epoch": 5.177814029363785, "grad_norm": 0.047608647495508194, "learning_rate": 0.0009250543356002347, "loss": 0.0445, "num_input_tokens_seen": 68504032, "step": 31740 }, { "epoch": 5.17862969004894, "grad_norm": 0.025980748236179352, "learning_rate": 0.0009250168474790806, "loss": 0.0177, "num_input_tokens_seen": 68515296, "step": 31745 }, { "epoch": 5.1794453507340945, "grad_norm": 0.16550405323505402, "learning_rate": 0.0009249793507444208, "loss": 0.1305, "num_input_tokens_seen": 68526112, "step": 31750 }, { "epoch": 5.180261011419249, "grad_norm": 0.43852806091308594, "learning_rate": 0.0009249418453970155, "loss": 0.1965, "num_input_tokens_seen": 68537600, "step": 31755 }, { "epoch": 5.181076672104404, "grad_norm": 0.4160293638706207, "learning_rate": 0.0009249043314376247, "loss": 0.2345, "num_input_tokens_seen": 68548640, "step": 31760 }, { "epoch": 5.18189233278956, "grad_norm": 0.010889717377722263, "learning_rate": 0.0009248668088670084, "loss": 0.0698, "num_input_tokens_seen": 68559712, "step": 31765 }, { "epoch": 5.182707993474715, "grad_norm": 0.10754194110631943, "learning_rate": 0.0009248292776859273, "loss": 0.0208, "num_input_tokens_seen": 68569920, "step": 31770 }, { "epoch": 5.1835236541598695, "grad_norm": 0.03462247550487518, "learning_rate": 0.0009247917378951419, "loss": 0.0866, "num_input_tokens_seen": 68580416, "step": 31775 }, { "epoch": 5.184339314845024, "grad_norm": 0.004287704825401306, "learning_rate": 0.0009247541894954132, "loss": 0.0407, "num_input_tokens_seen": 68590016, "step": 31780 }, { "epoch": 5.185154975530179, "grad_norm": 0.020558413118124008, "learning_rate": 0.0009247166324875018, "loss": 0.0309, "num_input_tokens_seen": 68600896, "step": 31785 }, { "epoch": 5.185970636215335, "grad_norm": 0.18890945613384247, "learning_rate": 0.0009246790668721692, "loss": 0.0803, "num_input_tokens_seen": 68611584, "step": 31790 }, { "epoch": 5.18678629690049, "grad_norm": 0.014520949684083462, "learning_rate": 0.0009246414926501766, "loss": 0.0279, "num_input_tokens_seen": 68622432, "step": 31795 }, { "epoch": 5.1876019575856445, "grad_norm": 0.009679626673460007, "learning_rate": 0.0009246039098222854, "loss": 0.1422, "num_input_tokens_seen": 68633024, "step": 31800 }, { "epoch": 5.188417618270799, "grad_norm": 0.5596218109130859, "learning_rate": 0.0009245663183892572, "loss": 0.0816, "num_input_tokens_seen": 68644160, "step": 31805 }, { "epoch": 5.189233278955954, "grad_norm": 0.8993096351623535, "learning_rate": 0.0009245287183518541, "loss": 0.1044, "num_input_tokens_seen": 68654432, "step": 31810 }, { "epoch": 5.190048939641109, "grad_norm": 0.3230532705783844, "learning_rate": 0.0009244911097108379, "loss": 0.3156, "num_input_tokens_seen": 68664928, "step": 31815 }, { "epoch": 5.190864600326265, "grad_norm": 0.01960061304271221, "learning_rate": 0.000924453492466971, "loss": 0.0352, "num_input_tokens_seen": 68676256, "step": 31820 }, { "epoch": 5.191680261011419, "grad_norm": 0.0067152297124266624, "learning_rate": 0.0009244158666210154, "loss": 0.0743, "num_input_tokens_seen": 68686976, "step": 31825 }, { "epoch": 5.192495921696574, "grad_norm": 0.06851846724748611, "learning_rate": 0.0009243782321737339, "loss": 0.112, "num_input_tokens_seen": 68698432, "step": 31830 }, { "epoch": 5.193311582381729, "grad_norm": 0.015204858034849167, "learning_rate": 0.0009243405891258894, "loss": 0.0266, "num_input_tokens_seen": 68709248, "step": 31835 }, { "epoch": 5.194127243066884, "grad_norm": 0.1737021505832672, "learning_rate": 0.0009243029374782443, "loss": 0.0622, "num_input_tokens_seen": 68720672, "step": 31840 }, { "epoch": 5.19494290375204, "grad_norm": 0.028907503932714462, "learning_rate": 0.0009242652772315621, "loss": 0.1325, "num_input_tokens_seen": 68732384, "step": 31845 }, { "epoch": 5.195758564437194, "grad_norm": 0.08393022418022156, "learning_rate": 0.0009242276083866056, "loss": 0.1833, "num_input_tokens_seen": 68743040, "step": 31850 }, { "epoch": 5.196574225122349, "grad_norm": 0.007252842653542757, "learning_rate": 0.0009241899309441386, "loss": 0.1219, "num_input_tokens_seen": 68752192, "step": 31855 }, { "epoch": 5.197389885807504, "grad_norm": 0.026566535234451294, "learning_rate": 0.0009241522449049245, "loss": 0.1687, "num_input_tokens_seen": 68763808, "step": 31860 }, { "epoch": 5.198205546492659, "grad_norm": 0.016730941832065582, "learning_rate": 0.000924114550269727, "loss": 0.0327, "num_input_tokens_seen": 68774784, "step": 31865 }, { "epoch": 5.199021207177814, "grad_norm": 0.03941414877772331, "learning_rate": 0.0009240768470393101, "loss": 0.018, "num_input_tokens_seen": 68785408, "step": 31870 }, { "epoch": 5.199836867862969, "grad_norm": 0.22222483158111572, "learning_rate": 0.0009240391352144382, "loss": 0.0655, "num_input_tokens_seen": 68796960, "step": 31875 }, { "epoch": 5.200652528548124, "grad_norm": 0.21988743543624878, "learning_rate": 0.0009240014147958751, "loss": 0.0615, "num_input_tokens_seen": 68808544, "step": 31880 }, { "epoch": 5.201468189233279, "grad_norm": 0.014653995633125305, "learning_rate": 0.0009239636857843854, "loss": 0.0601, "num_input_tokens_seen": 68820480, "step": 31885 }, { "epoch": 5.202283849918434, "grad_norm": 0.05298502743244171, "learning_rate": 0.0009239259481807338, "loss": 0.0584, "num_input_tokens_seen": 68831040, "step": 31890 }, { "epoch": 5.203099510603589, "grad_norm": 0.35446247458457947, "learning_rate": 0.0009238882019856851, "loss": 0.0791, "num_input_tokens_seen": 68842112, "step": 31895 }, { "epoch": 5.2039151712887435, "grad_norm": 0.19607031345367432, "learning_rate": 0.0009238504472000042, "loss": 0.143, "num_input_tokens_seen": 68853952, "step": 31900 }, { "epoch": 5.204730831973899, "grad_norm": 0.01760163903236389, "learning_rate": 0.0009238126838244562, "loss": 0.1293, "num_input_tokens_seen": 68865504, "step": 31905 }, { "epoch": 5.205546492659054, "grad_norm": 0.0035541425459086895, "learning_rate": 0.0009237749118598067, "loss": 0.1221, "num_input_tokens_seen": 68875712, "step": 31910 }, { "epoch": 5.206362153344209, "grad_norm": 0.2665620446205139, "learning_rate": 0.000923737131306821, "loss": 0.0819, "num_input_tokens_seen": 68887200, "step": 31915 }, { "epoch": 5.207177814029364, "grad_norm": 0.1335567981004715, "learning_rate": 0.0009236993421662648, "loss": 0.0672, "num_input_tokens_seen": 68898720, "step": 31920 }, { "epoch": 5.2079934747145185, "grad_norm": 0.09202000498771667, "learning_rate": 0.0009236615444389038, "loss": 0.239, "num_input_tokens_seen": 68907968, "step": 31925 }, { "epoch": 5.208809135399674, "grad_norm": 0.021996986120939255, "learning_rate": 0.0009236237381255041, "loss": 0.0468, "num_input_tokens_seen": 68919136, "step": 31930 }, { "epoch": 5.209624796084829, "grad_norm": 0.31675806641578674, "learning_rate": 0.0009235859232268322, "loss": 0.0514, "num_input_tokens_seen": 68929888, "step": 31935 }, { "epoch": 5.210440456769984, "grad_norm": 0.07840342819690704, "learning_rate": 0.000923548099743654, "loss": 0.0229, "num_input_tokens_seen": 68940992, "step": 31940 }, { "epoch": 5.211256117455139, "grad_norm": 0.009411263279616833, "learning_rate": 0.0009235102676767364, "loss": 0.1226, "num_input_tokens_seen": 68952160, "step": 31945 }, { "epoch": 5.212071778140293, "grad_norm": 0.3270871639251709, "learning_rate": 0.0009234724270268459, "loss": 0.1333, "num_input_tokens_seen": 68962752, "step": 31950 }, { "epoch": 5.212887438825448, "grad_norm": 0.08211880177259445, "learning_rate": 0.0009234345777947493, "loss": 0.072, "num_input_tokens_seen": 68973984, "step": 31955 }, { "epoch": 5.213703099510604, "grad_norm": 0.39796698093414307, "learning_rate": 0.0009233967199812141, "loss": 0.1575, "num_input_tokens_seen": 68984864, "step": 31960 }, { "epoch": 5.214518760195759, "grad_norm": 0.5391538739204407, "learning_rate": 0.000923358853587007, "loss": 0.1198, "num_input_tokens_seen": 68996384, "step": 31965 }, { "epoch": 5.215334420880914, "grad_norm": 0.29873961210250854, "learning_rate": 0.0009233209786128957, "loss": 0.2221, "num_input_tokens_seen": 69007488, "step": 31970 }, { "epoch": 5.216150081566068, "grad_norm": 0.10935372859239578, "learning_rate": 0.0009232830950596479, "loss": 0.0178, "num_input_tokens_seen": 69020032, "step": 31975 }, { "epoch": 5.216965742251223, "grad_norm": 0.3985706865787506, "learning_rate": 0.0009232452029280312, "loss": 0.08, "num_input_tokens_seen": 69030432, "step": 31980 }, { "epoch": 5.217781402936378, "grad_norm": 0.043537456542253494, "learning_rate": 0.0009232073022188135, "loss": 0.0415, "num_input_tokens_seen": 69042176, "step": 31985 }, { "epoch": 5.218597063621534, "grad_norm": 0.025628121569752693, "learning_rate": 0.0009231693929327628, "loss": 0.0604, "num_input_tokens_seen": 69053984, "step": 31990 }, { "epoch": 5.219412724306689, "grad_norm": 0.42975082993507385, "learning_rate": 0.0009231314750706476, "loss": 0.0908, "num_input_tokens_seen": 69064064, "step": 31995 }, { "epoch": 5.220228384991843, "grad_norm": 0.03866426274180412, "learning_rate": 0.0009230935486332363, "loss": 0.0365, "num_input_tokens_seen": 69075200, "step": 32000 }, { "epoch": 5.221044045676998, "grad_norm": 0.06650548428297043, "learning_rate": 0.0009230556136212975, "loss": 0.0537, "num_input_tokens_seen": 69086528, "step": 32005 }, { "epoch": 5.221859706362153, "grad_norm": 0.059267230331897736, "learning_rate": 0.0009230176700356001, "loss": 0.0573, "num_input_tokens_seen": 69097792, "step": 32010 }, { "epoch": 5.222675367047309, "grad_norm": 0.3453933298587799, "learning_rate": 0.0009229797178769128, "loss": 0.091, "num_input_tokens_seen": 69108416, "step": 32015 }, { "epoch": 5.2234910277324635, "grad_norm": 0.07621794193983078, "learning_rate": 0.000922941757146005, "loss": 0.0459, "num_input_tokens_seen": 69120256, "step": 32020 }, { "epoch": 5.224306688417618, "grad_norm": 0.004987028427422047, "learning_rate": 0.000922903787843646, "loss": 0.2532, "num_input_tokens_seen": 69131136, "step": 32025 }, { "epoch": 5.225122349102773, "grad_norm": 0.46153607964515686, "learning_rate": 0.0009228658099706053, "loss": 0.148, "num_input_tokens_seen": 69141216, "step": 32030 }, { "epoch": 5.225938009787928, "grad_norm": 0.1316855102777481, "learning_rate": 0.0009228278235276524, "loss": 0.2052, "num_input_tokens_seen": 69152160, "step": 32035 }, { "epoch": 5.226753670473083, "grad_norm": 0.28339651226997375, "learning_rate": 0.0009227898285155574, "loss": 0.1246, "num_input_tokens_seen": 69162624, "step": 32040 }, { "epoch": 5.2275693311582385, "grad_norm": 0.03376791998744011, "learning_rate": 0.00092275182493509, "loss": 0.103, "num_input_tokens_seen": 69173664, "step": 32045 }, { "epoch": 5.228384991843393, "grad_norm": 0.3722885251045227, "learning_rate": 0.0009227138127870208, "loss": 0.1149, "num_input_tokens_seen": 69182976, "step": 32050 }, { "epoch": 5.229200652528548, "grad_norm": 0.08567280322313309, "learning_rate": 0.0009226757920721196, "loss": 0.0362, "num_input_tokens_seen": 69192576, "step": 32055 }, { "epoch": 5.230016313213703, "grad_norm": 0.36891838908195496, "learning_rate": 0.0009226377627911575, "loss": 0.0757, "num_input_tokens_seen": 69203552, "step": 32060 }, { "epoch": 5.230831973898858, "grad_norm": 0.012629147619009018, "learning_rate": 0.000922599724944905, "loss": 0.1484, "num_input_tokens_seen": 69214432, "step": 32065 }, { "epoch": 5.231647634584013, "grad_norm": 0.1295524686574936, "learning_rate": 0.0009225616785341329, "loss": 0.0868, "num_input_tokens_seen": 69225632, "step": 32070 }, { "epoch": 5.232463295269168, "grad_norm": 0.010368788614869118, "learning_rate": 0.0009225236235596123, "loss": 0.1134, "num_input_tokens_seen": 69236224, "step": 32075 }, { "epoch": 5.233278955954323, "grad_norm": 0.28468385338783264, "learning_rate": 0.0009224855600221145, "loss": 0.0609, "num_input_tokens_seen": 69247200, "step": 32080 }, { "epoch": 5.234094616639478, "grad_norm": 0.039518047124147415, "learning_rate": 0.0009224474879224109, "loss": 0.0553, "num_input_tokens_seen": 69257216, "step": 32085 }, { "epoch": 5.234910277324633, "grad_norm": 0.34224626421928406, "learning_rate": 0.000922409407261273, "loss": 0.0686, "num_input_tokens_seen": 69268192, "step": 32090 }, { "epoch": 5.235725938009788, "grad_norm": 0.07368524372577667, "learning_rate": 0.0009223713180394726, "loss": 0.1467, "num_input_tokens_seen": 69278912, "step": 32095 }, { "epoch": 5.236541598694943, "grad_norm": 0.030505536124110222, "learning_rate": 0.0009223332202577815, "loss": 0.0195, "num_input_tokens_seen": 69289152, "step": 32100 }, { "epoch": 5.237357259380098, "grad_norm": 0.03637324646115303, "learning_rate": 0.0009222951139169722, "loss": 0.1244, "num_input_tokens_seen": 69300704, "step": 32105 }, { "epoch": 5.238172920065253, "grad_norm": 0.3361864387989044, "learning_rate": 0.0009222569990178165, "loss": 0.1545, "num_input_tokens_seen": 69309568, "step": 32110 }, { "epoch": 5.238988580750408, "grad_norm": 0.011383959092199802, "learning_rate": 0.0009222188755610871, "loss": 0.0779, "num_input_tokens_seen": 69321056, "step": 32115 }, { "epoch": 5.239804241435563, "grad_norm": 0.09642324596643448, "learning_rate": 0.0009221807435475564, "loss": 0.1572, "num_input_tokens_seen": 69331200, "step": 32120 }, { "epoch": 5.240619902120717, "grad_norm": 0.09677105396986008, "learning_rate": 0.0009221426029779975, "loss": 0.1539, "num_input_tokens_seen": 69341760, "step": 32125 }, { "epoch": 5.241435562805873, "grad_norm": 0.15398910641670227, "learning_rate": 0.0009221044538531833, "loss": 0.0912, "num_input_tokens_seen": 69352512, "step": 32130 }, { "epoch": 5.242251223491028, "grad_norm": 0.09538555145263672, "learning_rate": 0.0009220662961738868, "loss": 0.0617, "num_input_tokens_seen": 69361728, "step": 32135 }, { "epoch": 5.243066884176183, "grad_norm": 0.01968984119594097, "learning_rate": 0.0009220281299408815, "loss": 0.1174, "num_input_tokens_seen": 69372416, "step": 32140 }, { "epoch": 5.2438825448613375, "grad_norm": 0.29630982875823975, "learning_rate": 0.0009219899551549405, "loss": 0.1093, "num_input_tokens_seen": 69382336, "step": 32145 }, { "epoch": 5.244698205546492, "grad_norm": 0.017732080072164536, "learning_rate": 0.0009219517718168379, "loss": 0.0327, "num_input_tokens_seen": 69392960, "step": 32150 }, { "epoch": 5.245513866231648, "grad_norm": 0.07561706006526947, "learning_rate": 0.0009219135799273474, "loss": 0.168, "num_input_tokens_seen": 69403616, "step": 32155 }, { "epoch": 5.246329526916803, "grad_norm": 0.049572765827178955, "learning_rate": 0.0009218753794872429, "loss": 0.1803, "num_input_tokens_seen": 69414464, "step": 32160 }, { "epoch": 5.247145187601958, "grad_norm": 0.31382349133491516, "learning_rate": 0.0009218371704972987, "loss": 0.0998, "num_input_tokens_seen": 69426400, "step": 32165 }, { "epoch": 5.2479608482871125, "grad_norm": 0.6625356078147888, "learning_rate": 0.0009217989529582889, "loss": 0.109, "num_input_tokens_seen": 69437184, "step": 32170 }, { "epoch": 5.248776508972267, "grad_norm": 0.23642398416996002, "learning_rate": 0.0009217607268709884, "loss": 0.1353, "num_input_tokens_seen": 69448416, "step": 32175 }, { "epoch": 5.249592169657422, "grad_norm": 0.09059550613164902, "learning_rate": 0.0009217224922361718, "loss": 0.1781, "num_input_tokens_seen": 69459424, "step": 32180 }, { "epoch": 5.250407830342578, "grad_norm": 0.27348828315734863, "learning_rate": 0.0009216842490546138, "loss": 0.0839, "num_input_tokens_seen": 69470112, "step": 32185 }, { "epoch": 5.251223491027733, "grad_norm": 0.1343534290790558, "learning_rate": 0.0009216459973270895, "loss": 0.1586, "num_input_tokens_seen": 69481760, "step": 32190 }, { "epoch": 5.2520391517128875, "grad_norm": 0.01578347384929657, "learning_rate": 0.0009216077370543743, "loss": 0.0811, "num_input_tokens_seen": 69493408, "step": 32195 }, { "epoch": 5.252854812398042, "grad_norm": 0.22201500833034515, "learning_rate": 0.0009215694682372433, "loss": 0.0411, "num_input_tokens_seen": 69504000, "step": 32200 }, { "epoch": 5.253670473083197, "grad_norm": 0.11477544158697128, "learning_rate": 0.0009215311908764724, "loss": 0.0977, "num_input_tokens_seen": 69514880, "step": 32205 }, { "epoch": 5.254486133768353, "grad_norm": 0.31708627939224243, "learning_rate": 0.000921492904972837, "loss": 0.0764, "num_input_tokens_seen": 69525408, "step": 32210 }, { "epoch": 5.255301794453508, "grad_norm": 0.244248628616333, "learning_rate": 0.0009214546105271133, "loss": 0.1152, "num_input_tokens_seen": 69534976, "step": 32215 }, { "epoch": 5.2561174551386625, "grad_norm": 0.3195655941963196, "learning_rate": 0.0009214163075400772, "loss": 0.0587, "num_input_tokens_seen": 69544992, "step": 32220 }, { "epoch": 5.256933115823817, "grad_norm": 0.24756282567977905, "learning_rate": 0.000921377996012505, "loss": 0.1262, "num_input_tokens_seen": 69556704, "step": 32225 }, { "epoch": 5.257748776508972, "grad_norm": 0.10196772962808609, "learning_rate": 0.0009213396759451732, "loss": 0.0889, "num_input_tokens_seen": 69567520, "step": 32230 }, { "epoch": 5.258564437194127, "grad_norm": 0.062439385801553726, "learning_rate": 0.0009213013473388584, "loss": 0.1046, "num_input_tokens_seen": 69577472, "step": 32235 }, { "epoch": 5.259380097879283, "grad_norm": 0.11655324697494507, "learning_rate": 0.0009212630101943373, "loss": 0.2032, "num_input_tokens_seen": 69589024, "step": 32240 }, { "epoch": 5.260195758564437, "grad_norm": 0.08118362724781036, "learning_rate": 0.000921224664512387, "loss": 0.0222, "num_input_tokens_seen": 69599840, "step": 32245 }, { "epoch": 5.261011419249592, "grad_norm": 0.026308543980121613, "learning_rate": 0.0009211863102937843, "loss": 0.1076, "num_input_tokens_seen": 69609120, "step": 32250 }, { "epoch": 5.261827079934747, "grad_norm": 0.18624624609947205, "learning_rate": 0.0009211479475393068, "loss": 0.1014, "num_input_tokens_seen": 69619808, "step": 32255 }, { "epoch": 5.262642740619902, "grad_norm": 0.019620131701231003, "learning_rate": 0.0009211095762497319, "loss": 0.0321, "num_input_tokens_seen": 69630080, "step": 32260 }, { "epoch": 5.263458401305057, "grad_norm": 0.3600849211215973, "learning_rate": 0.0009210711964258372, "loss": 0.0992, "num_input_tokens_seen": 69639488, "step": 32265 }, { "epoch": 5.264274061990212, "grad_norm": 0.17657999694347382, "learning_rate": 0.0009210328080684005, "loss": 0.0529, "num_input_tokens_seen": 69650784, "step": 32270 }, { "epoch": 5.265089722675367, "grad_norm": 0.045330073684453964, "learning_rate": 0.0009209944111782, "loss": 0.1267, "num_input_tokens_seen": 69661312, "step": 32275 }, { "epoch": 5.265905383360522, "grad_norm": 0.03289695829153061, "learning_rate": 0.0009209560057560134, "loss": 0.1699, "num_input_tokens_seen": 69671840, "step": 32280 }, { "epoch": 5.266721044045677, "grad_norm": 0.13221107423305511, "learning_rate": 0.0009209175918026195, "loss": 0.1105, "num_input_tokens_seen": 69682336, "step": 32285 }, { "epoch": 5.267536704730832, "grad_norm": 0.013435963541269302, "learning_rate": 0.0009208791693187967, "loss": 0.0852, "num_input_tokens_seen": 69692608, "step": 32290 }, { "epoch": 5.268352365415987, "grad_norm": 0.2551325559616089, "learning_rate": 0.0009208407383053235, "loss": 0.0507, "num_input_tokens_seen": 69702400, "step": 32295 }, { "epoch": 5.269168026101142, "grad_norm": 0.008623032830655575, "learning_rate": 0.000920802298762979, "loss": 0.0351, "num_input_tokens_seen": 69713824, "step": 32300 }, { "epoch": 5.269983686786297, "grad_norm": 0.014695529825985432, "learning_rate": 0.0009207638506925419, "loss": 0.1366, "num_input_tokens_seen": 69725216, "step": 32305 }, { "epoch": 5.270799347471452, "grad_norm": 0.3066323399543762, "learning_rate": 0.0009207253940947916, "loss": 0.1395, "num_input_tokens_seen": 69736320, "step": 32310 }, { "epoch": 5.271615008156607, "grad_norm": 0.02507980726659298, "learning_rate": 0.0009206869289705075, "loss": 0.0558, "num_input_tokens_seen": 69746688, "step": 32315 }, { "epoch": 5.2724306688417615, "grad_norm": 0.28601711988449097, "learning_rate": 0.0009206484553204693, "loss": 0.1409, "num_input_tokens_seen": 69757408, "step": 32320 }, { "epoch": 5.273246329526917, "grad_norm": 0.06800656020641327, "learning_rate": 0.0009206099731454562, "loss": 0.0406, "num_input_tokens_seen": 69768960, "step": 32325 }, { "epoch": 5.274061990212072, "grad_norm": 0.19018906354904175, "learning_rate": 0.0009205714824462487, "loss": 0.073, "num_input_tokens_seen": 69779456, "step": 32330 }, { "epoch": 5.274877650897227, "grad_norm": 0.023114677518606186, "learning_rate": 0.0009205329832236265, "loss": 0.0848, "num_input_tokens_seen": 69790048, "step": 32335 }, { "epoch": 5.275693311582382, "grad_norm": 0.006871652789413929, "learning_rate": 0.0009204944754783698, "loss": 0.1075, "num_input_tokens_seen": 69801280, "step": 32340 }, { "epoch": 5.2765089722675365, "grad_norm": 0.3016926944255829, "learning_rate": 0.0009204559592112592, "loss": 0.066, "num_input_tokens_seen": 69811520, "step": 32345 }, { "epoch": 5.277324632952691, "grad_norm": 0.038791198283433914, "learning_rate": 0.0009204174344230751, "loss": 0.1418, "num_input_tokens_seen": 69823456, "step": 32350 }, { "epoch": 5.278140293637847, "grad_norm": 0.05556711181998253, "learning_rate": 0.0009203789011145984, "loss": 0.2575, "num_input_tokens_seen": 69834816, "step": 32355 }, { "epoch": 5.278955954323002, "grad_norm": 0.049236562103033066, "learning_rate": 0.00092034035928661, "loss": 0.1355, "num_input_tokens_seen": 69844928, "step": 32360 }, { "epoch": 5.279771615008157, "grad_norm": 0.11340753734111786, "learning_rate": 0.000920301808939891, "loss": 0.0842, "num_input_tokens_seen": 69856256, "step": 32365 }, { "epoch": 5.280587275693311, "grad_norm": 0.08792033046483994, "learning_rate": 0.0009202632500752226, "loss": 0.0409, "num_input_tokens_seen": 69866208, "step": 32370 }, { "epoch": 5.281402936378466, "grad_norm": 0.10982216149568558, "learning_rate": 0.0009202246826933864, "loss": 0.1524, "num_input_tokens_seen": 69875328, "step": 32375 }, { "epoch": 5.282218597063622, "grad_norm": 0.15313772857189178, "learning_rate": 0.0009201861067951638, "loss": 0.1006, "num_input_tokens_seen": 69885504, "step": 32380 }, { "epoch": 5.283034257748777, "grad_norm": 0.7371540069580078, "learning_rate": 0.0009201475223813368, "loss": 0.2293, "num_input_tokens_seen": 69896128, "step": 32385 }, { "epoch": 5.283849918433932, "grad_norm": 0.07774913311004639, "learning_rate": 0.0009201089294526872, "loss": 0.0827, "num_input_tokens_seen": 69906208, "step": 32390 }, { "epoch": 5.284665579119086, "grad_norm": 0.16968795657157898, "learning_rate": 0.0009200703280099971, "loss": 0.0626, "num_input_tokens_seen": 69916928, "step": 32395 }, { "epoch": 5.285481239804241, "grad_norm": 0.04233980178833008, "learning_rate": 0.0009200317180540491, "loss": 0.0675, "num_input_tokens_seen": 69927872, "step": 32400 }, { "epoch": 5.286296900489396, "grad_norm": 0.03283955901861191, "learning_rate": 0.0009199930995856254, "loss": 0.1035, "num_input_tokens_seen": 69939840, "step": 32405 }, { "epoch": 5.287112561174552, "grad_norm": 0.01955365389585495, "learning_rate": 0.0009199544726055087, "loss": 0.0838, "num_input_tokens_seen": 69949824, "step": 32410 }, { "epoch": 5.287928221859707, "grad_norm": 0.22192370891571045, "learning_rate": 0.000919915837114482, "loss": 0.0499, "num_input_tokens_seen": 69960672, "step": 32415 }, { "epoch": 5.288743882544861, "grad_norm": 0.23097379505634308, "learning_rate": 0.0009198771931133281, "loss": 0.1444, "num_input_tokens_seen": 69973024, "step": 32420 }, { "epoch": 5.289559543230016, "grad_norm": 0.5525568127632141, "learning_rate": 0.0009198385406028302, "loss": 0.1719, "num_input_tokens_seen": 69984096, "step": 32425 }, { "epoch": 5.290375203915171, "grad_norm": 0.0077023254707455635, "learning_rate": 0.0009197998795837716, "loss": 0.0858, "num_input_tokens_seen": 69993888, "step": 32430 }, { "epoch": 5.291190864600326, "grad_norm": 0.01506341714411974, "learning_rate": 0.0009197612100569359, "loss": 0.0613, "num_input_tokens_seen": 70002912, "step": 32435 }, { "epoch": 5.2920065252854815, "grad_norm": 0.22217650711536407, "learning_rate": 0.0009197225320231069, "loss": 0.0741, "num_input_tokens_seen": 70014720, "step": 32440 }, { "epoch": 5.292822185970636, "grad_norm": 0.04445306584239006, "learning_rate": 0.0009196838454830682, "loss": 0.1215, "num_input_tokens_seen": 70023904, "step": 32445 }, { "epoch": 5.293637846655791, "grad_norm": 0.21980933845043182, "learning_rate": 0.000919645150437604, "loss": 0.1191, "num_input_tokens_seen": 70034944, "step": 32450 }, { "epoch": 5.294453507340946, "grad_norm": 0.09222866594791412, "learning_rate": 0.0009196064468874985, "loss": 0.0289, "num_input_tokens_seen": 70046112, "step": 32455 }, { "epoch": 5.295269168026101, "grad_norm": 0.028388027101755142, "learning_rate": 0.0009195677348335361, "loss": 0.0433, "num_input_tokens_seen": 70057376, "step": 32460 }, { "epoch": 5.2960848287112565, "grad_norm": 0.09829483926296234, "learning_rate": 0.0009195290142765012, "loss": 0.0645, "num_input_tokens_seen": 70067712, "step": 32465 }, { "epoch": 5.296900489396411, "grad_norm": 0.16900232434272766, "learning_rate": 0.0009194902852171787, "loss": 0.1645, "num_input_tokens_seen": 70078656, "step": 32470 }, { "epoch": 5.297716150081566, "grad_norm": 0.34144875407218933, "learning_rate": 0.0009194515476563533, "loss": 0.1061, "num_input_tokens_seen": 70089984, "step": 32475 }, { "epoch": 5.298531810766721, "grad_norm": 0.19346509873867035, "learning_rate": 0.0009194128015948103, "loss": 0.1102, "num_input_tokens_seen": 70099136, "step": 32480 }, { "epoch": 5.299347471451876, "grad_norm": 0.021596305072307587, "learning_rate": 0.0009193740470333347, "loss": 0.0609, "num_input_tokens_seen": 70110016, "step": 32485 }, { "epoch": 5.300163132137031, "grad_norm": 0.23590485751628876, "learning_rate": 0.0009193352839727121, "loss": 0.0583, "num_input_tokens_seen": 70121824, "step": 32490 }, { "epoch": 5.300978792822186, "grad_norm": 0.5183067321777344, "learning_rate": 0.0009192965124137279, "loss": 0.0937, "num_input_tokens_seen": 70132672, "step": 32495 }, { "epoch": 5.301794453507341, "grad_norm": 0.20867738127708435, "learning_rate": 0.000919257732357168, "loss": 0.0859, "num_input_tokens_seen": 70142976, "step": 32500 }, { "epoch": 5.302610114192496, "grad_norm": 0.3076983094215393, "learning_rate": 0.0009192189438038183, "loss": 0.1052, "num_input_tokens_seen": 70152992, "step": 32505 }, { "epoch": 5.303425774877651, "grad_norm": 0.016605082899332047, "learning_rate": 0.0009191801467544649, "loss": 0.1924, "num_input_tokens_seen": 70164224, "step": 32510 }, { "epoch": 5.304241435562806, "grad_norm": 0.11719417572021484, "learning_rate": 0.0009191413412098942, "loss": 0.0625, "num_input_tokens_seen": 70174912, "step": 32515 }, { "epoch": 5.30505709624796, "grad_norm": 0.16518744826316833, "learning_rate": 0.0009191025271708923, "loss": 0.1705, "num_input_tokens_seen": 70185536, "step": 32520 }, { "epoch": 5.305872756933116, "grad_norm": 0.08779332786798477, "learning_rate": 0.0009190637046382461, "loss": 0.0879, "num_input_tokens_seen": 70195808, "step": 32525 }, { "epoch": 5.306688417618271, "grad_norm": 0.059284575283527374, "learning_rate": 0.0009190248736127422, "loss": 0.053, "num_input_tokens_seen": 70206336, "step": 32530 }, { "epoch": 5.307504078303426, "grad_norm": 0.08679941296577454, "learning_rate": 0.0009189860340951679, "loss": 0.0352, "num_input_tokens_seen": 70217120, "step": 32535 }, { "epoch": 5.308319738988581, "grad_norm": 0.33877840638160706, "learning_rate": 0.0009189471860863099, "loss": 0.1158, "num_input_tokens_seen": 70228352, "step": 32540 }, { "epoch": 5.309135399673735, "grad_norm": 0.017490115016698837, "learning_rate": 0.0009189083295869558, "loss": 0.0316, "num_input_tokens_seen": 70238304, "step": 32545 }, { "epoch": 5.309951060358891, "grad_norm": 0.26520296931266785, "learning_rate": 0.0009188694645978928, "loss": 0.198, "num_input_tokens_seen": 70250848, "step": 32550 }, { "epoch": 5.310766721044046, "grad_norm": 0.05721283704042435, "learning_rate": 0.0009188305911199088, "loss": 0.092, "num_input_tokens_seen": 70261440, "step": 32555 }, { "epoch": 5.311582381729201, "grad_norm": 0.015442118979990482, "learning_rate": 0.0009187917091537918, "loss": 0.0866, "num_input_tokens_seen": 70272768, "step": 32560 }, { "epoch": 5.3123980424143555, "grad_norm": 0.0715777724981308, "learning_rate": 0.0009187528187003293, "loss": 0.0591, "num_input_tokens_seen": 70284000, "step": 32565 }, { "epoch": 5.31321370309951, "grad_norm": 0.020449407398700714, "learning_rate": 0.0009187139197603097, "loss": 0.0661, "num_input_tokens_seen": 70295168, "step": 32570 }, { "epoch": 5.314029363784665, "grad_norm": 0.016960782930254936, "learning_rate": 0.0009186750123345214, "loss": 0.0855, "num_input_tokens_seen": 70305632, "step": 32575 }, { "epoch": 5.314845024469821, "grad_norm": 0.18175779283046722, "learning_rate": 0.0009186360964237528, "loss": 0.0526, "num_input_tokens_seen": 70316416, "step": 32580 }, { "epoch": 5.315660685154976, "grad_norm": 0.30781203508377075, "learning_rate": 0.0009185971720287926, "loss": 0.1116, "num_input_tokens_seen": 70326976, "step": 32585 }, { "epoch": 5.3164763458401305, "grad_norm": 0.3953135311603546, "learning_rate": 0.0009185582391504299, "loss": 0.1166, "num_input_tokens_seen": 70334816, "step": 32590 }, { "epoch": 5.317292006525285, "grad_norm": 0.032423511147499084, "learning_rate": 0.0009185192977894533, "loss": 0.1751, "num_input_tokens_seen": 70345664, "step": 32595 }, { "epoch": 5.31810766721044, "grad_norm": 0.23598891496658325, "learning_rate": 0.0009184803479466521, "loss": 0.2265, "num_input_tokens_seen": 70357184, "step": 32600 }, { "epoch": 5.318923327895595, "grad_norm": 0.04018597677350044, "learning_rate": 0.0009184413896228161, "loss": 0.0721, "num_input_tokens_seen": 70368832, "step": 32605 }, { "epoch": 5.319738988580751, "grad_norm": 0.3094189167022705, "learning_rate": 0.0009184024228187343, "loss": 0.2841, "num_input_tokens_seen": 70380160, "step": 32610 }, { "epoch": 5.3205546492659055, "grad_norm": 0.015309697017073631, "learning_rate": 0.0009183634475351967, "loss": 0.0287, "num_input_tokens_seen": 70391552, "step": 32615 }, { "epoch": 5.32137030995106, "grad_norm": 0.014306425116956234, "learning_rate": 0.0009183244637729931, "loss": 0.0449, "num_input_tokens_seen": 70403520, "step": 32620 }, { "epoch": 5.322185970636215, "grad_norm": 0.006644033826887608, "learning_rate": 0.0009182854715329134, "loss": 0.0272, "num_input_tokens_seen": 70414176, "step": 32625 }, { "epoch": 5.32300163132137, "grad_norm": 0.013273539021611214, "learning_rate": 0.0009182464708157481, "loss": 0.0544, "num_input_tokens_seen": 70425600, "step": 32630 }, { "epoch": 5.323817292006526, "grad_norm": 0.10662904381752014, "learning_rate": 0.0009182074616222875, "loss": 0.039, "num_input_tokens_seen": 70435968, "step": 32635 }, { "epoch": 5.3246329526916805, "grad_norm": 0.06408187001943588, "learning_rate": 0.0009181684439533223, "loss": 0.0498, "num_input_tokens_seen": 70447200, "step": 32640 }, { "epoch": 5.325448613376835, "grad_norm": 0.004773424938321114, "learning_rate": 0.0009181294178096427, "loss": 0.111, "num_input_tokens_seen": 70457568, "step": 32645 }, { "epoch": 5.32626427406199, "grad_norm": 0.12504421174526215, "learning_rate": 0.0009180903831920404, "loss": 0.0227, "num_input_tokens_seen": 70469504, "step": 32650 }, { "epoch": 5.327079934747145, "grad_norm": 0.06146138161420822, "learning_rate": 0.0009180513401013059, "loss": 0.0863, "num_input_tokens_seen": 70480576, "step": 32655 }, { "epoch": 5.327895595432301, "grad_norm": 0.013475736603140831, "learning_rate": 0.0009180122885382307, "loss": 0.0604, "num_input_tokens_seen": 70491296, "step": 32660 }, { "epoch": 5.328711256117455, "grad_norm": 0.03132254630327225, "learning_rate": 0.0009179732285036062, "loss": 0.0265, "num_input_tokens_seen": 70502688, "step": 32665 }, { "epoch": 5.32952691680261, "grad_norm": 0.33490869402885437, "learning_rate": 0.0009179341599982239, "loss": 0.1301, "num_input_tokens_seen": 70513376, "step": 32670 }, { "epoch": 5.330342577487765, "grad_norm": 0.008075200021266937, "learning_rate": 0.0009178950830228759, "loss": 0.013, "num_input_tokens_seen": 70524128, "step": 32675 }, { "epoch": 5.33115823817292, "grad_norm": 0.007414266467094421, "learning_rate": 0.0009178559975783536, "loss": 0.0444, "num_input_tokens_seen": 70535520, "step": 32680 }, { "epoch": 5.331973898858075, "grad_norm": 0.24761831760406494, "learning_rate": 0.0009178169036654496, "loss": 0.1502, "num_input_tokens_seen": 70546624, "step": 32685 }, { "epoch": 5.33278955954323, "grad_norm": 0.31343305110931396, "learning_rate": 0.0009177778012849561, "loss": 0.0717, "num_input_tokens_seen": 70557408, "step": 32690 }, { "epoch": 5.333605220228385, "grad_norm": 0.11185169965028763, "learning_rate": 0.0009177386904376652, "loss": 0.0395, "num_input_tokens_seen": 70568288, "step": 32695 }, { "epoch": 5.33442088091354, "grad_norm": 0.7584981322288513, "learning_rate": 0.0009176995711243699, "loss": 0.13, "num_input_tokens_seen": 70579040, "step": 32700 }, { "epoch": 5.335236541598695, "grad_norm": 0.01849561743438244, "learning_rate": 0.0009176604433458631, "loss": 0.0166, "num_input_tokens_seen": 70589696, "step": 32705 }, { "epoch": 5.33605220228385, "grad_norm": 0.016311844810843468, "learning_rate": 0.0009176213071029373, "loss": 0.0513, "num_input_tokens_seen": 70601888, "step": 32710 }, { "epoch": 5.3368678629690045, "grad_norm": 0.010315134190022945, "learning_rate": 0.0009175821623963861, "loss": 0.0417, "num_input_tokens_seen": 70612448, "step": 32715 }, { "epoch": 5.33768352365416, "grad_norm": 0.3202737867832184, "learning_rate": 0.0009175430092270026, "loss": 0.0699, "num_input_tokens_seen": 70623936, "step": 32720 }, { "epoch": 5.338499184339315, "grad_norm": 0.4267595112323761, "learning_rate": 0.0009175038475955804, "loss": 0.1721, "num_input_tokens_seen": 70634208, "step": 32725 }, { "epoch": 5.33931484502447, "grad_norm": 0.11981532722711563, "learning_rate": 0.0009174646775029129, "loss": 0.1129, "num_input_tokens_seen": 70645056, "step": 32730 }, { "epoch": 5.340130505709625, "grad_norm": 0.0671236515045166, "learning_rate": 0.0009174254989497942, "loss": 0.1892, "num_input_tokens_seen": 70656768, "step": 32735 }, { "epoch": 5.3409461663947795, "grad_norm": 0.1355196237564087, "learning_rate": 0.0009173863119370183, "loss": 0.0642, "num_input_tokens_seen": 70667840, "step": 32740 }, { "epoch": 5.341761827079935, "grad_norm": 0.09968720376491547, "learning_rate": 0.0009173471164653791, "loss": 0.0705, "num_input_tokens_seen": 70677728, "step": 32745 }, { "epoch": 5.34257748776509, "grad_norm": 0.015359531156718731, "learning_rate": 0.0009173079125356714, "loss": 0.2844, "num_input_tokens_seen": 70688128, "step": 32750 }, { "epoch": 5.343393148450245, "grad_norm": 0.02851163037121296, "learning_rate": 0.0009172687001486892, "loss": 0.0571, "num_input_tokens_seen": 70698912, "step": 32755 }, { "epoch": 5.3442088091354, "grad_norm": 0.06295658648014069, "learning_rate": 0.0009172294793052277, "loss": 0.0132, "num_input_tokens_seen": 70709568, "step": 32760 }, { "epoch": 5.3450244698205545, "grad_norm": 0.3002905249595642, "learning_rate": 0.0009171902500060814, "loss": 0.0725, "num_input_tokens_seen": 70720096, "step": 32765 }, { "epoch": 5.345840130505709, "grad_norm": 0.22536338865756989, "learning_rate": 0.0009171510122520455, "loss": 0.036, "num_input_tokens_seen": 70731200, "step": 32770 }, { "epoch": 5.346655791190865, "grad_norm": 0.08819224685430527, "learning_rate": 0.000917111766043915, "loss": 0.0936, "num_input_tokens_seen": 70740448, "step": 32775 }, { "epoch": 5.34747145187602, "grad_norm": 0.016420092433691025, "learning_rate": 0.0009170725113824855, "loss": 0.0904, "num_input_tokens_seen": 70750752, "step": 32780 }, { "epoch": 5.348287112561175, "grad_norm": 0.16604654490947723, "learning_rate": 0.0009170332482685524, "loss": 0.0482, "num_input_tokens_seen": 70761920, "step": 32785 }, { "epoch": 5.349102773246329, "grad_norm": 0.2976060211658478, "learning_rate": 0.0009169939767029116, "loss": 0.0899, "num_input_tokens_seen": 70773664, "step": 32790 }, { "epoch": 5.349918433931484, "grad_norm": 0.20711469650268555, "learning_rate": 0.0009169546966863588, "loss": 0.0852, "num_input_tokens_seen": 70784640, "step": 32795 }, { "epoch": 5.350734094616639, "grad_norm": 0.10269863158464432, "learning_rate": 0.0009169154082196901, "loss": 0.083, "num_input_tokens_seen": 70796288, "step": 32800 }, { "epoch": 5.351549755301795, "grad_norm": 0.022813158109784126, "learning_rate": 0.0009168761113037019, "loss": 0.075, "num_input_tokens_seen": 70806752, "step": 32805 }, { "epoch": 5.35236541598695, "grad_norm": 0.15759333968162537, "learning_rate": 0.0009168368059391903, "loss": 0.1424, "num_input_tokens_seen": 70818048, "step": 32810 }, { "epoch": 5.353181076672104, "grad_norm": 0.24115115404129028, "learning_rate": 0.0009167974921269519, "loss": 0.1861, "num_input_tokens_seen": 70827072, "step": 32815 }, { "epoch": 5.353996737357259, "grad_norm": 0.13524767756462097, "learning_rate": 0.0009167581698677838, "loss": 0.287, "num_input_tokens_seen": 70837312, "step": 32820 }, { "epoch": 5.354812398042414, "grad_norm": 0.08909308165311813, "learning_rate": 0.0009167188391624827, "loss": 0.0765, "num_input_tokens_seen": 70847200, "step": 32825 }, { "epoch": 5.35562805872757, "grad_norm": 0.15210150182247162, "learning_rate": 0.0009166795000118456, "loss": 0.2391, "num_input_tokens_seen": 70858112, "step": 32830 }, { "epoch": 5.356443719412725, "grad_norm": 0.11820219457149506, "learning_rate": 0.0009166401524166699, "loss": 0.1244, "num_input_tokens_seen": 70866848, "step": 32835 }, { "epoch": 5.357259380097879, "grad_norm": 0.021107325330376625, "learning_rate": 0.000916600796377753, "loss": 0.0864, "num_input_tokens_seen": 70877440, "step": 32840 }, { "epoch": 5.358075040783034, "grad_norm": 0.07454139739274979, "learning_rate": 0.0009165614318958924, "loss": 0.1565, "num_input_tokens_seen": 70887456, "step": 32845 }, { "epoch": 5.358890701468189, "grad_norm": 0.15558652579784393, "learning_rate": 0.0009165220589718859, "loss": 0.1329, "num_input_tokens_seen": 70898336, "step": 32850 }, { "epoch": 5.359706362153344, "grad_norm": 0.5014665722846985, "learning_rate": 0.0009164826776065316, "loss": 0.1743, "num_input_tokens_seen": 70907552, "step": 32855 }, { "epoch": 5.3605220228384995, "grad_norm": 0.18721306324005127, "learning_rate": 0.0009164432878006274, "loss": 0.1222, "num_input_tokens_seen": 70918400, "step": 32860 }, { "epoch": 5.361337683523654, "grad_norm": 0.3221430480480194, "learning_rate": 0.0009164038895549716, "loss": 0.0528, "num_input_tokens_seen": 70929760, "step": 32865 }, { "epoch": 5.362153344208809, "grad_norm": 0.12424920499324799, "learning_rate": 0.0009163644828703628, "loss": 0.028, "num_input_tokens_seen": 70940384, "step": 32870 }, { "epoch": 5.362969004893964, "grad_norm": 0.14955317974090576, "learning_rate": 0.0009163250677475996, "loss": 0.1631, "num_input_tokens_seen": 70951552, "step": 32875 }, { "epoch": 5.363784665579119, "grad_norm": 0.24267444014549255, "learning_rate": 0.0009162856441874807, "loss": 0.1863, "num_input_tokens_seen": 70962016, "step": 32880 }, { "epoch": 5.364600326264274, "grad_norm": 0.21469701826572418, "learning_rate": 0.0009162462121908052, "loss": 0.1446, "num_input_tokens_seen": 70972320, "step": 32885 }, { "epoch": 5.365415986949429, "grad_norm": 0.06439352035522461, "learning_rate": 0.0009162067717583722, "loss": 0.0656, "num_input_tokens_seen": 70983296, "step": 32890 }, { "epoch": 5.366231647634584, "grad_norm": 0.03191965073347092, "learning_rate": 0.0009161673228909808, "loss": 0.0576, "num_input_tokens_seen": 70995776, "step": 32895 }, { "epoch": 5.367047308319739, "grad_norm": 0.0767841637134552, "learning_rate": 0.0009161278655894307, "loss": 0.0508, "num_input_tokens_seen": 71005984, "step": 32900 }, { "epoch": 5.367862969004894, "grad_norm": 0.027977503836154938, "learning_rate": 0.0009160883998545216, "loss": 0.0346, "num_input_tokens_seen": 71016032, "step": 32905 }, { "epoch": 5.368678629690049, "grad_norm": 0.03358437865972519, "learning_rate": 0.0009160489256870532, "loss": 0.0931, "num_input_tokens_seen": 71025728, "step": 32910 }, { "epoch": 5.369494290375204, "grad_norm": 0.2287474274635315, "learning_rate": 0.0009160094430878255, "loss": 0.0542, "num_input_tokens_seen": 71036736, "step": 32915 }, { "epoch": 5.370309951060359, "grad_norm": 0.04842691496014595, "learning_rate": 0.0009159699520576388, "loss": 0.0292, "num_input_tokens_seen": 71048384, "step": 32920 }, { "epoch": 5.371125611745514, "grad_norm": 0.01275418046861887, "learning_rate": 0.0009159304525972931, "loss": 0.0477, "num_input_tokens_seen": 71059392, "step": 32925 }, { "epoch": 5.371941272430669, "grad_norm": 0.4336622953414917, "learning_rate": 0.0009158909447075894, "loss": 0.1431, "num_input_tokens_seen": 71069344, "step": 32930 }, { "epoch": 5.372756933115824, "grad_norm": 0.06846290826797485, "learning_rate": 0.0009158514283893279, "loss": 0.181, "num_input_tokens_seen": 71078496, "step": 32935 }, { "epoch": 5.373572593800978, "grad_norm": 0.0872889906167984, "learning_rate": 0.0009158119036433097, "loss": 0.0857, "num_input_tokens_seen": 71089056, "step": 32940 }, { "epoch": 5.374388254486134, "grad_norm": 0.025929905474185944, "learning_rate": 0.0009157723704703358, "loss": 0.1545, "num_input_tokens_seen": 71098432, "step": 32945 }, { "epoch": 5.375203915171289, "grad_norm": 0.05180001258850098, "learning_rate": 0.0009157328288712075, "loss": 0.1481, "num_input_tokens_seen": 71109472, "step": 32950 }, { "epoch": 5.376019575856444, "grad_norm": 0.18282483518123627, "learning_rate": 0.0009156932788467259, "loss": 0.056, "num_input_tokens_seen": 71119648, "step": 32955 }, { "epoch": 5.376835236541599, "grad_norm": 0.088663250207901, "learning_rate": 0.0009156537203976927, "loss": 0.0337, "num_input_tokens_seen": 71130688, "step": 32960 }, { "epoch": 5.377650897226753, "grad_norm": 0.011948135681450367, "learning_rate": 0.0009156141535249094, "loss": 0.0263, "num_input_tokens_seen": 71140736, "step": 32965 }, { "epoch": 5.378466557911908, "grad_norm": 0.0585586279630661, "learning_rate": 0.0009155745782291782, "loss": 0.0584, "num_input_tokens_seen": 71151488, "step": 32970 }, { "epoch": 5.379282218597064, "grad_norm": 0.6780929565429688, "learning_rate": 0.000915534994511301, "loss": 0.1512, "num_input_tokens_seen": 71162240, "step": 32975 }, { "epoch": 5.380097879282219, "grad_norm": 0.047391053289175034, "learning_rate": 0.0009154954023720799, "loss": 0.0523, "num_input_tokens_seen": 71171776, "step": 32980 }, { "epoch": 5.3809135399673735, "grad_norm": 0.09053643047809601, "learning_rate": 0.0009154558018123174, "loss": 0.1196, "num_input_tokens_seen": 71182496, "step": 32985 }, { "epoch": 5.381729200652528, "grad_norm": 0.16162870824337006, "learning_rate": 0.000915416192832816, "loss": 0.2026, "num_input_tokens_seen": 71194912, "step": 32990 }, { "epoch": 5.382544861337683, "grad_norm": 0.019207848235964775, "learning_rate": 0.0009153765754343786, "loss": 0.1132, "num_input_tokens_seen": 71205696, "step": 32995 }, { "epoch": 5.383360522022839, "grad_norm": 0.03291647881269455, "learning_rate": 0.0009153369496178078, "loss": 0.0317, "num_input_tokens_seen": 71216672, "step": 33000 }, { "epoch": 5.384176182707994, "grad_norm": 0.1418045461177826, "learning_rate": 0.0009152973153839068, "loss": 0.0734, "num_input_tokens_seen": 71226656, "step": 33005 }, { "epoch": 5.3849918433931485, "grad_norm": 0.02089807018637657, "learning_rate": 0.000915257672733479, "loss": 0.0191, "num_input_tokens_seen": 71237056, "step": 33010 }, { "epoch": 5.385807504078303, "grad_norm": 0.016831260174512863, "learning_rate": 0.0009152180216673276, "loss": 0.0733, "num_input_tokens_seen": 71248320, "step": 33015 }, { "epoch": 5.386623164763458, "grad_norm": 0.1020837128162384, "learning_rate": 0.0009151783621862564, "loss": 0.0724, "num_input_tokens_seen": 71257632, "step": 33020 }, { "epoch": 5.387438825448613, "grad_norm": 0.21184784173965454, "learning_rate": 0.0009151386942910688, "loss": 0.1175, "num_input_tokens_seen": 71267776, "step": 33025 }, { "epoch": 5.388254486133769, "grad_norm": 0.012271968647837639, "learning_rate": 0.0009150990179825689, "loss": 0.0395, "num_input_tokens_seen": 71278496, "step": 33030 }, { "epoch": 5.3890701468189235, "grad_norm": 0.2225111722946167, "learning_rate": 0.000915059333261561, "loss": 0.0483, "num_input_tokens_seen": 71290368, "step": 33035 }, { "epoch": 5.389885807504078, "grad_norm": 0.008628821931779385, "learning_rate": 0.0009150196401288491, "loss": 0.0493, "num_input_tokens_seen": 71300384, "step": 33040 }, { "epoch": 5.390701468189233, "grad_norm": 0.27652034163475037, "learning_rate": 0.0009149799385852375, "loss": 0.196, "num_input_tokens_seen": 71310464, "step": 33045 }, { "epoch": 5.391517128874388, "grad_norm": 0.13413751125335693, "learning_rate": 0.0009149402286315314, "loss": 0.0578, "num_input_tokens_seen": 71321248, "step": 33050 }, { "epoch": 5.392332789559543, "grad_norm": 0.06022435054183006, "learning_rate": 0.0009149005102685348, "loss": 0.0738, "num_input_tokens_seen": 71331744, "step": 33055 }, { "epoch": 5.3931484502446985, "grad_norm": 0.26010018587112427, "learning_rate": 0.0009148607834970532, "loss": 0.1997, "num_input_tokens_seen": 71342240, "step": 33060 }, { "epoch": 5.393964110929853, "grad_norm": 0.0971352830529213, "learning_rate": 0.0009148210483178916, "loss": 0.2019, "num_input_tokens_seen": 71354016, "step": 33065 }, { "epoch": 5.394779771615008, "grad_norm": 0.27673134207725525, "learning_rate": 0.000914781304731855, "loss": 0.1359, "num_input_tokens_seen": 71364416, "step": 33070 }, { "epoch": 5.395595432300163, "grad_norm": 0.08628147840499878, "learning_rate": 0.0009147415527397492, "loss": 0.1079, "num_input_tokens_seen": 71375328, "step": 33075 }, { "epoch": 5.396411092985318, "grad_norm": 0.021312475204467773, "learning_rate": 0.0009147017923423797, "loss": 0.034, "num_input_tokens_seen": 71385984, "step": 33080 }, { "epoch": 5.397226753670473, "grad_norm": 0.018863115459680557, "learning_rate": 0.0009146620235405523, "loss": 0.0901, "num_input_tokens_seen": 71396704, "step": 33085 }, { "epoch": 5.398042414355628, "grad_norm": 0.24924832582473755, "learning_rate": 0.0009146222463350729, "loss": 0.1815, "num_input_tokens_seen": 71406752, "step": 33090 }, { "epoch": 5.398858075040783, "grad_norm": 0.07007253170013428, "learning_rate": 0.0009145824607267478, "loss": 0.0722, "num_input_tokens_seen": 71417664, "step": 33095 }, { "epoch": 5.399673735725938, "grad_norm": 0.012764201499521732, "learning_rate": 0.0009145426667163832, "loss": 0.0617, "num_input_tokens_seen": 71429664, "step": 33100 }, { "epoch": 5.400489396411093, "grad_norm": 0.2257203459739685, "learning_rate": 0.0009145028643047855, "loss": 0.1548, "num_input_tokens_seen": 71441408, "step": 33105 }, { "epoch": 5.401305057096248, "grad_norm": 0.01852389983832836, "learning_rate": 0.0009144630534927613, "loss": 0.1108, "num_input_tokens_seen": 71451136, "step": 33110 }, { "epoch": 5.402120717781403, "grad_norm": 0.056180689483881, "learning_rate": 0.0009144232342811179, "loss": 0.0302, "num_input_tokens_seen": 71462880, "step": 33115 }, { "epoch": 5.402936378466558, "grad_norm": 0.020584911108016968, "learning_rate": 0.0009143834066706615, "loss": 0.1158, "num_input_tokens_seen": 71473120, "step": 33120 }, { "epoch": 5.403752039151713, "grad_norm": 0.23340380191802979, "learning_rate": 0.0009143435706621999, "loss": 0.2091, "num_input_tokens_seen": 71485024, "step": 33125 }, { "epoch": 5.404567699836868, "grad_norm": 0.022555751726031303, "learning_rate": 0.0009143037262565401, "loss": 0.2071, "num_input_tokens_seen": 71494944, "step": 33130 }, { "epoch": 5.4053833605220225, "grad_norm": 0.05650365725159645, "learning_rate": 0.00091426387345449, "loss": 0.1057, "num_input_tokens_seen": 71505120, "step": 33135 }, { "epoch": 5.406199021207178, "grad_norm": 0.09056977182626724, "learning_rate": 0.0009142240122568566, "loss": 0.0956, "num_input_tokens_seen": 71517056, "step": 33140 }, { "epoch": 5.407014681892333, "grad_norm": 0.07559631764888763, "learning_rate": 0.0009141841426644482, "loss": 0.1127, "num_input_tokens_seen": 71528576, "step": 33145 }, { "epoch": 5.407830342577488, "grad_norm": 0.10208159685134888, "learning_rate": 0.0009141442646780728, "loss": 0.0468, "num_input_tokens_seen": 71539264, "step": 33150 }, { "epoch": 5.408646003262643, "grad_norm": 0.04610144719481468, "learning_rate": 0.0009141043782985385, "loss": 0.144, "num_input_tokens_seen": 71549696, "step": 33155 }, { "epoch": 5.4094616639477975, "grad_norm": 0.18817569315433502, "learning_rate": 0.0009140644835266537, "loss": 0.063, "num_input_tokens_seen": 71560480, "step": 33160 }, { "epoch": 5.410277324632952, "grad_norm": 0.02124689146876335, "learning_rate": 0.0009140245803632268, "loss": 0.1456, "num_input_tokens_seen": 71571904, "step": 33165 }, { "epoch": 5.411092985318108, "grad_norm": 0.011705000884830952, "learning_rate": 0.0009139846688090665, "loss": 0.1007, "num_input_tokens_seen": 71582208, "step": 33170 }, { "epoch": 5.411908646003263, "grad_norm": 0.07694895565509796, "learning_rate": 0.0009139447488649818, "loss": 0.2328, "num_input_tokens_seen": 71592736, "step": 33175 }, { "epoch": 5.412724306688418, "grad_norm": 0.1447536051273346, "learning_rate": 0.0009139048205317817, "loss": 0.143, "num_input_tokens_seen": 71602720, "step": 33180 }, { "epoch": 5.4135399673735725, "grad_norm": 0.0459216833114624, "learning_rate": 0.0009138648838102751, "loss": 0.0917, "num_input_tokens_seen": 71614048, "step": 33185 }, { "epoch": 5.414355628058727, "grad_norm": 0.17608995735645294, "learning_rate": 0.0009138249387012718, "loss": 0.1044, "num_input_tokens_seen": 71623296, "step": 33190 }, { "epoch": 5.415171288743883, "grad_norm": 0.029491329565644264, "learning_rate": 0.000913784985205581, "loss": 0.1341, "num_input_tokens_seen": 71633280, "step": 33195 }, { "epoch": 5.415986949429038, "grad_norm": 0.09146325290203094, "learning_rate": 0.0009137450233240127, "loss": 0.1531, "num_input_tokens_seen": 71643776, "step": 33200 }, { "epoch": 5.416802610114193, "grad_norm": 0.4748654365539551, "learning_rate": 0.0009137050530573765, "loss": 0.1062, "num_input_tokens_seen": 71654784, "step": 33205 }, { "epoch": 5.417618270799347, "grad_norm": 0.05935012176632881, "learning_rate": 0.0009136650744064827, "loss": 0.1268, "num_input_tokens_seen": 71665696, "step": 33210 }, { "epoch": 5.418433931484502, "grad_norm": 0.08716800808906555, "learning_rate": 0.0009136250873721413, "loss": 0.1123, "num_input_tokens_seen": 71676416, "step": 33215 }, { "epoch": 5.419249592169657, "grad_norm": 0.04298451170325279, "learning_rate": 0.0009135850919551628, "loss": 0.0744, "num_input_tokens_seen": 71687456, "step": 33220 }, { "epoch": 5.420065252854813, "grad_norm": 0.1644071489572525, "learning_rate": 0.0009135450881563578, "loss": 0.0421, "num_input_tokens_seen": 71697856, "step": 33225 }, { "epoch": 5.420880913539968, "grad_norm": 0.17143607139587402, "learning_rate": 0.0009135050759765369, "loss": 0.0614, "num_input_tokens_seen": 71708032, "step": 33230 }, { "epoch": 5.421696574225122, "grad_norm": 0.2774263620376587, "learning_rate": 0.0009134650554165111, "loss": 0.1739, "num_input_tokens_seen": 71718944, "step": 33235 }, { "epoch": 5.422512234910277, "grad_norm": 0.06561808288097382, "learning_rate": 0.0009134250264770914, "loss": 0.1868, "num_input_tokens_seen": 71730368, "step": 33240 }, { "epoch": 5.423327895595432, "grad_norm": 0.09156402200460434, "learning_rate": 0.0009133849891590891, "loss": 0.1313, "num_input_tokens_seen": 71741088, "step": 33245 }, { "epoch": 5.424143556280587, "grad_norm": 0.16547074913978577, "learning_rate": 0.0009133449434633157, "loss": 0.1184, "num_input_tokens_seen": 71751808, "step": 33250 }, { "epoch": 5.424959216965743, "grad_norm": 0.0974913239479065, "learning_rate": 0.0009133048893905824, "loss": 0.0393, "num_input_tokens_seen": 71762720, "step": 33255 }, { "epoch": 5.425774877650897, "grad_norm": 0.07357635349035263, "learning_rate": 0.0009132648269417014, "loss": 0.0428, "num_input_tokens_seen": 71772992, "step": 33260 }, { "epoch": 5.426590538336052, "grad_norm": 0.0067160590551793575, "learning_rate": 0.0009132247561174843, "loss": 0.0202, "num_input_tokens_seen": 71784192, "step": 33265 }, { "epoch": 5.427406199021207, "grad_norm": 0.08158013969659805, "learning_rate": 0.0009131846769187434, "loss": 0.0574, "num_input_tokens_seen": 71795200, "step": 33270 }, { "epoch": 5.428221859706362, "grad_norm": 0.060711588710546494, "learning_rate": 0.0009131445893462908, "loss": 0.0846, "num_input_tokens_seen": 71805728, "step": 33275 }, { "epoch": 5.4290375203915175, "grad_norm": 0.22940810024738312, "learning_rate": 0.000913104493400939, "loss": 0.11, "num_input_tokens_seen": 71816352, "step": 33280 }, { "epoch": 5.429853181076672, "grad_norm": 0.10907072573900223, "learning_rate": 0.0009130643890835007, "loss": 0.0697, "num_input_tokens_seen": 71826752, "step": 33285 }, { "epoch": 5.430668841761827, "grad_norm": 0.03653138130903244, "learning_rate": 0.0009130242763947884, "loss": 0.0262, "num_input_tokens_seen": 71837856, "step": 33290 }, { "epoch": 5.431484502446982, "grad_norm": 0.006531296297907829, "learning_rate": 0.0009129841553356152, "loss": 0.1161, "num_input_tokens_seen": 71849248, "step": 33295 }, { "epoch": 5.432300163132137, "grad_norm": 0.21725022792816162, "learning_rate": 0.0009129440259067941, "loss": 0.1478, "num_input_tokens_seen": 71861024, "step": 33300 }, { "epoch": 5.433115823817292, "grad_norm": 0.10832436382770538, "learning_rate": 0.0009129038881091386, "loss": 0.1324, "num_input_tokens_seen": 71872128, "step": 33305 }, { "epoch": 5.433931484502447, "grad_norm": 0.24074572324752808, "learning_rate": 0.000912863741943462, "loss": 0.0644, "num_input_tokens_seen": 71882848, "step": 33310 }, { "epoch": 5.434747145187602, "grad_norm": 0.05547598376870155, "learning_rate": 0.000912823587410578, "loss": 0.0316, "num_input_tokens_seen": 71894272, "step": 33315 }, { "epoch": 5.435562805872757, "grad_norm": 0.7397826910018921, "learning_rate": 0.0009127834245113, "loss": 0.1284, "num_input_tokens_seen": 71904928, "step": 33320 }, { "epoch": 5.436378466557912, "grad_norm": 0.6665012836456299, "learning_rate": 0.0009127432532464424, "loss": 0.1159, "num_input_tokens_seen": 71916224, "step": 33325 }, { "epoch": 5.437194127243067, "grad_norm": 0.009233569726347923, "learning_rate": 0.0009127030736168192, "loss": 0.1416, "num_input_tokens_seen": 71926432, "step": 33330 }, { "epoch": 5.438009787928221, "grad_norm": 0.22085018455982208, "learning_rate": 0.0009126628856232446, "loss": 0.0501, "num_input_tokens_seen": 71938080, "step": 33335 }, { "epoch": 5.438825448613377, "grad_norm": 0.07378071546554565, "learning_rate": 0.0009126226892665333, "loss": 0.1174, "num_input_tokens_seen": 71949056, "step": 33340 }, { "epoch": 5.439641109298532, "grad_norm": 0.0521637424826622, "learning_rate": 0.0009125824845474996, "loss": 0.2178, "num_input_tokens_seen": 71959392, "step": 33345 }, { "epoch": 5.440456769983687, "grad_norm": 0.18902598321437836, "learning_rate": 0.0009125422714669584, "loss": 0.0754, "num_input_tokens_seen": 71970912, "step": 33350 }, { "epoch": 5.441272430668842, "grad_norm": 0.028784997761249542, "learning_rate": 0.0009125020500257248, "loss": 0.0441, "num_input_tokens_seen": 71980768, "step": 33355 }, { "epoch": 5.442088091353996, "grad_norm": 0.01244835089892149, "learning_rate": 0.000912461820224614, "loss": 0.0703, "num_input_tokens_seen": 71991584, "step": 33360 }, { "epoch": 5.442903752039152, "grad_norm": 0.17799144983291626, "learning_rate": 0.000912421582064441, "loss": 0.0827, "num_input_tokens_seen": 72002016, "step": 33365 }, { "epoch": 5.443719412724307, "grad_norm": 0.03637881577014923, "learning_rate": 0.0009123813355460214, "loss": 0.0603, "num_input_tokens_seen": 72012160, "step": 33370 }, { "epoch": 5.444535073409462, "grad_norm": 0.013502347283065319, "learning_rate": 0.000912341080670171, "loss": 0.0647, "num_input_tokens_seen": 72023136, "step": 33375 }, { "epoch": 5.445350734094617, "grad_norm": 0.13770249485969543, "learning_rate": 0.0009123008174377054, "loss": 0.0307, "num_input_tokens_seen": 72034112, "step": 33380 }, { "epoch": 5.446166394779771, "grad_norm": 0.06592484563589096, "learning_rate": 0.0009122605458494409, "loss": 0.0817, "num_input_tokens_seen": 72044128, "step": 33385 }, { "epoch": 5.446982055464926, "grad_norm": 0.0350588858127594, "learning_rate": 0.0009122202659061934, "loss": 0.0408, "num_input_tokens_seen": 72055104, "step": 33390 }, { "epoch": 5.447797716150082, "grad_norm": 0.10394226014614105, "learning_rate": 0.0009121799776087791, "loss": 0.1577, "num_input_tokens_seen": 72065760, "step": 33395 }, { "epoch": 5.448613376835237, "grad_norm": 0.009646871127188206, "learning_rate": 0.0009121396809580147, "loss": 0.0267, "num_input_tokens_seen": 72075808, "step": 33400 }, { "epoch": 5.4494290375203915, "grad_norm": 0.3740110993385315, "learning_rate": 0.0009120993759547169, "loss": 0.2119, "num_input_tokens_seen": 72087360, "step": 33405 }, { "epoch": 5.450244698205546, "grad_norm": 0.04822655767202377, "learning_rate": 0.0009120590625997026, "loss": 0.0558, "num_input_tokens_seen": 72098464, "step": 33410 }, { "epoch": 5.451060358890701, "grad_norm": 0.04984582960605621, "learning_rate": 0.0009120187408937884, "loss": 0.1361, "num_input_tokens_seen": 72110272, "step": 33415 }, { "epoch": 5.451876019575856, "grad_norm": 0.43698954582214355, "learning_rate": 0.0009119784108377918, "loss": 0.0841, "num_input_tokens_seen": 72121792, "step": 33420 }, { "epoch": 5.452691680261012, "grad_norm": 0.017899371683597565, "learning_rate": 0.0009119380724325302, "loss": 0.1177, "num_input_tokens_seen": 72132000, "step": 33425 }, { "epoch": 5.4535073409461665, "grad_norm": 0.005906949285417795, "learning_rate": 0.0009118977256788208, "loss": 0.0173, "num_input_tokens_seen": 72141408, "step": 33430 }, { "epoch": 5.454323001631321, "grad_norm": 0.07196404784917831, "learning_rate": 0.0009118573705774815, "loss": 0.0313, "num_input_tokens_seen": 72153088, "step": 33435 }, { "epoch": 5.455138662316476, "grad_norm": 0.09553544968366623, "learning_rate": 0.0009118170071293302, "loss": 0.029, "num_input_tokens_seen": 72163456, "step": 33440 }, { "epoch": 5.455954323001631, "grad_norm": 0.24880199134349823, "learning_rate": 0.0009117766353351848, "loss": 0.3415, "num_input_tokens_seen": 72174528, "step": 33445 }, { "epoch": 5.456769983686787, "grad_norm": 0.02181357331573963, "learning_rate": 0.0009117362551958635, "loss": 0.016, "num_input_tokens_seen": 72184672, "step": 33450 }, { "epoch": 5.4575856443719415, "grad_norm": 0.3363330066204071, "learning_rate": 0.0009116958667121847, "loss": 0.2458, "num_input_tokens_seen": 72196384, "step": 33455 }, { "epoch": 5.458401305057096, "grad_norm": 0.06619381904602051, "learning_rate": 0.0009116554698849668, "loss": 0.0328, "num_input_tokens_seen": 72206912, "step": 33460 }, { "epoch": 5.459216965742251, "grad_norm": 0.03706890344619751, "learning_rate": 0.0009116150647150286, "loss": 0.0515, "num_input_tokens_seen": 72216896, "step": 33465 }, { "epoch": 5.460032626427406, "grad_norm": 0.135879784822464, "learning_rate": 0.0009115746512031891, "loss": 0.1527, "num_input_tokens_seen": 72227552, "step": 33470 }, { "epoch": 5.460848287112561, "grad_norm": 0.8552200794219971, "learning_rate": 0.0009115342293502669, "loss": 0.152, "num_input_tokens_seen": 72237632, "step": 33475 }, { "epoch": 5.4616639477977165, "grad_norm": 0.013805222697556019, "learning_rate": 0.0009114937991570817, "loss": 0.0635, "num_input_tokens_seen": 72249216, "step": 33480 }, { "epoch": 5.462479608482871, "grad_norm": 0.04382677003741264, "learning_rate": 0.0009114533606244526, "loss": 0.0416, "num_input_tokens_seen": 72258912, "step": 33485 }, { "epoch": 5.463295269168026, "grad_norm": 0.31470417976379395, "learning_rate": 0.0009114129137531991, "loss": 0.0456, "num_input_tokens_seen": 72270144, "step": 33490 }, { "epoch": 5.464110929853181, "grad_norm": 0.2806030213832855, "learning_rate": 0.000911372458544141, "loss": 0.0719, "num_input_tokens_seen": 72279872, "step": 33495 }, { "epoch": 5.464926590538336, "grad_norm": 0.15725630521774292, "learning_rate": 0.0009113319949980983, "loss": 0.0584, "num_input_tokens_seen": 72290112, "step": 33500 }, { "epoch": 5.465742251223491, "grad_norm": 0.04170875623822212, "learning_rate": 0.0009112915231158907, "loss": 0.113, "num_input_tokens_seen": 72300256, "step": 33505 }, { "epoch": 5.466557911908646, "grad_norm": 0.33874374628067017, "learning_rate": 0.0009112510428983387, "loss": 0.1592, "num_input_tokens_seen": 72311072, "step": 33510 }, { "epoch": 5.467373572593801, "grad_norm": 0.10050855576992035, "learning_rate": 0.0009112105543462628, "loss": 0.1357, "num_input_tokens_seen": 72320832, "step": 33515 }, { "epoch": 5.468189233278956, "grad_norm": 0.3247714936733246, "learning_rate": 0.0009111700574604831, "loss": 0.1783, "num_input_tokens_seen": 72331104, "step": 33520 }, { "epoch": 5.469004893964111, "grad_norm": 0.14813661575317383, "learning_rate": 0.0009111295522418207, "loss": 0.061, "num_input_tokens_seen": 72341792, "step": 33525 }, { "epoch": 5.4698205546492655, "grad_norm": 0.07143258303403854, "learning_rate": 0.0009110890386910964, "loss": 0.1736, "num_input_tokens_seen": 72352544, "step": 33530 }, { "epoch": 5.470636215334421, "grad_norm": 0.06433774530887604, "learning_rate": 0.0009110485168091311, "loss": 0.0882, "num_input_tokens_seen": 72363072, "step": 33535 }, { "epoch": 5.471451876019576, "grad_norm": 0.03924332559108734, "learning_rate": 0.0009110079865967462, "loss": 0.0444, "num_input_tokens_seen": 72374688, "step": 33540 }, { "epoch": 5.472267536704731, "grad_norm": 0.03980143740773201, "learning_rate": 0.0009109674480547632, "loss": 0.1353, "num_input_tokens_seen": 72384384, "step": 33545 }, { "epoch": 5.473083197389886, "grad_norm": 0.4753454327583313, "learning_rate": 0.0009109269011840033, "loss": 0.1622, "num_input_tokens_seen": 72395680, "step": 33550 }, { "epoch": 5.4738988580750405, "grad_norm": 0.06528880447149277, "learning_rate": 0.0009108863459852886, "loss": 0.0359, "num_input_tokens_seen": 72406208, "step": 33555 }, { "epoch": 5.474714518760196, "grad_norm": 0.16494019329547882, "learning_rate": 0.0009108457824594407, "loss": 0.0845, "num_input_tokens_seen": 72417184, "step": 33560 }, { "epoch": 5.475530179445351, "grad_norm": 0.37512052059173584, "learning_rate": 0.0009108052106072819, "loss": 0.1141, "num_input_tokens_seen": 72427904, "step": 33565 }, { "epoch": 5.476345840130506, "grad_norm": 0.004489774815738201, "learning_rate": 0.0009107646304296344, "loss": 0.1083, "num_input_tokens_seen": 72438752, "step": 33570 }, { "epoch": 5.477161500815661, "grad_norm": 0.16451720893383026, "learning_rate": 0.0009107240419273206, "loss": 0.0967, "num_input_tokens_seen": 72450496, "step": 33575 }, { "epoch": 5.4779771615008155, "grad_norm": 0.010010715574026108, "learning_rate": 0.000910683445101163, "loss": 0.1226, "num_input_tokens_seen": 72462304, "step": 33580 }, { "epoch": 5.47879282218597, "grad_norm": 0.23389068245887756, "learning_rate": 0.0009106428399519844, "loss": 0.2303, "num_input_tokens_seen": 72473792, "step": 33585 }, { "epoch": 5.479608482871126, "grad_norm": 0.013406556099653244, "learning_rate": 0.0009106022264806078, "loss": 0.1026, "num_input_tokens_seen": 72483936, "step": 33590 }, { "epoch": 5.480424143556281, "grad_norm": 0.047236163169145584, "learning_rate": 0.000910561604687856, "loss": 0.0757, "num_input_tokens_seen": 72493568, "step": 33595 }, { "epoch": 5.481239804241436, "grad_norm": 0.22084510326385498, "learning_rate": 0.0009105209745745526, "loss": 0.0704, "num_input_tokens_seen": 72504800, "step": 33600 }, { "epoch": 5.4820554649265905, "grad_norm": 0.13281327486038208, "learning_rate": 0.0009104803361415208, "loss": 0.0971, "num_input_tokens_seen": 72516512, "step": 33605 }, { "epoch": 5.482871125611745, "grad_norm": 0.2645023763179779, "learning_rate": 0.0009104396893895843, "loss": 0.1354, "num_input_tokens_seen": 72528064, "step": 33610 }, { "epoch": 5.4836867862969, "grad_norm": 0.23055286705493927, "learning_rate": 0.0009103990343195667, "loss": 0.0895, "num_input_tokens_seen": 72538944, "step": 33615 }, { "epoch": 5.484502446982056, "grad_norm": 0.014704127795994282, "learning_rate": 0.0009103583709322923, "loss": 0.122, "num_input_tokens_seen": 72548416, "step": 33620 }, { "epoch": 5.485318107667211, "grad_norm": 0.023578355088829994, "learning_rate": 0.0009103176992285847, "loss": 0.03, "num_input_tokens_seen": 72559840, "step": 33625 }, { "epoch": 5.486133768352365, "grad_norm": 0.1732192039489746, "learning_rate": 0.0009102770192092684, "loss": 0.115, "num_input_tokens_seen": 72570720, "step": 33630 }, { "epoch": 5.48694942903752, "grad_norm": 0.1337272822856903, "learning_rate": 0.000910236330875168, "loss": 0.2457, "num_input_tokens_seen": 72580672, "step": 33635 }, { "epoch": 5.487765089722675, "grad_norm": 0.12379706650972366, "learning_rate": 0.0009101956342271078, "loss": 0.0549, "num_input_tokens_seen": 72591968, "step": 33640 }, { "epoch": 5.488580750407831, "grad_norm": 0.32962992787361145, "learning_rate": 0.0009101549292659128, "loss": 0.1602, "num_input_tokens_seen": 72603584, "step": 33645 }, { "epoch": 5.489396411092986, "grad_norm": 0.13441351056098938, "learning_rate": 0.0009101142159924077, "loss": 0.1115, "num_input_tokens_seen": 72614080, "step": 33650 }, { "epoch": 5.49021207177814, "grad_norm": 0.05461417883634567, "learning_rate": 0.0009100734944074179, "loss": 0.0963, "num_input_tokens_seen": 72624608, "step": 33655 }, { "epoch": 5.491027732463295, "grad_norm": 0.37821197509765625, "learning_rate": 0.0009100327645117684, "loss": 0.2191, "num_input_tokens_seen": 72634784, "step": 33660 }, { "epoch": 5.49184339314845, "grad_norm": 0.026111720129847527, "learning_rate": 0.0009099920263062848, "loss": 0.2053, "num_input_tokens_seen": 72646720, "step": 33665 }, { "epoch": 5.492659053833605, "grad_norm": 0.050946373492479324, "learning_rate": 0.0009099512797917927, "loss": 0.0723, "num_input_tokens_seen": 72657984, "step": 33670 }, { "epoch": 5.493474714518761, "grad_norm": 0.20536427199840546, "learning_rate": 0.0009099105249691179, "loss": 0.0751, "num_input_tokens_seen": 72669376, "step": 33675 }, { "epoch": 5.494290375203915, "grad_norm": 0.07914465665817261, "learning_rate": 0.0009098697618390862, "loss": 0.027, "num_input_tokens_seen": 72679008, "step": 33680 }, { "epoch": 5.49510603588907, "grad_norm": 0.0224993247538805, "learning_rate": 0.0009098289904025239, "loss": 0.1015, "num_input_tokens_seen": 72690528, "step": 33685 }, { "epoch": 5.495921696574225, "grad_norm": 0.13876989483833313, "learning_rate": 0.0009097882106602571, "loss": 0.0686, "num_input_tokens_seen": 72702112, "step": 33690 }, { "epoch": 5.49673735725938, "grad_norm": 0.10930801928043365, "learning_rate": 0.0009097474226131124, "loss": 0.1243, "num_input_tokens_seen": 72713824, "step": 33695 }, { "epoch": 5.497553017944535, "grad_norm": 0.00551770580932498, "learning_rate": 0.0009097066262619165, "loss": 0.141, "num_input_tokens_seen": 72723712, "step": 33700 }, { "epoch": 5.49836867862969, "grad_norm": 0.3949025273323059, "learning_rate": 0.000909665821607496, "loss": 0.1214, "num_input_tokens_seen": 72734144, "step": 33705 }, { "epoch": 5.499184339314845, "grad_norm": 0.21785476803779602, "learning_rate": 0.0009096250086506779, "loss": 0.1371, "num_input_tokens_seen": 72745024, "step": 33710 }, { "epoch": 5.5, "grad_norm": 0.23071850836277008, "learning_rate": 0.0009095841873922894, "loss": 0.0708, "num_input_tokens_seen": 72756576, "step": 33715 }, { "epoch": 5.500815660685155, "grad_norm": 0.01105006318539381, "learning_rate": 0.0009095433578331576, "loss": 0.0272, "num_input_tokens_seen": 72767680, "step": 33720 }, { "epoch": 5.50163132137031, "grad_norm": 0.3398113250732422, "learning_rate": 0.0009095025199741103, "loss": 0.2005, "num_input_tokens_seen": 72777632, "step": 33725 }, { "epoch": 5.502446982055465, "grad_norm": 0.02042406238615513, "learning_rate": 0.0009094616738159748, "loss": 0.0181, "num_input_tokens_seen": 72789312, "step": 33730 }, { "epoch": 5.50326264274062, "grad_norm": 0.06718539446592331, "learning_rate": 0.000909420819359579, "loss": 0.2193, "num_input_tokens_seen": 72799488, "step": 33735 }, { "epoch": 5.504078303425775, "grad_norm": 0.01698293536901474, "learning_rate": 0.000909379956605751, "loss": 0.0906, "num_input_tokens_seen": 72811424, "step": 33740 }, { "epoch": 5.50489396411093, "grad_norm": 0.021430758759379387, "learning_rate": 0.000909339085555319, "loss": 0.1657, "num_input_tokens_seen": 72821728, "step": 33745 }, { "epoch": 5.505709624796085, "grad_norm": 0.020780151709914207, "learning_rate": 0.0009092982062091109, "loss": 0.1749, "num_input_tokens_seen": 72832000, "step": 33750 }, { "epoch": 5.506525285481239, "grad_norm": 0.024232158437371254, "learning_rate": 0.0009092573185679556, "loss": 0.0678, "num_input_tokens_seen": 72842944, "step": 33755 }, { "epoch": 5.507340946166395, "grad_norm": 0.03573385998606682, "learning_rate": 0.0009092164226326814, "loss": 0.1031, "num_input_tokens_seen": 72855168, "step": 33760 }, { "epoch": 5.50815660685155, "grad_norm": 0.4746679961681366, "learning_rate": 0.0009091755184041173, "loss": 0.0799, "num_input_tokens_seen": 72866208, "step": 33765 }, { "epoch": 5.508972267536705, "grad_norm": 0.00861032772809267, "learning_rate": 0.0009091346058830923, "loss": 0.0331, "num_input_tokens_seen": 72878400, "step": 33770 }, { "epoch": 5.50978792822186, "grad_norm": 0.4031074345111847, "learning_rate": 0.0009090936850704354, "loss": 0.2052, "num_input_tokens_seen": 72889472, "step": 33775 }, { "epoch": 5.510603588907014, "grad_norm": 0.25700539350509644, "learning_rate": 0.0009090527559669761, "loss": 0.0908, "num_input_tokens_seen": 72900320, "step": 33780 }, { "epoch": 5.511419249592169, "grad_norm": 0.29348260164260864, "learning_rate": 0.0009090118185735438, "loss": 0.1556, "num_input_tokens_seen": 72911008, "step": 33785 }, { "epoch": 5.512234910277325, "grad_norm": 0.12544786930084229, "learning_rate": 0.000908970872890968, "loss": 0.0918, "num_input_tokens_seen": 72922592, "step": 33790 }, { "epoch": 5.51305057096248, "grad_norm": 0.01846909523010254, "learning_rate": 0.0009089299189200789, "loss": 0.1301, "num_input_tokens_seen": 72932640, "step": 33795 }, { "epoch": 5.513866231647635, "grad_norm": 0.27973416447639465, "learning_rate": 0.000908888956661706, "loss": 0.0806, "num_input_tokens_seen": 72944288, "step": 33800 }, { "epoch": 5.514681892332789, "grad_norm": 0.216182678937912, "learning_rate": 0.0009088479861166797, "loss": 0.0927, "num_input_tokens_seen": 72954048, "step": 33805 }, { "epoch": 5.515497553017944, "grad_norm": 0.29406675696372986, "learning_rate": 0.0009088070072858303, "loss": 0.0953, "num_input_tokens_seen": 72963360, "step": 33810 }, { "epoch": 5.5163132137031, "grad_norm": 0.06551232188940048, "learning_rate": 0.0009087660201699884, "loss": 0.0494, "num_input_tokens_seen": 72973888, "step": 33815 }, { "epoch": 5.517128874388255, "grad_norm": 0.03660907596349716, "learning_rate": 0.0009087250247699846, "loss": 0.1777, "num_input_tokens_seen": 72984032, "step": 33820 }, { "epoch": 5.5179445350734095, "grad_norm": 0.09545240551233292, "learning_rate": 0.0009086840210866493, "loss": 0.1464, "num_input_tokens_seen": 72994368, "step": 33825 }, { "epoch": 5.518760195758564, "grad_norm": 0.13930189609527588, "learning_rate": 0.0009086430091208142, "loss": 0.1389, "num_input_tokens_seen": 73004640, "step": 33830 }, { "epoch": 5.519575856443719, "grad_norm": 0.005968734622001648, "learning_rate": 0.00090860198887331, "loss": 0.074, "num_input_tokens_seen": 73014336, "step": 33835 }, { "epoch": 5.520391517128875, "grad_norm": 0.020325252786278725, "learning_rate": 0.0009085609603449683, "loss": 0.0415, "num_input_tokens_seen": 73024736, "step": 33840 }, { "epoch": 5.52120717781403, "grad_norm": 0.17212851345539093, "learning_rate": 0.0009085199235366201, "loss": 0.0656, "num_input_tokens_seen": 73036032, "step": 33845 }, { "epoch": 5.5220228384991845, "grad_norm": 0.06091204285621643, "learning_rate": 0.0009084788784490977, "loss": 0.053, "num_input_tokens_seen": 73048384, "step": 33850 }, { "epoch": 5.522838499184339, "grad_norm": 0.19289155304431915, "learning_rate": 0.0009084378250832325, "loss": 0.0949, "num_input_tokens_seen": 73058848, "step": 33855 }, { "epoch": 5.523654159869494, "grad_norm": 0.0691351592540741, "learning_rate": 0.0009083967634398567, "loss": 0.0512, "num_input_tokens_seen": 73069728, "step": 33860 }, { "epoch": 5.524469820554649, "grad_norm": 0.08239395171403885, "learning_rate": 0.0009083556935198024, "loss": 0.0901, "num_input_tokens_seen": 73080768, "step": 33865 }, { "epoch": 5.525285481239804, "grad_norm": 0.22937366366386414, "learning_rate": 0.0009083146153239019, "loss": 0.1334, "num_input_tokens_seen": 73091424, "step": 33870 }, { "epoch": 5.5261011419249595, "grad_norm": 0.047064874321222305, "learning_rate": 0.0009082735288529878, "loss": 0.0838, "num_input_tokens_seen": 73100640, "step": 33875 }, { "epoch": 5.526916802610114, "grad_norm": 0.011595834977924824, "learning_rate": 0.0009082324341078927, "loss": 0.1497, "num_input_tokens_seen": 73112640, "step": 33880 }, { "epoch": 5.527732463295269, "grad_norm": 0.060405977070331573, "learning_rate": 0.0009081913310894494, "loss": 0.2886, "num_input_tokens_seen": 73122272, "step": 33885 }, { "epoch": 5.528548123980424, "grad_norm": 0.04885214567184448, "learning_rate": 0.000908150219798491, "loss": 0.045, "num_input_tokens_seen": 73133024, "step": 33890 }, { "epoch": 5.529363784665579, "grad_norm": 0.4130043387413025, "learning_rate": 0.0009081091002358506, "loss": 0.2055, "num_input_tokens_seen": 73142496, "step": 33895 }, { "epoch": 5.5301794453507345, "grad_norm": 0.047620970755815506, "learning_rate": 0.0009080679724023615, "loss": 0.042, "num_input_tokens_seen": 73153248, "step": 33900 }, { "epoch": 5.530995106035889, "grad_norm": 0.07925587892532349, "learning_rate": 0.0009080268362988572, "loss": 0.087, "num_input_tokens_seen": 73164704, "step": 33905 }, { "epoch": 5.531810766721044, "grad_norm": 0.05550973862409592, "learning_rate": 0.0009079856919261716, "loss": 0.0669, "num_input_tokens_seen": 73174336, "step": 33910 }, { "epoch": 5.532626427406199, "grad_norm": 0.03632967546582222, "learning_rate": 0.0009079445392851383, "loss": 0.0837, "num_input_tokens_seen": 73185056, "step": 33915 }, { "epoch": 5.533442088091354, "grad_norm": 0.12514789402484894, "learning_rate": 0.0009079033783765914, "loss": 0.0454, "num_input_tokens_seen": 73196096, "step": 33920 }, { "epoch": 5.5342577487765094, "grad_norm": 0.0763651579618454, "learning_rate": 0.0009078622092013651, "loss": 0.0781, "num_input_tokens_seen": 73206656, "step": 33925 }, { "epoch": 5.535073409461664, "grad_norm": 0.05287047475576401, "learning_rate": 0.0009078210317602938, "loss": 0.1107, "num_input_tokens_seen": 73215584, "step": 33930 }, { "epoch": 5.535889070146819, "grad_norm": 0.036722272634506226, "learning_rate": 0.0009077798460542119, "loss": 0.0364, "num_input_tokens_seen": 73225856, "step": 33935 }, { "epoch": 5.536704730831974, "grad_norm": 0.23173309862613678, "learning_rate": 0.0009077386520839541, "loss": 0.1158, "num_input_tokens_seen": 73236608, "step": 33940 }, { "epoch": 5.537520391517129, "grad_norm": 0.011476065032184124, "learning_rate": 0.0009076974498503552, "loss": 0.0499, "num_input_tokens_seen": 73246944, "step": 33945 }, { "epoch": 5.5383360522022835, "grad_norm": 0.036247797310352325, "learning_rate": 0.0009076562393542502, "loss": 0.2025, "num_input_tokens_seen": 73258016, "step": 33950 }, { "epoch": 5.539151712887438, "grad_norm": 0.18980836868286133, "learning_rate": 0.0009076150205964746, "loss": 0.2154, "num_input_tokens_seen": 73267936, "step": 33955 }, { "epoch": 5.539967373572594, "grad_norm": 0.06631734222173691, "learning_rate": 0.0009075737935778634, "loss": 0.1712, "num_input_tokens_seen": 73278816, "step": 33960 }, { "epoch": 5.540783034257749, "grad_norm": 0.19360850751399994, "learning_rate": 0.0009075325582992522, "loss": 0.0989, "num_input_tokens_seen": 73290368, "step": 33965 }, { "epoch": 5.541598694942904, "grad_norm": 0.18678073585033417, "learning_rate": 0.0009074913147614767, "loss": 0.0999, "num_input_tokens_seen": 73302080, "step": 33970 }, { "epoch": 5.5424143556280585, "grad_norm": 0.029551517218351364, "learning_rate": 0.0009074500629653728, "loss": 0.0714, "num_input_tokens_seen": 73311456, "step": 33975 }, { "epoch": 5.543230016313213, "grad_norm": 0.04303313419222832, "learning_rate": 0.0009074088029117764, "loss": 0.1131, "num_input_tokens_seen": 73320992, "step": 33980 }, { "epoch": 5.544045676998369, "grad_norm": 0.15677279233932495, "learning_rate": 0.0009073675346015239, "loss": 0.0439, "num_input_tokens_seen": 73330592, "step": 33985 }, { "epoch": 5.544861337683524, "grad_norm": 0.059072427451610565, "learning_rate": 0.0009073262580354516, "loss": 0.1143, "num_input_tokens_seen": 73340992, "step": 33990 }, { "epoch": 5.545676998368679, "grad_norm": 0.24197915196418762, "learning_rate": 0.0009072849732143957, "loss": 0.1155, "num_input_tokens_seen": 73353440, "step": 33995 }, { "epoch": 5.5464926590538335, "grad_norm": 0.037562817335128784, "learning_rate": 0.0009072436801391932, "loss": 0.1566, "num_input_tokens_seen": 73364448, "step": 34000 }, { "epoch": 5.547308319738988, "grad_norm": 0.09774079918861389, "learning_rate": 0.0009072023788106811, "loss": 0.19, "num_input_tokens_seen": 73374496, "step": 34005 }, { "epoch": 5.548123980424144, "grad_norm": 0.1483016461133957, "learning_rate": 0.0009071610692296961, "loss": 0.0512, "num_input_tokens_seen": 73385472, "step": 34010 }, { "epoch": 5.548939641109299, "grad_norm": 0.10106521844863892, "learning_rate": 0.0009071197513970755, "loss": 0.0318, "num_input_tokens_seen": 73396960, "step": 34015 }, { "epoch": 5.549755301794454, "grad_norm": 0.13982902467250824, "learning_rate": 0.0009070784253136565, "loss": 0.2652, "num_input_tokens_seen": 73408032, "step": 34020 }, { "epoch": 5.5505709624796085, "grad_norm": 0.07747935503721237, "learning_rate": 0.0009070370909802772, "loss": 0.1446, "num_input_tokens_seen": 73418752, "step": 34025 }, { "epoch": 5.551386623164763, "grad_norm": 0.04437597841024399, "learning_rate": 0.0009069957483977747, "loss": 0.0314, "num_input_tokens_seen": 73430656, "step": 34030 }, { "epoch": 5.552202283849918, "grad_norm": 0.09726716578006744, "learning_rate": 0.0009069543975669869, "loss": 0.0293, "num_input_tokens_seen": 73441952, "step": 34035 }, { "epoch": 5.553017944535073, "grad_norm": 0.33005276322364807, "learning_rate": 0.0009069130384887521, "loss": 0.2334, "num_input_tokens_seen": 73453472, "step": 34040 }, { "epoch": 5.553833605220229, "grad_norm": 0.029982885345816612, "learning_rate": 0.0009068716711639084, "loss": 0.0254, "num_input_tokens_seen": 73463904, "step": 34045 }, { "epoch": 5.554649265905383, "grad_norm": 0.45158693194389343, "learning_rate": 0.0009068302955932939, "loss": 0.2258, "num_input_tokens_seen": 73474176, "step": 34050 }, { "epoch": 5.555464926590538, "grad_norm": 0.10603254288434982, "learning_rate": 0.0009067889117777477, "loss": 0.051, "num_input_tokens_seen": 73483904, "step": 34055 }, { "epoch": 5.556280587275693, "grad_norm": 0.02714691311120987, "learning_rate": 0.000906747519718108, "loss": 0.0664, "num_input_tokens_seen": 73495360, "step": 34060 }, { "epoch": 5.557096247960848, "grad_norm": 0.019381796941161156, "learning_rate": 0.0009067061194152138, "loss": 0.1517, "num_input_tokens_seen": 73507328, "step": 34065 }, { "epoch": 5.557911908646004, "grad_norm": 0.027508718892931938, "learning_rate": 0.0009066647108699041, "loss": 0.0481, "num_input_tokens_seen": 73518272, "step": 34070 }, { "epoch": 5.558727569331158, "grad_norm": 0.08095196634531021, "learning_rate": 0.0009066232940830182, "loss": 0.0755, "num_input_tokens_seen": 73528608, "step": 34075 }, { "epoch": 5.559543230016313, "grad_norm": 0.07537607103586197, "learning_rate": 0.0009065818690553955, "loss": 0.1149, "num_input_tokens_seen": 73540224, "step": 34080 }, { "epoch": 5.560358890701468, "grad_norm": 0.040021467953920364, "learning_rate": 0.0009065404357878752, "loss": 0.1655, "num_input_tokens_seen": 73550976, "step": 34085 }, { "epoch": 5.561174551386623, "grad_norm": 0.1030910462141037, "learning_rate": 0.0009064989942812974, "loss": 0.0439, "num_input_tokens_seen": 73563488, "step": 34090 }, { "epoch": 5.561990212071779, "grad_norm": 0.3230529725551605, "learning_rate": 0.0009064575445365019, "loss": 0.1774, "num_input_tokens_seen": 73573696, "step": 34095 }, { "epoch": 5.562805872756933, "grad_norm": 0.06685519218444824, "learning_rate": 0.0009064160865543285, "loss": 0.1093, "num_input_tokens_seen": 73584384, "step": 34100 }, { "epoch": 5.563621533442088, "grad_norm": 0.22881431877613068, "learning_rate": 0.0009063746203356176, "loss": 0.12, "num_input_tokens_seen": 73596480, "step": 34105 }, { "epoch": 5.564437194127243, "grad_norm": 0.3218865692615509, "learning_rate": 0.0009063331458812094, "loss": 0.0934, "num_input_tokens_seen": 73606208, "step": 34110 }, { "epoch": 5.565252854812398, "grad_norm": 0.07445463538169861, "learning_rate": 0.0009062916631919445, "loss": 0.04, "num_input_tokens_seen": 73617184, "step": 34115 }, { "epoch": 5.566068515497553, "grad_norm": 0.22873570024967194, "learning_rate": 0.0009062501722686638, "loss": 0.1851, "num_input_tokens_seen": 73628608, "step": 34120 }, { "epoch": 5.566884176182708, "grad_norm": 0.05482787266373634, "learning_rate": 0.0009062086731122079, "loss": 0.0322, "num_input_tokens_seen": 73639808, "step": 34125 }, { "epoch": 5.567699836867863, "grad_norm": 0.20735277235507965, "learning_rate": 0.0009061671657234179, "loss": 0.1828, "num_input_tokens_seen": 73649952, "step": 34130 }, { "epoch": 5.568515497553018, "grad_norm": 0.0738314688205719, "learning_rate": 0.000906125650103135, "loss": 0.0412, "num_input_tokens_seen": 73659808, "step": 34135 }, { "epoch": 5.569331158238173, "grad_norm": 0.03241332247853279, "learning_rate": 0.0009060841262522006, "loss": 0.1029, "num_input_tokens_seen": 73670752, "step": 34140 }, { "epoch": 5.570146818923328, "grad_norm": 0.02908007800579071, "learning_rate": 0.0009060425941714563, "loss": 0.082, "num_input_tokens_seen": 73681440, "step": 34145 }, { "epoch": 5.5709624796084825, "grad_norm": 0.07430959492921829, "learning_rate": 0.0009060010538617437, "loss": 0.0569, "num_input_tokens_seen": 73691168, "step": 34150 }, { "epoch": 5.571778140293638, "grad_norm": 0.12032222747802734, "learning_rate": 0.0009059595053239047, "loss": 0.0855, "num_input_tokens_seen": 73701312, "step": 34155 }, { "epoch": 5.572593800978793, "grad_norm": 0.10388089716434479, "learning_rate": 0.0009059179485587813, "loss": 0.2733, "num_input_tokens_seen": 73713088, "step": 34160 }, { "epoch": 5.573409461663948, "grad_norm": 0.204657182097435, "learning_rate": 0.0009058763835672157, "loss": 0.0515, "num_input_tokens_seen": 73722976, "step": 34165 }, { "epoch": 5.574225122349103, "grad_norm": 0.020359275862574577, "learning_rate": 0.0009058348103500504, "loss": 0.0841, "num_input_tokens_seen": 73734880, "step": 34170 }, { "epoch": 5.575040783034257, "grad_norm": 0.14025022089481354, "learning_rate": 0.0009057932289081278, "loss": 0.0571, "num_input_tokens_seen": 73746528, "step": 34175 }, { "epoch": 5.575856443719413, "grad_norm": 0.10773877054452896, "learning_rate": 0.0009057516392422906, "loss": 0.022, "num_input_tokens_seen": 73757280, "step": 34180 }, { "epoch": 5.576672104404568, "grad_norm": 0.02311548963189125, "learning_rate": 0.0009057100413533817, "loss": 0.0557, "num_input_tokens_seen": 73767456, "step": 34185 }, { "epoch": 5.577487765089723, "grad_norm": 0.1422637701034546, "learning_rate": 0.0009056684352422441, "loss": 0.1058, "num_input_tokens_seen": 73778080, "step": 34190 }, { "epoch": 5.578303425774878, "grad_norm": 0.0218193456530571, "learning_rate": 0.0009056268209097211, "loss": 0.1884, "num_input_tokens_seen": 73788544, "step": 34195 }, { "epoch": 5.579119086460032, "grad_norm": 0.022400854155421257, "learning_rate": 0.000905585198356656, "loss": 0.0199, "num_input_tokens_seen": 73801536, "step": 34200 }, { "epoch": 5.579934747145187, "grad_norm": 0.043671272695064545, "learning_rate": 0.0009055435675838923, "loss": 0.0261, "num_input_tokens_seen": 73814016, "step": 34205 }, { "epoch": 5.580750407830343, "grad_norm": 0.26837876439094543, "learning_rate": 0.0009055019285922737, "loss": 0.1321, "num_input_tokens_seen": 73825920, "step": 34210 }, { "epoch": 5.581566068515498, "grad_norm": 0.3287936747074127, "learning_rate": 0.0009054602813826441, "loss": 0.1132, "num_input_tokens_seen": 73837600, "step": 34215 }, { "epoch": 5.582381729200653, "grad_norm": 0.30297088623046875, "learning_rate": 0.0009054186259558477, "loss": 0.0893, "num_input_tokens_seen": 73848064, "step": 34220 }, { "epoch": 5.583197389885807, "grad_norm": 0.038943033665418625, "learning_rate": 0.0009053769623127284, "loss": 0.0668, "num_input_tokens_seen": 73858720, "step": 34225 }, { "epoch": 5.584013050570962, "grad_norm": 0.08361177891492844, "learning_rate": 0.0009053352904541306, "loss": 0.1178, "num_input_tokens_seen": 73871264, "step": 34230 }, { "epoch": 5.584828711256117, "grad_norm": 0.3237897455692291, "learning_rate": 0.0009052936103808991, "loss": 0.0707, "num_input_tokens_seen": 73881696, "step": 34235 }, { "epoch": 5.585644371941273, "grad_norm": 0.013652279041707516, "learning_rate": 0.0009052519220938784, "loss": 0.0409, "num_input_tokens_seen": 73892544, "step": 34240 }, { "epoch": 5.5864600326264275, "grad_norm": 0.14428219199180603, "learning_rate": 0.0009052102255939134, "loss": 0.1227, "num_input_tokens_seen": 73902848, "step": 34245 }, { "epoch": 5.587275693311582, "grad_norm": 0.6132535338401794, "learning_rate": 0.000905168520881849, "loss": 0.1041, "num_input_tokens_seen": 73914080, "step": 34250 }, { "epoch": 5.588091353996737, "grad_norm": 0.134591743350029, "learning_rate": 0.0009051268079585306, "loss": 0.1665, "num_input_tokens_seen": 73925536, "step": 34255 }, { "epoch": 5.588907014681892, "grad_norm": 0.010655044578015804, "learning_rate": 0.0009050850868248037, "loss": 0.073, "num_input_tokens_seen": 73936224, "step": 34260 }, { "epoch": 5.589722675367048, "grad_norm": 0.33637312054634094, "learning_rate": 0.0009050433574815134, "loss": 0.2458, "num_input_tokens_seen": 73947520, "step": 34265 }, { "epoch": 5.5905383360522025, "grad_norm": 0.014963323250412941, "learning_rate": 0.0009050016199295057, "loss": 0.0553, "num_input_tokens_seen": 73958048, "step": 34270 }, { "epoch": 5.591353996737357, "grad_norm": 0.2244342416524887, "learning_rate": 0.0009049598741696263, "loss": 0.0542, "num_input_tokens_seen": 73969376, "step": 34275 }, { "epoch": 5.592169657422512, "grad_norm": 0.03150659427046776, "learning_rate": 0.0009049181202027215, "loss": 0.175, "num_input_tokens_seen": 73979232, "step": 34280 }, { "epoch": 5.592985318107667, "grad_norm": 0.04426252841949463, "learning_rate": 0.0009048763580296373, "loss": 0.0365, "num_input_tokens_seen": 73990112, "step": 34285 }, { "epoch": 5.593800978792823, "grad_norm": 0.10377488285303116, "learning_rate": 0.00090483458765122, "loss": 0.1266, "num_input_tokens_seen": 74001344, "step": 34290 }, { "epoch": 5.5946166394779775, "grad_norm": 0.33538010716438293, "learning_rate": 0.0009047928090683162, "loss": 0.121, "num_input_tokens_seen": 74011168, "step": 34295 }, { "epoch": 5.595432300163132, "grad_norm": 0.17077447474002838, "learning_rate": 0.0009047510222817725, "loss": 0.065, "num_input_tokens_seen": 74022720, "step": 34300 }, { "epoch": 5.596247960848287, "grad_norm": 0.24240583181381226, "learning_rate": 0.0009047092272924361, "loss": 0.0317, "num_input_tokens_seen": 74034144, "step": 34305 }, { "epoch": 5.597063621533442, "grad_norm": 0.31515541672706604, "learning_rate": 0.0009046674241011537, "loss": 0.0346, "num_input_tokens_seen": 74044288, "step": 34310 }, { "epoch": 5.597879282218597, "grad_norm": 0.01569436304271221, "learning_rate": 0.0009046256127087727, "loss": 0.0576, "num_input_tokens_seen": 74052896, "step": 34315 }, { "epoch": 5.598694942903752, "grad_norm": 0.026618527248501778, "learning_rate": 0.0009045837931161402, "loss": 0.0655, "num_input_tokens_seen": 74063840, "step": 34320 }, { "epoch": 5.599510603588907, "grad_norm": 0.04020289331674576, "learning_rate": 0.0009045419653241038, "loss": 0.1489, "num_input_tokens_seen": 74075264, "step": 34325 }, { "epoch": 5.600326264274062, "grad_norm": 0.019134551286697388, "learning_rate": 0.0009045001293335115, "loss": 0.0129, "num_input_tokens_seen": 74085216, "step": 34330 }, { "epoch": 5.601141924959217, "grad_norm": 0.0995677188038826, "learning_rate": 0.0009044582851452107, "loss": 0.0241, "num_input_tokens_seen": 74095616, "step": 34335 }, { "epoch": 5.601957585644372, "grad_norm": 0.018183544278144836, "learning_rate": 0.0009044164327600499, "loss": 0.1038, "num_input_tokens_seen": 74105472, "step": 34340 }, { "epoch": 5.602773246329527, "grad_norm": 0.005952127743512392, "learning_rate": 0.000904374572178877, "loss": 0.0343, "num_input_tokens_seen": 74117280, "step": 34345 }, { "epoch": 5.603588907014682, "grad_norm": 0.04874003678560257, "learning_rate": 0.0009043327034025404, "loss": 0.0326, "num_input_tokens_seen": 74128032, "step": 34350 }, { "epoch": 5.604404567699837, "grad_norm": 0.15768302977085114, "learning_rate": 0.0009042908264318885, "loss": 0.0801, "num_input_tokens_seen": 74140416, "step": 34355 }, { "epoch": 5.605220228384992, "grad_norm": 0.03794685751199722, "learning_rate": 0.0009042489412677702, "loss": 0.0878, "num_input_tokens_seen": 74152032, "step": 34360 }, { "epoch": 5.606035889070147, "grad_norm": 0.039189379662275314, "learning_rate": 0.0009042070479110343, "loss": 0.1459, "num_input_tokens_seen": 74162912, "step": 34365 }, { "epoch": 5.6068515497553015, "grad_norm": 0.015013376250863075, "learning_rate": 0.0009041651463625298, "loss": 0.162, "num_input_tokens_seen": 74173440, "step": 34370 }, { "epoch": 5.607667210440457, "grad_norm": 0.09556364268064499, "learning_rate": 0.0009041232366231059, "loss": 0.0385, "num_input_tokens_seen": 74184064, "step": 34375 }, { "epoch": 5.608482871125612, "grad_norm": 0.18006251752376556, "learning_rate": 0.0009040813186936119, "loss": 0.048, "num_input_tokens_seen": 74194784, "step": 34380 }, { "epoch": 5.609298531810767, "grad_norm": 0.018620967864990234, "learning_rate": 0.0009040393925748973, "loss": 0.109, "num_input_tokens_seen": 74205344, "step": 34385 }, { "epoch": 5.610114192495922, "grad_norm": 0.08701225370168686, "learning_rate": 0.0009039974582678121, "loss": 0.0496, "num_input_tokens_seen": 74216096, "step": 34390 }, { "epoch": 5.6109298531810765, "grad_norm": 0.017861537635326385, "learning_rate": 0.0009039555157732056, "loss": 0.1324, "num_input_tokens_seen": 74227904, "step": 34395 }, { "epoch": 5.611745513866231, "grad_norm": 0.032413117587566376, "learning_rate": 0.0009039135650919283, "loss": 0.0483, "num_input_tokens_seen": 74239264, "step": 34400 }, { "epoch": 5.612561174551386, "grad_norm": 0.04601699858903885, "learning_rate": 0.0009038716062248302, "loss": 0.0694, "num_input_tokens_seen": 74250464, "step": 34405 }, { "epoch": 5.613376835236542, "grad_norm": 0.20806185901165009, "learning_rate": 0.0009038296391727616, "loss": 0.0399, "num_input_tokens_seen": 74261024, "step": 34410 }, { "epoch": 5.614192495921697, "grad_norm": 0.043033551424741745, "learning_rate": 0.0009037876639365731, "loss": 0.0208, "num_input_tokens_seen": 74272160, "step": 34415 }, { "epoch": 5.6150081566068515, "grad_norm": 0.012338409200310707, "learning_rate": 0.0009037456805171154, "loss": 0.0652, "num_input_tokens_seen": 74282752, "step": 34420 }, { "epoch": 5.615823817292006, "grad_norm": 0.11541248857975006, "learning_rate": 0.0009037036889152391, "loss": 0.1015, "num_input_tokens_seen": 74293664, "step": 34425 }, { "epoch": 5.616639477977161, "grad_norm": 0.05983193963766098, "learning_rate": 0.0009036616891317956, "loss": 0.0309, "num_input_tokens_seen": 74304896, "step": 34430 }, { "epoch": 5.617455138662317, "grad_norm": 0.05885162204504013, "learning_rate": 0.0009036196811676358, "loss": 0.027, "num_input_tokens_seen": 74315168, "step": 34435 }, { "epoch": 5.618270799347472, "grad_norm": 0.0991673693060875, "learning_rate": 0.0009035776650236112, "loss": 0.0328, "num_input_tokens_seen": 74325376, "step": 34440 }, { "epoch": 5.6190864600326265, "grad_norm": 0.08424853533506393, "learning_rate": 0.0009035356407005732, "loss": 0.0554, "num_input_tokens_seen": 74334880, "step": 34445 }, { "epoch": 5.619902120717781, "grad_norm": 0.12316300719976425, "learning_rate": 0.0009034936081993736, "loss": 0.0456, "num_input_tokens_seen": 74345344, "step": 34450 }, { "epoch": 5.620717781402936, "grad_norm": 0.36097198724746704, "learning_rate": 0.0009034515675208641, "loss": 0.047, "num_input_tokens_seen": 74355584, "step": 34455 }, { "epoch": 5.621533442088092, "grad_norm": 0.05849973112344742, "learning_rate": 0.0009034095186658966, "loss": 0.0403, "num_input_tokens_seen": 74366496, "step": 34460 }, { "epoch": 5.622349102773247, "grad_norm": 0.3942221701145172, "learning_rate": 0.0009033674616353236, "loss": 0.2591, "num_input_tokens_seen": 74377600, "step": 34465 }, { "epoch": 5.623164763458401, "grad_norm": 0.3130427300930023, "learning_rate": 0.0009033253964299972, "loss": 0.1431, "num_input_tokens_seen": 74388160, "step": 34470 }, { "epoch": 5.623980424143556, "grad_norm": 0.004189645871520042, "learning_rate": 0.0009032833230507702, "loss": 0.3711, "num_input_tokens_seen": 74398912, "step": 34475 }, { "epoch": 5.624796084828711, "grad_norm": 0.7448554635047913, "learning_rate": 0.000903241241498495, "loss": 0.0801, "num_input_tokens_seen": 74410528, "step": 34480 }, { "epoch": 5.625611745513866, "grad_norm": 0.4256080090999603, "learning_rate": 0.0009031991517740244, "loss": 0.0922, "num_input_tokens_seen": 74422272, "step": 34485 }, { "epoch": 5.626427406199021, "grad_norm": 0.13796821236610413, "learning_rate": 0.0009031570538782115, "loss": 0.2524, "num_input_tokens_seen": 74432480, "step": 34490 }, { "epoch": 5.627243066884176, "grad_norm": 0.02983507141470909, "learning_rate": 0.0009031149478119094, "loss": 0.0787, "num_input_tokens_seen": 74443008, "step": 34495 }, { "epoch": 5.628058727569331, "grad_norm": 0.3010656535625458, "learning_rate": 0.0009030728335759716, "loss": 0.1419, "num_input_tokens_seen": 74454784, "step": 34500 }, { "epoch": 5.628874388254486, "grad_norm": 0.40009579062461853, "learning_rate": 0.0009030307111712514, "loss": 0.1985, "num_input_tokens_seen": 74466016, "step": 34505 }, { "epoch": 5.629690048939641, "grad_norm": 0.15258464217185974, "learning_rate": 0.0009029885805986027, "loss": 0.0233, "num_input_tokens_seen": 74477600, "step": 34510 }, { "epoch": 5.630505709624796, "grad_norm": 0.09673656523227692, "learning_rate": 0.0009029464418588791, "loss": 0.0673, "num_input_tokens_seen": 74489024, "step": 34515 }, { "epoch": 5.631321370309951, "grad_norm": 0.06399132311344147, "learning_rate": 0.0009029042949529347, "loss": 0.2176, "num_input_tokens_seen": 74499776, "step": 34520 }, { "epoch": 5.632137030995106, "grad_norm": 0.5344573259353638, "learning_rate": 0.0009028621398816236, "loss": 0.18, "num_input_tokens_seen": 74509824, "step": 34525 }, { "epoch": 5.632952691680261, "grad_norm": 0.4363504648208618, "learning_rate": 0.0009028199766458002, "loss": 0.1894, "num_input_tokens_seen": 74520896, "step": 34530 }, { "epoch": 5.633768352365416, "grad_norm": 0.12759283185005188, "learning_rate": 0.000902777805246319, "loss": 0.0348, "num_input_tokens_seen": 74531456, "step": 34535 }, { "epoch": 5.634584013050571, "grad_norm": 0.02577286772429943, "learning_rate": 0.0009027356256840345, "loss": 0.0562, "num_input_tokens_seen": 74542592, "step": 34540 }, { "epoch": 5.635399673735726, "grad_norm": 0.026240799576044083, "learning_rate": 0.0009026934379598018, "loss": 0.0489, "num_input_tokens_seen": 74552800, "step": 34545 }, { "epoch": 5.636215334420881, "grad_norm": 0.42971017956733704, "learning_rate": 0.0009026512420744756, "loss": 0.1097, "num_input_tokens_seen": 74564000, "step": 34550 }, { "epoch": 5.637030995106036, "grad_norm": 0.016909627243876457, "learning_rate": 0.0009026090380289111, "loss": 0.1458, "num_input_tokens_seen": 74575584, "step": 34555 }, { "epoch": 5.637846655791191, "grad_norm": 0.025410939007997513, "learning_rate": 0.0009025668258239638, "loss": 0.1284, "num_input_tokens_seen": 74585600, "step": 34560 }, { "epoch": 5.638662316476346, "grad_norm": 0.24586129188537598, "learning_rate": 0.0009025246054604892, "loss": 0.0771, "num_input_tokens_seen": 74596352, "step": 34565 }, { "epoch": 5.6394779771615005, "grad_norm": 0.07094182819128036, "learning_rate": 0.0009024823769393427, "loss": 0.1418, "num_input_tokens_seen": 74607232, "step": 34570 }, { "epoch": 5.640293637846656, "grad_norm": 0.15324319899082184, "learning_rate": 0.0009024401402613803, "loss": 0.077, "num_input_tokens_seen": 74618464, "step": 34575 }, { "epoch": 5.641109298531811, "grad_norm": 0.10697678476572037, "learning_rate": 0.0009023978954274579, "loss": 0.1385, "num_input_tokens_seen": 74627776, "step": 34580 }, { "epoch": 5.641924959216966, "grad_norm": 0.18614965677261353, "learning_rate": 0.0009023556424384317, "loss": 0.0467, "num_input_tokens_seen": 74639136, "step": 34585 }, { "epoch": 5.642740619902121, "grad_norm": 0.04832128807902336, "learning_rate": 0.0009023133812951581, "loss": 0.0203, "num_input_tokens_seen": 74648736, "step": 34590 }, { "epoch": 5.643556280587275, "grad_norm": 0.018466826528310776, "learning_rate": 0.0009022711119984932, "loss": 0.013, "num_input_tokens_seen": 74659712, "step": 34595 }, { "epoch": 5.64437194127243, "grad_norm": 0.01913890615105629, "learning_rate": 0.0009022288345492941, "loss": 0.0727, "num_input_tokens_seen": 74669952, "step": 34600 }, { "epoch": 5.645187601957586, "grad_norm": 0.15149618685245514, "learning_rate": 0.0009021865489484173, "loss": 0.0948, "num_input_tokens_seen": 74680320, "step": 34605 }, { "epoch": 5.646003262642741, "grad_norm": 0.08799257129430771, "learning_rate": 0.0009021442551967198, "loss": 0.0697, "num_input_tokens_seen": 74691936, "step": 34610 }, { "epoch": 5.646818923327896, "grad_norm": 0.40812253952026367, "learning_rate": 0.000902101953295059, "loss": 0.0766, "num_input_tokens_seen": 74702208, "step": 34615 }, { "epoch": 5.64763458401305, "grad_norm": 0.1235685646533966, "learning_rate": 0.0009020596432442918, "loss": 0.1913, "num_input_tokens_seen": 74713760, "step": 34620 }, { "epoch": 5.648450244698205, "grad_norm": 0.05971542373299599, "learning_rate": 0.0009020173250452761, "loss": 0.0627, "num_input_tokens_seen": 74724000, "step": 34625 }, { "epoch": 5.649265905383361, "grad_norm": 0.07850366830825806, "learning_rate": 0.0009019749986988692, "loss": 0.1047, "num_input_tokens_seen": 74733888, "step": 34630 }, { "epoch": 5.650081566068516, "grad_norm": 0.3104951083660126, "learning_rate": 0.000901932664205929, "loss": 0.1343, "num_input_tokens_seen": 74745184, "step": 34635 }, { "epoch": 5.650897226753671, "grad_norm": 0.17923834919929504, "learning_rate": 0.0009018903215673135, "loss": 0.1993, "num_input_tokens_seen": 74755392, "step": 34640 }, { "epoch": 5.651712887438825, "grad_norm": 0.06313243508338928, "learning_rate": 0.0009018479707838808, "loss": 0.0259, "num_input_tokens_seen": 74767776, "step": 34645 }, { "epoch": 5.65252854812398, "grad_norm": 0.3423994183540344, "learning_rate": 0.0009018056118564893, "loss": 0.0444, "num_input_tokens_seen": 74777664, "step": 34650 }, { "epoch": 5.653344208809135, "grad_norm": 0.05090714246034622, "learning_rate": 0.0009017632447859971, "loss": 0.0795, "num_input_tokens_seen": 74788352, "step": 34655 }, { "epoch": 5.654159869494291, "grad_norm": 0.2204114943742752, "learning_rate": 0.0009017208695732633, "loss": 0.1692, "num_input_tokens_seen": 74799392, "step": 34660 }, { "epoch": 5.6549755301794455, "grad_norm": 0.0872822031378746, "learning_rate": 0.0009016784862191463, "loss": 0.1549, "num_input_tokens_seen": 74810912, "step": 34665 }, { "epoch": 5.6557911908646, "grad_norm": 0.03856171667575836, "learning_rate": 0.0009016360947245053, "loss": 0.038, "num_input_tokens_seen": 74820064, "step": 34670 }, { "epoch": 5.656606851549755, "grad_norm": 0.00477330107241869, "learning_rate": 0.0009015936950901993, "loss": 0.0088, "num_input_tokens_seen": 74832128, "step": 34675 }, { "epoch": 5.65742251223491, "grad_norm": 0.06736885756254196, "learning_rate": 0.0009015512873170877, "loss": 0.153, "num_input_tokens_seen": 74843040, "step": 34680 }, { "epoch": 5.658238172920065, "grad_norm": 0.10295030474662781, "learning_rate": 0.0009015088714060297, "loss": 0.1257, "num_input_tokens_seen": 74853760, "step": 34685 }, { "epoch": 5.6590538336052205, "grad_norm": 0.16876518726348877, "learning_rate": 0.0009014664473578851, "loss": 0.0942, "num_input_tokens_seen": 74864000, "step": 34690 }, { "epoch": 5.659869494290375, "grad_norm": 0.4086992144584656, "learning_rate": 0.0009014240151735138, "loss": 0.0717, "num_input_tokens_seen": 74874144, "step": 34695 }, { "epoch": 5.66068515497553, "grad_norm": 0.20435844361782074, "learning_rate": 0.0009013815748537755, "loss": 0.1527, "num_input_tokens_seen": 74884608, "step": 34700 }, { "epoch": 5.661500815660685, "grad_norm": 0.196432426571846, "learning_rate": 0.0009013391263995303, "loss": 0.0933, "num_input_tokens_seen": 74894880, "step": 34705 }, { "epoch": 5.66231647634584, "grad_norm": 0.12401189655065536, "learning_rate": 0.0009012966698116387, "loss": 0.0592, "num_input_tokens_seen": 74906432, "step": 34710 }, { "epoch": 5.6631321370309955, "grad_norm": 0.044955942779779434, "learning_rate": 0.0009012542050909609, "loss": 0.1592, "num_input_tokens_seen": 74916992, "step": 34715 }, { "epoch": 5.66394779771615, "grad_norm": 0.9044135808944702, "learning_rate": 0.0009012117322383577, "loss": 0.1481, "num_input_tokens_seen": 74927936, "step": 34720 }, { "epoch": 5.664763458401305, "grad_norm": 0.035306416451931, "learning_rate": 0.0009011692512546897, "loss": 0.0322, "num_input_tokens_seen": 74939008, "step": 34725 }, { "epoch": 5.66557911908646, "grad_norm": 0.08044596016407013, "learning_rate": 0.0009011267621408179, "loss": 0.0756, "num_input_tokens_seen": 74949920, "step": 34730 }, { "epoch": 5.666394779771615, "grad_norm": 0.050850771367549896, "learning_rate": 0.0009010842648976034, "loss": 0.2003, "num_input_tokens_seen": 74961440, "step": 34735 }, { "epoch": 5.6672104404567705, "grad_norm": 0.017895543947815895, "learning_rate": 0.0009010417595259077, "loss": 0.1182, "num_input_tokens_seen": 74972672, "step": 34740 }, { "epoch": 5.668026101141925, "grad_norm": 0.4449547231197357, "learning_rate": 0.0009009992460265917, "loss": 0.0941, "num_input_tokens_seen": 74984032, "step": 34745 }, { "epoch": 5.66884176182708, "grad_norm": 0.1064477264881134, "learning_rate": 0.0009009567244005174, "loss": 0.0825, "num_input_tokens_seen": 74994112, "step": 34750 }, { "epoch": 5.669657422512235, "grad_norm": 0.035620711743831635, "learning_rate": 0.0009009141946485464, "loss": 0.0534, "num_input_tokens_seen": 75004960, "step": 34755 }, { "epoch": 5.67047308319739, "grad_norm": 0.3777397871017456, "learning_rate": 0.0009008716567715406, "loss": 0.052, "num_input_tokens_seen": 75014336, "step": 34760 }, { "epoch": 5.671288743882545, "grad_norm": 0.0141084473580122, "learning_rate": 0.0009008291107703621, "loss": 0.0446, "num_input_tokens_seen": 75024032, "step": 34765 }, { "epoch": 5.672104404567699, "grad_norm": 0.08631782233715057, "learning_rate": 0.0009007865566458733, "loss": 0.0176, "num_input_tokens_seen": 75034080, "step": 34770 }, { "epoch": 5.672920065252855, "grad_norm": 0.042694441974163055, "learning_rate": 0.0009007439943989364, "loss": 0.0372, "num_input_tokens_seen": 75045376, "step": 34775 }, { "epoch": 5.67373572593801, "grad_norm": 0.014830412343144417, "learning_rate": 0.0009007014240304143, "loss": 0.0439, "num_input_tokens_seen": 75054976, "step": 34780 }, { "epoch": 5.674551386623165, "grad_norm": 0.05688071623444557, "learning_rate": 0.0009006588455411692, "loss": 0.0736, "num_input_tokens_seen": 75066656, "step": 34785 }, { "epoch": 5.6753670473083195, "grad_norm": 0.028390178456902504, "learning_rate": 0.0009006162589320645, "loss": 0.0231, "num_input_tokens_seen": 75077216, "step": 34790 }, { "epoch": 5.676182707993474, "grad_norm": 0.015469072386622429, "learning_rate": 0.000900573664203963, "loss": 0.0205, "num_input_tokens_seen": 75088800, "step": 34795 }, { "epoch": 5.67699836867863, "grad_norm": 0.034379467368125916, "learning_rate": 0.0009005310613577282, "loss": 0.1312, "num_input_tokens_seen": 75100096, "step": 34800 }, { "epoch": 5.677814029363785, "grad_norm": 0.01801988296210766, "learning_rate": 0.0009004884503942232, "loss": 0.0774, "num_input_tokens_seen": 75110752, "step": 34805 }, { "epoch": 5.67862969004894, "grad_norm": 0.0075623285956680775, "learning_rate": 0.0009004458313143118, "loss": 0.0258, "num_input_tokens_seen": 75121792, "step": 34810 }, { "epoch": 5.6794453507340945, "grad_norm": 0.008005345240235329, "learning_rate": 0.0009004032041188575, "loss": 0.1853, "num_input_tokens_seen": 75132096, "step": 34815 }, { "epoch": 5.680261011419249, "grad_norm": 0.057614244520664215, "learning_rate": 0.0009003605688087244, "loss": 0.1245, "num_input_tokens_seen": 75143392, "step": 34820 }, { "epoch": 5.681076672104405, "grad_norm": 0.20683561265468597, "learning_rate": 0.0009003179253847764, "loss": 0.0345, "num_input_tokens_seen": 75154752, "step": 34825 }, { "epoch": 5.68189233278956, "grad_norm": 0.029672974720597267, "learning_rate": 0.0009002752738478779, "loss": 0.0894, "num_input_tokens_seen": 75164832, "step": 34830 }, { "epoch": 5.682707993474715, "grad_norm": 0.015130017884075642, "learning_rate": 0.000900232614198893, "loss": 0.1556, "num_input_tokens_seen": 75175936, "step": 34835 }, { "epoch": 5.6835236541598695, "grad_norm": 0.12673024833202362, "learning_rate": 0.0009001899464386867, "loss": 0.0622, "num_input_tokens_seen": 75185920, "step": 34840 }, { "epoch": 5.684339314845024, "grad_norm": 0.01434240210801363, "learning_rate": 0.0009001472705681233, "loss": 0.0288, "num_input_tokens_seen": 75196576, "step": 34845 }, { "epoch": 5.685154975530179, "grad_norm": 0.012928368523716927, "learning_rate": 0.0009001045865880679, "loss": 0.0508, "num_input_tokens_seen": 75206752, "step": 34850 }, { "epoch": 5.685970636215334, "grad_norm": 0.43933171033859253, "learning_rate": 0.0009000618944993854, "loss": 0.1256, "num_input_tokens_seen": 75219104, "step": 34855 }, { "epoch": 5.68678629690049, "grad_norm": 0.006042810156941414, "learning_rate": 0.0009000191943029412, "loss": 0.0799, "num_input_tokens_seen": 75230016, "step": 34860 }, { "epoch": 5.6876019575856445, "grad_norm": 0.11846655607223511, "learning_rate": 0.0008999764859996005, "loss": 0.1055, "num_input_tokens_seen": 75241504, "step": 34865 }, { "epoch": 5.688417618270799, "grad_norm": 0.15159554779529572, "learning_rate": 0.000899933769590229, "loss": 0.1355, "num_input_tokens_seen": 75252960, "step": 34870 }, { "epoch": 5.689233278955954, "grad_norm": 0.09048362821340561, "learning_rate": 0.0008998910450756923, "loss": 0.1678, "num_input_tokens_seen": 75263616, "step": 34875 }, { "epoch": 5.690048939641109, "grad_norm": 0.01475920807570219, "learning_rate": 0.0008998483124568561, "loss": 0.0403, "num_input_tokens_seen": 75274208, "step": 34880 }, { "epoch": 5.690864600326265, "grad_norm": 0.02499830536544323, "learning_rate": 0.0008998055717345868, "loss": 0.2082, "num_input_tokens_seen": 75285600, "step": 34885 }, { "epoch": 5.691680261011419, "grad_norm": 0.10734041780233383, "learning_rate": 0.0008997628229097503, "loss": 0.0347, "num_input_tokens_seen": 75296544, "step": 34890 }, { "epoch": 5.692495921696574, "grad_norm": 0.031412553042173386, "learning_rate": 0.0008997200659832129, "loss": 0.1912, "num_input_tokens_seen": 75308384, "step": 34895 }, { "epoch": 5.693311582381729, "grad_norm": 0.19736769795417786, "learning_rate": 0.0008996773009558416, "loss": 0.0943, "num_input_tokens_seen": 75318656, "step": 34900 }, { "epoch": 5.694127243066884, "grad_norm": 0.175284743309021, "learning_rate": 0.0008996345278285027, "loss": 0.095, "num_input_tokens_seen": 75329152, "step": 34905 }, { "epoch": 5.69494290375204, "grad_norm": 0.3828403651714325, "learning_rate": 0.000899591746602063, "loss": 0.0942, "num_input_tokens_seen": 75339328, "step": 34910 }, { "epoch": 5.695758564437194, "grad_norm": 0.015262450091540813, "learning_rate": 0.0008995489572773896, "loss": 0.0522, "num_input_tokens_seen": 75351104, "step": 34915 }, { "epoch": 5.696574225122349, "grad_norm": 0.30807459354400635, "learning_rate": 0.0008995061598553499, "loss": 0.2721, "num_input_tokens_seen": 75362176, "step": 34920 }, { "epoch": 5.697389885807504, "grad_norm": 0.6110740900039673, "learning_rate": 0.000899463354336811, "loss": 0.1667, "num_input_tokens_seen": 75372768, "step": 34925 }, { "epoch": 5.698205546492659, "grad_norm": 0.018610049039125443, "learning_rate": 0.0008994205407226403, "loss": 0.0166, "num_input_tokens_seen": 75383808, "step": 34930 }, { "epoch": 5.699021207177814, "grad_norm": 0.01315385103225708, "learning_rate": 0.0008993777190137058, "loss": 0.1008, "num_input_tokens_seen": 75394240, "step": 34935 }, { "epoch": 5.699836867862969, "grad_norm": 0.4213053584098816, "learning_rate": 0.0008993348892108753, "loss": 0.1875, "num_input_tokens_seen": 75404416, "step": 34940 }, { "epoch": 5.700652528548124, "grad_norm": 0.028114255517721176, "learning_rate": 0.0008992920513150165, "loss": 0.1464, "num_input_tokens_seen": 75414272, "step": 34945 }, { "epoch": 5.701468189233279, "grad_norm": 0.6140248775482178, "learning_rate": 0.0008992492053269976, "loss": 0.142, "num_input_tokens_seen": 75424096, "step": 34950 }, { "epoch": 5.702283849918434, "grad_norm": 0.33115965127944946, "learning_rate": 0.0008992063512476873, "loss": 0.1389, "num_input_tokens_seen": 75434208, "step": 34955 }, { "epoch": 5.703099510603589, "grad_norm": 0.09612563997507095, "learning_rate": 0.0008991634890779538, "loss": 0.0669, "num_input_tokens_seen": 75444640, "step": 34960 }, { "epoch": 5.7039151712887435, "grad_norm": 0.07617972791194916, "learning_rate": 0.0008991206188186658, "loss": 0.0497, "num_input_tokens_seen": 75455680, "step": 34965 }, { "epoch": 5.704730831973899, "grad_norm": 0.17514978349208832, "learning_rate": 0.0008990777404706922, "loss": 0.1451, "num_input_tokens_seen": 75466208, "step": 34970 }, { "epoch": 5.705546492659054, "grad_norm": 0.08824466913938522, "learning_rate": 0.0008990348540349019, "loss": 0.0678, "num_input_tokens_seen": 75475680, "step": 34975 }, { "epoch": 5.706362153344209, "grad_norm": 0.23644739389419556, "learning_rate": 0.0008989919595121641, "loss": 0.0833, "num_input_tokens_seen": 75486368, "step": 34980 }, { "epoch": 5.707177814029364, "grad_norm": 0.45226654410362244, "learning_rate": 0.000898949056903348, "loss": 0.1562, "num_input_tokens_seen": 75496864, "step": 34985 }, { "epoch": 5.7079934747145185, "grad_norm": 0.008867254480719566, "learning_rate": 0.0008989061462093233, "loss": 0.0191, "num_input_tokens_seen": 75508320, "step": 34990 }, { "epoch": 5.708809135399674, "grad_norm": 0.03352213650941849, "learning_rate": 0.0008988632274309593, "loss": 0.0748, "num_input_tokens_seen": 75520640, "step": 34995 }, { "epoch": 5.709624796084829, "grad_norm": 0.10128029435873032, "learning_rate": 0.0008988203005691262, "loss": 0.0428, "num_input_tokens_seen": 75530560, "step": 35000 }, { "epoch": 5.710440456769984, "grad_norm": 0.7100696563720703, "learning_rate": 0.0008987773656246936, "loss": 0.0809, "num_input_tokens_seen": 75540448, "step": 35005 }, { "epoch": 5.711256117455139, "grad_norm": 0.3765551447868347, "learning_rate": 0.0008987344225985319, "loss": 0.1071, "num_input_tokens_seen": 75550272, "step": 35010 }, { "epoch": 5.712071778140293, "grad_norm": 0.20009589195251465, "learning_rate": 0.0008986914714915112, "loss": 0.0844, "num_input_tokens_seen": 75562016, "step": 35015 }, { "epoch": 5.712887438825448, "grad_norm": 0.04967062547802925, "learning_rate": 0.000898648512304502, "loss": 0.0463, "num_input_tokens_seen": 75573184, "step": 35020 }, { "epoch": 5.713703099510604, "grad_norm": 0.1394423544406891, "learning_rate": 0.0008986055450383752, "loss": 0.0474, "num_input_tokens_seen": 75583200, "step": 35025 }, { "epoch": 5.714518760195759, "grad_norm": 0.3808037340641022, "learning_rate": 0.0008985625696940013, "loss": 0.217, "num_input_tokens_seen": 75594240, "step": 35030 }, { "epoch": 5.715334420880914, "grad_norm": 0.23324237763881683, "learning_rate": 0.0008985195862722513, "loss": 0.029, "num_input_tokens_seen": 75604608, "step": 35035 }, { "epoch": 5.716150081566068, "grad_norm": 0.14053428173065186, "learning_rate": 0.0008984765947739964, "loss": 0.1524, "num_input_tokens_seen": 75615744, "step": 35040 }, { "epoch": 5.716965742251223, "grad_norm": 0.057728011161088943, "learning_rate": 0.0008984335952001075, "loss": 0.2114, "num_input_tokens_seen": 75626496, "step": 35045 }, { "epoch": 5.717781402936378, "grad_norm": 0.011718401685357094, "learning_rate": 0.0008983905875514566, "loss": 0.0159, "num_input_tokens_seen": 75637760, "step": 35050 }, { "epoch": 5.718597063621534, "grad_norm": 0.07139599323272705, "learning_rate": 0.000898347571828915, "loss": 0.0968, "num_input_tokens_seen": 75646624, "step": 35055 }, { "epoch": 5.719412724306689, "grad_norm": 0.4526294469833374, "learning_rate": 0.0008983045480333545, "loss": 0.0807, "num_input_tokens_seen": 75657696, "step": 35060 }, { "epoch": 5.720228384991843, "grad_norm": 0.491219162940979, "learning_rate": 0.0008982615161656471, "loss": 0.1084, "num_input_tokens_seen": 75668128, "step": 35065 }, { "epoch": 5.721044045676998, "grad_norm": 0.08383429050445557, "learning_rate": 0.0008982184762266648, "loss": 0.0165, "num_input_tokens_seen": 75679072, "step": 35070 }, { "epoch": 5.721859706362153, "grad_norm": 0.018759015947580338, "learning_rate": 0.00089817542821728, "loss": 0.1407, "num_input_tokens_seen": 75690432, "step": 35075 }, { "epoch": 5.722675367047309, "grad_norm": 0.4715929627418518, "learning_rate": 0.0008981323721383649, "loss": 0.1315, "num_input_tokens_seen": 75700064, "step": 35080 }, { "epoch": 5.7234910277324635, "grad_norm": 0.45476603507995605, "learning_rate": 0.0008980893079907922, "loss": 0.2947, "num_input_tokens_seen": 75710560, "step": 35085 }, { "epoch": 5.724306688417618, "grad_norm": 0.1755806803703308, "learning_rate": 0.0008980462357754347, "loss": 0.0316, "num_input_tokens_seen": 75720256, "step": 35090 }, { "epoch": 5.725122349102773, "grad_norm": 0.35163751244544983, "learning_rate": 0.0008980031554931654, "loss": 0.212, "num_input_tokens_seen": 75730816, "step": 35095 }, { "epoch": 5.725938009787928, "grad_norm": 0.145817369222641, "learning_rate": 0.0008979600671448571, "loss": 0.0783, "num_input_tokens_seen": 75741568, "step": 35100 }, { "epoch": 5.726753670473083, "grad_norm": 0.31082308292388916, "learning_rate": 0.0008979169707313831, "loss": 0.1183, "num_input_tokens_seen": 75750368, "step": 35105 }, { "epoch": 5.7275693311582385, "grad_norm": 0.45988333225250244, "learning_rate": 0.000897873866253617, "loss": 0.1917, "num_input_tokens_seen": 75760832, "step": 35110 }, { "epoch": 5.728384991843393, "grad_norm": 0.31690049171447754, "learning_rate": 0.0008978307537124324, "loss": 0.1662, "num_input_tokens_seen": 75771904, "step": 35115 }, { "epoch": 5.729200652528548, "grad_norm": 0.10007751733064651, "learning_rate": 0.0008977876331087027, "loss": 0.1104, "num_input_tokens_seen": 75781920, "step": 35120 }, { "epoch": 5.730016313213703, "grad_norm": 0.2881515324115753, "learning_rate": 0.0008977445044433021, "loss": 0.0935, "num_input_tokens_seen": 75793600, "step": 35125 }, { "epoch": 5.730831973898858, "grad_norm": 0.02075897715985775, "learning_rate": 0.0008977013677171045, "loss": 0.2536, "num_input_tokens_seen": 75804064, "step": 35130 }, { "epoch": 5.731647634584013, "grad_norm": 0.2971900701522827, "learning_rate": 0.0008976582229309842, "loss": 0.1208, "num_input_tokens_seen": 75813760, "step": 35135 }, { "epoch": 5.732463295269168, "grad_norm": 0.023297790437936783, "learning_rate": 0.0008976150700858155, "loss": 0.0618, "num_input_tokens_seen": 75825248, "step": 35140 }, { "epoch": 5.733278955954323, "grad_norm": 0.09069034457206726, "learning_rate": 0.000897571909182473, "loss": 0.0883, "num_input_tokens_seen": 75836288, "step": 35145 }, { "epoch": 5.734094616639478, "grad_norm": 0.13035504519939423, "learning_rate": 0.0008975287402218314, "loss": 0.1772, "num_input_tokens_seen": 75847104, "step": 35150 }, { "epoch": 5.734910277324633, "grad_norm": 0.09928813576698303, "learning_rate": 0.0008974855632047657, "loss": 0.1265, "num_input_tokens_seen": 75858816, "step": 35155 }, { "epoch": 5.735725938009788, "grad_norm": 0.23770451545715332, "learning_rate": 0.0008974423781321506, "loss": 0.1183, "num_input_tokens_seen": 75868800, "step": 35160 }, { "epoch": 5.736541598694943, "grad_norm": 0.3532763421535492, "learning_rate": 0.0008973991850048616, "loss": 0.08, "num_input_tokens_seen": 75879904, "step": 35165 }, { "epoch": 5.737357259380098, "grad_norm": 0.1615249663591385, "learning_rate": 0.0008973559838237739, "loss": 0.068, "num_input_tokens_seen": 75890208, "step": 35170 }, { "epoch": 5.738172920065253, "grad_norm": 0.19247469305992126, "learning_rate": 0.0008973127745897634, "loss": 0.0627, "num_input_tokens_seen": 75901600, "step": 35175 }, { "epoch": 5.738988580750408, "grad_norm": 0.017135970294475555, "learning_rate": 0.0008972695573037052, "loss": 0.0609, "num_input_tokens_seen": 75912256, "step": 35180 }, { "epoch": 5.739804241435563, "grad_norm": 0.02943228930234909, "learning_rate": 0.0008972263319664756, "loss": 0.0479, "num_input_tokens_seen": 75923808, "step": 35185 }, { "epoch": 5.740619902120718, "grad_norm": 0.07456370443105698, "learning_rate": 0.0008971830985789504, "loss": 0.0962, "num_input_tokens_seen": 75934016, "step": 35190 }, { "epoch": 5.741435562805873, "grad_norm": 0.1820264607667923, "learning_rate": 0.0008971398571420058, "loss": 0.033, "num_input_tokens_seen": 75944288, "step": 35195 }, { "epoch": 5.742251223491028, "grad_norm": 0.1755121946334839, "learning_rate": 0.0008970966076565183, "loss": 0.2468, "num_input_tokens_seen": 75954976, "step": 35200 }, { "epoch": 5.743066884176183, "grad_norm": 0.12547104060649872, "learning_rate": 0.0008970533501233642, "loss": 0.0646, "num_input_tokens_seen": 75965568, "step": 35205 }, { "epoch": 5.7438825448613375, "grad_norm": 0.03255412355065346, "learning_rate": 0.0008970100845434204, "loss": 0.0361, "num_input_tokens_seen": 75977152, "step": 35210 }, { "epoch": 5.744698205546492, "grad_norm": 0.5355380773544312, "learning_rate": 0.0008969668109175635, "loss": 0.0838, "num_input_tokens_seen": 75988768, "step": 35215 }, { "epoch": 5.745513866231647, "grad_norm": 0.34910765290260315, "learning_rate": 0.0008969235292466706, "loss": 0.1314, "num_input_tokens_seen": 75998016, "step": 35220 }, { "epoch": 5.746329526916803, "grad_norm": 0.37777137756347656, "learning_rate": 0.0008968802395316187, "loss": 0.1321, "num_input_tokens_seen": 76008640, "step": 35225 }, { "epoch": 5.747145187601958, "grad_norm": 0.09451743960380554, "learning_rate": 0.0008968369417732855, "loss": 0.0564, "num_input_tokens_seen": 76019904, "step": 35230 }, { "epoch": 5.7479608482871125, "grad_norm": 0.29713886976242065, "learning_rate": 0.0008967936359725482, "loss": 0.1076, "num_input_tokens_seen": 76031488, "step": 35235 }, { "epoch": 5.748776508972267, "grad_norm": 0.09916041046380997, "learning_rate": 0.0008967503221302844, "loss": 0.1917, "num_input_tokens_seen": 76043232, "step": 35240 }, { "epoch": 5.749592169657422, "grad_norm": 0.34659889340400696, "learning_rate": 0.0008967070002473721, "loss": 0.0495, "num_input_tokens_seen": 76053696, "step": 35245 }, { "epoch": 5.750407830342578, "grad_norm": 0.18091651797294617, "learning_rate": 0.0008966636703246891, "loss": 0.1811, "num_input_tokens_seen": 76064768, "step": 35250 }, { "epoch": 5.751223491027733, "grad_norm": 0.05636643245816231, "learning_rate": 0.0008966203323631137, "loss": 0.0816, "num_input_tokens_seen": 76075264, "step": 35255 }, { "epoch": 5.7520391517128875, "grad_norm": 0.08892639726400375, "learning_rate": 0.000896576986363524, "loss": 0.1118, "num_input_tokens_seen": 76086368, "step": 35260 }, { "epoch": 5.752854812398042, "grad_norm": 0.18423070013523102, "learning_rate": 0.0008965336323267986, "loss": 0.1382, "num_input_tokens_seen": 76095968, "step": 35265 }, { "epoch": 5.753670473083197, "grad_norm": 0.27871909737586975, "learning_rate": 0.0008964902702538163, "loss": 0.0682, "num_input_tokens_seen": 76106752, "step": 35270 }, { "epoch": 5.754486133768353, "grad_norm": 0.14170120656490326, "learning_rate": 0.0008964469001454554, "loss": 0.047, "num_input_tokens_seen": 76118048, "step": 35275 }, { "epoch": 5.755301794453508, "grad_norm": 0.09062470495700836, "learning_rate": 0.0008964035220025953, "loss": 0.0222, "num_input_tokens_seen": 76128864, "step": 35280 }, { "epoch": 5.7561174551386625, "grad_norm": 0.25201520323753357, "learning_rate": 0.000896360135826115, "loss": 0.0828, "num_input_tokens_seen": 76139168, "step": 35285 }, { "epoch": 5.756933115823817, "grad_norm": 0.13299931585788727, "learning_rate": 0.0008963167416168936, "loss": 0.2028, "num_input_tokens_seen": 76149856, "step": 35290 }, { "epoch": 5.757748776508972, "grad_norm": 0.01444562990218401, "learning_rate": 0.0008962733393758107, "loss": 0.0443, "num_input_tokens_seen": 76160960, "step": 35295 }, { "epoch": 5.758564437194127, "grad_norm": 0.13611751794815063, "learning_rate": 0.0008962299291037459, "loss": 0.0253, "num_input_tokens_seen": 76170336, "step": 35300 }, { "epoch": 5.759380097879282, "grad_norm": 0.0755523294210434, "learning_rate": 0.000896186510801579, "loss": 0.0582, "num_input_tokens_seen": 76181088, "step": 35305 }, { "epoch": 5.760195758564437, "grad_norm": 0.04345029592514038, "learning_rate": 0.0008961430844701899, "loss": 0.178, "num_input_tokens_seen": 76190656, "step": 35310 }, { "epoch": 5.761011419249592, "grad_norm": 0.2502068281173706, "learning_rate": 0.0008960996501104583, "loss": 0.102, "num_input_tokens_seen": 76203072, "step": 35315 }, { "epoch": 5.761827079934747, "grad_norm": 0.0854695737361908, "learning_rate": 0.0008960562077232652, "loss": 0.0882, "num_input_tokens_seen": 76213632, "step": 35320 }, { "epoch": 5.762642740619902, "grad_norm": 0.10365451127290726, "learning_rate": 0.0008960127573094904, "loss": 0.152, "num_input_tokens_seen": 76225184, "step": 35325 }, { "epoch": 5.763458401305057, "grad_norm": 0.08228952437639236, "learning_rate": 0.0008959692988700148, "loss": 0.0878, "num_input_tokens_seen": 76236000, "step": 35330 }, { "epoch": 5.764274061990212, "grad_norm": 0.009751678444445133, "learning_rate": 0.000895925832405719, "loss": 0.0774, "num_input_tokens_seen": 76246560, "step": 35335 }, { "epoch": 5.765089722675367, "grad_norm": 0.09585826843976974, "learning_rate": 0.0008958823579174839, "loss": 0.0406, "num_input_tokens_seen": 76257344, "step": 35340 }, { "epoch": 5.765905383360522, "grad_norm": 0.07184731960296631, "learning_rate": 0.0008958388754061907, "loss": 0.0341, "num_input_tokens_seen": 76268576, "step": 35345 }, { "epoch": 5.766721044045677, "grad_norm": 0.2015448361635208, "learning_rate": 0.0008957953848727205, "loss": 0.0612, "num_input_tokens_seen": 76278912, "step": 35350 }, { "epoch": 5.767536704730832, "grad_norm": 0.38755977153778076, "learning_rate": 0.0008957518863179545, "loss": 0.0816, "num_input_tokens_seen": 76289568, "step": 35355 }, { "epoch": 5.768352365415987, "grad_norm": 0.2927498519420624, "learning_rate": 0.0008957083797427747, "loss": 0.0455, "num_input_tokens_seen": 76300576, "step": 35360 }, { "epoch": 5.769168026101142, "grad_norm": 0.009031339548528194, "learning_rate": 0.0008956648651480627, "loss": 0.0058, "num_input_tokens_seen": 76311808, "step": 35365 }, { "epoch": 5.769983686786297, "grad_norm": 0.317665159702301, "learning_rate": 0.0008956213425347001, "loss": 0.058, "num_input_tokens_seen": 76322784, "step": 35370 }, { "epoch": 5.770799347471452, "grad_norm": 0.05084143579006195, "learning_rate": 0.0008955778119035692, "loss": 0.1742, "num_input_tokens_seen": 76333280, "step": 35375 }, { "epoch": 5.771615008156607, "grad_norm": 0.2825391888618469, "learning_rate": 0.000895534273255552, "loss": 0.0788, "num_input_tokens_seen": 76345280, "step": 35380 }, { "epoch": 5.7724306688417615, "grad_norm": 0.4164521098136902, "learning_rate": 0.0008954907265915311, "loss": 0.129, "num_input_tokens_seen": 76356544, "step": 35385 }, { "epoch": 5.773246329526917, "grad_norm": 0.215864896774292, "learning_rate": 0.0008954471719123889, "loss": 0.3236, "num_input_tokens_seen": 76367456, "step": 35390 }, { "epoch": 5.774061990212072, "grad_norm": 0.025849949568510056, "learning_rate": 0.0008954036092190079, "loss": 0.0558, "num_input_tokens_seen": 76379328, "step": 35395 }, { "epoch": 5.774877650897227, "grad_norm": 0.005236461292952299, "learning_rate": 0.0008953600385122713, "loss": 0.0722, "num_input_tokens_seen": 76390592, "step": 35400 }, { "epoch": 5.775693311582382, "grad_norm": 0.19621627032756805, "learning_rate": 0.0008953164597930621, "loss": 0.0311, "num_input_tokens_seen": 76400640, "step": 35405 }, { "epoch": 5.7765089722675365, "grad_norm": 0.11065861582756042, "learning_rate": 0.0008952728730622632, "loss": 0.0979, "num_input_tokens_seen": 76410144, "step": 35410 }, { "epoch": 5.777324632952691, "grad_norm": 0.2194560468196869, "learning_rate": 0.000895229278320758, "loss": 0.0851, "num_input_tokens_seen": 76420704, "step": 35415 }, { "epoch": 5.778140293637847, "grad_norm": 0.04678243026137352, "learning_rate": 0.0008951856755694303, "loss": 0.1172, "num_input_tokens_seen": 76431872, "step": 35420 }, { "epoch": 5.778955954323002, "grad_norm": 0.011566748842597008, "learning_rate": 0.0008951420648091635, "loss": 0.0478, "num_input_tokens_seen": 76443936, "step": 35425 }, { "epoch": 5.779771615008157, "grad_norm": 0.0972641259431839, "learning_rate": 0.0008950984460408414, "loss": 0.0609, "num_input_tokens_seen": 76455904, "step": 35430 }, { "epoch": 5.780587275693311, "grad_norm": 0.025492532178759575, "learning_rate": 0.0008950548192653481, "loss": 0.0943, "num_input_tokens_seen": 76467168, "step": 35435 }, { "epoch": 5.781402936378466, "grad_norm": 0.1884034276008606, "learning_rate": 0.0008950111844835678, "loss": 0.08, "num_input_tokens_seen": 76477184, "step": 35440 }, { "epoch": 5.782218597063622, "grad_norm": 0.7385169863700867, "learning_rate": 0.0008949675416963847, "loss": 0.4548, "num_input_tokens_seen": 76487520, "step": 35445 }, { "epoch": 5.783034257748777, "grad_norm": 0.11542234569787979, "learning_rate": 0.0008949238909046833, "loss": 0.1836, "num_input_tokens_seen": 76498752, "step": 35450 }, { "epoch": 5.783849918433932, "grad_norm": 0.4526139497756958, "learning_rate": 0.0008948802321093484, "loss": 0.0959, "num_input_tokens_seen": 76509344, "step": 35455 }, { "epoch": 5.784665579119086, "grad_norm": 0.0629848837852478, "learning_rate": 0.0008948365653112645, "loss": 0.0458, "num_input_tokens_seen": 76520512, "step": 35460 }, { "epoch": 5.785481239804241, "grad_norm": 0.8237877488136292, "learning_rate": 0.0008947928905113166, "loss": 0.166, "num_input_tokens_seen": 76530048, "step": 35465 }, { "epoch": 5.786296900489396, "grad_norm": 0.3765025734901428, "learning_rate": 0.00089474920771039, "loss": 0.1463, "num_input_tokens_seen": 76541760, "step": 35470 }, { "epoch": 5.787112561174552, "grad_norm": 0.2595158815383911, "learning_rate": 0.0008947055169093701, "loss": 0.178, "num_input_tokens_seen": 76552288, "step": 35475 }, { "epoch": 5.787928221859707, "grad_norm": 0.14233607053756714, "learning_rate": 0.000894661818109142, "loss": 0.0777, "num_input_tokens_seen": 76562912, "step": 35480 }, { "epoch": 5.788743882544861, "grad_norm": 0.3600342273712158, "learning_rate": 0.0008946181113105915, "loss": 0.2549, "num_input_tokens_seen": 76573760, "step": 35485 }, { "epoch": 5.789559543230016, "grad_norm": 0.03818240761756897, "learning_rate": 0.0008945743965146044, "loss": 0.1483, "num_input_tokens_seen": 76583712, "step": 35490 }, { "epoch": 5.790375203915171, "grad_norm": 0.24550725519657135, "learning_rate": 0.0008945306737220669, "loss": 0.1739, "num_input_tokens_seen": 76594848, "step": 35495 }, { "epoch": 5.791190864600326, "grad_norm": 0.016272088512778282, "learning_rate": 0.0008944869429338645, "loss": 0.0818, "num_input_tokens_seen": 76606944, "step": 35500 }, { "epoch": 5.7920065252854815, "grad_norm": 0.37452420592308044, "learning_rate": 0.0008944432041508838, "loss": 0.1, "num_input_tokens_seen": 76615680, "step": 35505 }, { "epoch": 5.792822185970636, "grad_norm": 0.08965803682804108, "learning_rate": 0.0008943994573740111, "loss": 0.2718, "num_input_tokens_seen": 76627744, "step": 35510 }, { "epoch": 5.793637846655791, "grad_norm": 0.20133842527866364, "learning_rate": 0.0008943557026041331, "loss": 0.0491, "num_input_tokens_seen": 76638912, "step": 35515 }, { "epoch": 5.794453507340946, "grad_norm": 0.46865710616111755, "learning_rate": 0.0008943119398421367, "loss": 0.1109, "num_input_tokens_seen": 76649312, "step": 35520 }, { "epoch": 5.795269168026101, "grad_norm": 0.07669750601053238, "learning_rate": 0.0008942681690889084, "loss": 0.0304, "num_input_tokens_seen": 76659936, "step": 35525 }, { "epoch": 5.7960848287112565, "grad_norm": 0.04282965138554573, "learning_rate": 0.0008942243903453356, "loss": 0.1046, "num_input_tokens_seen": 76670432, "step": 35530 }, { "epoch": 5.796900489396411, "grad_norm": 0.02896581031382084, "learning_rate": 0.0008941806036123054, "loss": 0.0466, "num_input_tokens_seen": 76681248, "step": 35535 }, { "epoch": 5.797716150081566, "grad_norm": 0.008679798804223537, "learning_rate": 0.0008941368088907052, "loss": 0.0421, "num_input_tokens_seen": 76691424, "step": 35540 }, { "epoch": 5.798531810766721, "grad_norm": 0.013509760610759258, "learning_rate": 0.0008940930061814226, "loss": 0.0925, "num_input_tokens_seen": 76701984, "step": 35545 }, { "epoch": 5.799347471451876, "grad_norm": 0.03300989046692848, "learning_rate": 0.0008940491954853451, "loss": 0.0446, "num_input_tokens_seen": 76712672, "step": 35550 }, { "epoch": 5.800163132137031, "grad_norm": 0.03703325241804123, "learning_rate": 0.0008940053768033609, "loss": 0.0346, "num_input_tokens_seen": 76722912, "step": 35555 }, { "epoch": 5.800978792822186, "grad_norm": 0.05283704772591591, "learning_rate": 0.0008939615501363581, "loss": 0.0244, "num_input_tokens_seen": 76734560, "step": 35560 }, { "epoch": 5.801794453507341, "grad_norm": 0.4448443651199341, "learning_rate": 0.0008939177154852245, "loss": 0.1331, "num_input_tokens_seen": 76745856, "step": 35565 }, { "epoch": 5.802610114192496, "grad_norm": 0.2907335162162781, "learning_rate": 0.0008938738728508487, "loss": 0.0654, "num_input_tokens_seen": 76757152, "step": 35570 }, { "epoch": 5.803425774877651, "grad_norm": 0.03172137960791588, "learning_rate": 0.0008938300222341192, "loss": 0.2265, "num_input_tokens_seen": 76767776, "step": 35575 }, { "epoch": 5.804241435562806, "grad_norm": 0.27944836020469666, "learning_rate": 0.0008937861636359248, "loss": 0.107, "num_input_tokens_seen": 76778368, "step": 35580 }, { "epoch": 5.80505709624796, "grad_norm": 0.40030089020729065, "learning_rate": 0.000893742297057154, "loss": 0.1052, "num_input_tokens_seen": 76789216, "step": 35585 }, { "epoch": 5.805872756933116, "grad_norm": 0.040430907160043716, "learning_rate": 0.0008936984224986962, "loss": 0.0779, "num_input_tokens_seen": 76801248, "step": 35590 }, { "epoch": 5.806688417618271, "grad_norm": 0.10204604268074036, "learning_rate": 0.0008936545399614405, "loss": 0.0573, "num_input_tokens_seen": 76810464, "step": 35595 }, { "epoch": 5.807504078303426, "grad_norm": 0.15727996826171875, "learning_rate": 0.0008936106494462761, "loss": 0.1553, "num_input_tokens_seen": 76822016, "step": 35600 }, { "epoch": 5.808319738988581, "grad_norm": 0.1952189952135086, "learning_rate": 0.0008935667509540926, "loss": 0.0558, "num_input_tokens_seen": 76832512, "step": 35605 }, { "epoch": 5.809135399673735, "grad_norm": 0.28809651732444763, "learning_rate": 0.0008935228444857795, "loss": 0.1226, "num_input_tokens_seen": 76843328, "step": 35610 }, { "epoch": 5.809951060358891, "grad_norm": 0.03265548124909401, "learning_rate": 0.0008934789300422268, "loss": 0.1274, "num_input_tokens_seen": 76853760, "step": 35615 }, { "epoch": 5.810766721044046, "grad_norm": 0.321913480758667, "learning_rate": 0.0008934350076243245, "loss": 0.193, "num_input_tokens_seen": 76863776, "step": 35620 }, { "epoch": 5.811582381729201, "grad_norm": 0.10693345963954926, "learning_rate": 0.0008933910772329625, "loss": 0.0824, "num_input_tokens_seen": 76875328, "step": 35625 }, { "epoch": 5.8123980424143555, "grad_norm": 0.021103518083691597, "learning_rate": 0.0008933471388690314, "loss": 0.0374, "num_input_tokens_seen": 76886432, "step": 35630 }, { "epoch": 5.81321370309951, "grad_norm": 0.11103524267673492, "learning_rate": 0.0008933031925334214, "loss": 0.0642, "num_input_tokens_seen": 76896448, "step": 35635 }, { "epoch": 5.814029363784666, "grad_norm": 0.026865128427743912, "learning_rate": 0.0008932592382270235, "loss": 0.0625, "num_input_tokens_seen": 76907072, "step": 35640 }, { "epoch": 5.814845024469821, "grad_norm": 0.07422339171171188, "learning_rate": 0.0008932152759507279, "loss": 0.2169, "num_input_tokens_seen": 76917312, "step": 35645 }, { "epoch": 5.815660685154976, "grad_norm": 0.03850390389561653, "learning_rate": 0.0008931713057054263, "loss": 0.0398, "num_input_tokens_seen": 76928832, "step": 35650 }, { "epoch": 5.8164763458401305, "grad_norm": 0.15367527306079865, "learning_rate": 0.0008931273274920091, "loss": 0.0692, "num_input_tokens_seen": 76939488, "step": 35655 }, { "epoch": 5.817292006525285, "grad_norm": 0.2739315330982208, "learning_rate": 0.0008930833413113682, "loss": 0.0812, "num_input_tokens_seen": 76950080, "step": 35660 }, { "epoch": 5.81810766721044, "grad_norm": 0.28793448209762573, "learning_rate": 0.0008930393471643945, "loss": 0.0708, "num_input_tokens_seen": 76960608, "step": 35665 }, { "epoch": 5.818923327895595, "grad_norm": 0.028073977679014206, "learning_rate": 0.0008929953450519799, "loss": 0.1352, "num_input_tokens_seen": 76971680, "step": 35670 }, { "epoch": 5.819738988580751, "grad_norm": 0.0377231240272522, "learning_rate": 0.000892951334975016, "loss": 0.1365, "num_input_tokens_seen": 76983680, "step": 35675 }, { "epoch": 5.8205546492659055, "grad_norm": 0.4496561586856842, "learning_rate": 0.0008929073169343948, "loss": 0.2585, "num_input_tokens_seen": 76993888, "step": 35680 }, { "epoch": 5.82137030995106, "grad_norm": 0.14706535637378693, "learning_rate": 0.0008928632909310084, "loss": 0.0305, "num_input_tokens_seen": 77004800, "step": 35685 }, { "epoch": 5.822185970636215, "grad_norm": 0.12223663926124573, "learning_rate": 0.000892819256965749, "loss": 0.1338, "num_input_tokens_seen": 77015616, "step": 35690 }, { "epoch": 5.82300163132137, "grad_norm": 0.17912597954273224, "learning_rate": 0.0008927752150395092, "loss": 0.0469, "num_input_tokens_seen": 77027040, "step": 35695 }, { "epoch": 5.823817292006526, "grad_norm": 0.18874692916870117, "learning_rate": 0.0008927311651531813, "loss": 0.0418, "num_input_tokens_seen": 77038912, "step": 35700 }, { "epoch": 5.8246329526916805, "grad_norm": 0.34856441617012024, "learning_rate": 0.0008926871073076581, "loss": 0.0514, "num_input_tokens_seen": 77049728, "step": 35705 }, { "epoch": 5.825448613376835, "grad_norm": 0.014552679844200611, "learning_rate": 0.0008926430415038324, "loss": 0.1226, "num_input_tokens_seen": 77060480, "step": 35710 }, { "epoch": 5.82626427406199, "grad_norm": 0.34308135509490967, "learning_rate": 0.0008925989677425976, "loss": 0.2628, "num_input_tokens_seen": 77070656, "step": 35715 }, { "epoch": 5.827079934747145, "grad_norm": 0.03816450759768486, "learning_rate": 0.0008925548860248464, "loss": 0.0604, "num_input_tokens_seen": 77080320, "step": 35720 }, { "epoch": 5.827895595432301, "grad_norm": 0.11364386975765228, "learning_rate": 0.0008925107963514727, "loss": 0.0481, "num_input_tokens_seen": 77091168, "step": 35725 }, { "epoch": 5.828711256117455, "grad_norm": 0.314354807138443, "learning_rate": 0.0008924666987233697, "loss": 0.1305, "num_input_tokens_seen": 77102304, "step": 35730 }, { "epoch": 5.82952691680261, "grad_norm": 0.34631896018981934, "learning_rate": 0.0008924225931414312, "loss": 0.1009, "num_input_tokens_seen": 77113504, "step": 35735 }, { "epoch": 5.830342577487765, "grad_norm": 0.602199912071228, "learning_rate": 0.000892378479606551, "loss": 0.2013, "num_input_tokens_seen": 77125856, "step": 35740 }, { "epoch": 5.83115823817292, "grad_norm": 0.21205942332744598, "learning_rate": 0.0008923343581196231, "loss": 0.115, "num_input_tokens_seen": 77135616, "step": 35745 }, { "epoch": 5.831973898858075, "grad_norm": 0.06693276762962341, "learning_rate": 0.0008922902286815417, "loss": 0.0366, "num_input_tokens_seen": 77146688, "step": 35750 }, { "epoch": 5.8327895595432295, "grad_norm": 0.024583742022514343, "learning_rate": 0.0008922460912932013, "loss": 0.2042, "num_input_tokens_seen": 77157696, "step": 35755 }, { "epoch": 5.833605220228385, "grad_norm": 0.08708050847053528, "learning_rate": 0.0008922019459554961, "loss": 0.083, "num_input_tokens_seen": 77169504, "step": 35760 }, { "epoch": 5.83442088091354, "grad_norm": 0.4164409637451172, "learning_rate": 0.000892157792669321, "loss": 0.0876, "num_input_tokens_seen": 77179424, "step": 35765 }, { "epoch": 5.835236541598695, "grad_norm": 0.04075586050748825, "learning_rate": 0.0008921136314355706, "loss": 0.0597, "num_input_tokens_seen": 77190528, "step": 35770 }, { "epoch": 5.83605220228385, "grad_norm": 0.23782560229301453, "learning_rate": 0.0008920694622551402, "loss": 0.1233, "num_input_tokens_seen": 77200800, "step": 35775 }, { "epoch": 5.8368678629690045, "grad_norm": 0.36072486639022827, "learning_rate": 0.0008920252851289248, "loss": 0.3809, "num_input_tokens_seen": 77212064, "step": 35780 }, { "epoch": 5.83768352365416, "grad_norm": 0.028735985979437828, "learning_rate": 0.0008919811000578195, "loss": 0.0243, "num_input_tokens_seen": 77223040, "step": 35785 }, { "epoch": 5.838499184339315, "grad_norm": 0.3051793575286865, "learning_rate": 0.0008919369070427201, "loss": 0.0698, "num_input_tokens_seen": 77233920, "step": 35790 }, { "epoch": 5.83931484502447, "grad_norm": 0.05971081927418709, "learning_rate": 0.000891892706084522, "loss": 0.0713, "num_input_tokens_seen": 77245312, "step": 35795 }, { "epoch": 5.840130505709625, "grad_norm": 0.050233446061611176, "learning_rate": 0.0008918484971841211, "loss": 0.0713, "num_input_tokens_seen": 77255552, "step": 35800 }, { "epoch": 5.8409461663947795, "grad_norm": 0.25608137249946594, "learning_rate": 0.0008918042803424133, "loss": 0.2545, "num_input_tokens_seen": 77266816, "step": 35805 }, { "epoch": 5.841761827079935, "grad_norm": 0.16953977942466736, "learning_rate": 0.0008917600555602947, "loss": 0.0384, "num_input_tokens_seen": 77277888, "step": 35810 }, { "epoch": 5.84257748776509, "grad_norm": 0.033759839832782745, "learning_rate": 0.0008917158228386616, "loss": 0.0315, "num_input_tokens_seen": 77289728, "step": 35815 }, { "epoch": 5.843393148450245, "grad_norm": 0.02717246487736702, "learning_rate": 0.0008916715821784105, "loss": 0.0194, "num_input_tokens_seen": 77301344, "step": 35820 }, { "epoch": 5.8442088091354, "grad_norm": 0.012489907443523407, "learning_rate": 0.0008916273335804377, "loss": 0.1244, "num_input_tokens_seen": 77312672, "step": 35825 }, { "epoch": 5.8450244698205545, "grad_norm": 0.029561089351773262, "learning_rate": 0.0008915830770456403, "loss": 0.021, "num_input_tokens_seen": 77322560, "step": 35830 }, { "epoch": 5.845840130505709, "grad_norm": 0.39458951354026794, "learning_rate": 0.0008915388125749152, "loss": 0.1226, "num_input_tokens_seen": 77333408, "step": 35835 }, { "epoch": 5.846655791190865, "grad_norm": 0.018306110054254532, "learning_rate": 0.0008914945401691592, "loss": 0.0348, "num_input_tokens_seen": 77344096, "step": 35840 }, { "epoch": 5.84747145187602, "grad_norm": 0.21841028332710266, "learning_rate": 0.0008914502598292698, "loss": 0.2353, "num_input_tokens_seen": 77355200, "step": 35845 }, { "epoch": 5.848287112561175, "grad_norm": 0.19094513356685638, "learning_rate": 0.0008914059715561442, "loss": 0.0517, "num_input_tokens_seen": 77366144, "step": 35850 }, { "epoch": 5.849102773246329, "grad_norm": 0.022495143115520477, "learning_rate": 0.0008913616753506801, "loss": 0.1897, "num_input_tokens_seen": 77376704, "step": 35855 }, { "epoch": 5.849918433931484, "grad_norm": 0.04379771649837494, "learning_rate": 0.0008913173712137752, "loss": 0.1108, "num_input_tokens_seen": 77387296, "step": 35860 }, { "epoch": 5.850734094616639, "grad_norm": 0.41782650351524353, "learning_rate": 0.0008912730591463274, "loss": 0.0688, "num_input_tokens_seen": 77399296, "step": 35865 }, { "epoch": 5.851549755301795, "grad_norm": 0.19718439877033234, "learning_rate": 0.0008912287391492345, "loss": 0.0809, "num_input_tokens_seen": 77410176, "step": 35870 }, { "epoch": 5.85236541598695, "grad_norm": 0.08135117590427399, "learning_rate": 0.0008911844112233951, "loss": 0.0862, "num_input_tokens_seen": 77419840, "step": 35875 }, { "epoch": 5.853181076672104, "grad_norm": 0.2629607021808624, "learning_rate": 0.0008911400753697072, "loss": 0.0476, "num_input_tokens_seen": 77429760, "step": 35880 }, { "epoch": 5.853996737357259, "grad_norm": 0.0741540864109993, "learning_rate": 0.0008910957315890695, "loss": 0.1002, "num_input_tokens_seen": 77440096, "step": 35885 }, { "epoch": 5.854812398042414, "grad_norm": 0.024497751146554947, "learning_rate": 0.0008910513798823807, "loss": 0.0858, "num_input_tokens_seen": 77451040, "step": 35890 }, { "epoch": 5.85562805872757, "grad_norm": 0.01297854445874691, "learning_rate": 0.0008910070202505396, "loss": 0.0215, "num_input_tokens_seen": 77461632, "step": 35895 }, { "epoch": 5.856443719412725, "grad_norm": 0.012460933066904545, "learning_rate": 0.0008909626526944452, "loss": 0.0401, "num_input_tokens_seen": 77472864, "step": 35900 }, { "epoch": 5.857259380097879, "grad_norm": 0.0581069178879261, "learning_rate": 0.0008909182772149966, "loss": 0.0269, "num_input_tokens_seen": 77483776, "step": 35905 }, { "epoch": 5.858075040783034, "grad_norm": 0.03670242056250572, "learning_rate": 0.0008908738938130933, "loss": 0.0493, "num_input_tokens_seen": 77494496, "step": 35910 }, { "epoch": 5.858890701468189, "grad_norm": 0.5860180854797363, "learning_rate": 0.0008908295024896346, "loss": 0.1048, "num_input_tokens_seen": 77505184, "step": 35915 }, { "epoch": 5.859706362153344, "grad_norm": 0.22770962119102478, "learning_rate": 0.0008907851032455204, "loss": 0.1189, "num_input_tokens_seen": 77517056, "step": 35920 }, { "epoch": 5.8605220228384995, "grad_norm": 0.08153259009122849, "learning_rate": 0.0008907406960816502, "loss": 0.067, "num_input_tokens_seen": 77527840, "step": 35925 }, { "epoch": 5.861337683523654, "grad_norm": 0.06236327439546585, "learning_rate": 0.0008906962809989242, "loss": 0.0317, "num_input_tokens_seen": 77538368, "step": 35930 }, { "epoch": 5.862153344208809, "grad_norm": 0.011000140570104122, "learning_rate": 0.0008906518579982423, "loss": 0.069, "num_input_tokens_seen": 77549056, "step": 35935 }, { "epoch": 5.862969004893964, "grad_norm": 0.10868638008832932, "learning_rate": 0.000890607427080505, "loss": 0.1208, "num_input_tokens_seen": 77558848, "step": 35940 }, { "epoch": 5.863784665579119, "grad_norm": 0.21896480023860931, "learning_rate": 0.0008905629882466126, "loss": 0.0815, "num_input_tokens_seen": 77570720, "step": 35945 }, { "epoch": 5.864600326264274, "grad_norm": 0.007620544172823429, "learning_rate": 0.0008905185414974659, "loss": 0.1975, "num_input_tokens_seen": 77582720, "step": 35950 }, { "epoch": 5.865415986949429, "grad_norm": 0.008391725830733776, "learning_rate": 0.0008904740868339655, "loss": 0.1207, "num_input_tokens_seen": 77593728, "step": 35955 }, { "epoch": 5.866231647634584, "grad_norm": 0.08590381592512131, "learning_rate": 0.0008904296242570123, "loss": 0.1747, "num_input_tokens_seen": 77605792, "step": 35960 }, { "epoch": 5.867047308319739, "grad_norm": 0.03389144316315651, "learning_rate": 0.0008903851537675076, "loss": 0.1059, "num_input_tokens_seen": 77617024, "step": 35965 }, { "epoch": 5.867862969004894, "grad_norm": 0.055074598640203476, "learning_rate": 0.0008903406753663524, "loss": 0.0965, "num_input_tokens_seen": 77627136, "step": 35970 }, { "epoch": 5.868678629690049, "grad_norm": 0.11963813006877899, "learning_rate": 0.0008902961890544483, "loss": 0.14, "num_input_tokens_seen": 77638080, "step": 35975 }, { "epoch": 5.869494290375204, "grad_norm": 0.19323231279850006, "learning_rate": 0.0008902516948326967, "loss": 0.1134, "num_input_tokens_seen": 77648736, "step": 35980 }, { "epoch": 5.870309951060359, "grad_norm": 0.11363516747951508, "learning_rate": 0.0008902071927019996, "loss": 0.0875, "num_input_tokens_seen": 77660160, "step": 35985 }, { "epoch": 5.871125611745514, "grad_norm": 0.409029096364975, "learning_rate": 0.0008901626826632586, "loss": 0.1079, "num_input_tokens_seen": 77671072, "step": 35990 }, { "epoch": 5.871941272430669, "grad_norm": 0.6548612117767334, "learning_rate": 0.000890118164717376, "loss": 0.1684, "num_input_tokens_seen": 77682208, "step": 35995 }, { "epoch": 5.872756933115824, "grad_norm": 0.21544083952903748, "learning_rate": 0.0008900736388652537, "loss": 0.1887, "num_input_tokens_seen": 77692768, "step": 36000 }, { "epoch": 5.873572593800979, "grad_norm": 0.04487856104969978, "learning_rate": 0.0008900291051077944, "loss": 0.0267, "num_input_tokens_seen": 77704256, "step": 36005 }, { "epoch": 5.874388254486134, "grad_norm": 0.23634777963161469, "learning_rate": 0.0008899845634459005, "loss": 0.0581, "num_input_tokens_seen": 77716320, "step": 36010 }, { "epoch": 5.875203915171289, "grad_norm": 0.16273118555545807, "learning_rate": 0.0008899400138804748, "loss": 0.1202, "num_input_tokens_seen": 77726464, "step": 36015 }, { "epoch": 5.876019575856444, "grad_norm": 0.011688041500747204, "learning_rate": 0.0008898954564124197, "loss": 0.0175, "num_input_tokens_seen": 77736480, "step": 36020 }, { "epoch": 5.876835236541599, "grad_norm": 0.054035451263189316, "learning_rate": 0.0008898508910426388, "loss": 0.0498, "num_input_tokens_seen": 77746592, "step": 36025 }, { "epoch": 5.877650897226753, "grad_norm": 0.4161895513534546, "learning_rate": 0.0008898063177720351, "loss": 0.1098, "num_input_tokens_seen": 77757664, "step": 36030 }, { "epoch": 5.878466557911908, "grad_norm": 0.08277707546949387, "learning_rate": 0.0008897617366015118, "loss": 0.0481, "num_input_tokens_seen": 77769696, "step": 36035 }, { "epoch": 5.879282218597064, "grad_norm": 0.10127153247594833, "learning_rate": 0.0008897171475319723, "loss": 0.0518, "num_input_tokens_seen": 77780224, "step": 36040 }, { "epoch": 5.880097879282219, "grad_norm": 0.15637008845806122, "learning_rate": 0.0008896725505643206, "loss": 0.1078, "num_input_tokens_seen": 77790528, "step": 36045 }, { "epoch": 5.8809135399673735, "grad_norm": 0.03767627477645874, "learning_rate": 0.0008896279456994603, "loss": 0.1868, "num_input_tokens_seen": 77801536, "step": 36050 }, { "epoch": 5.881729200652528, "grad_norm": 0.3619842231273651, "learning_rate": 0.0008895833329382954, "loss": 0.1214, "num_input_tokens_seen": 77811808, "step": 36055 }, { "epoch": 5.882544861337683, "grad_norm": 0.3798603415489197, "learning_rate": 0.00088953871228173, "loss": 0.1087, "num_input_tokens_seen": 77822368, "step": 36060 }, { "epoch": 5.883360522022839, "grad_norm": 0.02396499738097191, "learning_rate": 0.0008894940837306685, "loss": 0.0244, "num_input_tokens_seen": 77834304, "step": 36065 }, { "epoch": 5.884176182707994, "grad_norm": 0.2254335731267929, "learning_rate": 0.000889449447286015, "loss": 0.0696, "num_input_tokens_seen": 77845696, "step": 36070 }, { "epoch": 5.8849918433931485, "grad_norm": 0.13282710313796997, "learning_rate": 0.0008894048029486748, "loss": 0.0763, "num_input_tokens_seen": 77854976, "step": 36075 }, { "epoch": 5.885807504078303, "grad_norm": 0.008264929056167603, "learning_rate": 0.0008893601507195521, "loss": 0.0147, "num_input_tokens_seen": 77865216, "step": 36080 }, { "epoch": 5.886623164763458, "grad_norm": 0.18794430792331696, "learning_rate": 0.000889315490599552, "loss": 0.2341, "num_input_tokens_seen": 77876352, "step": 36085 }, { "epoch": 5.887438825448614, "grad_norm": 0.21469272673130035, "learning_rate": 0.0008892708225895796, "loss": 0.054, "num_input_tokens_seen": 77887264, "step": 36090 }, { "epoch": 5.888254486133769, "grad_norm": 0.04470356926321983, "learning_rate": 0.0008892261466905402, "loss": 0.1782, "num_input_tokens_seen": 77897920, "step": 36095 }, { "epoch": 5.8890701468189235, "grad_norm": 0.23760774731636047, "learning_rate": 0.000889181462903339, "loss": 0.218, "num_input_tokens_seen": 77909600, "step": 36100 }, { "epoch": 5.889885807504078, "grad_norm": 0.010729491710662842, "learning_rate": 0.0008891367712288819, "loss": 0.1702, "num_input_tokens_seen": 77919840, "step": 36105 }, { "epoch": 5.890701468189233, "grad_norm": 0.2396470308303833, "learning_rate": 0.0008890920716680744, "loss": 0.1634, "num_input_tokens_seen": 77931104, "step": 36110 }, { "epoch": 5.891517128874388, "grad_norm": 0.18414612114429474, "learning_rate": 0.0008890473642218226, "loss": 0.0793, "num_input_tokens_seen": 77942240, "step": 36115 }, { "epoch": 5.892332789559543, "grad_norm": 0.18837104737758636, "learning_rate": 0.0008890026488910323, "loss": 0.0457, "num_input_tokens_seen": 77952448, "step": 36120 }, { "epoch": 5.8931484502446985, "grad_norm": 0.020916037261486053, "learning_rate": 0.0008889579256766098, "loss": 0.0765, "num_input_tokens_seen": 77963680, "step": 36125 }, { "epoch": 5.893964110929853, "grad_norm": 0.021567506715655327, "learning_rate": 0.0008889131945794618, "loss": 0.1257, "num_input_tokens_seen": 77974752, "step": 36130 }, { "epoch": 5.894779771615008, "grad_norm": 0.1524185836315155, "learning_rate": 0.0008888684556004942, "loss": 0.0777, "num_input_tokens_seen": 77984896, "step": 36135 }, { "epoch": 5.895595432300163, "grad_norm": 0.1279253512620926, "learning_rate": 0.0008888237087406141, "loss": 0.1406, "num_input_tokens_seen": 77994464, "step": 36140 }, { "epoch": 5.896411092985318, "grad_norm": 0.03425915911793709, "learning_rate": 0.0008887789540007285, "loss": 0.1018, "num_input_tokens_seen": 78005632, "step": 36145 }, { "epoch": 5.897226753670473, "grad_norm": 0.045344095677137375, "learning_rate": 0.000888734191381744, "loss": 0.0259, "num_input_tokens_seen": 78015680, "step": 36150 }, { "epoch": 5.898042414355628, "grad_norm": 0.07926657050848007, "learning_rate": 0.000888689420884568, "loss": 0.0776, "num_input_tokens_seen": 78026848, "step": 36155 }, { "epoch": 5.898858075040783, "grad_norm": 0.3574562966823578, "learning_rate": 0.0008886446425101078, "loss": 0.0966, "num_input_tokens_seen": 78038336, "step": 36160 }, { "epoch": 5.899673735725938, "grad_norm": 0.01838219352066517, "learning_rate": 0.0008885998562592709, "loss": 0.0941, "num_input_tokens_seen": 78049504, "step": 36165 }, { "epoch": 5.900489396411093, "grad_norm": 0.06573229283094406, "learning_rate": 0.0008885550621329649, "loss": 0.1173, "num_input_tokens_seen": 78060352, "step": 36170 }, { "epoch": 5.901305057096248, "grad_norm": 0.050442252308130264, "learning_rate": 0.0008885102601320976, "loss": 0.0945, "num_input_tokens_seen": 78070944, "step": 36175 }, { "epoch": 5.902120717781403, "grad_norm": 0.2188454419374466, "learning_rate": 0.0008884654502575771, "loss": 0.0384, "num_input_tokens_seen": 78082176, "step": 36180 }, { "epoch": 5.902936378466558, "grad_norm": 0.020083604380488396, "learning_rate": 0.0008884206325103115, "loss": 0.1171, "num_input_tokens_seen": 78091904, "step": 36185 }, { "epoch": 5.903752039151713, "grad_norm": 0.22508877515792847, "learning_rate": 0.000888375806891209, "loss": 0.0914, "num_input_tokens_seen": 78102432, "step": 36190 }, { "epoch": 5.904567699836868, "grad_norm": 0.12901021540164948, "learning_rate": 0.0008883309734011779, "loss": 0.1683, "num_input_tokens_seen": 78113248, "step": 36195 }, { "epoch": 5.9053833605220225, "grad_norm": 0.0814613327383995, "learning_rate": 0.0008882861320411273, "loss": 0.0931, "num_input_tokens_seen": 78123904, "step": 36200 }, { "epoch": 5.906199021207177, "grad_norm": 0.17218764126300812, "learning_rate": 0.0008882412828119655, "loss": 0.161, "num_input_tokens_seen": 78134304, "step": 36205 }, { "epoch": 5.907014681892333, "grad_norm": 0.024561315774917603, "learning_rate": 0.0008881964257146015, "loss": 0.1242, "num_input_tokens_seen": 78144992, "step": 36210 }, { "epoch": 5.907830342577488, "grad_norm": 0.010131190530955791, "learning_rate": 0.0008881515607499446, "loss": 0.045, "num_input_tokens_seen": 78155552, "step": 36215 }, { "epoch": 5.908646003262643, "grad_norm": 0.03763355314731598, "learning_rate": 0.000888106687918904, "loss": 0.017, "num_input_tokens_seen": 78166816, "step": 36220 }, { "epoch": 5.9094616639477975, "grad_norm": 0.5506182909011841, "learning_rate": 0.000888061807222389, "loss": 0.2033, "num_input_tokens_seen": 78177088, "step": 36225 }, { "epoch": 5.910277324632952, "grad_norm": 0.015695173293352127, "learning_rate": 0.000888016918661309, "loss": 0.0142, "num_input_tokens_seen": 78188576, "step": 36230 }, { "epoch": 5.911092985318108, "grad_norm": 0.020118433982133865, "learning_rate": 0.0008879720222365739, "loss": 0.1358, "num_input_tokens_seen": 78200000, "step": 36235 }, { "epoch": 5.911908646003263, "grad_norm": 0.053898733109235764, "learning_rate": 0.0008879271179490938, "loss": 0.0643, "num_input_tokens_seen": 78212288, "step": 36240 }, { "epoch": 5.912724306688418, "grad_norm": 0.025138895958662033, "learning_rate": 0.0008878822057997784, "loss": 0.127, "num_input_tokens_seen": 78222048, "step": 36245 }, { "epoch": 5.9135399673735725, "grad_norm": 0.3389056324958801, "learning_rate": 0.000887837285789538, "loss": 0.1031, "num_input_tokens_seen": 78231040, "step": 36250 }, { "epoch": 5.914355628058727, "grad_norm": 0.04555288702249527, "learning_rate": 0.0008877923579192831, "loss": 0.157, "num_input_tokens_seen": 78240928, "step": 36255 }, { "epoch": 5.915171288743883, "grad_norm": 0.010890844278037548, "learning_rate": 0.0008877474221899241, "loss": 0.1419, "num_input_tokens_seen": 78250240, "step": 36260 }, { "epoch": 5.915986949429038, "grad_norm": 0.05956060439348221, "learning_rate": 0.0008877024786023718, "loss": 0.0827, "num_input_tokens_seen": 78261760, "step": 36265 }, { "epoch": 5.916802610114193, "grad_norm": 0.03534997999668121, "learning_rate": 0.0008876575271575366, "loss": 0.1573, "num_input_tokens_seen": 78272640, "step": 36270 }, { "epoch": 5.917618270799347, "grad_norm": 0.1497540920972824, "learning_rate": 0.0008876125678563301, "loss": 0.1618, "num_input_tokens_seen": 78283328, "step": 36275 }, { "epoch": 5.918433931484502, "grad_norm": 0.10943331569433212, "learning_rate": 0.0008875676006996631, "loss": 0.0418, "num_input_tokens_seen": 78292320, "step": 36280 }, { "epoch": 5.919249592169657, "grad_norm": 0.11919853836297989, "learning_rate": 0.0008875226256884471, "loss": 0.0903, "num_input_tokens_seen": 78302688, "step": 36285 }, { "epoch": 5.920065252854813, "grad_norm": 0.28448113799095154, "learning_rate": 0.0008874776428235933, "loss": 0.0443, "num_input_tokens_seen": 78313216, "step": 36290 }, { "epoch": 5.920880913539968, "grad_norm": 0.10071628540754318, "learning_rate": 0.0008874326521060138, "loss": 0.0614, "num_input_tokens_seen": 78324128, "step": 36295 }, { "epoch": 5.921696574225122, "grad_norm": 0.08436565101146698, "learning_rate": 0.0008873876535366199, "loss": 0.1527, "num_input_tokens_seen": 78334336, "step": 36300 }, { "epoch": 5.922512234910277, "grad_norm": 0.2097802460193634, "learning_rate": 0.0008873426471163238, "loss": 0.135, "num_input_tokens_seen": 78346176, "step": 36305 }, { "epoch": 5.923327895595432, "grad_norm": 0.026297233998775482, "learning_rate": 0.0008872976328460376, "loss": 0.0546, "num_input_tokens_seen": 78356704, "step": 36310 }, { "epoch": 5.924143556280587, "grad_norm": 0.10168523341417313, "learning_rate": 0.0008872526107266736, "loss": 0.033, "num_input_tokens_seen": 78367680, "step": 36315 }, { "epoch": 5.924959216965743, "grad_norm": 0.08757545053958893, "learning_rate": 0.0008872075807591442, "loss": 0.3276, "num_input_tokens_seen": 78377920, "step": 36320 }, { "epoch": 5.925774877650897, "grad_norm": 0.16277466714382172, "learning_rate": 0.0008871625429443617, "loss": 0.0264, "num_input_tokens_seen": 78388512, "step": 36325 }, { "epoch": 5.926590538336052, "grad_norm": 0.03490889444947243, "learning_rate": 0.0008871174972832394, "loss": 0.1745, "num_input_tokens_seen": 78399712, "step": 36330 }, { "epoch": 5.927406199021207, "grad_norm": 0.026111353188753128, "learning_rate": 0.0008870724437766898, "loss": 0.016, "num_input_tokens_seen": 78409280, "step": 36335 }, { "epoch": 5.928221859706362, "grad_norm": 0.02281114272773266, "learning_rate": 0.0008870273824256261, "loss": 0.1145, "num_input_tokens_seen": 78419456, "step": 36340 }, { "epoch": 5.9290375203915175, "grad_norm": 0.04518705978989601, "learning_rate": 0.0008869823132309616, "loss": 0.1605, "num_input_tokens_seen": 78430624, "step": 36345 }, { "epoch": 5.929853181076672, "grad_norm": 0.09140415489673615, "learning_rate": 0.0008869372361936096, "loss": 0.0358, "num_input_tokens_seen": 78440864, "step": 36350 }, { "epoch": 5.930668841761827, "grad_norm": 0.02230074815452099, "learning_rate": 0.0008868921513144835, "loss": 0.0297, "num_input_tokens_seen": 78451360, "step": 36355 }, { "epoch": 5.931484502446982, "grad_norm": 0.024204522371292114, "learning_rate": 0.0008868470585944972, "loss": 0.1266, "num_input_tokens_seen": 78463296, "step": 36360 }, { "epoch": 5.932300163132137, "grad_norm": 0.3010368347167969, "learning_rate": 0.0008868019580345645, "loss": 0.1374, "num_input_tokens_seen": 78474272, "step": 36365 }, { "epoch": 5.933115823817292, "grad_norm": 0.12085027247667313, "learning_rate": 0.0008867568496355996, "loss": 0.0553, "num_input_tokens_seen": 78485408, "step": 36370 }, { "epoch": 5.933931484502447, "grad_norm": 0.010076077654957771, "learning_rate": 0.0008867117333985164, "loss": 0.1819, "num_input_tokens_seen": 78495488, "step": 36375 }, { "epoch": 5.934747145187602, "grad_norm": 0.17460212111473083, "learning_rate": 0.0008866666093242292, "loss": 0.0887, "num_input_tokens_seen": 78506176, "step": 36380 }, { "epoch": 5.935562805872757, "grad_norm": 0.08236898481845856, "learning_rate": 0.0008866214774136528, "loss": 0.0602, "num_input_tokens_seen": 78517024, "step": 36385 }, { "epoch": 5.936378466557912, "grad_norm": 0.019518408924341202, "learning_rate": 0.0008865763376677017, "loss": 0.1587, "num_input_tokens_seen": 78527168, "step": 36390 }, { "epoch": 5.937194127243067, "grad_norm": 0.06480106711387634, "learning_rate": 0.0008865311900872905, "loss": 0.1132, "num_input_tokens_seen": 78537824, "step": 36395 }, { "epoch": 5.938009787928221, "grad_norm": 0.16219420731067657, "learning_rate": 0.0008864860346733346, "loss": 0.0306, "num_input_tokens_seen": 78548192, "step": 36400 }, { "epoch": 5.938825448613377, "grad_norm": 0.24130693078041077, "learning_rate": 0.0008864408714267489, "loss": 0.1181, "num_input_tokens_seen": 78558240, "step": 36405 }, { "epoch": 5.939641109298532, "grad_norm": 0.2774330675601959, "learning_rate": 0.0008863957003484486, "loss": 0.2401, "num_input_tokens_seen": 78569408, "step": 36410 }, { "epoch": 5.940456769983687, "grad_norm": 0.10567296296358109, "learning_rate": 0.0008863505214393494, "loss": 0.0487, "num_input_tokens_seen": 78580480, "step": 36415 }, { "epoch": 5.941272430668842, "grad_norm": 0.026514258235692978, "learning_rate": 0.0008863053347003667, "loss": 0.0496, "num_input_tokens_seen": 78590720, "step": 36420 }, { "epoch": 5.942088091353996, "grad_norm": 0.022216591984033585, "learning_rate": 0.0008862601401324162, "loss": 0.1193, "num_input_tokens_seen": 78601312, "step": 36425 }, { "epoch": 5.942903752039152, "grad_norm": 0.1317899376153946, "learning_rate": 0.0008862149377364142, "loss": 0.0662, "num_input_tokens_seen": 78612928, "step": 36430 }, { "epoch": 5.943719412724307, "grad_norm": 0.06863513588905334, "learning_rate": 0.0008861697275132763, "loss": 0.0436, "num_input_tokens_seen": 78624384, "step": 36435 }, { "epoch": 5.944535073409462, "grad_norm": 0.28613516688346863, "learning_rate": 0.0008861245094639193, "loss": 0.1399, "num_input_tokens_seen": 78635392, "step": 36440 }, { "epoch": 5.945350734094617, "grad_norm": 0.0075575015507638454, "learning_rate": 0.000886079283589259, "loss": 0.0275, "num_input_tokens_seen": 78644800, "step": 36445 }, { "epoch": 5.946166394779771, "grad_norm": 0.13025657832622528, "learning_rate": 0.0008860340498902121, "loss": 0.1351, "num_input_tokens_seen": 78656288, "step": 36450 }, { "epoch": 5.946982055464927, "grad_norm": 0.013715657405555248, "learning_rate": 0.0008859888083676958, "loss": 0.0379, "num_input_tokens_seen": 78667840, "step": 36455 }, { "epoch": 5.947797716150082, "grad_norm": 0.02856520190834999, "learning_rate": 0.0008859435590226266, "loss": 0.0195, "num_input_tokens_seen": 78678176, "step": 36460 }, { "epoch": 5.948613376835237, "grad_norm": 0.00763371167704463, "learning_rate": 0.0008858983018559214, "loss": 0.1175, "num_input_tokens_seen": 78689984, "step": 36465 }, { "epoch": 5.9494290375203915, "grad_norm": 0.06063228100538254, "learning_rate": 0.0008858530368684977, "loss": 0.0443, "num_input_tokens_seen": 78700128, "step": 36470 }, { "epoch": 5.950244698205546, "grad_norm": 0.07167887687683105, "learning_rate": 0.0008858077640612727, "loss": 0.0621, "num_input_tokens_seen": 78710272, "step": 36475 }, { "epoch": 5.951060358890701, "grad_norm": 0.1954011470079422, "learning_rate": 0.0008857624834351639, "loss": 0.07, "num_input_tokens_seen": 78720736, "step": 36480 }, { "epoch": 5.951876019575856, "grad_norm": 0.2489968091249466, "learning_rate": 0.000885717194991089, "loss": 0.0427, "num_input_tokens_seen": 78732032, "step": 36485 }, { "epoch": 5.952691680261012, "grad_norm": 0.020654816180467606, "learning_rate": 0.0008856718987299656, "loss": 0.2218, "num_input_tokens_seen": 78743520, "step": 36490 }, { "epoch": 5.9535073409461665, "grad_norm": 0.009793770499527454, "learning_rate": 0.0008856265946527122, "loss": 0.1135, "num_input_tokens_seen": 78754368, "step": 36495 }, { "epoch": 5.954323001631321, "grad_norm": 0.027436191216111183, "learning_rate": 0.0008855812827602465, "loss": 0.1718, "num_input_tokens_seen": 78765888, "step": 36500 }, { "epoch": 5.955138662316476, "grad_norm": 0.34989839792251587, "learning_rate": 0.0008855359630534871, "loss": 0.1535, "num_input_tokens_seen": 78777472, "step": 36505 }, { "epoch": 5.955954323001631, "grad_norm": 0.19187739491462708, "learning_rate": 0.0008854906355333522, "loss": 0.0795, "num_input_tokens_seen": 78787680, "step": 36510 }, { "epoch": 5.956769983686787, "grad_norm": 0.016511663794517517, "learning_rate": 0.0008854453002007607, "loss": 0.022, "num_input_tokens_seen": 78799552, "step": 36515 }, { "epoch": 5.9575856443719415, "grad_norm": 0.01984327659010887, "learning_rate": 0.0008853999570566311, "loss": 0.0309, "num_input_tokens_seen": 78809408, "step": 36520 }, { "epoch": 5.958401305057096, "grad_norm": 0.21259474754333496, "learning_rate": 0.0008853546061018825, "loss": 0.0842, "num_input_tokens_seen": 78820960, "step": 36525 }, { "epoch": 5.959216965742251, "grad_norm": 0.2555185854434967, "learning_rate": 0.000885309247337434, "loss": 0.1311, "num_input_tokens_seen": 78831872, "step": 36530 }, { "epoch": 5.960032626427406, "grad_norm": 0.03929561749100685, "learning_rate": 0.0008852638807642048, "loss": 0.0164, "num_input_tokens_seen": 78843552, "step": 36535 }, { "epoch": 5.960848287112562, "grad_norm": 0.019497189670801163, "learning_rate": 0.0008852185063831142, "loss": 0.0733, "num_input_tokens_seen": 78854080, "step": 36540 }, { "epoch": 5.9616639477977165, "grad_norm": 0.42725521326065063, "learning_rate": 0.000885173124195082, "loss": 0.2263, "num_input_tokens_seen": 78864544, "step": 36545 }, { "epoch": 5.962479608482871, "grad_norm": 0.3349764049053192, "learning_rate": 0.0008851277342010278, "loss": 0.0864, "num_input_tokens_seen": 78875232, "step": 36550 }, { "epoch": 5.963295269168026, "grad_norm": 0.408931702375412, "learning_rate": 0.0008850823364018715, "loss": 0.0762, "num_input_tokens_seen": 78886752, "step": 36555 }, { "epoch": 5.964110929853181, "grad_norm": 0.11438414454460144, "learning_rate": 0.0008850369307985328, "loss": 0.05, "num_input_tokens_seen": 78896160, "step": 36560 }, { "epoch": 5.964926590538336, "grad_norm": 0.07769472151994705, "learning_rate": 0.0008849915173919327, "loss": 0.0806, "num_input_tokens_seen": 78907744, "step": 36565 }, { "epoch": 5.9657422512234906, "grad_norm": 0.017839863896369934, "learning_rate": 0.0008849460961829909, "loss": 0.0257, "num_input_tokens_seen": 78919456, "step": 36570 }, { "epoch": 5.966557911908646, "grad_norm": 0.1394793689250946, "learning_rate": 0.0008849006671726281, "loss": 0.0478, "num_input_tokens_seen": 78930112, "step": 36575 }, { "epoch": 5.967373572593801, "grad_norm": 0.31096503138542175, "learning_rate": 0.0008848552303617651, "loss": 0.1099, "num_input_tokens_seen": 78940640, "step": 36580 }, { "epoch": 5.968189233278956, "grad_norm": 0.1621881127357483, "learning_rate": 0.0008848097857513227, "loss": 0.0529, "num_input_tokens_seen": 78951744, "step": 36585 }, { "epoch": 5.969004893964111, "grad_norm": 0.10522038489580154, "learning_rate": 0.0008847643333422216, "loss": 0.0245, "num_input_tokens_seen": 78961856, "step": 36590 }, { "epoch": 5.9698205546492655, "grad_norm": 0.005367176607251167, "learning_rate": 0.0008847188731353833, "loss": 0.118, "num_input_tokens_seen": 78972064, "step": 36595 }, { "epoch": 5.970636215334421, "grad_norm": 0.1463940292596817, "learning_rate": 0.0008846734051317289, "loss": 0.1008, "num_input_tokens_seen": 78982656, "step": 36600 }, { "epoch": 5.971451876019576, "grad_norm": 0.09417103230953217, "learning_rate": 0.0008846279293321801, "loss": 0.0668, "num_input_tokens_seen": 78994048, "step": 36605 }, { "epoch": 5.972267536704731, "grad_norm": 0.011723408475518227, "learning_rate": 0.0008845824457376583, "loss": 0.1304, "num_input_tokens_seen": 79005280, "step": 36610 }, { "epoch": 5.973083197389886, "grad_norm": 0.949364960193634, "learning_rate": 0.0008845369543490853, "loss": 0.1086, "num_input_tokens_seen": 79016384, "step": 36615 }, { "epoch": 5.9738988580750405, "grad_norm": 0.009487105533480644, "learning_rate": 0.0008844914551673832, "loss": 0.0561, "num_input_tokens_seen": 79026656, "step": 36620 }, { "epoch": 5.974714518760196, "grad_norm": 0.43934065103530884, "learning_rate": 0.000884445948193474, "loss": 0.0783, "num_input_tokens_seen": 79036576, "step": 36625 }, { "epoch": 5.975530179445351, "grad_norm": 0.042962513864040375, "learning_rate": 0.0008844004334282801, "loss": 0.0733, "num_input_tokens_seen": 79046240, "step": 36630 }, { "epoch": 5.976345840130506, "grad_norm": 0.3583058714866638, "learning_rate": 0.0008843549108727234, "loss": 0.1904, "num_input_tokens_seen": 79056512, "step": 36635 }, { "epoch": 5.977161500815661, "grad_norm": 0.3396947979927063, "learning_rate": 0.0008843093805277271, "loss": 0.2137, "num_input_tokens_seen": 79065760, "step": 36640 }, { "epoch": 5.9779771615008155, "grad_norm": 0.01713588833808899, "learning_rate": 0.0008842638423942136, "loss": 0.0473, "num_input_tokens_seen": 79076128, "step": 36645 }, { "epoch": 5.97879282218597, "grad_norm": 0.09106364846229553, "learning_rate": 0.0008842182964731058, "loss": 0.0424, "num_input_tokens_seen": 79087360, "step": 36650 }, { "epoch": 5.979608482871125, "grad_norm": 0.4537774920463562, "learning_rate": 0.0008841727427653269, "loss": 0.1464, "num_input_tokens_seen": 79099808, "step": 36655 }, { "epoch": 5.980424143556281, "grad_norm": 0.14655371010303497, "learning_rate": 0.0008841271812717999, "loss": 0.0601, "num_input_tokens_seen": 79108000, "step": 36660 }, { "epoch": 5.981239804241436, "grad_norm": 0.05170466750860214, "learning_rate": 0.0008840816119934485, "loss": 0.0305, "num_input_tokens_seen": 79117728, "step": 36665 }, { "epoch": 5.9820554649265905, "grad_norm": 0.27158981561660767, "learning_rate": 0.0008840360349311958, "loss": 0.171, "num_input_tokens_seen": 79128928, "step": 36670 }, { "epoch": 5.982871125611745, "grad_norm": 0.0950988382101059, "learning_rate": 0.0008839904500859656, "loss": 0.0686, "num_input_tokens_seen": 79140544, "step": 36675 }, { "epoch": 5.9836867862969, "grad_norm": 0.013811898417770863, "learning_rate": 0.0008839448574586821, "loss": 0.0501, "num_input_tokens_seen": 79150528, "step": 36680 }, { "epoch": 5.984502446982056, "grad_norm": 0.23695848882198334, "learning_rate": 0.0008838992570502687, "loss": 0.106, "num_input_tokens_seen": 79161664, "step": 36685 }, { "epoch": 5.985318107667211, "grad_norm": 0.29405519366264343, "learning_rate": 0.0008838536488616499, "loss": 0.1543, "num_input_tokens_seen": 79171808, "step": 36690 }, { "epoch": 5.986133768352365, "grad_norm": 0.15233932435512543, "learning_rate": 0.0008838080328937501, "loss": 0.1326, "num_input_tokens_seen": 79182592, "step": 36695 }, { "epoch": 5.98694942903752, "grad_norm": 0.5910404920578003, "learning_rate": 0.0008837624091474935, "loss": 0.1294, "num_input_tokens_seen": 79193344, "step": 36700 }, { "epoch": 5.987765089722675, "grad_norm": 0.32317015528678894, "learning_rate": 0.0008837167776238049, "loss": 0.1999, "num_input_tokens_seen": 79204320, "step": 36705 }, { "epoch": 5.988580750407831, "grad_norm": 0.020873652771115303, "learning_rate": 0.0008836711383236089, "loss": 0.1119, "num_input_tokens_seen": 79215072, "step": 36710 }, { "epoch": 5.989396411092986, "grad_norm": 0.031689584255218506, "learning_rate": 0.0008836254912478308, "loss": 0.0375, "num_input_tokens_seen": 79226816, "step": 36715 }, { "epoch": 5.99021207177814, "grad_norm": 0.4548819661140442, "learning_rate": 0.0008835798363973952, "loss": 0.0451, "num_input_tokens_seen": 79237920, "step": 36720 }, { "epoch": 5.991027732463295, "grad_norm": 0.3833901584148407, "learning_rate": 0.0008835341737732276, "loss": 0.0917, "num_input_tokens_seen": 79249120, "step": 36725 }, { "epoch": 5.99184339314845, "grad_norm": 0.45743510127067566, "learning_rate": 0.0008834885033762536, "loss": 0.0686, "num_input_tokens_seen": 79258432, "step": 36730 }, { "epoch": 5.992659053833605, "grad_norm": 0.05050883814692497, "learning_rate": 0.0008834428252073986, "loss": 0.0372, "num_input_tokens_seen": 79269280, "step": 36735 }, { "epoch": 5.993474714518761, "grad_norm": 0.08462905138731003, "learning_rate": 0.0008833971392675882, "loss": 0.104, "num_input_tokens_seen": 79280128, "step": 36740 }, { "epoch": 5.994290375203915, "grad_norm": 0.1325332224369049, "learning_rate": 0.0008833514455577485, "loss": 0.0848, "num_input_tokens_seen": 79289728, "step": 36745 }, { "epoch": 5.99510603588907, "grad_norm": 0.1361333727836609, "learning_rate": 0.0008833057440788053, "loss": 0.0467, "num_input_tokens_seen": 79301088, "step": 36750 }, { "epoch": 5.995921696574225, "grad_norm": 0.005999071057885885, "learning_rate": 0.000883260034831685, "loss": 0.0627, "num_input_tokens_seen": 79311456, "step": 36755 }, { "epoch": 5.99673735725938, "grad_norm": 0.20180122554302216, "learning_rate": 0.000883214317817314, "loss": 0.1282, "num_input_tokens_seen": 79322304, "step": 36760 }, { "epoch": 5.997553017944535, "grad_norm": 0.0356915220618248, "learning_rate": 0.0008831685930366187, "loss": 0.1549, "num_input_tokens_seen": 79332960, "step": 36765 }, { "epoch": 5.99836867862969, "grad_norm": 0.06853177398443222, "learning_rate": 0.0008831228604905257, "loss": 0.2317, "num_input_tokens_seen": 79344000, "step": 36770 }, { "epoch": 5.999184339314845, "grad_norm": 0.11068347096443176, "learning_rate": 0.0008830771201799619, "loss": 0.1963, "num_input_tokens_seen": 79353824, "step": 36775 }, { "epoch": 6.0, "grad_norm": 0.02611491270363331, "learning_rate": 0.0008830313721058543, "loss": 0.0673, "num_input_tokens_seen": 79364192, "step": 36780 }, { "epoch": 6.0, "eval_loss": 0.12947814166545868, "eval_runtime": 104.471, "eval_samples_per_second": 26.084, "eval_steps_per_second": 6.528, "num_input_tokens_seen": 79364192, "step": 36780 }, { "epoch": 6.000815660685155, "grad_norm": 0.16653205454349518, "learning_rate": 0.00088298561626913, "loss": 0.0389, "num_input_tokens_seen": 79374304, "step": 36785 }, { "epoch": 6.00163132137031, "grad_norm": 0.2460404336452484, "learning_rate": 0.0008829398526707164, "loss": 0.1395, "num_input_tokens_seen": 79385472, "step": 36790 }, { "epoch": 6.002446982055465, "grad_norm": 0.016366058960556984, "learning_rate": 0.0008828940813115408, "loss": 0.0757, "num_input_tokens_seen": 79396256, "step": 36795 }, { "epoch": 6.00326264274062, "grad_norm": 0.2952450215816498, "learning_rate": 0.000882848302192531, "loss": 0.1255, "num_input_tokens_seen": 79407456, "step": 36800 }, { "epoch": 6.004078303425775, "grad_norm": 0.09208202362060547, "learning_rate": 0.0008828025153146147, "loss": 0.1378, "num_input_tokens_seen": 79418464, "step": 36805 }, { "epoch": 6.00489396411093, "grad_norm": 0.02729138545691967, "learning_rate": 0.0008827567206787197, "loss": 0.0352, "num_input_tokens_seen": 79430272, "step": 36810 }, { "epoch": 6.005709624796085, "grad_norm": 0.03895847126841545, "learning_rate": 0.0008827109182857742, "loss": 0.0297, "num_input_tokens_seen": 79440864, "step": 36815 }, { "epoch": 6.006525285481239, "grad_norm": 0.25856494903564453, "learning_rate": 0.0008826651081367065, "loss": 0.0947, "num_input_tokens_seen": 79452640, "step": 36820 }, { "epoch": 6.007340946166395, "grad_norm": 0.013391979038715363, "learning_rate": 0.0008826192902324449, "loss": 0.0213, "num_input_tokens_seen": 79462656, "step": 36825 }, { "epoch": 6.00815660685155, "grad_norm": 0.1911272257566452, "learning_rate": 0.0008825734645739181, "loss": 0.0539, "num_input_tokens_seen": 79474144, "step": 36830 }, { "epoch": 6.008972267536705, "grad_norm": 0.23752671480178833, "learning_rate": 0.0008825276311620546, "loss": 0.0366, "num_input_tokens_seen": 79483392, "step": 36835 }, { "epoch": 6.00978792822186, "grad_norm": 0.10461752861738205, "learning_rate": 0.0008824817899977834, "loss": 0.107, "num_input_tokens_seen": 79495104, "step": 36840 }, { "epoch": 6.010603588907014, "grad_norm": 0.1776762157678604, "learning_rate": 0.0008824359410820335, "loss": 0.2274, "num_input_tokens_seen": 79505824, "step": 36845 }, { "epoch": 6.011419249592169, "grad_norm": 0.1347481608390808, "learning_rate": 0.0008823900844157342, "loss": 0.1716, "num_input_tokens_seen": 79517440, "step": 36850 }, { "epoch": 6.012234910277325, "grad_norm": 0.25701335072517395, "learning_rate": 0.0008823442199998147, "loss": 0.0409, "num_input_tokens_seen": 79528864, "step": 36855 }, { "epoch": 6.01305057096248, "grad_norm": 0.029766852036118507, "learning_rate": 0.0008822983478352044, "loss": 0.083, "num_input_tokens_seen": 79538560, "step": 36860 }, { "epoch": 6.013866231647635, "grad_norm": 0.515079140663147, "learning_rate": 0.0008822524679228332, "loss": 0.1564, "num_input_tokens_seen": 79549600, "step": 36865 }, { "epoch": 6.014681892332789, "grad_norm": 0.0366225391626358, "learning_rate": 0.0008822065802636308, "loss": 0.0674, "num_input_tokens_seen": 79560896, "step": 36870 }, { "epoch": 6.015497553017944, "grad_norm": 0.02549532987177372, "learning_rate": 0.0008821606848585273, "loss": 0.1236, "num_input_tokens_seen": 79571488, "step": 36875 }, { "epoch": 6.0163132137031, "grad_norm": 0.089844711124897, "learning_rate": 0.0008821147817084526, "loss": 0.0186, "num_input_tokens_seen": 79582368, "step": 36880 }, { "epoch": 6.017128874388255, "grad_norm": 0.4777730405330658, "learning_rate": 0.0008820688708143372, "loss": 0.2186, "num_input_tokens_seen": 79593760, "step": 36885 }, { "epoch": 6.0179445350734095, "grad_norm": 0.4216669201850891, "learning_rate": 0.0008820229521771112, "loss": 0.0839, "num_input_tokens_seen": 79603040, "step": 36890 }, { "epoch": 6.018760195758564, "grad_norm": 0.23115593194961548, "learning_rate": 0.0008819770257977058, "loss": 0.0806, "num_input_tokens_seen": 79612928, "step": 36895 }, { "epoch": 6.019575856443719, "grad_norm": 0.033511899411678314, "learning_rate": 0.0008819310916770511, "loss": 0.1122, "num_input_tokens_seen": 79624320, "step": 36900 }, { "epoch": 6.020391517128874, "grad_norm": 0.07824668288230896, "learning_rate": 0.0008818851498160785, "loss": 0.1404, "num_input_tokens_seen": 79635616, "step": 36905 }, { "epoch": 6.02120717781403, "grad_norm": 0.04387859255075455, "learning_rate": 0.0008818392002157188, "loss": 0.0509, "num_input_tokens_seen": 79645152, "step": 36910 }, { "epoch": 6.0220228384991845, "grad_norm": 0.3376307189464569, "learning_rate": 0.0008817932428769033, "loss": 0.0792, "num_input_tokens_seen": 79656864, "step": 36915 }, { "epoch": 6.022838499184339, "grad_norm": 0.6046404838562012, "learning_rate": 0.0008817472778005635, "loss": 0.163, "num_input_tokens_seen": 79668480, "step": 36920 }, { "epoch": 6.023654159869494, "grad_norm": 0.19403912127017975, "learning_rate": 0.0008817013049876308, "loss": 0.0571, "num_input_tokens_seen": 79679776, "step": 36925 }, { "epoch": 6.024469820554649, "grad_norm": 0.04723694175481796, "learning_rate": 0.0008816553244390368, "loss": 0.254, "num_input_tokens_seen": 79690464, "step": 36930 }, { "epoch": 6.025285481239805, "grad_norm": 0.0326119028031826, "learning_rate": 0.0008816093361557136, "loss": 0.0185, "num_input_tokens_seen": 79701696, "step": 36935 }, { "epoch": 6.0261011419249595, "grad_norm": 0.1328343152999878, "learning_rate": 0.0008815633401385932, "loss": 0.1224, "num_input_tokens_seen": 79712032, "step": 36940 }, { "epoch": 6.026916802610114, "grad_norm": 0.032321348786354065, "learning_rate": 0.0008815173363886075, "loss": 0.1284, "num_input_tokens_seen": 79721536, "step": 36945 }, { "epoch": 6.027732463295269, "grad_norm": 0.10826992988586426, "learning_rate": 0.000881471324906689, "loss": 0.1083, "num_input_tokens_seen": 79731456, "step": 36950 }, { "epoch": 6.028548123980424, "grad_norm": 0.11952725797891617, "learning_rate": 0.0008814253056937702, "loss": 0.0405, "num_input_tokens_seen": 79743264, "step": 36955 }, { "epoch": 6.029363784665579, "grad_norm": 0.08245185017585754, "learning_rate": 0.0008813792787507837, "loss": 0.0873, "num_input_tokens_seen": 79755008, "step": 36960 }, { "epoch": 6.0301794453507345, "grad_norm": 0.0556970052421093, "learning_rate": 0.0008813332440786623, "loss": 0.0298, "num_input_tokens_seen": 79766720, "step": 36965 }, { "epoch": 6.030995106035889, "grad_norm": 0.6840552091598511, "learning_rate": 0.0008812872016783389, "loss": 0.0935, "num_input_tokens_seen": 79778080, "step": 36970 }, { "epoch": 6.031810766721044, "grad_norm": 0.19289150834083557, "learning_rate": 0.0008812411515507468, "loss": 0.1668, "num_input_tokens_seen": 79788736, "step": 36975 }, { "epoch": 6.032626427406199, "grad_norm": 0.09962446242570877, "learning_rate": 0.000881195093696819, "loss": 0.1355, "num_input_tokens_seen": 79800576, "step": 36980 }, { "epoch": 6.033442088091354, "grad_norm": 0.4618459641933441, "learning_rate": 0.000881149028117489, "loss": 0.1988, "num_input_tokens_seen": 79812256, "step": 36985 }, { "epoch": 6.034257748776509, "grad_norm": 0.04367858171463013, "learning_rate": 0.0008811029548136906, "loss": 0.1321, "num_input_tokens_seen": 79823744, "step": 36990 }, { "epoch": 6.035073409461664, "grad_norm": 0.12380357831716537, "learning_rate": 0.0008810568737863574, "loss": 0.0689, "num_input_tokens_seen": 79833120, "step": 36995 }, { "epoch": 6.035889070146819, "grad_norm": 0.38375481963157654, "learning_rate": 0.000881010785036423, "loss": 0.1567, "num_input_tokens_seen": 79843232, "step": 37000 }, { "epoch": 6.036704730831974, "grad_norm": 0.013430536724627018, "learning_rate": 0.0008809646885648218, "loss": 0.0563, "num_input_tokens_seen": 79854016, "step": 37005 }, { "epoch": 6.037520391517129, "grad_norm": 0.039799146354198456, "learning_rate": 0.000880918584372488, "loss": 0.105, "num_input_tokens_seen": 79864640, "step": 37010 }, { "epoch": 6.0383360522022835, "grad_norm": 0.1629927009344101, "learning_rate": 0.0008808724724603558, "loss": 0.0764, "num_input_tokens_seen": 79876000, "step": 37015 }, { "epoch": 6.039151712887439, "grad_norm": 0.037636782974004745, "learning_rate": 0.0008808263528293596, "loss": 0.1005, "num_input_tokens_seen": 79887328, "step": 37020 }, { "epoch": 6.039967373572594, "grad_norm": 0.19488772749900818, "learning_rate": 0.0008807802254804344, "loss": 0.0804, "num_input_tokens_seen": 79897024, "step": 37025 }, { "epoch": 6.040783034257749, "grad_norm": 0.006130755878984928, "learning_rate": 0.000880734090414515, "loss": 0.0334, "num_input_tokens_seen": 79907616, "step": 37030 }, { "epoch": 6.041598694942904, "grad_norm": 0.01621272601187229, "learning_rate": 0.000880687947632536, "loss": 0.0952, "num_input_tokens_seen": 79916864, "step": 37035 }, { "epoch": 6.0424143556280585, "grad_norm": 0.17209812998771667, "learning_rate": 0.000880641797135433, "loss": 0.0653, "num_input_tokens_seen": 79926816, "step": 37040 }, { "epoch": 6.043230016313213, "grad_norm": 0.018628334626555443, "learning_rate": 0.000880595638924141, "loss": 0.1279, "num_input_tokens_seen": 79937504, "step": 37045 }, { "epoch": 6.044045676998369, "grad_norm": 0.08120463043451309, "learning_rate": 0.0008805494729995957, "loss": 0.0403, "num_input_tokens_seen": 79948768, "step": 37050 }, { "epoch": 6.044861337683524, "grad_norm": 0.37048813700675964, "learning_rate": 0.0008805032993627324, "loss": 0.1643, "num_input_tokens_seen": 79959328, "step": 37055 }, { "epoch": 6.045676998368679, "grad_norm": 0.6608492136001587, "learning_rate": 0.0008804571180144871, "loss": 0.099, "num_input_tokens_seen": 79970720, "step": 37060 }, { "epoch": 6.0464926590538335, "grad_norm": 0.30889031291007996, "learning_rate": 0.0008804109289557956, "loss": 0.0553, "num_input_tokens_seen": 79982336, "step": 37065 }, { "epoch": 6.047308319738988, "grad_norm": 0.6421380639076233, "learning_rate": 0.0008803647321875942, "loss": 0.1402, "num_input_tokens_seen": 79992064, "step": 37070 }, { "epoch": 6.048123980424143, "grad_norm": 0.07220091670751572, "learning_rate": 0.0008803185277108188, "loss": 0.0705, "num_input_tokens_seen": 80001216, "step": 37075 }, { "epoch": 6.048939641109299, "grad_norm": 0.01507633924484253, "learning_rate": 0.0008802723155264061, "loss": 0.0219, "num_input_tokens_seen": 80011648, "step": 37080 }, { "epoch": 6.049755301794454, "grad_norm": 0.0792185589671135, "learning_rate": 0.0008802260956352924, "loss": 0.0926, "num_input_tokens_seen": 80021856, "step": 37085 }, { "epoch": 6.0505709624796085, "grad_norm": 0.005553775001317263, "learning_rate": 0.0008801798680384145, "loss": 0.2757, "num_input_tokens_seen": 80033120, "step": 37090 }, { "epoch": 6.051386623164763, "grad_norm": 0.12523257732391357, "learning_rate": 0.0008801336327367096, "loss": 0.0638, "num_input_tokens_seen": 80044064, "step": 37095 }, { "epoch": 6.052202283849918, "grad_norm": 0.4345572292804718, "learning_rate": 0.0008800873897311141, "loss": 0.1134, "num_input_tokens_seen": 80054656, "step": 37100 }, { "epoch": 6.053017944535074, "grad_norm": 0.31900763511657715, "learning_rate": 0.0008800411390225655, "loss": 0.1515, "num_input_tokens_seen": 80065696, "step": 37105 }, { "epoch": 6.053833605220229, "grad_norm": 0.15819565951824188, "learning_rate": 0.000879994880612001, "loss": 0.0686, "num_input_tokens_seen": 80075648, "step": 37110 }, { "epoch": 6.054649265905383, "grad_norm": 0.08624502271413803, "learning_rate": 0.0008799486145003583, "loss": 0.0459, "num_input_tokens_seen": 80085728, "step": 37115 }, { "epoch": 6.055464926590538, "grad_norm": 0.0157896988093853, "learning_rate": 0.0008799023406885751, "loss": 0.028, "num_input_tokens_seen": 80097248, "step": 37120 }, { "epoch": 6.056280587275693, "grad_norm": 0.04613260179758072, "learning_rate": 0.0008798560591775889, "loss": 0.0432, "num_input_tokens_seen": 80108288, "step": 37125 }, { "epoch": 6.057096247960848, "grad_norm": 0.03548629581928253, "learning_rate": 0.0008798097699683376, "loss": 0.0547, "num_input_tokens_seen": 80119808, "step": 37130 }, { "epoch": 6.057911908646004, "grad_norm": 0.4876565635204315, "learning_rate": 0.0008797634730617598, "loss": 0.2471, "num_input_tokens_seen": 80131712, "step": 37135 }, { "epoch": 6.058727569331158, "grad_norm": 0.7909441590309143, "learning_rate": 0.0008797171684587933, "loss": 0.1106, "num_input_tokens_seen": 80140992, "step": 37140 }, { "epoch": 6.059543230016313, "grad_norm": 0.04108560457825661, "learning_rate": 0.0008796708561603766, "loss": 0.0789, "num_input_tokens_seen": 80150144, "step": 37145 }, { "epoch": 6.060358890701468, "grad_norm": 0.0329439677298069, "learning_rate": 0.0008796245361674484, "loss": 0.1389, "num_input_tokens_seen": 80161696, "step": 37150 }, { "epoch": 6.061174551386623, "grad_norm": 0.18954244256019592, "learning_rate": 0.0008795782084809473, "loss": 0.1201, "num_input_tokens_seen": 80172128, "step": 37155 }, { "epoch": 6.061990212071779, "grad_norm": 0.033920902758836746, "learning_rate": 0.0008795318731018124, "loss": 0.0314, "num_input_tokens_seen": 80184896, "step": 37160 }, { "epoch": 6.062805872756933, "grad_norm": 0.027078086510300636, "learning_rate": 0.0008794855300309827, "loss": 0.0118, "num_input_tokens_seen": 80195392, "step": 37165 }, { "epoch": 6.063621533442088, "grad_norm": 0.004350304137915373, "learning_rate": 0.0008794391792693973, "loss": 0.0518, "num_input_tokens_seen": 80205280, "step": 37170 }, { "epoch": 6.064437194127243, "grad_norm": 0.15988744795322418, "learning_rate": 0.0008793928208179955, "loss": 0.107, "num_input_tokens_seen": 80215328, "step": 37175 }, { "epoch": 6.065252854812398, "grad_norm": 0.10844093561172485, "learning_rate": 0.000879346454677717, "loss": 0.1112, "num_input_tokens_seen": 80225600, "step": 37180 }, { "epoch": 6.066068515497553, "grad_norm": 0.06532759219408035, "learning_rate": 0.0008793000808495012, "loss": 0.0304, "num_input_tokens_seen": 80235680, "step": 37185 }, { "epoch": 6.066884176182708, "grad_norm": 0.20083583891391754, "learning_rate": 0.0008792536993342882, "loss": 0.1577, "num_input_tokens_seen": 80246112, "step": 37190 }, { "epoch": 6.067699836867863, "grad_norm": 0.037911564111709595, "learning_rate": 0.0008792073101330177, "loss": 0.0537, "num_input_tokens_seen": 80257824, "step": 37195 }, { "epoch": 6.068515497553018, "grad_norm": 0.05030696839094162, "learning_rate": 0.00087916091324663, "loss": 0.015, "num_input_tokens_seen": 80270368, "step": 37200 }, { "epoch": 6.069331158238173, "grad_norm": 0.07539231330156326, "learning_rate": 0.0008791145086760656, "loss": 0.0522, "num_input_tokens_seen": 80280864, "step": 37205 }, { "epoch": 6.070146818923328, "grad_norm": 0.11466151475906372, "learning_rate": 0.0008790680964222647, "loss": 0.0204, "num_input_tokens_seen": 80290496, "step": 37210 }, { "epoch": 6.0709624796084825, "grad_norm": 0.009491103701293468, "learning_rate": 0.000879021676486168, "loss": 0.0588, "num_input_tokens_seen": 80301856, "step": 37215 }, { "epoch": 6.071778140293638, "grad_norm": 0.4161435067653656, "learning_rate": 0.0008789752488687159, "loss": 0.0555, "num_input_tokens_seen": 80314304, "step": 37220 }, { "epoch": 6.072593800978793, "grad_norm": 0.13105103373527527, "learning_rate": 0.00087892881357085, "loss": 0.0758, "num_input_tokens_seen": 80324928, "step": 37225 }, { "epoch": 6.073409461663948, "grad_norm": 0.045078426599502563, "learning_rate": 0.0008788823705935107, "loss": 0.1719, "num_input_tokens_seen": 80335712, "step": 37230 }, { "epoch": 6.074225122349103, "grad_norm": 0.07596886903047562, "learning_rate": 0.0008788359199376396, "loss": 0.0689, "num_input_tokens_seen": 80346112, "step": 37235 }, { "epoch": 6.075040783034257, "grad_norm": 0.1532737910747528, "learning_rate": 0.0008787894616041781, "loss": 0.2429, "num_input_tokens_seen": 80355232, "step": 37240 }, { "epoch": 6.075856443719413, "grad_norm": 0.03510677441954613, "learning_rate": 0.0008787429955940675, "loss": 0.1764, "num_input_tokens_seen": 80366464, "step": 37245 }, { "epoch": 6.076672104404568, "grad_norm": 0.2712423801422119, "learning_rate": 0.0008786965219082497, "loss": 0.2319, "num_input_tokens_seen": 80377696, "step": 37250 }, { "epoch": 6.077487765089723, "grad_norm": 0.009527994319796562, "learning_rate": 0.0008786500405476664, "loss": 0.0082, "num_input_tokens_seen": 80389472, "step": 37255 }, { "epoch": 6.078303425774878, "grad_norm": 0.015354527160525322, "learning_rate": 0.0008786035515132598, "loss": 0.0284, "num_input_tokens_seen": 80400416, "step": 37260 }, { "epoch": 6.079119086460032, "grad_norm": 0.36544013023376465, "learning_rate": 0.0008785570548059718, "loss": 0.1677, "num_input_tokens_seen": 80411488, "step": 37265 }, { "epoch": 6.079934747145187, "grad_norm": 0.012820636853575706, "learning_rate": 0.0008785105504267449, "loss": 0.0872, "num_input_tokens_seen": 80421664, "step": 37270 }, { "epoch": 6.080750407830343, "grad_norm": 0.08892273157835007, "learning_rate": 0.0008784640383765215, "loss": 0.0219, "num_input_tokens_seen": 80432864, "step": 37275 }, { "epoch": 6.081566068515498, "grad_norm": 0.12562714517116547, "learning_rate": 0.0008784175186562442, "loss": 0.1648, "num_input_tokens_seen": 80442080, "step": 37280 }, { "epoch": 6.082381729200653, "grad_norm": 0.018611062318086624, "learning_rate": 0.000878370991266856, "loss": 0.1374, "num_input_tokens_seen": 80452768, "step": 37285 }, { "epoch": 6.083197389885807, "grad_norm": 0.38506850600242615, "learning_rate": 0.0008783244562092996, "loss": 0.0831, "num_input_tokens_seen": 80463264, "step": 37290 }, { "epoch": 6.084013050570962, "grad_norm": 0.10411655902862549, "learning_rate": 0.0008782779134845181, "loss": 0.1503, "num_input_tokens_seen": 80473248, "step": 37295 }, { "epoch": 6.084828711256117, "grad_norm": 0.28278854489326477, "learning_rate": 0.0008782313630934548, "loss": 0.0557, "num_input_tokens_seen": 80482848, "step": 37300 }, { "epoch": 6.085644371941273, "grad_norm": 0.03771654888987541, "learning_rate": 0.0008781848050370531, "loss": 0.0656, "num_input_tokens_seen": 80492128, "step": 37305 }, { "epoch": 6.0864600326264275, "grad_norm": 0.05060260742902756, "learning_rate": 0.0008781382393162566, "loss": 0.0698, "num_input_tokens_seen": 80501376, "step": 37310 }, { "epoch": 6.087275693311582, "grad_norm": 0.09077697992324829, "learning_rate": 0.0008780916659320091, "loss": 0.0166, "num_input_tokens_seen": 80511776, "step": 37315 }, { "epoch": 6.088091353996737, "grad_norm": 0.10223732888698578, "learning_rate": 0.0008780450848852541, "loss": 0.1069, "num_input_tokens_seen": 80523328, "step": 37320 }, { "epoch": 6.088907014681892, "grad_norm": 0.05045846849679947, "learning_rate": 0.0008779984961769361, "loss": 0.0449, "num_input_tokens_seen": 80534304, "step": 37325 }, { "epoch": 6.089722675367048, "grad_norm": 0.38617292046546936, "learning_rate": 0.0008779518998079988, "loss": 0.12, "num_input_tokens_seen": 80544832, "step": 37330 }, { "epoch": 6.0905383360522025, "grad_norm": 0.019382769241929054, "learning_rate": 0.000877905295779387, "loss": 0.0595, "num_input_tokens_seen": 80555104, "step": 37335 }, { "epoch": 6.091353996737357, "grad_norm": 0.007274582050740719, "learning_rate": 0.0008778586840920449, "loss": 0.1076, "num_input_tokens_seen": 80565376, "step": 37340 }, { "epoch": 6.092169657422512, "grad_norm": 0.01822720840573311, "learning_rate": 0.0008778120647469172, "loss": 0.0131, "num_input_tokens_seen": 80575680, "step": 37345 }, { "epoch": 6.092985318107667, "grad_norm": 0.3150451183319092, "learning_rate": 0.0008777654377449487, "loss": 0.0564, "num_input_tokens_seen": 80586112, "step": 37350 }, { "epoch": 6.093800978792822, "grad_norm": 0.04134416580200195, "learning_rate": 0.0008777188030870845, "loss": 0.0247, "num_input_tokens_seen": 80597632, "step": 37355 }, { "epoch": 6.0946166394779775, "grad_norm": 0.07903924584388733, "learning_rate": 0.0008776721607742695, "loss": 0.0964, "num_input_tokens_seen": 80609024, "step": 37360 }, { "epoch": 6.095432300163132, "grad_norm": 0.36570823192596436, "learning_rate": 0.0008776255108074489, "loss": 0.3219, "num_input_tokens_seen": 80619456, "step": 37365 }, { "epoch": 6.096247960848287, "grad_norm": 0.014795482158660889, "learning_rate": 0.0008775788531875685, "loss": 0.0364, "num_input_tokens_seen": 80629920, "step": 37370 }, { "epoch": 6.097063621533442, "grad_norm": 0.22917284071445465, "learning_rate": 0.0008775321879155735, "loss": 0.0637, "num_input_tokens_seen": 80640320, "step": 37375 }, { "epoch": 6.097879282218597, "grad_norm": 0.09315807372331619, "learning_rate": 0.0008774855149924099, "loss": 0.0394, "num_input_tokens_seen": 80649792, "step": 37380 }, { "epoch": 6.0986949429037525, "grad_norm": 0.09622371196746826, "learning_rate": 0.0008774388344190234, "loss": 0.0457, "num_input_tokens_seen": 80661632, "step": 37385 }, { "epoch": 6.099510603588907, "grad_norm": 0.002638316247612238, "learning_rate": 0.0008773921461963601, "loss": 0.0487, "num_input_tokens_seen": 80671904, "step": 37390 }, { "epoch": 6.100326264274062, "grad_norm": 0.16990534961223602, "learning_rate": 0.0008773454503253662, "loss": 0.0839, "num_input_tokens_seen": 80683424, "step": 37395 }, { "epoch": 6.101141924959217, "grad_norm": 0.14464055001735687, "learning_rate": 0.0008772987468069881, "loss": 0.0547, "num_input_tokens_seen": 80692736, "step": 37400 }, { "epoch": 6.101957585644372, "grad_norm": 0.11268597841262817, "learning_rate": 0.0008772520356421723, "loss": 0.089, "num_input_tokens_seen": 80703008, "step": 37405 }, { "epoch": 6.102773246329527, "grad_norm": 0.24785836040973663, "learning_rate": 0.0008772053168318653, "loss": 0.0411, "num_input_tokens_seen": 80714016, "step": 37410 }, { "epoch": 6.103588907014682, "grad_norm": 0.036350879818201065, "learning_rate": 0.000877158590377014, "loss": 0.0196, "num_input_tokens_seen": 80724736, "step": 37415 }, { "epoch": 6.104404567699837, "grad_norm": 0.1829647570848465, "learning_rate": 0.0008771118562785656, "loss": 0.023, "num_input_tokens_seen": 80736768, "step": 37420 }, { "epoch": 6.105220228384992, "grad_norm": 0.0030466562602669, "learning_rate": 0.0008770651145374669, "loss": 0.032, "num_input_tokens_seen": 80747840, "step": 37425 }, { "epoch": 6.106035889070147, "grad_norm": 0.3722741901874542, "learning_rate": 0.0008770183651546653, "loss": 0.0443, "num_input_tokens_seen": 80759712, "step": 37430 }, { "epoch": 6.1068515497553015, "grad_norm": 0.0066236103884875774, "learning_rate": 0.0008769716081311083, "loss": 0.0429, "num_input_tokens_seen": 80770240, "step": 37435 }, { "epoch": 6.107667210440456, "grad_norm": 0.009392821229994297, "learning_rate": 0.0008769248434677434, "loss": 0.2414, "num_input_tokens_seen": 80779936, "step": 37440 }, { "epoch": 6.108482871125612, "grad_norm": 0.21216735243797302, "learning_rate": 0.0008768780711655185, "loss": 0.1942, "num_input_tokens_seen": 80789856, "step": 37445 }, { "epoch": 6.109298531810767, "grad_norm": 0.03186091035604477, "learning_rate": 0.0008768312912253811, "loss": 0.053, "num_input_tokens_seen": 80801952, "step": 37450 }, { "epoch": 6.110114192495922, "grad_norm": 0.33031660318374634, "learning_rate": 0.0008767845036482798, "loss": 0.0719, "num_input_tokens_seen": 80812128, "step": 37455 }, { "epoch": 6.1109298531810765, "grad_norm": 0.14421811699867249, "learning_rate": 0.0008767377084351625, "loss": 0.0585, "num_input_tokens_seen": 80823136, "step": 37460 }, { "epoch": 6.111745513866231, "grad_norm": 0.20838215947151184, "learning_rate": 0.0008766909055869777, "loss": 0.0228, "num_input_tokens_seen": 80833600, "step": 37465 }, { "epoch": 6.112561174551387, "grad_norm": 0.2048577219247818, "learning_rate": 0.0008766440951046736, "loss": 0.1003, "num_input_tokens_seen": 80845248, "step": 37470 }, { "epoch": 6.113376835236542, "grad_norm": 0.2671665549278259, "learning_rate": 0.0008765972769891993, "loss": 0.0478, "num_input_tokens_seen": 80854624, "step": 37475 }, { "epoch": 6.114192495921697, "grad_norm": 0.21815544366836548, "learning_rate": 0.0008765504512415033, "loss": 0.0794, "num_input_tokens_seen": 80866272, "step": 37480 }, { "epoch": 6.1150081566068515, "grad_norm": 0.017273621633648872, "learning_rate": 0.0008765036178625347, "loss": 0.0737, "num_input_tokens_seen": 80875168, "step": 37485 }, { "epoch": 6.115823817292006, "grad_norm": 0.2894923985004425, "learning_rate": 0.0008764567768532427, "loss": 0.0955, "num_input_tokens_seen": 80885888, "step": 37490 }, { "epoch": 6.116639477977161, "grad_norm": 0.43978849053382874, "learning_rate": 0.0008764099282145767, "loss": 0.1646, "num_input_tokens_seen": 80897728, "step": 37495 }, { "epoch": 6.117455138662317, "grad_norm": 0.23947592079639435, "learning_rate": 0.0008763630719474857, "loss": 0.2055, "num_input_tokens_seen": 80908992, "step": 37500 }, { "epoch": 6.118270799347472, "grad_norm": 0.014366867952048779, "learning_rate": 0.0008763162080529199, "loss": 0.0669, "num_input_tokens_seen": 80920064, "step": 37505 }, { "epoch": 6.1190864600326265, "grad_norm": 0.20465487241744995, "learning_rate": 0.0008762693365318286, "loss": 0.0718, "num_input_tokens_seen": 80930784, "step": 37510 }, { "epoch": 6.119902120717781, "grad_norm": 0.012040183879435062, "learning_rate": 0.0008762224573851619, "loss": 0.0472, "num_input_tokens_seen": 80941952, "step": 37515 }, { "epoch": 6.120717781402936, "grad_norm": 0.15415719151496887, "learning_rate": 0.0008761755706138698, "loss": 0.0578, "num_input_tokens_seen": 80953184, "step": 37520 }, { "epoch": 6.121533442088092, "grad_norm": 0.0038683812599629164, "learning_rate": 0.0008761286762189027, "loss": 0.0302, "num_input_tokens_seen": 80964064, "step": 37525 }, { "epoch": 6.122349102773247, "grad_norm": 0.22263197600841522, "learning_rate": 0.0008760817742012106, "loss": 0.08, "num_input_tokens_seen": 80975776, "step": 37530 }, { "epoch": 6.123164763458401, "grad_norm": 0.20978163182735443, "learning_rate": 0.0008760348645617444, "loss": 0.0366, "num_input_tokens_seen": 80985536, "step": 37535 }, { "epoch": 6.123980424143556, "grad_norm": 0.24373182654380798, "learning_rate": 0.0008759879473014545, "loss": 0.0998, "num_input_tokens_seen": 80996096, "step": 37540 }, { "epoch": 6.124796084828711, "grad_norm": 0.12226638197898865, "learning_rate": 0.000875941022421292, "loss": 0.0379, "num_input_tokens_seen": 81007968, "step": 37545 }, { "epoch": 6.125611745513866, "grad_norm": 0.415848046541214, "learning_rate": 0.0008758940899222077, "loss": 0.1026, "num_input_tokens_seen": 81018976, "step": 37550 }, { "epoch": 6.126427406199022, "grad_norm": 0.011912100948393345, "learning_rate": 0.0008758471498051528, "loss": 0.0523, "num_input_tokens_seen": 81029056, "step": 37555 }, { "epoch": 6.127243066884176, "grad_norm": 0.02331371232867241, "learning_rate": 0.0008758002020710787, "loss": 0.0265, "num_input_tokens_seen": 81039712, "step": 37560 }, { "epoch": 6.128058727569331, "grad_norm": 0.00830017775297165, "learning_rate": 0.0008757532467209367, "loss": 0.1197, "num_input_tokens_seen": 81050816, "step": 37565 }, { "epoch": 6.128874388254486, "grad_norm": 0.15961168706417084, "learning_rate": 0.0008757062837556784, "loss": 0.1396, "num_input_tokens_seen": 81060992, "step": 37570 }, { "epoch": 6.129690048939641, "grad_norm": 0.12956620752811432, "learning_rate": 0.0008756593131762557, "loss": 0.0641, "num_input_tokens_seen": 81071968, "step": 37575 }, { "epoch": 6.130505709624796, "grad_norm": 0.14461350440979004, "learning_rate": 0.0008756123349836206, "loss": 0.1831, "num_input_tokens_seen": 81084096, "step": 37580 }, { "epoch": 6.131321370309951, "grad_norm": 0.10241524130105972, "learning_rate": 0.0008755653491787249, "loss": 0.0364, "num_input_tokens_seen": 81094912, "step": 37585 }, { "epoch": 6.132137030995106, "grad_norm": 0.2627876400947571, "learning_rate": 0.000875518355762521, "loss": 0.0586, "num_input_tokens_seen": 81104992, "step": 37590 }, { "epoch": 6.132952691680261, "grad_norm": 0.10247651487588882, "learning_rate": 0.0008754713547359612, "loss": 0.1084, "num_input_tokens_seen": 81114272, "step": 37595 }, { "epoch": 6.133768352365416, "grad_norm": 0.02694053389132023, "learning_rate": 0.0008754243460999982, "loss": 0.0949, "num_input_tokens_seen": 81126336, "step": 37600 }, { "epoch": 6.134584013050571, "grad_norm": 0.13037988543510437, "learning_rate": 0.0008753773298555844, "loss": 0.0619, "num_input_tokens_seen": 81137728, "step": 37605 }, { "epoch": 6.135399673735726, "grad_norm": 0.4558175802230835, "learning_rate": 0.0008753303060036728, "loss": 0.1718, "num_input_tokens_seen": 81148448, "step": 37610 }, { "epoch": 6.136215334420881, "grad_norm": 0.05078926682472229, "learning_rate": 0.0008752832745452166, "loss": 0.138, "num_input_tokens_seen": 81159872, "step": 37615 }, { "epoch": 6.137030995106036, "grad_norm": 0.07354626059532166, "learning_rate": 0.0008752362354811686, "loss": 0.0489, "num_input_tokens_seen": 81170304, "step": 37620 }, { "epoch": 6.137846655791191, "grad_norm": 0.3129079341888428, "learning_rate": 0.0008751891888124823, "loss": 0.0931, "num_input_tokens_seen": 81180192, "step": 37625 }, { "epoch": 6.138662316476346, "grad_norm": 0.31993889808654785, "learning_rate": 0.0008751421345401111, "loss": 0.1613, "num_input_tokens_seen": 81191872, "step": 37630 }, { "epoch": 6.1394779771615005, "grad_norm": 0.2995270788669586, "learning_rate": 0.0008750950726650089, "loss": 0.0922, "num_input_tokens_seen": 81201856, "step": 37635 }, { "epoch": 6.140293637846656, "grad_norm": 0.32987722754478455, "learning_rate": 0.0008750480031881289, "loss": 0.0703, "num_input_tokens_seen": 81212736, "step": 37640 }, { "epoch": 6.141109298531811, "grad_norm": 0.08492878824472427, "learning_rate": 0.0008750009261104255, "loss": 0.1178, "num_input_tokens_seen": 81223680, "step": 37645 }, { "epoch": 6.141924959216966, "grad_norm": 0.21259157359600067, "learning_rate": 0.0008749538414328525, "loss": 0.2082, "num_input_tokens_seen": 81234368, "step": 37650 }, { "epoch": 6.142740619902121, "grad_norm": 0.296414852142334, "learning_rate": 0.0008749067491563643, "loss": 0.1526, "num_input_tokens_seen": 81244736, "step": 37655 }, { "epoch": 6.143556280587275, "grad_norm": 0.1294468641281128, "learning_rate": 0.0008748596492819152, "loss": 0.0526, "num_input_tokens_seen": 81254144, "step": 37660 }, { "epoch": 6.14437194127243, "grad_norm": 0.2406795769929886, "learning_rate": 0.0008748125418104598, "loss": 0.0623, "num_input_tokens_seen": 81265216, "step": 37665 }, { "epoch": 6.145187601957586, "grad_norm": 0.350881963968277, "learning_rate": 0.0008747654267429526, "loss": 0.1353, "num_input_tokens_seen": 81276416, "step": 37670 }, { "epoch": 6.146003262642741, "grad_norm": 0.19518651068210602, "learning_rate": 0.0008747183040803488, "loss": 0.1179, "num_input_tokens_seen": 81287168, "step": 37675 }, { "epoch": 6.146818923327896, "grad_norm": 0.1773224174976349, "learning_rate": 0.000874671173823603, "loss": 0.0698, "num_input_tokens_seen": 81297984, "step": 37680 }, { "epoch": 6.14763458401305, "grad_norm": 0.011056666262447834, "learning_rate": 0.0008746240359736708, "loss": 0.2111, "num_input_tokens_seen": 81308640, "step": 37685 }, { "epoch": 6.148450244698205, "grad_norm": 0.2546042203903198, "learning_rate": 0.0008745768905315072, "loss": 0.0712, "num_input_tokens_seen": 81319296, "step": 37690 }, { "epoch": 6.149265905383361, "grad_norm": 0.5490317940711975, "learning_rate": 0.0008745297374980676, "loss": 0.1745, "num_input_tokens_seen": 81330848, "step": 37695 }, { "epoch": 6.150081566068516, "grad_norm": 0.0984594002366066, "learning_rate": 0.0008744825768743079, "loss": 0.1607, "num_input_tokens_seen": 81342176, "step": 37700 }, { "epoch": 6.150897226753671, "grad_norm": 0.01078155729919672, "learning_rate": 0.0008744354086611837, "loss": 0.0968, "num_input_tokens_seen": 81352416, "step": 37705 }, { "epoch": 6.151712887438825, "grad_norm": 0.0327475480735302, "learning_rate": 0.0008743882328596509, "loss": 0.0983, "num_input_tokens_seen": 81364224, "step": 37710 }, { "epoch": 6.15252854812398, "grad_norm": 0.01576169952750206, "learning_rate": 0.0008743410494706655, "loss": 0.074, "num_input_tokens_seen": 81374912, "step": 37715 }, { "epoch": 6.153344208809135, "grad_norm": 0.27554431557655334, "learning_rate": 0.0008742938584951841, "loss": 0.117, "num_input_tokens_seen": 81385504, "step": 37720 }, { "epoch": 6.154159869494291, "grad_norm": 0.4346618950366974, "learning_rate": 0.0008742466599341625, "loss": 0.1603, "num_input_tokens_seen": 81396288, "step": 37725 }, { "epoch": 6.1549755301794455, "grad_norm": 0.10869763046503067, "learning_rate": 0.0008741994537885578, "loss": 0.026, "num_input_tokens_seen": 81407520, "step": 37730 }, { "epoch": 6.1557911908646, "grad_norm": 0.06256053596735, "learning_rate": 0.0008741522400593265, "loss": 0.0211, "num_input_tokens_seen": 81417088, "step": 37735 }, { "epoch": 6.156606851549755, "grad_norm": 0.011777924373745918, "learning_rate": 0.0008741050187474253, "loss": 0.0465, "num_input_tokens_seen": 81428224, "step": 37740 }, { "epoch": 6.15742251223491, "grad_norm": 0.024991702288389206, "learning_rate": 0.0008740577898538114, "loss": 0.1233, "num_input_tokens_seen": 81440000, "step": 37745 }, { "epoch": 6.158238172920065, "grad_norm": 0.17709702253341675, "learning_rate": 0.0008740105533794417, "loss": 0.1033, "num_input_tokens_seen": 81451264, "step": 37750 }, { "epoch": 6.1590538336052205, "grad_norm": 0.038892246782779694, "learning_rate": 0.0008739633093252738, "loss": 0.0246, "num_input_tokens_seen": 81462144, "step": 37755 }, { "epoch": 6.159869494290375, "grad_norm": 0.16601526737213135, "learning_rate": 0.0008739160576922649, "loss": 0.1504, "num_input_tokens_seen": 81473408, "step": 37760 }, { "epoch": 6.16068515497553, "grad_norm": 0.05186929553747177, "learning_rate": 0.0008738687984813729, "loss": 0.0618, "num_input_tokens_seen": 81484928, "step": 37765 }, { "epoch": 6.161500815660685, "grad_norm": 0.09986516088247299, "learning_rate": 0.0008738215316935554, "loss": 0.0514, "num_input_tokens_seen": 81498208, "step": 37770 }, { "epoch": 6.16231647634584, "grad_norm": 0.21742188930511475, "learning_rate": 0.0008737742573297702, "loss": 0.0901, "num_input_tokens_seen": 81508672, "step": 37775 }, { "epoch": 6.1631321370309955, "grad_norm": 0.045663535594940186, "learning_rate": 0.0008737269753909757, "loss": 0.0743, "num_input_tokens_seen": 81518656, "step": 37780 }, { "epoch": 6.16394779771615, "grad_norm": 0.3040618598461151, "learning_rate": 0.0008736796858781297, "loss": 0.1936, "num_input_tokens_seen": 81529984, "step": 37785 }, { "epoch": 6.164763458401305, "grad_norm": 0.13098281621932983, "learning_rate": 0.0008736323887921911, "loss": 0.1406, "num_input_tokens_seen": 81540096, "step": 37790 }, { "epoch": 6.16557911908646, "grad_norm": 0.0293863657861948, "learning_rate": 0.0008735850841341179, "loss": 0.0109, "num_input_tokens_seen": 81550368, "step": 37795 }, { "epoch": 6.166394779771615, "grad_norm": 0.012408810667693615, "learning_rate": 0.0008735377719048692, "loss": 0.0278, "num_input_tokens_seen": 81562176, "step": 37800 }, { "epoch": 6.16721044045677, "grad_norm": 0.2513691484928131, "learning_rate": 0.0008734904521054037, "loss": 0.1586, "num_input_tokens_seen": 81572992, "step": 37805 }, { "epoch": 6.168026101141925, "grad_norm": 0.2130783647298813, "learning_rate": 0.0008734431247366803, "loss": 0.0539, "num_input_tokens_seen": 81582496, "step": 37810 }, { "epoch": 6.16884176182708, "grad_norm": 0.026805637404322624, "learning_rate": 0.0008733957897996583, "loss": 0.0954, "num_input_tokens_seen": 81593664, "step": 37815 }, { "epoch": 6.169657422512235, "grad_norm": 0.02379719726741314, "learning_rate": 0.0008733484472952969, "loss": 0.0515, "num_input_tokens_seen": 81605408, "step": 37820 }, { "epoch": 6.17047308319739, "grad_norm": 0.01487817894667387, "learning_rate": 0.0008733010972245554, "loss": 0.0293, "num_input_tokens_seen": 81616928, "step": 37825 }, { "epoch": 6.171288743882545, "grad_norm": 0.06261876225471497, "learning_rate": 0.0008732537395883938, "loss": 0.0753, "num_input_tokens_seen": 81627872, "step": 37830 }, { "epoch": 6.1721044045677, "grad_norm": 0.019965844228863716, "learning_rate": 0.0008732063743877716, "loss": 0.06, "num_input_tokens_seen": 81638336, "step": 37835 }, { "epoch": 6.172920065252855, "grad_norm": 0.017746130004525185, "learning_rate": 0.0008731590016236489, "loss": 0.0144, "num_input_tokens_seen": 81648704, "step": 37840 }, { "epoch": 6.17373572593801, "grad_norm": 0.04359278455376625, "learning_rate": 0.0008731116212969856, "loss": 0.0381, "num_input_tokens_seen": 81659424, "step": 37845 }, { "epoch": 6.174551386623165, "grad_norm": 0.056117620319128036, "learning_rate": 0.000873064233408742, "loss": 0.0934, "num_input_tokens_seen": 81670464, "step": 37850 }, { "epoch": 6.1753670473083195, "grad_norm": 0.11717624962329865, "learning_rate": 0.0008730168379598782, "loss": 0.09, "num_input_tokens_seen": 81681792, "step": 37855 }, { "epoch": 6.176182707993474, "grad_norm": 0.27975597977638245, "learning_rate": 0.0008729694349513552, "loss": 0.1407, "num_input_tokens_seen": 81692352, "step": 37860 }, { "epoch": 6.17699836867863, "grad_norm": 0.008886213414371014, "learning_rate": 0.0008729220243841334, "loss": 0.082, "num_input_tokens_seen": 81701920, "step": 37865 }, { "epoch": 6.177814029363785, "grad_norm": 0.3695761263370514, "learning_rate": 0.0008728746062591737, "loss": 0.1001, "num_input_tokens_seen": 81713664, "step": 37870 }, { "epoch": 6.17862969004894, "grad_norm": 0.02611437626183033, "learning_rate": 0.0008728271805774371, "loss": 0.1694, "num_input_tokens_seen": 81725024, "step": 37875 }, { "epoch": 6.1794453507340945, "grad_norm": 0.18484219908714294, "learning_rate": 0.0008727797473398846, "loss": 0.0353, "num_input_tokens_seen": 81734752, "step": 37880 }, { "epoch": 6.180261011419249, "grad_norm": 0.27713656425476074, "learning_rate": 0.0008727323065474778, "loss": 0.1025, "num_input_tokens_seen": 81743392, "step": 37885 }, { "epoch": 6.181076672104404, "grad_norm": 0.026678450405597687, "learning_rate": 0.000872684858201178, "loss": 0.0117, "num_input_tokens_seen": 81755744, "step": 37890 }, { "epoch": 6.18189233278956, "grad_norm": 0.3659737706184387, "learning_rate": 0.0008726374023019465, "loss": 0.3793, "num_input_tokens_seen": 81766368, "step": 37895 }, { "epoch": 6.182707993474715, "grad_norm": 0.28093287348747253, "learning_rate": 0.0008725899388507454, "loss": 0.0374, "num_input_tokens_seen": 81776672, "step": 37900 }, { "epoch": 6.1835236541598695, "grad_norm": 0.009045279584825039, "learning_rate": 0.0008725424678485366, "loss": 0.0808, "num_input_tokens_seen": 81787680, "step": 37905 }, { "epoch": 6.184339314845024, "grad_norm": 0.046027738600969315, "learning_rate": 0.0008724949892962821, "loss": 0.0744, "num_input_tokens_seen": 81798400, "step": 37910 }, { "epoch": 6.185154975530179, "grad_norm": 0.03590027242898941, "learning_rate": 0.0008724475031949441, "loss": 0.2563, "num_input_tokens_seen": 81810240, "step": 37915 }, { "epoch": 6.185970636215335, "grad_norm": 0.02497250586748123, "learning_rate": 0.0008724000095454849, "loss": 0.2208, "num_input_tokens_seen": 81821984, "step": 37920 }, { "epoch": 6.18678629690049, "grad_norm": 0.18172307312488556, "learning_rate": 0.0008723525083488671, "loss": 0.1687, "num_input_tokens_seen": 81833504, "step": 37925 }, { "epoch": 6.1876019575856445, "grad_norm": 0.2802371382713318, "learning_rate": 0.0008723049996060534, "loss": 0.1311, "num_input_tokens_seen": 81843008, "step": 37930 }, { "epoch": 6.188417618270799, "grad_norm": 0.40945982933044434, "learning_rate": 0.0008722574833180065, "loss": 0.1315, "num_input_tokens_seen": 81854272, "step": 37935 }, { "epoch": 6.189233278955954, "grad_norm": 0.38113129138946533, "learning_rate": 0.0008722099594856895, "loss": 0.1452, "num_input_tokens_seen": 81866336, "step": 37940 }, { "epoch": 6.190048939641109, "grad_norm": 0.040621671825647354, "learning_rate": 0.0008721624281100655, "loss": 0.1398, "num_input_tokens_seen": 81878336, "step": 37945 }, { "epoch": 6.190864600326265, "grad_norm": 0.06518690288066864, "learning_rate": 0.0008721148891920978, "loss": 0.0218, "num_input_tokens_seen": 81889536, "step": 37950 }, { "epoch": 6.191680261011419, "grad_norm": 0.02929527871310711, "learning_rate": 0.0008720673427327496, "loss": 0.0468, "num_input_tokens_seen": 81901600, "step": 37955 }, { "epoch": 6.192495921696574, "grad_norm": 0.2593875229358673, "learning_rate": 0.0008720197887329851, "loss": 0.0449, "num_input_tokens_seen": 81912608, "step": 37960 }, { "epoch": 6.193311582381729, "grad_norm": 0.25359100103378296, "learning_rate": 0.0008719722271937673, "loss": 0.1554, "num_input_tokens_seen": 81922464, "step": 37965 }, { "epoch": 6.194127243066884, "grad_norm": 0.0206617284566164, "learning_rate": 0.0008719246581160606, "loss": 0.0237, "num_input_tokens_seen": 81934048, "step": 37970 }, { "epoch": 6.19494290375204, "grad_norm": 0.2102188766002655, "learning_rate": 0.0008718770815008288, "loss": 0.0959, "num_input_tokens_seen": 81944160, "step": 37975 }, { "epoch": 6.195758564437194, "grad_norm": 0.31613877415657043, "learning_rate": 0.0008718294973490362, "loss": 0.2433, "num_input_tokens_seen": 81955392, "step": 37980 }, { "epoch": 6.196574225122349, "grad_norm": 0.09631001204252243, "learning_rate": 0.0008717819056616472, "loss": 0.0627, "num_input_tokens_seen": 81966720, "step": 37985 }, { "epoch": 6.197389885807504, "grad_norm": 0.01937999203801155, "learning_rate": 0.0008717343064396262, "loss": 0.0609, "num_input_tokens_seen": 81977088, "step": 37990 }, { "epoch": 6.198205546492659, "grad_norm": 0.03130558133125305, "learning_rate": 0.0008716866996839378, "loss": 0.074, "num_input_tokens_seen": 81987456, "step": 37995 }, { "epoch": 6.199021207177814, "grad_norm": 0.47428566217422485, "learning_rate": 0.0008716390853955472, "loss": 0.1125, "num_input_tokens_seen": 81998432, "step": 38000 }, { "epoch": 6.199836867862969, "grad_norm": 0.051574766635894775, "learning_rate": 0.0008715914635754187, "loss": 0.0816, "num_input_tokens_seen": 82010080, "step": 38005 }, { "epoch": 6.200652528548124, "grad_norm": 0.1316729485988617, "learning_rate": 0.0008715438342245181, "loss": 0.0276, "num_input_tokens_seen": 82020096, "step": 38010 }, { "epoch": 6.201468189233279, "grad_norm": 0.18838675320148468, "learning_rate": 0.0008714961973438103, "loss": 0.1792, "num_input_tokens_seen": 82030944, "step": 38015 }, { "epoch": 6.202283849918434, "grad_norm": 0.17742425203323364, "learning_rate": 0.0008714485529342606, "loss": 0.0392, "num_input_tokens_seen": 82041984, "step": 38020 }, { "epoch": 6.203099510603589, "grad_norm": 0.5516185760498047, "learning_rate": 0.0008714009009968349, "loss": 0.1515, "num_input_tokens_seen": 82053280, "step": 38025 }, { "epoch": 6.2039151712887435, "grad_norm": 0.04361693188548088, "learning_rate": 0.0008713532415324988, "loss": 0.0365, "num_input_tokens_seen": 82063520, "step": 38030 }, { "epoch": 6.204730831973899, "grad_norm": 0.26263806223869324, "learning_rate": 0.0008713055745422181, "loss": 0.0603, "num_input_tokens_seen": 82075072, "step": 38035 }, { "epoch": 6.205546492659054, "grad_norm": 0.012335437349975109, "learning_rate": 0.000871257900026959, "loss": 0.1391, "num_input_tokens_seen": 82085440, "step": 38040 }, { "epoch": 6.206362153344209, "grad_norm": 0.017645860090851784, "learning_rate": 0.0008712102179876876, "loss": 0.1137, "num_input_tokens_seen": 82096672, "step": 38045 }, { "epoch": 6.207177814029364, "grad_norm": 0.02544236369431019, "learning_rate": 0.0008711625284253701, "loss": 0.028, "num_input_tokens_seen": 82106880, "step": 38050 }, { "epoch": 6.2079934747145185, "grad_norm": 0.2836909890174866, "learning_rate": 0.0008711148313409731, "loss": 0.1114, "num_input_tokens_seen": 82116928, "step": 38055 }, { "epoch": 6.208809135399674, "grad_norm": 0.3551044762134552, "learning_rate": 0.0008710671267354633, "loss": 0.0996, "num_input_tokens_seen": 82125824, "step": 38060 }, { "epoch": 6.209624796084829, "grad_norm": 0.018436262384057045, "learning_rate": 0.0008710194146098074, "loss": 0.0499, "num_input_tokens_seen": 82136896, "step": 38065 }, { "epoch": 6.210440456769984, "grad_norm": 0.6422732472419739, "learning_rate": 0.0008709716949649724, "loss": 0.1313, "num_input_tokens_seen": 82146784, "step": 38070 }, { "epoch": 6.211256117455139, "grad_norm": 0.4167613089084625, "learning_rate": 0.0008709239678019255, "loss": 0.1069, "num_input_tokens_seen": 82156544, "step": 38075 }, { "epoch": 6.212071778140293, "grad_norm": 0.029197990894317627, "learning_rate": 0.0008708762331216338, "loss": 0.0394, "num_input_tokens_seen": 82168192, "step": 38080 }, { "epoch": 6.212887438825448, "grad_norm": 0.02608264423906803, "learning_rate": 0.0008708284909250646, "loss": 0.0375, "num_input_tokens_seen": 82179360, "step": 38085 }, { "epoch": 6.213703099510604, "grad_norm": 0.019517365843057632, "learning_rate": 0.0008707807412131858, "loss": 0.0511, "num_input_tokens_seen": 82189888, "step": 38090 }, { "epoch": 6.214518760195759, "grad_norm": 0.01894032210111618, "learning_rate": 0.0008707329839869649, "loss": 0.0207, "num_input_tokens_seen": 82201408, "step": 38095 }, { "epoch": 6.215334420880914, "grad_norm": 0.27430230379104614, "learning_rate": 0.0008706852192473696, "loss": 0.1675, "num_input_tokens_seen": 82211936, "step": 38100 }, { "epoch": 6.216150081566068, "grad_norm": 0.14594011008739471, "learning_rate": 0.0008706374469953682, "loss": 0.1884, "num_input_tokens_seen": 82223904, "step": 38105 }, { "epoch": 6.216965742251223, "grad_norm": 0.4427279233932495, "learning_rate": 0.0008705896672319286, "loss": 0.0356, "num_input_tokens_seen": 82234656, "step": 38110 }, { "epoch": 6.217781402936378, "grad_norm": 0.38041970133781433, "learning_rate": 0.0008705418799580196, "loss": 0.2883, "num_input_tokens_seen": 82246400, "step": 38115 }, { "epoch": 6.218597063621534, "grad_norm": 0.07448987662792206, "learning_rate": 0.000870494085174609, "loss": 0.0143, "num_input_tokens_seen": 82257376, "step": 38120 }, { "epoch": 6.219412724306689, "grad_norm": 0.07405517250299454, "learning_rate": 0.000870446282882666, "loss": 0.028, "num_input_tokens_seen": 82269376, "step": 38125 }, { "epoch": 6.220228384991843, "grad_norm": 0.32132160663604736, "learning_rate": 0.0008703984730831589, "loss": 0.0551, "num_input_tokens_seen": 82279776, "step": 38130 }, { "epoch": 6.221044045676998, "grad_norm": 0.20271813869476318, "learning_rate": 0.0008703506557770571, "loss": 0.0848, "num_input_tokens_seen": 82291488, "step": 38135 }, { "epoch": 6.221859706362153, "grad_norm": 0.21891963481903076, "learning_rate": 0.0008703028309653293, "loss": 0.0556, "num_input_tokens_seen": 82303296, "step": 38140 }, { "epoch": 6.222675367047309, "grad_norm": 0.07454973459243774, "learning_rate": 0.0008702549986489449, "loss": 0.0876, "num_input_tokens_seen": 82312576, "step": 38145 }, { "epoch": 6.2234910277324635, "grad_norm": 0.009890730492770672, "learning_rate": 0.0008702071588288731, "loss": 0.026, "num_input_tokens_seen": 82324480, "step": 38150 }, { "epoch": 6.224306688417618, "grad_norm": 0.19836236536502838, "learning_rate": 0.0008701593115060837, "loss": 0.0271, "num_input_tokens_seen": 82334528, "step": 38155 }, { "epoch": 6.225122349102773, "grad_norm": 0.006776847876608372, "learning_rate": 0.0008701114566815464, "loss": 0.0255, "num_input_tokens_seen": 82344736, "step": 38160 }, { "epoch": 6.225938009787928, "grad_norm": 0.48327401280403137, "learning_rate": 0.0008700635943562308, "loss": 0.1248, "num_input_tokens_seen": 82355168, "step": 38165 }, { "epoch": 6.226753670473083, "grad_norm": 0.08898193389177322, "learning_rate": 0.0008700157245311071, "loss": 0.0838, "num_input_tokens_seen": 82366080, "step": 38170 }, { "epoch": 6.2275693311582385, "grad_norm": 0.16919302940368652, "learning_rate": 0.0008699678472071453, "loss": 0.1192, "num_input_tokens_seen": 82377088, "step": 38175 }, { "epoch": 6.228384991843393, "grad_norm": 0.18840764462947845, "learning_rate": 0.0008699199623853156, "loss": 0.138, "num_input_tokens_seen": 82387712, "step": 38180 }, { "epoch": 6.229200652528548, "grad_norm": 0.24587798118591309, "learning_rate": 0.0008698720700665888, "loss": 0.1201, "num_input_tokens_seen": 82397728, "step": 38185 }, { "epoch": 6.230016313213703, "grad_norm": 0.4648304283618927, "learning_rate": 0.0008698241702519351, "loss": 0.2054, "num_input_tokens_seen": 82408352, "step": 38190 }, { "epoch": 6.230831973898858, "grad_norm": 0.156012624502182, "learning_rate": 0.0008697762629423254, "loss": 0.144, "num_input_tokens_seen": 82419168, "step": 38195 }, { "epoch": 6.231647634584013, "grad_norm": 0.03668627515435219, "learning_rate": 0.0008697283481387308, "loss": 0.0349, "num_input_tokens_seen": 82429856, "step": 38200 }, { "epoch": 6.232463295269168, "grad_norm": 0.366272896528244, "learning_rate": 0.000869680425842122, "loss": 0.1479, "num_input_tokens_seen": 82441664, "step": 38205 }, { "epoch": 6.233278955954323, "grad_norm": 0.036376986652612686, "learning_rate": 0.0008696324960534706, "loss": 0.1415, "num_input_tokens_seen": 82452512, "step": 38210 }, { "epoch": 6.234094616639478, "grad_norm": 0.2304123193025589, "learning_rate": 0.0008695845587737476, "loss": 0.0644, "num_input_tokens_seen": 82462880, "step": 38215 }, { "epoch": 6.234910277324633, "grad_norm": 0.07836061716079712, "learning_rate": 0.0008695366140039248, "loss": 0.0758, "num_input_tokens_seen": 82474368, "step": 38220 }, { "epoch": 6.235725938009788, "grad_norm": 0.2481348216533661, "learning_rate": 0.0008694886617449738, "loss": 0.0527, "num_input_tokens_seen": 82484576, "step": 38225 }, { "epoch": 6.236541598694943, "grad_norm": 0.24087274074554443, "learning_rate": 0.0008694407019978661, "loss": 0.036, "num_input_tokens_seen": 82494784, "step": 38230 }, { "epoch": 6.237357259380098, "grad_norm": 0.010592986829578876, "learning_rate": 0.0008693927347635741, "loss": 0.0705, "num_input_tokens_seen": 82506176, "step": 38235 }, { "epoch": 6.238172920065253, "grad_norm": 0.513277530670166, "learning_rate": 0.0008693447600430695, "loss": 0.262, "num_input_tokens_seen": 82516544, "step": 38240 }, { "epoch": 6.238988580750408, "grad_norm": 0.04082418978214264, "learning_rate": 0.000869296777837325, "loss": 0.0634, "num_input_tokens_seen": 82527744, "step": 38245 }, { "epoch": 6.239804241435563, "grad_norm": 0.0600874237716198, "learning_rate": 0.0008692487881473128, "loss": 0.1995, "num_input_tokens_seen": 82539616, "step": 38250 }, { "epoch": 6.240619902120717, "grad_norm": 0.33390292525291443, "learning_rate": 0.0008692007909740054, "loss": 0.1939, "num_input_tokens_seen": 82551744, "step": 38255 }, { "epoch": 6.241435562805873, "grad_norm": 0.05315184220671654, "learning_rate": 0.0008691527863183755, "loss": 0.1404, "num_input_tokens_seen": 82562080, "step": 38260 }, { "epoch": 6.242251223491028, "grad_norm": 0.02041463367640972, "learning_rate": 0.0008691047741813963, "loss": 0.0351, "num_input_tokens_seen": 82570688, "step": 38265 }, { "epoch": 6.243066884176183, "grad_norm": 0.5262753963470459, "learning_rate": 0.0008690567545640406, "loss": 0.1362, "num_input_tokens_seen": 82581536, "step": 38270 }, { "epoch": 6.2438825448613375, "grad_norm": 0.5825360417366028, "learning_rate": 0.0008690087274672814, "loss": 0.1279, "num_input_tokens_seen": 82591904, "step": 38275 }, { "epoch": 6.244698205546492, "grad_norm": 0.138711079955101, "learning_rate": 0.0008689606928920923, "loss": 0.0546, "num_input_tokens_seen": 82602496, "step": 38280 }, { "epoch": 6.245513866231648, "grad_norm": 0.04171792417764664, "learning_rate": 0.0008689126508394467, "loss": 0.1912, "num_input_tokens_seen": 82612704, "step": 38285 }, { "epoch": 6.246329526916803, "grad_norm": 0.0177142471075058, "learning_rate": 0.0008688646013103183, "loss": 0.0154, "num_input_tokens_seen": 82624384, "step": 38290 }, { "epoch": 6.247145187601958, "grad_norm": 0.0836564227938652, "learning_rate": 0.0008688165443056808, "loss": 0.0928, "num_input_tokens_seen": 82633632, "step": 38295 }, { "epoch": 6.2479608482871125, "grad_norm": 0.5282182097434998, "learning_rate": 0.0008687684798265081, "loss": 0.2801, "num_input_tokens_seen": 82645024, "step": 38300 }, { "epoch": 6.248776508972267, "grad_norm": 0.1059623435139656, "learning_rate": 0.0008687204078737744, "loss": 0.11, "num_input_tokens_seen": 82655328, "step": 38305 }, { "epoch": 6.249592169657422, "grad_norm": 0.48579680919647217, "learning_rate": 0.0008686723284484538, "loss": 0.068, "num_input_tokens_seen": 82666176, "step": 38310 }, { "epoch": 6.250407830342578, "grad_norm": 0.037487879395484924, "learning_rate": 0.0008686242415515209, "loss": 0.3257, "num_input_tokens_seen": 82677312, "step": 38315 }, { "epoch": 6.251223491027733, "grad_norm": 0.13019028306007385, "learning_rate": 0.00086857614718395, "loss": 0.1451, "num_input_tokens_seen": 82688320, "step": 38320 }, { "epoch": 6.2520391517128875, "grad_norm": 0.012561680749058723, "learning_rate": 0.0008685280453467159, "loss": 0.0446, "num_input_tokens_seen": 82699232, "step": 38325 }, { "epoch": 6.252854812398042, "grad_norm": 0.09618709981441498, "learning_rate": 0.0008684799360407935, "loss": 0.1626, "num_input_tokens_seen": 82710560, "step": 38330 }, { "epoch": 6.253670473083197, "grad_norm": 0.16812492907047272, "learning_rate": 0.0008684318192671576, "loss": 0.1049, "num_input_tokens_seen": 82721152, "step": 38335 }, { "epoch": 6.254486133768353, "grad_norm": 0.0887015163898468, "learning_rate": 0.0008683836950267838, "loss": 0.0507, "num_input_tokens_seen": 82731360, "step": 38340 }, { "epoch": 6.255301794453508, "grad_norm": 0.1159474104642868, "learning_rate": 0.0008683355633206469, "loss": 0.1105, "num_input_tokens_seen": 82740672, "step": 38345 }, { "epoch": 6.2561174551386625, "grad_norm": 0.19158174097537994, "learning_rate": 0.0008682874241497225, "loss": 0.0587, "num_input_tokens_seen": 82752192, "step": 38350 }, { "epoch": 6.256933115823817, "grad_norm": 0.12312944233417511, "learning_rate": 0.0008682392775149863, "loss": 0.105, "num_input_tokens_seen": 82763232, "step": 38355 }, { "epoch": 6.257748776508972, "grad_norm": 0.33153775334358215, "learning_rate": 0.000868191123417414, "loss": 0.1231, "num_input_tokens_seen": 82772416, "step": 38360 }, { "epoch": 6.258564437194127, "grad_norm": 0.09870640933513641, "learning_rate": 0.0008681429618579815, "loss": 0.0997, "num_input_tokens_seen": 82783136, "step": 38365 }, { "epoch": 6.259380097879283, "grad_norm": 0.13201718032360077, "learning_rate": 0.0008680947928376648, "loss": 0.0397, "num_input_tokens_seen": 82792416, "step": 38370 }, { "epoch": 6.260195758564437, "grad_norm": 0.15855617821216583, "learning_rate": 0.0008680466163574402, "loss": 0.1456, "num_input_tokens_seen": 82802528, "step": 38375 }, { "epoch": 6.261011419249592, "grad_norm": 0.05080302804708481, "learning_rate": 0.000867998432418284, "loss": 0.075, "num_input_tokens_seen": 82813216, "step": 38380 }, { "epoch": 6.261827079934747, "grad_norm": 0.11077865958213806, "learning_rate": 0.0008679502410211728, "loss": 0.0559, "num_input_tokens_seen": 82824352, "step": 38385 }, { "epoch": 6.262642740619902, "grad_norm": 0.06897448003292084, "learning_rate": 0.0008679020421670831, "loss": 0.0576, "num_input_tokens_seen": 82835936, "step": 38390 }, { "epoch": 6.263458401305057, "grad_norm": 0.029949642717838287, "learning_rate": 0.0008678538358569918, "loss": 0.0237, "num_input_tokens_seen": 82846368, "step": 38395 }, { "epoch": 6.264274061990212, "grad_norm": 0.018623823300004005, "learning_rate": 0.000867805622091876, "loss": 0.1448, "num_input_tokens_seen": 82857504, "step": 38400 }, { "epoch": 6.265089722675367, "grad_norm": 0.041891224682331085, "learning_rate": 0.0008677574008727126, "loss": 0.1626, "num_input_tokens_seen": 82868192, "step": 38405 }, { "epoch": 6.265905383360522, "grad_norm": 0.3580511808395386, "learning_rate": 0.0008677091722004788, "loss": 0.0355, "num_input_tokens_seen": 82879328, "step": 38410 }, { "epoch": 6.266721044045677, "grad_norm": 0.21657977998256683, "learning_rate": 0.0008676609360761524, "loss": 0.1363, "num_input_tokens_seen": 82890976, "step": 38415 }, { "epoch": 6.267536704730832, "grad_norm": 0.173953577876091, "learning_rate": 0.0008676126925007107, "loss": 0.0733, "num_input_tokens_seen": 82901792, "step": 38420 }, { "epoch": 6.268352365415987, "grad_norm": 0.035258591175079346, "learning_rate": 0.0008675644414751311, "loss": 0.1188, "num_input_tokens_seen": 82913024, "step": 38425 }, { "epoch": 6.269168026101142, "grad_norm": 0.29136592149734497, "learning_rate": 0.0008675161830003921, "loss": 0.1675, "num_input_tokens_seen": 82923168, "step": 38430 }, { "epoch": 6.269983686786297, "grad_norm": 0.028397701680660248, "learning_rate": 0.0008674679170774713, "loss": 0.0752, "num_input_tokens_seen": 82934784, "step": 38435 }, { "epoch": 6.270799347471452, "grad_norm": 0.027016595005989075, "learning_rate": 0.0008674196437073472, "loss": 0.0646, "num_input_tokens_seen": 82946272, "step": 38440 }, { "epoch": 6.271615008156607, "grad_norm": 0.1601172387599945, "learning_rate": 0.0008673713628909978, "loss": 0.0527, "num_input_tokens_seen": 82955904, "step": 38445 }, { "epoch": 6.2724306688417615, "grad_norm": 0.06713040918111801, "learning_rate": 0.0008673230746294016, "loss": 0.1201, "num_input_tokens_seen": 82965376, "step": 38450 }, { "epoch": 6.273246329526917, "grad_norm": 0.18276642262935638, "learning_rate": 0.0008672747789235373, "loss": 0.1369, "num_input_tokens_seen": 82975904, "step": 38455 }, { "epoch": 6.274061990212072, "grad_norm": 0.14935600757598877, "learning_rate": 0.0008672264757743838, "loss": 0.0348, "num_input_tokens_seen": 82986752, "step": 38460 }, { "epoch": 6.274877650897227, "grad_norm": 0.300351083278656, "learning_rate": 0.0008671781651829198, "loss": 0.2014, "num_input_tokens_seen": 82996928, "step": 38465 }, { "epoch": 6.275693311582382, "grad_norm": 0.175311878323555, "learning_rate": 0.0008671298471501246, "loss": 0.0803, "num_input_tokens_seen": 83008320, "step": 38470 }, { "epoch": 6.2765089722675365, "grad_norm": 0.13221049308776855, "learning_rate": 0.0008670815216769771, "loss": 0.0143, "num_input_tokens_seen": 83018496, "step": 38475 }, { "epoch": 6.277324632952691, "grad_norm": 0.121135413646698, "learning_rate": 0.0008670331887644571, "loss": 0.144, "num_input_tokens_seen": 83028768, "step": 38480 }, { "epoch": 6.278140293637847, "grad_norm": 0.22578361630439758, "learning_rate": 0.0008669848484135439, "loss": 0.038, "num_input_tokens_seen": 83039680, "step": 38485 }, { "epoch": 6.278955954323002, "grad_norm": 0.00807348731905222, "learning_rate": 0.0008669365006252172, "loss": 0.0494, "num_input_tokens_seen": 83049600, "step": 38490 }, { "epoch": 6.279771615008157, "grad_norm": 0.1673729568719864, "learning_rate": 0.0008668881454004567, "loss": 0.0359, "num_input_tokens_seen": 83060096, "step": 38495 }, { "epoch": 6.280587275693311, "grad_norm": 0.20853634178638458, "learning_rate": 0.0008668397827402425, "loss": 0.086, "num_input_tokens_seen": 83070240, "step": 38500 }, { "epoch": 6.281402936378466, "grad_norm": 0.07261139899492264, "learning_rate": 0.000866791412645555, "loss": 0.0868, "num_input_tokens_seen": 83080864, "step": 38505 }, { "epoch": 6.282218597063622, "grad_norm": 0.03060084953904152, "learning_rate": 0.000866743035117374, "loss": 0.1523, "num_input_tokens_seen": 83090336, "step": 38510 }, { "epoch": 6.283034257748777, "grad_norm": 0.21546390652656555, "learning_rate": 0.0008666946501566801, "loss": 0.1084, "num_input_tokens_seen": 83101248, "step": 38515 }, { "epoch": 6.283849918433932, "grad_norm": 0.031271666288375854, "learning_rate": 0.000866646257764454, "loss": 0.036, "num_input_tokens_seen": 83111488, "step": 38520 }, { "epoch": 6.284665579119086, "grad_norm": 0.05465102195739746, "learning_rate": 0.0008665978579416763, "loss": 0.0554, "num_input_tokens_seen": 83122528, "step": 38525 }, { "epoch": 6.285481239804241, "grad_norm": 0.0712488442659378, "learning_rate": 0.000866549450689328, "loss": 0.0739, "num_input_tokens_seen": 83133120, "step": 38530 }, { "epoch": 6.286296900489396, "grad_norm": 0.18694248795509338, "learning_rate": 0.0008665010360083902, "loss": 0.0445, "num_input_tokens_seen": 83142912, "step": 38535 }, { "epoch": 6.287112561174552, "grad_norm": 0.015745891258120537, "learning_rate": 0.0008664526138998438, "loss": 0.0765, "num_input_tokens_seen": 83153728, "step": 38540 }, { "epoch": 6.287928221859707, "grad_norm": 0.0336974561214447, "learning_rate": 0.0008664041843646704, "loss": 0.051, "num_input_tokens_seen": 83163840, "step": 38545 }, { "epoch": 6.288743882544861, "grad_norm": 0.005266394466161728, "learning_rate": 0.0008663557474038512, "loss": 0.0382, "num_input_tokens_seen": 83174560, "step": 38550 }, { "epoch": 6.289559543230016, "grad_norm": 0.0909627303481102, "learning_rate": 0.0008663073030183683, "loss": 0.0333, "num_input_tokens_seen": 83185536, "step": 38555 }, { "epoch": 6.290375203915171, "grad_norm": 0.10828143358230591, "learning_rate": 0.000866258851209203, "loss": 0.0388, "num_input_tokens_seen": 83196576, "step": 38560 }, { "epoch": 6.291190864600326, "grad_norm": 0.4044075310230255, "learning_rate": 0.0008662103919773375, "loss": 0.2124, "num_input_tokens_seen": 83205920, "step": 38565 }, { "epoch": 6.2920065252854815, "grad_norm": 0.0093558169901371, "learning_rate": 0.0008661619253237538, "loss": 0.0328, "num_input_tokens_seen": 83217312, "step": 38570 }, { "epoch": 6.292822185970636, "grad_norm": 0.008684294298291206, "learning_rate": 0.0008661134512494343, "loss": 0.032, "num_input_tokens_seen": 83228064, "step": 38575 }, { "epoch": 6.293637846655791, "grad_norm": 0.03013445995748043, "learning_rate": 0.0008660649697553612, "loss": 0.0798, "num_input_tokens_seen": 83237920, "step": 38580 }, { "epoch": 6.294453507340946, "grad_norm": 0.06182802468538284, "learning_rate": 0.000866016480842517, "loss": 0.0589, "num_input_tokens_seen": 83249216, "step": 38585 }, { "epoch": 6.295269168026101, "grad_norm": 0.0091919656842947, "learning_rate": 0.0008659679845118847, "loss": 0.1478, "num_input_tokens_seen": 83259744, "step": 38590 }, { "epoch": 6.2960848287112565, "grad_norm": 0.07095085829496384, "learning_rate": 0.0008659194807644468, "loss": 0.0295, "num_input_tokens_seen": 83270400, "step": 38595 }, { "epoch": 6.296900489396411, "grad_norm": 0.08563302457332611, "learning_rate": 0.0008658709696011864, "loss": 0.0718, "num_input_tokens_seen": 83280640, "step": 38600 }, { "epoch": 6.297716150081566, "grad_norm": 0.080418162047863, "learning_rate": 0.0008658224510230867, "loss": 0.0123, "num_input_tokens_seen": 83289792, "step": 38605 }, { "epoch": 6.298531810766721, "grad_norm": 0.004029197618365288, "learning_rate": 0.0008657739250311309, "loss": 0.2104, "num_input_tokens_seen": 83300928, "step": 38610 }, { "epoch": 6.299347471451876, "grad_norm": 0.013748077675700188, "learning_rate": 0.0008657253916263026, "loss": 0.0841, "num_input_tokens_seen": 83311712, "step": 38615 }, { "epoch": 6.300163132137031, "grad_norm": 0.02718990482389927, "learning_rate": 0.0008656768508095852, "loss": 0.0434, "num_input_tokens_seen": 83323232, "step": 38620 }, { "epoch": 6.300978792822186, "grad_norm": 0.021569428965449333, "learning_rate": 0.0008656283025819626, "loss": 0.0134, "num_input_tokens_seen": 83334368, "step": 38625 }, { "epoch": 6.301794453507341, "grad_norm": 0.04530632495880127, "learning_rate": 0.0008655797469444186, "loss": 0.0333, "num_input_tokens_seen": 83345280, "step": 38630 }, { "epoch": 6.302610114192496, "grad_norm": 0.07367482036352158, "learning_rate": 0.0008655311838979371, "loss": 0.0363, "num_input_tokens_seen": 83355648, "step": 38635 }, { "epoch": 6.303425774877651, "grad_norm": 0.2540670335292816, "learning_rate": 0.0008654826134435028, "loss": 0.0879, "num_input_tokens_seen": 83366688, "step": 38640 }, { "epoch": 6.304241435562806, "grad_norm": 0.40385952591896057, "learning_rate": 0.0008654340355820993, "loss": 0.125, "num_input_tokens_seen": 83376064, "step": 38645 }, { "epoch": 6.30505709624796, "grad_norm": 0.014413048513233662, "learning_rate": 0.0008653854503147117, "loss": 0.029, "num_input_tokens_seen": 83386272, "step": 38650 }, { "epoch": 6.305872756933116, "grad_norm": 0.5913793444633484, "learning_rate": 0.0008653368576423244, "loss": 0.1761, "num_input_tokens_seen": 83396928, "step": 38655 }, { "epoch": 6.306688417618271, "grad_norm": 0.0838056281208992, "learning_rate": 0.0008652882575659222, "loss": 0.0116, "num_input_tokens_seen": 83407552, "step": 38660 }, { "epoch": 6.307504078303426, "grad_norm": 0.23180916905403137, "learning_rate": 0.00086523965008649, "loss": 0.2432, "num_input_tokens_seen": 83418560, "step": 38665 }, { "epoch": 6.308319738988581, "grad_norm": 0.10713952779769897, "learning_rate": 0.0008651910352050129, "loss": 0.0651, "num_input_tokens_seen": 83430304, "step": 38670 }, { "epoch": 6.309135399673735, "grad_norm": 0.03942100703716278, "learning_rate": 0.0008651424129224764, "loss": 0.0417, "num_input_tokens_seen": 83441984, "step": 38675 }, { "epoch": 6.309951060358891, "grad_norm": 0.014397433027625084, "learning_rate": 0.0008650937832398656, "loss": 0.0669, "num_input_tokens_seen": 83453440, "step": 38680 }, { "epoch": 6.310766721044046, "grad_norm": 0.008224395103752613, "learning_rate": 0.0008650451461581661, "loss": 0.0556, "num_input_tokens_seen": 83465280, "step": 38685 }, { "epoch": 6.311582381729201, "grad_norm": 0.15745656192302704, "learning_rate": 0.0008649965016783636, "loss": 0.0732, "num_input_tokens_seen": 83476704, "step": 38690 }, { "epoch": 6.3123980424143555, "grad_norm": 0.09341222047805786, "learning_rate": 0.0008649478498014441, "loss": 0.0312, "num_input_tokens_seen": 83486336, "step": 38695 }, { "epoch": 6.31321370309951, "grad_norm": 0.2951034903526306, "learning_rate": 0.0008648991905283931, "loss": 0.1782, "num_input_tokens_seen": 83496448, "step": 38700 }, { "epoch": 6.314029363784665, "grad_norm": 0.005823482759296894, "learning_rate": 0.0008648505238601974, "loss": 0.0063, "num_input_tokens_seen": 83507456, "step": 38705 }, { "epoch": 6.314845024469821, "grad_norm": 0.5006877779960632, "learning_rate": 0.0008648018497978429, "loss": 0.2367, "num_input_tokens_seen": 83517696, "step": 38710 }, { "epoch": 6.315660685154976, "grad_norm": 0.10921309143304825, "learning_rate": 0.0008647531683423162, "loss": 0.0967, "num_input_tokens_seen": 83527168, "step": 38715 }, { "epoch": 6.3164763458401305, "grad_norm": 0.20777003467082977, "learning_rate": 0.0008647044794946038, "loss": 0.1346, "num_input_tokens_seen": 83538112, "step": 38720 }, { "epoch": 6.317292006525285, "grad_norm": 0.032438185065984726, "learning_rate": 0.0008646557832556925, "loss": 0.0496, "num_input_tokens_seen": 83549600, "step": 38725 }, { "epoch": 6.31810766721044, "grad_norm": 0.01789376139640808, "learning_rate": 0.000864607079626569, "loss": 0.162, "num_input_tokens_seen": 83561920, "step": 38730 }, { "epoch": 6.318923327895595, "grad_norm": 0.19681602716445923, "learning_rate": 0.0008645583686082206, "loss": 0.0593, "num_input_tokens_seen": 83571264, "step": 38735 }, { "epoch": 6.319738988580751, "grad_norm": 0.2806093692779541, "learning_rate": 0.0008645096502016346, "loss": 0.0962, "num_input_tokens_seen": 83581440, "step": 38740 }, { "epoch": 6.3205546492659055, "grad_norm": 0.38697999715805054, "learning_rate": 0.0008644609244077978, "loss": 0.1458, "num_input_tokens_seen": 83592448, "step": 38745 }, { "epoch": 6.32137030995106, "grad_norm": 0.329448401927948, "learning_rate": 0.0008644121912276981, "loss": 0.1007, "num_input_tokens_seen": 83603104, "step": 38750 }, { "epoch": 6.322185970636215, "grad_norm": 0.053843192756175995, "learning_rate": 0.000864363450662323, "loss": 0.072, "num_input_tokens_seen": 83613472, "step": 38755 }, { "epoch": 6.32300163132137, "grad_norm": 0.1324566751718521, "learning_rate": 0.0008643147027126604, "loss": 0.0336, "num_input_tokens_seen": 83625152, "step": 38760 }, { "epoch": 6.323817292006526, "grad_norm": 0.06423362344503403, "learning_rate": 0.0008642659473796984, "loss": 0.0714, "num_input_tokens_seen": 83636224, "step": 38765 }, { "epoch": 6.3246329526916805, "grad_norm": 0.30357682704925537, "learning_rate": 0.0008642171846644245, "loss": 0.0397, "num_input_tokens_seen": 83647328, "step": 38770 }, { "epoch": 6.325448613376835, "grad_norm": 0.11212248355150223, "learning_rate": 0.0008641684145678275, "loss": 0.0645, "num_input_tokens_seen": 83656736, "step": 38775 }, { "epoch": 6.32626427406199, "grad_norm": 0.023727523162961006, "learning_rate": 0.0008641196370908956, "loss": 0.0336, "num_input_tokens_seen": 83666464, "step": 38780 }, { "epoch": 6.327079934747145, "grad_norm": 0.07480015605688095, "learning_rate": 0.0008640708522346173, "loss": 0.0693, "num_input_tokens_seen": 83677120, "step": 38785 }, { "epoch": 6.327895595432301, "grad_norm": 0.01593008264899254, "learning_rate": 0.0008640220599999813, "loss": 0.0241, "num_input_tokens_seen": 83688448, "step": 38790 }, { "epoch": 6.328711256117455, "grad_norm": 0.4655987024307251, "learning_rate": 0.0008639732603879766, "loss": 0.0414, "num_input_tokens_seen": 83698400, "step": 38795 }, { "epoch": 6.32952691680261, "grad_norm": 0.5030547976493835, "learning_rate": 0.0008639244533995919, "loss": 0.2909, "num_input_tokens_seen": 83709440, "step": 38800 }, { "epoch": 6.330342577487765, "grad_norm": 0.06431054323911667, "learning_rate": 0.0008638756390358164, "loss": 0.0324, "num_input_tokens_seen": 83719968, "step": 38805 }, { "epoch": 6.33115823817292, "grad_norm": 0.2710186839103699, "learning_rate": 0.0008638268172976398, "loss": 0.0842, "num_input_tokens_seen": 83730944, "step": 38810 }, { "epoch": 6.331973898858075, "grad_norm": 0.04093080386519432, "learning_rate": 0.0008637779881860509, "loss": 0.0259, "num_input_tokens_seen": 83741760, "step": 38815 }, { "epoch": 6.33278955954323, "grad_norm": 0.0359831340610981, "learning_rate": 0.0008637291517020397, "loss": 0.1097, "num_input_tokens_seen": 83752736, "step": 38820 }, { "epoch": 6.333605220228385, "grad_norm": 0.015018374659121037, "learning_rate": 0.0008636803078465958, "loss": 0.0156, "num_input_tokens_seen": 83762304, "step": 38825 }, { "epoch": 6.33442088091354, "grad_norm": 0.03273087367415428, "learning_rate": 0.000863631456620709, "loss": 0.1036, "num_input_tokens_seen": 83773056, "step": 38830 }, { "epoch": 6.335236541598695, "grad_norm": 0.04437518119812012, "learning_rate": 0.0008635825980253696, "loss": 0.1867, "num_input_tokens_seen": 83782688, "step": 38835 }, { "epoch": 6.33605220228385, "grad_norm": 0.24158930778503418, "learning_rate": 0.0008635337320615675, "loss": 0.0705, "num_input_tokens_seen": 83792768, "step": 38840 }, { "epoch": 6.3368678629690045, "grad_norm": 0.5404110550880432, "learning_rate": 0.0008634848587302932, "loss": 0.2417, "num_input_tokens_seen": 83804192, "step": 38845 }, { "epoch": 6.33768352365416, "grad_norm": 0.11945383250713348, "learning_rate": 0.0008634359780325372, "loss": 0.0953, "num_input_tokens_seen": 83815328, "step": 38850 }, { "epoch": 6.338499184339315, "grad_norm": 0.17123204469680786, "learning_rate": 0.0008633870899692899, "loss": 0.1361, "num_input_tokens_seen": 83827872, "step": 38855 }, { "epoch": 6.33931484502447, "grad_norm": 0.03688203915953636, "learning_rate": 0.0008633381945415422, "loss": 0.1074, "num_input_tokens_seen": 83839456, "step": 38860 }, { "epoch": 6.340130505709625, "grad_norm": 0.14051002264022827, "learning_rate": 0.0008632892917502852, "loss": 0.0444, "num_input_tokens_seen": 83849920, "step": 38865 }, { "epoch": 6.3409461663947795, "grad_norm": 0.03777134418487549, "learning_rate": 0.0008632403815965099, "loss": 0.0269, "num_input_tokens_seen": 83861632, "step": 38870 }, { "epoch": 6.341761827079935, "grad_norm": 0.23245501518249512, "learning_rate": 0.0008631914640812073, "loss": 0.2025, "num_input_tokens_seen": 83873344, "step": 38875 }, { "epoch": 6.34257748776509, "grad_norm": 0.03078058362007141, "learning_rate": 0.000863142539205369, "loss": 0.0186, "num_input_tokens_seen": 83884576, "step": 38880 }, { "epoch": 6.343393148450245, "grad_norm": 0.49583736062049866, "learning_rate": 0.0008630936069699864, "loss": 0.1544, "num_input_tokens_seen": 83894912, "step": 38885 }, { "epoch": 6.3442088091354, "grad_norm": 0.4590871334075928, "learning_rate": 0.0008630446673760513, "loss": 0.143, "num_input_tokens_seen": 83905856, "step": 38890 }, { "epoch": 6.3450244698205545, "grad_norm": 0.17231425642967224, "learning_rate": 0.0008629957204245555, "loss": 0.1081, "num_input_tokens_seen": 83916576, "step": 38895 }, { "epoch": 6.345840130505709, "grad_norm": 0.40050846338272095, "learning_rate": 0.000862946766116491, "loss": 0.0553, "num_input_tokens_seen": 83927392, "step": 38900 }, { "epoch": 6.346655791190865, "grad_norm": 0.03248974308371544, "learning_rate": 0.0008628978044528496, "loss": 0.1059, "num_input_tokens_seen": 83938272, "step": 38905 }, { "epoch": 6.34747145187602, "grad_norm": 0.5102570652961731, "learning_rate": 0.000862848835434624, "loss": 0.2209, "num_input_tokens_seen": 83948512, "step": 38910 }, { "epoch": 6.348287112561175, "grad_norm": 0.07462694495916367, "learning_rate": 0.0008627998590628065, "loss": 0.0589, "num_input_tokens_seen": 83958880, "step": 38915 }, { "epoch": 6.349102773246329, "grad_norm": 0.025697840377688408, "learning_rate": 0.0008627508753383895, "loss": 0.0338, "num_input_tokens_seen": 83970176, "step": 38920 }, { "epoch": 6.349918433931484, "grad_norm": 0.05003189295530319, "learning_rate": 0.0008627018842623657, "loss": 0.0572, "num_input_tokens_seen": 83981088, "step": 38925 }, { "epoch": 6.350734094616639, "grad_norm": 0.04974864795804024, "learning_rate": 0.0008626528858357283, "loss": 0.0327, "num_input_tokens_seen": 83992768, "step": 38930 }, { "epoch": 6.351549755301795, "grad_norm": 0.23812498152256012, "learning_rate": 0.0008626038800594703, "loss": 0.1277, "num_input_tokens_seen": 84004704, "step": 38935 }, { "epoch": 6.35236541598695, "grad_norm": 0.03692536801099777, "learning_rate": 0.0008625548669345842, "loss": 0.0595, "num_input_tokens_seen": 84016384, "step": 38940 }, { "epoch": 6.353181076672104, "grad_norm": 0.012938307598233223, "learning_rate": 0.0008625058464620641, "loss": 0.1121, "num_input_tokens_seen": 84027360, "step": 38945 }, { "epoch": 6.353996737357259, "grad_norm": 0.031014638021588326, "learning_rate": 0.0008624568186429031, "loss": 0.0291, "num_input_tokens_seen": 84037568, "step": 38950 }, { "epoch": 6.354812398042414, "grad_norm": 0.05073463171720505, "learning_rate": 0.0008624077834780948, "loss": 0.0228, "num_input_tokens_seen": 84048128, "step": 38955 }, { "epoch": 6.35562805872757, "grad_norm": 0.430327445268631, "learning_rate": 0.000862358740968633, "loss": 0.1307, "num_input_tokens_seen": 84059712, "step": 38960 }, { "epoch": 6.356443719412725, "grad_norm": 0.012945786118507385, "learning_rate": 0.0008623096911155117, "loss": 0.0601, "num_input_tokens_seen": 84070720, "step": 38965 }, { "epoch": 6.357259380097879, "grad_norm": 0.03251238167285919, "learning_rate": 0.000862260633919725, "loss": 0.0111, "num_input_tokens_seen": 84081600, "step": 38970 }, { "epoch": 6.358075040783034, "grad_norm": 0.15969786047935486, "learning_rate": 0.0008622115693822668, "loss": 0.0322, "num_input_tokens_seen": 84091648, "step": 38975 }, { "epoch": 6.358890701468189, "grad_norm": 0.0553104467689991, "learning_rate": 0.0008621624975041316, "loss": 0.0196, "num_input_tokens_seen": 84103040, "step": 38980 }, { "epoch": 6.359706362153344, "grad_norm": 0.20528584718704224, "learning_rate": 0.0008621134182863142, "loss": 0.1321, "num_input_tokens_seen": 84112768, "step": 38985 }, { "epoch": 6.3605220228384995, "grad_norm": 0.013696528971195221, "learning_rate": 0.0008620643317298088, "loss": 0.0636, "num_input_tokens_seen": 84123872, "step": 38990 }, { "epoch": 6.361337683523654, "grad_norm": 0.08747237175703049, "learning_rate": 0.0008620152378356105, "loss": 0.111, "num_input_tokens_seen": 84134496, "step": 38995 }, { "epoch": 6.362153344208809, "grad_norm": 0.21477220952510834, "learning_rate": 0.0008619661366047141, "loss": 0.0621, "num_input_tokens_seen": 84143424, "step": 39000 }, { "epoch": 6.362969004893964, "grad_norm": 0.5396068692207336, "learning_rate": 0.0008619170280381148, "loss": 0.2592, "num_input_tokens_seen": 84154080, "step": 39005 }, { "epoch": 6.363784665579119, "grad_norm": 0.2671952545642853, "learning_rate": 0.0008618679121368078, "loss": 0.1085, "num_input_tokens_seen": 84164608, "step": 39010 }, { "epoch": 6.364600326264274, "grad_norm": 0.39645931124687195, "learning_rate": 0.0008618187889017886, "loss": 0.1769, "num_input_tokens_seen": 84175488, "step": 39015 }, { "epoch": 6.365415986949429, "grad_norm": 0.11818519979715347, "learning_rate": 0.0008617696583340524, "loss": 0.0553, "num_input_tokens_seen": 84185344, "step": 39020 }, { "epoch": 6.366231647634584, "grad_norm": 0.08012297004461288, "learning_rate": 0.0008617205204345952, "loss": 0.0333, "num_input_tokens_seen": 84196864, "step": 39025 }, { "epoch": 6.367047308319739, "grad_norm": 0.3775355815887451, "learning_rate": 0.000861671375204413, "loss": 0.0295, "num_input_tokens_seen": 84208256, "step": 39030 }, { "epoch": 6.367862969004894, "grad_norm": 0.14818525314331055, "learning_rate": 0.0008616222226445014, "loss": 0.1181, "num_input_tokens_seen": 84219552, "step": 39035 }, { "epoch": 6.368678629690049, "grad_norm": 1.031343936920166, "learning_rate": 0.0008615730627558566, "loss": 0.0773, "num_input_tokens_seen": 84228832, "step": 39040 }, { "epoch": 6.369494290375204, "grad_norm": 0.01182447001338005, "learning_rate": 0.0008615238955394753, "loss": 0.0982, "num_input_tokens_seen": 84239424, "step": 39045 }, { "epoch": 6.370309951060359, "grad_norm": 0.17530785501003265, "learning_rate": 0.0008614747209963534, "loss": 0.1524, "num_input_tokens_seen": 84249856, "step": 39050 }, { "epoch": 6.371125611745514, "grad_norm": 0.11523699015378952, "learning_rate": 0.0008614255391274877, "loss": 0.0846, "num_input_tokens_seen": 84260288, "step": 39055 }, { "epoch": 6.371941272430669, "grad_norm": 0.012937724590301514, "learning_rate": 0.0008613763499338751, "loss": 0.1368, "num_input_tokens_seen": 84272000, "step": 39060 }, { "epoch": 6.372756933115824, "grad_norm": 0.013417487032711506, "learning_rate": 0.0008613271534165121, "loss": 0.1449, "num_input_tokens_seen": 84283776, "step": 39065 }, { "epoch": 6.373572593800978, "grad_norm": 0.4308937191963196, "learning_rate": 0.0008612779495763963, "loss": 0.1517, "num_input_tokens_seen": 84293888, "step": 39070 }, { "epoch": 6.374388254486134, "grad_norm": 0.00686118146404624, "learning_rate": 0.0008612287384145243, "loss": 0.0952, "num_input_tokens_seen": 84303680, "step": 39075 }, { "epoch": 6.375203915171289, "grad_norm": 0.2582622468471527, "learning_rate": 0.0008611795199318937, "loss": 0.1234, "num_input_tokens_seen": 84313312, "step": 39080 }, { "epoch": 6.376019575856444, "grad_norm": 0.03790220990777016, "learning_rate": 0.000861130294129502, "loss": 0.113, "num_input_tokens_seen": 84323936, "step": 39085 }, { "epoch": 6.376835236541599, "grad_norm": 0.1956367790699005, "learning_rate": 0.0008610810610083466, "loss": 0.086, "num_input_tokens_seen": 84334400, "step": 39090 }, { "epoch": 6.377650897226753, "grad_norm": 0.25849488377571106, "learning_rate": 0.0008610318205694256, "loss": 0.1042, "num_input_tokens_seen": 84345824, "step": 39095 }, { "epoch": 6.378466557911908, "grad_norm": 0.03146745637059212, "learning_rate": 0.0008609825728137366, "loss": 0.0721, "num_input_tokens_seen": 84357120, "step": 39100 }, { "epoch": 6.379282218597064, "grad_norm": 0.17641200125217438, "learning_rate": 0.000860933317742278, "loss": 0.1612, "num_input_tokens_seen": 84368704, "step": 39105 }, { "epoch": 6.380097879282219, "grad_norm": 0.04577714949846268, "learning_rate": 0.0008608840553560478, "loss": 0.1009, "num_input_tokens_seen": 84379648, "step": 39110 }, { "epoch": 6.3809135399673735, "grad_norm": 0.26288214325904846, "learning_rate": 0.0008608347856560443, "loss": 0.0911, "num_input_tokens_seen": 84390176, "step": 39115 }, { "epoch": 6.381729200652528, "grad_norm": 0.0820598155260086, "learning_rate": 0.0008607855086432663, "loss": 0.0453, "num_input_tokens_seen": 84400320, "step": 39120 }, { "epoch": 6.382544861337683, "grad_norm": 0.10501711815595627, "learning_rate": 0.0008607362243187121, "loss": 0.0374, "num_input_tokens_seen": 84411264, "step": 39125 }, { "epoch": 6.383360522022839, "grad_norm": 0.11721144616603851, "learning_rate": 0.0008606869326833809, "loss": 0.0362, "num_input_tokens_seen": 84423072, "step": 39130 }, { "epoch": 6.384176182707994, "grad_norm": 0.3540067970752716, "learning_rate": 0.0008606376337382711, "loss": 0.0697, "num_input_tokens_seen": 84433376, "step": 39135 }, { "epoch": 6.3849918433931485, "grad_norm": 0.016656190156936646, "learning_rate": 0.0008605883274843824, "loss": 0.1581, "num_input_tokens_seen": 84444832, "step": 39140 }, { "epoch": 6.385807504078303, "grad_norm": 0.2347029745578766, "learning_rate": 0.0008605390139227137, "loss": 0.0328, "num_input_tokens_seen": 84454912, "step": 39145 }, { "epoch": 6.386623164763458, "grad_norm": 0.052898161113262177, "learning_rate": 0.0008604896930542645, "loss": 0.0829, "num_input_tokens_seen": 84465344, "step": 39150 }, { "epoch": 6.387438825448613, "grad_norm": 0.03472793474793434, "learning_rate": 0.0008604403648800346, "loss": 0.0561, "num_input_tokens_seen": 84474432, "step": 39155 }, { "epoch": 6.388254486133769, "grad_norm": 0.5999219417572021, "learning_rate": 0.0008603910294010231, "loss": 0.1212, "num_input_tokens_seen": 84484992, "step": 39160 }, { "epoch": 6.3890701468189235, "grad_norm": 0.30784058570861816, "learning_rate": 0.0008603416866182305, "loss": 0.1322, "num_input_tokens_seen": 84494368, "step": 39165 }, { "epoch": 6.389885807504078, "grad_norm": 0.48889613151550293, "learning_rate": 0.0008602923365326563, "loss": 0.0841, "num_input_tokens_seen": 84504704, "step": 39170 }, { "epoch": 6.390701468189233, "grad_norm": 0.11101984977722168, "learning_rate": 0.000860242979145301, "loss": 0.07, "num_input_tokens_seen": 84515744, "step": 39175 }, { "epoch": 6.391517128874388, "grad_norm": 0.013271992094814777, "learning_rate": 0.0008601936144571646, "loss": 0.1196, "num_input_tokens_seen": 84525920, "step": 39180 }, { "epoch": 6.392332789559543, "grad_norm": 0.27621543407440186, "learning_rate": 0.0008601442424692476, "loss": 0.0518, "num_input_tokens_seen": 84537216, "step": 39185 }, { "epoch": 6.3931484502446985, "grad_norm": 0.44594940543174744, "learning_rate": 0.0008600948631825508, "loss": 0.16, "num_input_tokens_seen": 84548960, "step": 39190 }, { "epoch": 6.393964110929853, "grad_norm": 0.1410282552242279, "learning_rate": 0.0008600454765980747, "loss": 0.0719, "num_input_tokens_seen": 84560416, "step": 39195 }, { "epoch": 6.394779771615008, "grad_norm": 0.42135584354400635, "learning_rate": 0.0008599960827168204, "loss": 0.1206, "num_input_tokens_seen": 84571072, "step": 39200 }, { "epoch": 6.395595432300163, "grad_norm": 0.16869409382343292, "learning_rate": 0.0008599466815397886, "loss": 0.0407, "num_input_tokens_seen": 84581568, "step": 39205 }, { "epoch": 6.396411092985318, "grad_norm": 0.0462958924472332, "learning_rate": 0.0008598972730679809, "loss": 0.1401, "num_input_tokens_seen": 84592096, "step": 39210 }, { "epoch": 6.397226753670473, "grad_norm": 0.019148118793964386, "learning_rate": 0.0008598478573023982, "loss": 0.1378, "num_input_tokens_seen": 84603232, "step": 39215 }, { "epoch": 6.398042414355628, "grad_norm": 0.3690910041332245, "learning_rate": 0.0008597984342440421, "loss": 0.1459, "num_input_tokens_seen": 84615008, "step": 39220 }, { "epoch": 6.398858075040783, "grad_norm": 0.3056583106517792, "learning_rate": 0.0008597490038939145, "loss": 0.0676, "num_input_tokens_seen": 84627200, "step": 39225 }, { "epoch": 6.399673735725938, "grad_norm": 0.11962539702653885, "learning_rate": 0.0008596995662530169, "loss": 0.0379, "num_input_tokens_seen": 84637856, "step": 39230 }, { "epoch": 6.400489396411093, "grad_norm": 0.2644346058368683, "learning_rate": 0.0008596501213223514, "loss": 0.0501, "num_input_tokens_seen": 84649248, "step": 39235 }, { "epoch": 6.401305057096248, "grad_norm": 0.21092559397220612, "learning_rate": 0.0008596006691029196, "loss": 0.0582, "num_input_tokens_seen": 84659104, "step": 39240 }, { "epoch": 6.402120717781403, "grad_norm": 0.4548172652721405, "learning_rate": 0.0008595512095957244, "loss": 0.1284, "num_input_tokens_seen": 84670432, "step": 39245 }, { "epoch": 6.402936378466558, "grad_norm": 0.2206522822380066, "learning_rate": 0.0008595017428017677, "loss": 0.1089, "num_input_tokens_seen": 84681408, "step": 39250 }, { "epoch": 6.403752039151713, "grad_norm": 0.8364840149879456, "learning_rate": 0.000859452268722052, "loss": 0.0751, "num_input_tokens_seen": 84691232, "step": 39255 }, { "epoch": 6.404567699836868, "grad_norm": 0.012925582006573677, "learning_rate": 0.0008594027873575803, "loss": 0.0757, "num_input_tokens_seen": 84702528, "step": 39260 }, { "epoch": 6.4053833605220225, "grad_norm": 0.032570790499448776, "learning_rate": 0.0008593532987093551, "loss": 0.0301, "num_input_tokens_seen": 84712192, "step": 39265 }, { "epoch": 6.406199021207178, "grad_norm": 0.011080892756581306, "learning_rate": 0.0008593038027783793, "loss": 0.019, "num_input_tokens_seen": 84722720, "step": 39270 }, { "epoch": 6.407014681892333, "grad_norm": 0.6286371946334839, "learning_rate": 0.0008592542995656563, "loss": 0.0913, "num_input_tokens_seen": 84731776, "step": 39275 }, { "epoch": 6.407830342577488, "grad_norm": 0.0572502501308918, "learning_rate": 0.000859204789072189, "loss": 0.0486, "num_input_tokens_seen": 84743008, "step": 39280 }, { "epoch": 6.408646003262643, "grad_norm": 0.009955662302672863, "learning_rate": 0.0008591552712989812, "loss": 0.257, "num_input_tokens_seen": 84753408, "step": 39285 }, { "epoch": 6.4094616639477975, "grad_norm": 0.3026174008846283, "learning_rate": 0.0008591057462470359, "loss": 0.0355, "num_input_tokens_seen": 84764160, "step": 39290 }, { "epoch": 6.410277324632952, "grad_norm": 0.060678862035274506, "learning_rate": 0.0008590562139173573, "loss": 0.0856, "num_input_tokens_seen": 84774016, "step": 39295 }, { "epoch": 6.411092985318108, "grad_norm": 0.03247638791799545, "learning_rate": 0.000859006674310949, "loss": 0.0574, "num_input_tokens_seen": 84785440, "step": 39300 }, { "epoch": 6.411908646003263, "grad_norm": 0.4034556746482849, "learning_rate": 0.000858957127428815, "loss": 0.0518, "num_input_tokens_seen": 84797728, "step": 39305 }, { "epoch": 6.412724306688418, "grad_norm": 0.07276295125484467, "learning_rate": 0.0008589075732719594, "loss": 0.0365, "num_input_tokens_seen": 84808000, "step": 39310 }, { "epoch": 6.4135399673735725, "grad_norm": 0.09909258037805557, "learning_rate": 0.0008588580118413867, "loss": 0.1781, "num_input_tokens_seen": 84819520, "step": 39315 }, { "epoch": 6.414355628058727, "grad_norm": 0.3723595142364502, "learning_rate": 0.0008588084431381009, "loss": 0.1781, "num_input_tokens_seen": 84829984, "step": 39320 }, { "epoch": 6.415171288743883, "grad_norm": 0.33445391058921814, "learning_rate": 0.000858758867163107, "loss": 0.1307, "num_input_tokens_seen": 84841184, "step": 39325 }, { "epoch": 6.415986949429038, "grad_norm": 0.006616996601223946, "learning_rate": 0.0008587092839174096, "loss": 0.0462, "num_input_tokens_seen": 84850816, "step": 39330 }, { "epoch": 6.416802610114193, "grad_norm": 0.3705193102359772, "learning_rate": 0.0008586596934020132, "loss": 0.0959, "num_input_tokens_seen": 84862144, "step": 39335 }, { "epoch": 6.417618270799347, "grad_norm": 0.008181090466678143, "learning_rate": 0.0008586100956179234, "loss": 0.0224, "num_input_tokens_seen": 84873792, "step": 39340 }, { "epoch": 6.418433931484502, "grad_norm": 0.021409897133708, "learning_rate": 0.000858560490566145, "loss": 0.0418, "num_input_tokens_seen": 84884448, "step": 39345 }, { "epoch": 6.419249592169657, "grad_norm": 0.09967610239982605, "learning_rate": 0.0008585108782476834, "loss": 0.109, "num_input_tokens_seen": 84895552, "step": 39350 }, { "epoch": 6.420065252854813, "grad_norm": 0.2945992350578308, "learning_rate": 0.000858461258663544, "loss": 0.2387, "num_input_tokens_seen": 84907008, "step": 39355 }, { "epoch": 6.420880913539968, "grad_norm": 0.5458860993385315, "learning_rate": 0.0008584116318147324, "loss": 0.0809, "num_input_tokens_seen": 84916704, "step": 39360 }, { "epoch": 6.421696574225122, "grad_norm": 0.03361166641116142, "learning_rate": 0.0008583619977022546, "loss": 0.0102, "num_input_tokens_seen": 84928064, "step": 39365 }, { "epoch": 6.422512234910277, "grad_norm": 0.1437065601348877, "learning_rate": 0.000858312356327116, "loss": 0.0967, "num_input_tokens_seen": 84939456, "step": 39370 }, { "epoch": 6.423327895595432, "grad_norm": 0.23535189032554626, "learning_rate": 0.0008582627076903232, "loss": 0.0648, "num_input_tokens_seen": 84948992, "step": 39375 }, { "epoch": 6.424143556280587, "grad_norm": 0.16909369826316833, "learning_rate": 0.0008582130517928821, "loss": 0.0617, "num_input_tokens_seen": 84959968, "step": 39380 }, { "epoch": 6.424959216965743, "grad_norm": 0.07231950759887695, "learning_rate": 0.000858163388635799, "loss": 0.0156, "num_input_tokens_seen": 84970944, "step": 39385 }, { "epoch": 6.425774877650897, "grad_norm": 0.061435140669345856, "learning_rate": 0.0008581137182200806, "loss": 0.0499, "num_input_tokens_seen": 84982496, "step": 39390 }, { "epoch": 6.426590538336052, "grad_norm": 0.0060627213679254055, "learning_rate": 0.0008580640405467333, "loss": 0.0774, "num_input_tokens_seen": 84993536, "step": 39395 }, { "epoch": 6.427406199021207, "grad_norm": 0.0698898509144783, "learning_rate": 0.0008580143556167638, "loss": 0.1262, "num_input_tokens_seen": 85003520, "step": 39400 }, { "epoch": 6.428221859706362, "grad_norm": 0.019863862544298172, "learning_rate": 0.0008579646634311795, "loss": 0.1402, "num_input_tokens_seen": 85012672, "step": 39405 }, { "epoch": 6.4290375203915175, "grad_norm": 0.01478732842952013, "learning_rate": 0.0008579149639909872, "loss": 0.0458, "num_input_tokens_seen": 85023552, "step": 39410 }, { "epoch": 6.429853181076672, "grad_norm": 0.06148804351687431, "learning_rate": 0.0008578652572971939, "loss": 0.0371, "num_input_tokens_seen": 85035360, "step": 39415 }, { "epoch": 6.430668841761827, "grad_norm": 0.0714406743645668, "learning_rate": 0.0008578155433508073, "loss": 0.0375, "num_input_tokens_seen": 85044480, "step": 39420 }, { "epoch": 6.431484502446982, "grad_norm": 0.05120420455932617, "learning_rate": 0.0008577658221528349, "loss": 0.0385, "num_input_tokens_seen": 85055520, "step": 39425 }, { "epoch": 6.432300163132137, "grad_norm": 0.004446776583790779, "learning_rate": 0.000857716093704284, "loss": 0.0591, "num_input_tokens_seen": 85065824, "step": 39430 }, { "epoch": 6.433115823817292, "grad_norm": 0.01311581488698721, "learning_rate": 0.0008576663580061628, "loss": 0.0349, "num_input_tokens_seen": 85075232, "step": 39435 }, { "epoch": 6.433931484502447, "grad_norm": 0.22832755744457245, "learning_rate": 0.0008576166150594792, "loss": 0.165, "num_input_tokens_seen": 85084800, "step": 39440 }, { "epoch": 6.434747145187602, "grad_norm": 0.2552063465118408, "learning_rate": 0.0008575668648652411, "loss": 0.0727, "num_input_tokens_seen": 85094880, "step": 39445 }, { "epoch": 6.435562805872757, "grad_norm": 0.011848578229546547, "learning_rate": 0.0008575171074244568, "loss": 0.0133, "num_input_tokens_seen": 85105024, "step": 39450 }, { "epoch": 6.436378466557912, "grad_norm": 0.232289656996727, "learning_rate": 0.000857467342738135, "loss": 0.0667, "num_input_tokens_seen": 85114016, "step": 39455 }, { "epoch": 6.437194127243067, "grad_norm": 0.3870263397693634, "learning_rate": 0.000857417570807284, "loss": 0.0868, "num_input_tokens_seen": 85125312, "step": 39460 }, { "epoch": 6.438009787928221, "grad_norm": 0.00902845524251461, "learning_rate": 0.0008573677916329124, "loss": 0.0622, "num_input_tokens_seen": 85136032, "step": 39465 }, { "epoch": 6.438825448613377, "grad_norm": 0.10346759110689163, "learning_rate": 0.0008573180052160291, "loss": 0.0652, "num_input_tokens_seen": 85146976, "step": 39470 }, { "epoch": 6.439641109298532, "grad_norm": 0.01170378178358078, "learning_rate": 0.0008572682115576433, "loss": 0.0712, "num_input_tokens_seen": 85159072, "step": 39475 }, { "epoch": 6.440456769983687, "grad_norm": 0.1264829933643341, "learning_rate": 0.0008572184106587638, "loss": 0.0275, "num_input_tokens_seen": 85171232, "step": 39480 }, { "epoch": 6.441272430668842, "grad_norm": 0.020200500264763832, "learning_rate": 0.0008571686025204002, "loss": 0.0336, "num_input_tokens_seen": 85181120, "step": 39485 }, { "epoch": 6.442088091353996, "grad_norm": 0.03258857876062393, "learning_rate": 0.0008571187871435616, "loss": 0.1062, "num_input_tokens_seen": 85191680, "step": 39490 }, { "epoch": 6.442903752039152, "grad_norm": 0.07585708051919937, "learning_rate": 0.0008570689645292579, "loss": 0.1603, "num_input_tokens_seen": 85202944, "step": 39495 }, { "epoch": 6.443719412724307, "grad_norm": 0.5404019951820374, "learning_rate": 0.0008570191346784986, "loss": 0.0772, "num_input_tokens_seen": 85213344, "step": 39500 }, { "epoch": 6.444535073409462, "grad_norm": 0.09979365766048431, "learning_rate": 0.0008569692975922935, "loss": 0.0923, "num_input_tokens_seen": 85224352, "step": 39505 }, { "epoch": 6.445350734094617, "grad_norm": 0.09445372968912125, "learning_rate": 0.0008569194532716529, "loss": 0.0822, "num_input_tokens_seen": 85233920, "step": 39510 }, { "epoch": 6.446166394779771, "grad_norm": 0.37658143043518066, "learning_rate": 0.0008568696017175868, "loss": 0.0396, "num_input_tokens_seen": 85245056, "step": 39515 }, { "epoch": 6.446982055464926, "grad_norm": 0.41861656308174133, "learning_rate": 0.0008568197429311054, "loss": 0.1113, "num_input_tokens_seen": 85256384, "step": 39520 }, { "epoch": 6.447797716150082, "grad_norm": 0.008790227584540844, "learning_rate": 0.0008567698769132193, "loss": 0.0187, "num_input_tokens_seen": 85266208, "step": 39525 }, { "epoch": 6.448613376835237, "grad_norm": 0.007207643240690231, "learning_rate": 0.0008567200036649391, "loss": 0.0331, "num_input_tokens_seen": 85276832, "step": 39530 }, { "epoch": 6.4494290375203915, "grad_norm": 0.4155519902706146, "learning_rate": 0.0008566701231872753, "loss": 0.0574, "num_input_tokens_seen": 85286528, "step": 39535 }, { "epoch": 6.450244698205546, "grad_norm": 0.005324701778590679, "learning_rate": 0.0008566202354812392, "loss": 0.218, "num_input_tokens_seen": 85298272, "step": 39540 }, { "epoch": 6.451060358890701, "grad_norm": 0.014250691048800945, "learning_rate": 0.0008565703405478415, "loss": 0.0813, "num_input_tokens_seen": 85308896, "step": 39545 }, { "epoch": 6.451876019575856, "grad_norm": 0.06505730748176575, "learning_rate": 0.0008565204383880937, "loss": 0.1595, "num_input_tokens_seen": 85318272, "step": 39550 }, { "epoch": 6.452691680261012, "grad_norm": 0.12621276080608368, "learning_rate": 0.0008564705290030068, "loss": 0.0367, "num_input_tokens_seen": 85329888, "step": 39555 }, { "epoch": 6.4535073409461665, "grad_norm": 0.6045138239860535, "learning_rate": 0.0008564206123935924, "loss": 0.2, "num_input_tokens_seen": 85339648, "step": 39560 }, { "epoch": 6.454323001631321, "grad_norm": 0.006620241794735193, "learning_rate": 0.0008563706885608622, "loss": 0.0151, "num_input_tokens_seen": 85351040, "step": 39565 }, { "epoch": 6.455138662316476, "grad_norm": 0.6453996300697327, "learning_rate": 0.0008563207575058279, "loss": 0.2335, "num_input_tokens_seen": 85361120, "step": 39570 }, { "epoch": 6.455954323001631, "grad_norm": 0.11755736917257309, "learning_rate": 0.0008562708192295012, "loss": 0.0733, "num_input_tokens_seen": 85372256, "step": 39575 }, { "epoch": 6.456769983686787, "grad_norm": 0.47685131430625916, "learning_rate": 0.0008562208737328947, "loss": 0.1609, "num_input_tokens_seen": 85381664, "step": 39580 }, { "epoch": 6.4575856443719415, "grad_norm": 0.012666283175349236, "learning_rate": 0.0008561709210170201, "loss": 0.0769, "num_input_tokens_seen": 85392832, "step": 39585 }, { "epoch": 6.458401305057096, "grad_norm": 0.15514227747917175, "learning_rate": 0.00085612096108289, "loss": 0.313, "num_input_tokens_seen": 85403648, "step": 39590 }, { "epoch": 6.459216965742251, "grad_norm": 0.1315620243549347, "learning_rate": 0.0008560709939315169, "loss": 0.0844, "num_input_tokens_seen": 85413408, "step": 39595 }, { "epoch": 6.460032626427406, "grad_norm": 0.36999809741973877, "learning_rate": 0.0008560210195639133, "loss": 0.0986, "num_input_tokens_seen": 85423680, "step": 39600 }, { "epoch": 6.460848287112561, "grad_norm": 0.12215088307857513, "learning_rate": 0.0008559710379810922, "loss": 0.0901, "num_input_tokens_seen": 85433600, "step": 39605 }, { "epoch": 6.4616639477977165, "grad_norm": 0.023609763011336327, "learning_rate": 0.0008559210491840664, "loss": 0.2226, "num_input_tokens_seen": 85444896, "step": 39610 }, { "epoch": 6.462479608482871, "grad_norm": 0.22625789046287537, "learning_rate": 0.0008558710531738489, "loss": 0.1351, "num_input_tokens_seen": 85454528, "step": 39615 }, { "epoch": 6.463295269168026, "grad_norm": 0.16243544220924377, "learning_rate": 0.0008558210499514532, "loss": 0.1958, "num_input_tokens_seen": 85465920, "step": 39620 }, { "epoch": 6.464110929853181, "grad_norm": 0.03166147321462631, "learning_rate": 0.0008557710395178926, "loss": 0.1011, "num_input_tokens_seen": 85477536, "step": 39625 }, { "epoch": 6.464926590538336, "grad_norm": 0.021285058930516243, "learning_rate": 0.0008557210218741805, "loss": 0.0413, "num_input_tokens_seen": 85489824, "step": 39630 }, { "epoch": 6.465742251223491, "grad_norm": 0.3112122714519501, "learning_rate": 0.0008556709970213305, "loss": 0.1496, "num_input_tokens_seen": 85500480, "step": 39635 }, { "epoch": 6.466557911908646, "grad_norm": 0.06069442257285118, "learning_rate": 0.0008556209649603566, "loss": 0.1098, "num_input_tokens_seen": 85510144, "step": 39640 }, { "epoch": 6.467373572593801, "grad_norm": 0.08355006575584412, "learning_rate": 0.0008555709256922728, "loss": 0.1476, "num_input_tokens_seen": 85519712, "step": 39645 }, { "epoch": 6.468189233278956, "grad_norm": 0.12182510644197464, "learning_rate": 0.0008555208792180931, "loss": 0.2313, "num_input_tokens_seen": 85530816, "step": 39650 }, { "epoch": 6.469004893964111, "grad_norm": 0.16057415306568146, "learning_rate": 0.0008554708255388317, "loss": 0.0719, "num_input_tokens_seen": 85541152, "step": 39655 }, { "epoch": 6.4698205546492655, "grad_norm": 0.018787473440170288, "learning_rate": 0.0008554207646555032, "loss": 0.0391, "num_input_tokens_seen": 85551424, "step": 39660 }, { "epoch": 6.470636215334421, "grad_norm": 0.03167731687426567, "learning_rate": 0.0008553706965691218, "loss": 0.0588, "num_input_tokens_seen": 85562208, "step": 39665 }, { "epoch": 6.471451876019576, "grad_norm": 0.026085861027240753, "learning_rate": 0.0008553206212807026, "loss": 0.0577, "num_input_tokens_seen": 85572832, "step": 39670 }, { "epoch": 6.472267536704731, "grad_norm": 0.04006026312708855, "learning_rate": 0.0008552705387912602, "loss": 0.0505, "num_input_tokens_seen": 85583232, "step": 39675 }, { "epoch": 6.473083197389886, "grad_norm": 0.021557439118623734, "learning_rate": 0.0008552204491018096, "loss": 0.1147, "num_input_tokens_seen": 85593056, "step": 39680 }, { "epoch": 6.4738988580750405, "grad_norm": 0.2893325984477997, "learning_rate": 0.000855170352213366, "loss": 0.1673, "num_input_tokens_seen": 85603456, "step": 39685 }, { "epoch": 6.474714518760196, "grad_norm": 0.1294337958097458, "learning_rate": 0.0008551202481269446, "loss": 0.0414, "num_input_tokens_seen": 85613568, "step": 39690 }, { "epoch": 6.475530179445351, "grad_norm": 0.026840096339583397, "learning_rate": 0.000855070136843561, "loss": 0.0382, "num_input_tokens_seen": 85623744, "step": 39695 }, { "epoch": 6.476345840130506, "grad_norm": 0.286678284406662, "learning_rate": 0.0008550200183642304, "loss": 0.2243, "num_input_tokens_seen": 85634176, "step": 39700 }, { "epoch": 6.477161500815661, "grad_norm": 0.07949990779161453, "learning_rate": 0.000854969892689969, "loss": 0.1249, "num_input_tokens_seen": 85644672, "step": 39705 }, { "epoch": 6.4779771615008155, "grad_norm": 0.14659525454044342, "learning_rate": 0.0008549197598217923, "loss": 0.0846, "num_input_tokens_seen": 85655808, "step": 39710 }, { "epoch": 6.47879282218597, "grad_norm": 0.019031165167689323, "learning_rate": 0.0008548696197607165, "loss": 0.0911, "num_input_tokens_seen": 85667488, "step": 39715 }, { "epoch": 6.479608482871126, "grad_norm": 0.006379369180649519, "learning_rate": 0.0008548194725077576, "loss": 0.0648, "num_input_tokens_seen": 85679168, "step": 39720 }, { "epoch": 6.480424143556281, "grad_norm": 0.22886744141578674, "learning_rate": 0.000854769318063932, "loss": 0.103, "num_input_tokens_seen": 85688992, "step": 39725 }, { "epoch": 6.481239804241436, "grad_norm": 0.02675086073577404, "learning_rate": 0.0008547191564302561, "loss": 0.0853, "num_input_tokens_seen": 85699776, "step": 39730 }, { "epoch": 6.4820554649265905, "grad_norm": 0.13034316897392273, "learning_rate": 0.0008546689876077464, "loss": 0.1189, "num_input_tokens_seen": 85711136, "step": 39735 }, { "epoch": 6.482871125611745, "grad_norm": 0.05041171610355377, "learning_rate": 0.0008546188115974198, "loss": 0.1698, "num_input_tokens_seen": 85721568, "step": 39740 }, { "epoch": 6.4836867862969, "grad_norm": 0.3870621621608734, "learning_rate": 0.0008545686284002932, "loss": 0.1274, "num_input_tokens_seen": 85732608, "step": 39745 }, { "epoch": 6.484502446982056, "grad_norm": 0.16718515753746033, "learning_rate": 0.0008545184380173835, "loss": 0.0865, "num_input_tokens_seen": 85741984, "step": 39750 }, { "epoch": 6.485318107667211, "grad_norm": 0.22091160714626312, "learning_rate": 0.0008544682404497079, "loss": 0.043, "num_input_tokens_seen": 85753632, "step": 39755 }, { "epoch": 6.486133768352365, "grad_norm": 0.33305469155311584, "learning_rate": 0.0008544180356982838, "loss": 0.1023, "num_input_tokens_seen": 85763488, "step": 39760 }, { "epoch": 6.48694942903752, "grad_norm": 0.1044543981552124, "learning_rate": 0.0008543678237641284, "loss": 0.1745, "num_input_tokens_seen": 85774624, "step": 39765 }, { "epoch": 6.487765089722675, "grad_norm": 0.11706213653087616, "learning_rate": 0.0008543176046482597, "loss": 0.0333, "num_input_tokens_seen": 85785440, "step": 39770 }, { "epoch": 6.488580750407831, "grad_norm": 0.45858094096183777, "learning_rate": 0.0008542673783516952, "loss": 0.2093, "num_input_tokens_seen": 85797280, "step": 39775 }, { "epoch": 6.489396411092986, "grad_norm": 0.03951876610517502, "learning_rate": 0.0008542171448754528, "loss": 0.1535, "num_input_tokens_seen": 85808928, "step": 39780 }, { "epoch": 6.49021207177814, "grad_norm": 0.14754879474639893, "learning_rate": 0.0008541669042205507, "loss": 0.076, "num_input_tokens_seen": 85818176, "step": 39785 }, { "epoch": 6.491027732463295, "grad_norm": 0.19696387648582458, "learning_rate": 0.0008541166563880069, "loss": 0.1179, "num_input_tokens_seen": 85828384, "step": 39790 }, { "epoch": 6.49184339314845, "grad_norm": 0.46504220366477966, "learning_rate": 0.00085406640137884, "loss": 0.0609, "num_input_tokens_seen": 85838752, "step": 39795 }, { "epoch": 6.492659053833605, "grad_norm": 0.4072239398956299, "learning_rate": 0.0008540161391940681, "loss": 0.1374, "num_input_tokens_seen": 85850336, "step": 39800 }, { "epoch": 6.493474714518761, "grad_norm": 0.075205497443676, "learning_rate": 0.0008539658698347102, "loss": 0.0711, "num_input_tokens_seen": 85862272, "step": 39805 }, { "epoch": 6.494290375203915, "grad_norm": 0.2597215175628662, "learning_rate": 0.0008539155933017848, "loss": 0.0748, "num_input_tokens_seen": 85872224, "step": 39810 }, { "epoch": 6.49510603588907, "grad_norm": 0.03566305339336395, "learning_rate": 0.0008538653095963109, "loss": 0.1159, "num_input_tokens_seen": 85883552, "step": 39815 }, { "epoch": 6.495921696574225, "grad_norm": 0.19262714684009552, "learning_rate": 0.0008538150187193076, "loss": 0.053, "num_input_tokens_seen": 85894848, "step": 39820 }, { "epoch": 6.49673735725938, "grad_norm": 0.11648371070623398, "learning_rate": 0.0008537647206717942, "loss": 0.0693, "num_input_tokens_seen": 85906272, "step": 39825 }, { "epoch": 6.497553017944535, "grad_norm": 0.2878781855106354, "learning_rate": 0.00085371441545479, "loss": 0.0672, "num_input_tokens_seen": 85914976, "step": 39830 }, { "epoch": 6.49836867862969, "grad_norm": 0.37304550409317017, "learning_rate": 0.0008536641030693143, "loss": 0.1354, "num_input_tokens_seen": 85926240, "step": 39835 }, { "epoch": 6.499184339314845, "grad_norm": 0.019629567861557007, "learning_rate": 0.000853613783516387, "loss": 0.0167, "num_input_tokens_seen": 85938752, "step": 39840 }, { "epoch": 6.5, "grad_norm": 0.007871031761169434, "learning_rate": 0.0008535634567970277, "loss": 0.0226, "num_input_tokens_seen": 85949344, "step": 39845 }, { "epoch": 6.500815660685155, "grad_norm": 0.10063948482275009, "learning_rate": 0.0008535131229122565, "loss": 0.04, "num_input_tokens_seen": 85960320, "step": 39850 }, { "epoch": 6.50163132137031, "grad_norm": 0.4401071071624756, "learning_rate": 0.0008534627818630933, "loss": 0.2351, "num_input_tokens_seen": 85970272, "step": 39855 }, { "epoch": 6.502446982055465, "grad_norm": 0.5636202096939087, "learning_rate": 0.0008534124336505585, "loss": 0.127, "num_input_tokens_seen": 85980608, "step": 39860 }, { "epoch": 6.50326264274062, "grad_norm": 0.003848550608381629, "learning_rate": 0.0008533620782756724, "loss": 0.0384, "num_input_tokens_seen": 85991936, "step": 39865 }, { "epoch": 6.504078303425775, "grad_norm": 0.030977876856923103, "learning_rate": 0.0008533117157394556, "loss": 0.0721, "num_input_tokens_seen": 86003744, "step": 39870 }, { "epoch": 6.50489396411093, "grad_norm": 0.15690048038959503, "learning_rate": 0.0008532613460429285, "loss": 0.0433, "num_input_tokens_seen": 86014528, "step": 39875 }, { "epoch": 6.505709624796085, "grad_norm": 0.06793689727783203, "learning_rate": 0.0008532109691871122, "loss": 0.0812, "num_input_tokens_seen": 86026528, "step": 39880 }, { "epoch": 6.506525285481239, "grad_norm": 0.016750985756516457, "learning_rate": 0.0008531605851730275, "loss": 0.0248, "num_input_tokens_seen": 86037824, "step": 39885 }, { "epoch": 6.507340946166395, "grad_norm": 0.34783661365509033, "learning_rate": 0.0008531101940016954, "loss": 0.0656, "num_input_tokens_seen": 86049120, "step": 39890 }, { "epoch": 6.50815660685155, "grad_norm": 0.3815332353115082, "learning_rate": 0.0008530597956741374, "loss": 0.2015, "num_input_tokens_seen": 86059872, "step": 39895 }, { "epoch": 6.508972267536705, "grad_norm": 0.18855412304401398, "learning_rate": 0.0008530093901913748, "loss": 0.0731, "num_input_tokens_seen": 86071648, "step": 39900 }, { "epoch": 6.50978792822186, "grad_norm": 0.2578810155391693, "learning_rate": 0.000852958977554429, "loss": 0.0934, "num_input_tokens_seen": 86082240, "step": 39905 }, { "epoch": 6.510603588907014, "grad_norm": 0.024510657414793968, "learning_rate": 0.0008529085577643217, "loss": 0.0136, "num_input_tokens_seen": 86091776, "step": 39910 }, { "epoch": 6.511419249592169, "grad_norm": 0.18999266624450684, "learning_rate": 0.0008528581308220748, "loss": 0.0381, "num_input_tokens_seen": 86102336, "step": 39915 }, { "epoch": 6.512234910277325, "grad_norm": 0.021141933277249336, "learning_rate": 0.0008528076967287103, "loss": 0.0165, "num_input_tokens_seen": 86113824, "step": 39920 }, { "epoch": 6.51305057096248, "grad_norm": 0.2783777415752411, "learning_rate": 0.0008527572554852502, "loss": 0.1418, "num_input_tokens_seen": 86124768, "step": 39925 }, { "epoch": 6.513866231647635, "grad_norm": 0.45041847229003906, "learning_rate": 0.0008527068070927169, "loss": 0.168, "num_input_tokens_seen": 86135552, "step": 39930 }, { "epoch": 6.514681892332789, "grad_norm": 0.015398653224110603, "learning_rate": 0.0008526563515521327, "loss": 0.0792, "num_input_tokens_seen": 86145792, "step": 39935 }, { "epoch": 6.515497553017944, "grad_norm": 0.03405394405126572, "learning_rate": 0.0008526058888645202, "loss": 0.0955, "num_input_tokens_seen": 86156448, "step": 39940 }, { "epoch": 6.5163132137031, "grad_norm": 0.014512408524751663, "learning_rate": 0.000852555419030902, "loss": 0.0269, "num_input_tokens_seen": 86167968, "step": 39945 }, { "epoch": 6.517128874388255, "grad_norm": 0.06708409637212753, "learning_rate": 0.000852504942052301, "loss": 0.0226, "num_input_tokens_seen": 86178464, "step": 39950 }, { "epoch": 6.5179445350734095, "grad_norm": 0.2663254737854004, "learning_rate": 0.0008524544579297402, "loss": 0.0912, "num_input_tokens_seen": 86189728, "step": 39955 }, { "epoch": 6.518760195758564, "grad_norm": 0.01599900797009468, "learning_rate": 0.0008524039666642424, "loss": 0.0658, "num_input_tokens_seen": 86201280, "step": 39960 }, { "epoch": 6.519575856443719, "grad_norm": 0.21366745233535767, "learning_rate": 0.0008523534682568315, "loss": 0.0491, "num_input_tokens_seen": 86212800, "step": 39965 }, { "epoch": 6.520391517128875, "grad_norm": 0.03452513739466667, "learning_rate": 0.0008523029627085306, "loss": 0.061, "num_input_tokens_seen": 86224224, "step": 39970 }, { "epoch": 6.52120717781403, "grad_norm": 0.21651406586170197, "learning_rate": 0.000852252450020363, "loss": 0.1226, "num_input_tokens_seen": 86234432, "step": 39975 }, { "epoch": 6.5220228384991845, "grad_norm": 0.03710886090993881, "learning_rate": 0.0008522019301933528, "loss": 0.1263, "num_input_tokens_seen": 86245024, "step": 39980 }, { "epoch": 6.522838499184339, "grad_norm": 0.5214162468910217, "learning_rate": 0.0008521514032285236, "loss": 0.2043, "num_input_tokens_seen": 86256736, "step": 39985 }, { "epoch": 6.523654159869494, "grad_norm": 0.03660536929965019, "learning_rate": 0.0008521008691268994, "loss": 0.1257, "num_input_tokens_seen": 86268320, "step": 39990 }, { "epoch": 6.524469820554649, "grad_norm": 0.1708569973707199, "learning_rate": 0.0008520503278895045, "loss": 0.0542, "num_input_tokens_seen": 86279680, "step": 39995 }, { "epoch": 6.525285481239804, "grad_norm": 0.018384849652647972, "learning_rate": 0.0008519997795173632, "loss": 0.1252, "num_input_tokens_seen": 86290528, "step": 40000 }, { "epoch": 6.5261011419249595, "grad_norm": 0.390406996011734, "learning_rate": 0.0008519492240114996, "loss": 0.0902, "num_input_tokens_seen": 86301024, "step": 40005 }, { "epoch": 6.526916802610114, "grad_norm": 0.08911449462175369, "learning_rate": 0.0008518986613729387, "loss": 0.0306, "num_input_tokens_seen": 86311776, "step": 40010 }, { "epoch": 6.527732463295269, "grad_norm": 0.20538145303726196, "learning_rate": 0.0008518480916027049, "loss": 0.1207, "num_input_tokens_seen": 86323072, "step": 40015 }, { "epoch": 6.528548123980424, "grad_norm": 0.02925197221338749, "learning_rate": 0.0008517975147018233, "loss": 0.1667, "num_input_tokens_seen": 86334880, "step": 40020 }, { "epoch": 6.529363784665579, "grad_norm": 0.011222249828279018, "learning_rate": 0.0008517469306713187, "loss": 0.0231, "num_input_tokens_seen": 86345408, "step": 40025 }, { "epoch": 6.5301794453507345, "grad_norm": 0.011386829428374767, "learning_rate": 0.0008516963395122163, "loss": 0.0499, "num_input_tokens_seen": 86356448, "step": 40030 }, { "epoch": 6.530995106035889, "grad_norm": 0.29350894689559937, "learning_rate": 0.0008516457412255414, "loss": 0.1197, "num_input_tokens_seen": 86366976, "step": 40035 }, { "epoch": 6.531810766721044, "grad_norm": 0.36121800541877747, "learning_rate": 0.0008515951358123195, "loss": 0.2583, "num_input_tokens_seen": 86377824, "step": 40040 }, { "epoch": 6.532626427406199, "grad_norm": 0.028843572363257408, "learning_rate": 0.0008515445232735761, "loss": 0.056, "num_input_tokens_seen": 86387232, "step": 40045 }, { "epoch": 6.533442088091354, "grad_norm": 0.025559019297361374, "learning_rate": 0.0008514939036103371, "loss": 0.0608, "num_input_tokens_seen": 86399744, "step": 40050 }, { "epoch": 6.5342577487765094, "grad_norm": 0.13452427089214325, "learning_rate": 0.0008514432768236282, "loss": 0.0527, "num_input_tokens_seen": 86410240, "step": 40055 }, { "epoch": 6.535073409461664, "grad_norm": 0.24296768009662628, "learning_rate": 0.0008513926429144754, "loss": 0.107, "num_input_tokens_seen": 86420512, "step": 40060 }, { "epoch": 6.535889070146819, "grad_norm": 0.20541426539421082, "learning_rate": 0.0008513420018839049, "loss": 0.0542, "num_input_tokens_seen": 86432192, "step": 40065 }, { "epoch": 6.536704730831974, "grad_norm": 0.020763792097568512, "learning_rate": 0.0008512913537329431, "loss": 0.0787, "num_input_tokens_seen": 86442912, "step": 40070 }, { "epoch": 6.537520391517129, "grad_norm": 0.05833481252193451, "learning_rate": 0.0008512406984626162, "loss": 0.1367, "num_input_tokens_seen": 86453024, "step": 40075 }, { "epoch": 6.5383360522022835, "grad_norm": 0.0294799767434597, "learning_rate": 0.0008511900360739512, "loss": 0.08, "num_input_tokens_seen": 86463840, "step": 40080 }, { "epoch": 6.539151712887438, "grad_norm": 0.11328484117984772, "learning_rate": 0.0008511393665679745, "loss": 0.0767, "num_input_tokens_seen": 86474432, "step": 40085 }, { "epoch": 6.539967373572594, "grad_norm": 0.3191603422164917, "learning_rate": 0.000851088689945713, "loss": 0.1044, "num_input_tokens_seen": 86485344, "step": 40090 }, { "epoch": 6.540783034257749, "grad_norm": 0.10024445503950119, "learning_rate": 0.0008510380062081939, "loss": 0.0848, "num_input_tokens_seen": 86496832, "step": 40095 }, { "epoch": 6.541598694942904, "grad_norm": 0.23384661972522736, "learning_rate": 0.0008509873153564443, "loss": 0.0368, "num_input_tokens_seen": 86508896, "step": 40100 }, { "epoch": 6.5424143556280585, "grad_norm": 0.29073745012283325, "learning_rate": 0.0008509366173914914, "loss": 0.0822, "num_input_tokens_seen": 86519072, "step": 40105 }, { "epoch": 6.543230016313213, "grad_norm": 0.07561987638473511, "learning_rate": 0.0008508859123143628, "loss": 0.0228, "num_input_tokens_seen": 86529600, "step": 40110 }, { "epoch": 6.544045676998369, "grad_norm": 0.036076486110687256, "learning_rate": 0.0008508352001260861, "loss": 0.1099, "num_input_tokens_seen": 86540544, "step": 40115 }, { "epoch": 6.544861337683524, "grad_norm": 0.018983732908964157, "learning_rate": 0.000850784480827689, "loss": 0.0386, "num_input_tokens_seen": 86549056, "step": 40120 }, { "epoch": 6.545676998368679, "grad_norm": 0.22001031041145325, "learning_rate": 0.0008507337544201994, "loss": 0.0305, "num_input_tokens_seen": 86560096, "step": 40125 }, { "epoch": 6.5464926590538335, "grad_norm": 0.006830132100731134, "learning_rate": 0.0008506830209046453, "loss": 0.0536, "num_input_tokens_seen": 86570752, "step": 40130 }, { "epoch": 6.547308319738988, "grad_norm": 0.0041461121290922165, "learning_rate": 0.000850632280282055, "loss": 0.0425, "num_input_tokens_seen": 86581696, "step": 40135 }, { "epoch": 6.548123980424144, "grad_norm": 0.5802711248397827, "learning_rate": 0.0008505815325534565, "loss": 0.0954, "num_input_tokens_seen": 86592864, "step": 40140 }, { "epoch": 6.548939641109299, "grad_norm": 0.1495361030101776, "learning_rate": 0.0008505307777198788, "loss": 0.0592, "num_input_tokens_seen": 86604992, "step": 40145 }, { "epoch": 6.549755301794454, "grad_norm": 0.8144291043281555, "learning_rate": 0.0008504800157823501, "loss": 0.0632, "num_input_tokens_seen": 86616576, "step": 40150 }, { "epoch": 6.5505709624796085, "grad_norm": 0.12730781733989716, "learning_rate": 0.000850429246741899, "loss": 0.0427, "num_input_tokens_seen": 86626752, "step": 40155 }, { "epoch": 6.551386623164763, "grad_norm": 0.13173724710941315, "learning_rate": 0.0008503784705995549, "loss": 0.1192, "num_input_tokens_seen": 86638048, "step": 40160 }, { "epoch": 6.552202283849918, "grad_norm": 0.021391939371824265, "learning_rate": 0.0008503276873563465, "loss": 0.1415, "num_input_tokens_seen": 86648000, "step": 40165 }, { "epoch": 6.553017944535073, "grad_norm": 0.09386082738637924, "learning_rate": 0.0008502768970133032, "loss": 0.0577, "num_input_tokens_seen": 86660096, "step": 40170 }, { "epoch": 6.553833605220229, "grad_norm": 0.009469651617109776, "learning_rate": 0.0008502260995714543, "loss": 0.0256, "num_input_tokens_seen": 86671200, "step": 40175 }, { "epoch": 6.554649265905383, "grad_norm": 0.01482017245143652, "learning_rate": 0.0008501752950318292, "loss": 0.0504, "num_input_tokens_seen": 86682304, "step": 40180 }, { "epoch": 6.555464926590538, "grad_norm": 0.03399678319692612, "learning_rate": 0.0008501244833954573, "loss": 0.0889, "num_input_tokens_seen": 86692768, "step": 40185 }, { "epoch": 6.556280587275693, "grad_norm": 0.6753520965576172, "learning_rate": 0.0008500736646633686, "loss": 0.1323, "num_input_tokens_seen": 86701952, "step": 40190 }, { "epoch": 6.557096247960848, "grad_norm": 0.41494840383529663, "learning_rate": 0.0008500228388365933, "loss": 0.0476, "num_input_tokens_seen": 86712704, "step": 40195 }, { "epoch": 6.557911908646004, "grad_norm": 0.6417130827903748, "learning_rate": 0.0008499720059161608, "loss": 0.2146, "num_input_tokens_seen": 86723264, "step": 40200 }, { "epoch": 6.558727569331158, "grad_norm": 0.1450645625591278, "learning_rate": 0.0008499211659031018, "loss": 0.2825, "num_input_tokens_seen": 86736032, "step": 40205 }, { "epoch": 6.559543230016313, "grad_norm": 0.10557205975055695, "learning_rate": 0.0008498703187984465, "loss": 0.1004, "num_input_tokens_seen": 86746784, "step": 40210 }, { "epoch": 6.560358890701468, "grad_norm": 0.267245352268219, "learning_rate": 0.0008498194646032253, "loss": 0.0572, "num_input_tokens_seen": 86757760, "step": 40215 }, { "epoch": 6.561174551386623, "grad_norm": 0.4595361351966858, "learning_rate": 0.0008497686033184687, "loss": 0.1548, "num_input_tokens_seen": 86768416, "step": 40220 }, { "epoch": 6.561990212071779, "grad_norm": 0.07745881378650665, "learning_rate": 0.0008497177349452077, "loss": 0.0244, "num_input_tokens_seen": 86779872, "step": 40225 }, { "epoch": 6.562805872756933, "grad_norm": 0.04713252931833267, "learning_rate": 0.0008496668594844733, "loss": 0.05, "num_input_tokens_seen": 86790176, "step": 40230 }, { "epoch": 6.563621533442088, "grad_norm": 0.0702272430062294, "learning_rate": 0.0008496159769372964, "loss": 0.0697, "num_input_tokens_seen": 86800992, "step": 40235 }, { "epoch": 6.564437194127243, "grad_norm": 0.06800079345703125, "learning_rate": 0.0008495650873047081, "loss": 0.0318, "num_input_tokens_seen": 86811488, "step": 40240 }, { "epoch": 6.565252854812398, "grad_norm": 0.006683358456939459, "learning_rate": 0.0008495141905877398, "loss": 0.0164, "num_input_tokens_seen": 86822624, "step": 40245 }, { "epoch": 6.566068515497553, "grad_norm": 0.12418615818023682, "learning_rate": 0.0008494632867874232, "loss": 0.0615, "num_input_tokens_seen": 86834560, "step": 40250 }, { "epoch": 6.566884176182708, "grad_norm": 0.038725290447473526, "learning_rate": 0.0008494123759047897, "loss": 0.0559, "num_input_tokens_seen": 86845600, "step": 40255 }, { "epoch": 6.567699836867863, "grad_norm": 0.05363617464900017, "learning_rate": 0.0008493614579408712, "loss": 0.0225, "num_input_tokens_seen": 86854880, "step": 40260 }, { "epoch": 6.568515497553018, "grad_norm": 0.04508909210562706, "learning_rate": 0.0008493105328966995, "loss": 0.0292, "num_input_tokens_seen": 86866304, "step": 40265 }, { "epoch": 6.569331158238173, "grad_norm": 0.005186726804822683, "learning_rate": 0.0008492596007733066, "loss": 0.0773, "num_input_tokens_seen": 86878624, "step": 40270 }, { "epoch": 6.570146818923328, "grad_norm": 0.09585123509168625, "learning_rate": 0.0008492086615717251, "loss": 0.0396, "num_input_tokens_seen": 86889600, "step": 40275 }, { "epoch": 6.5709624796084825, "grad_norm": 0.5959972739219666, "learning_rate": 0.0008491577152929867, "loss": 0.0775, "num_input_tokens_seen": 86898944, "step": 40280 }, { "epoch": 6.571778140293638, "grad_norm": 0.15105099976062775, "learning_rate": 0.0008491067619381247, "loss": 0.0352, "num_input_tokens_seen": 86909696, "step": 40285 }, { "epoch": 6.572593800978793, "grad_norm": 0.12814867496490479, "learning_rate": 0.0008490558015081711, "loss": 0.0186, "num_input_tokens_seen": 86920832, "step": 40290 }, { "epoch": 6.573409461663948, "grad_norm": 0.805547297000885, "learning_rate": 0.0008490048340041587, "loss": 0.0704, "num_input_tokens_seen": 86931872, "step": 40295 }, { "epoch": 6.574225122349103, "grad_norm": 0.0016842829063534737, "learning_rate": 0.0008489538594271209, "loss": 0.164, "num_input_tokens_seen": 86942528, "step": 40300 }, { "epoch": 6.575040783034257, "grad_norm": 0.033892672508955, "learning_rate": 0.0008489028777780901, "loss": 0.1135, "num_input_tokens_seen": 86953888, "step": 40305 }, { "epoch": 6.575856443719413, "grad_norm": 0.06621178984642029, "learning_rate": 0.0008488518890581002, "loss": 0.0782, "num_input_tokens_seen": 86964896, "step": 40310 }, { "epoch": 6.576672104404568, "grad_norm": 0.3112511932849884, "learning_rate": 0.0008488008932681841, "loss": 0.0362, "num_input_tokens_seen": 86976416, "step": 40315 }, { "epoch": 6.577487765089723, "grad_norm": 0.034096796065568924, "learning_rate": 0.0008487498904093753, "loss": 0.0594, "num_input_tokens_seen": 86987360, "step": 40320 }, { "epoch": 6.578303425774878, "grad_norm": 0.008582952432334423, "learning_rate": 0.0008486988804827077, "loss": 0.1113, "num_input_tokens_seen": 86997952, "step": 40325 }, { "epoch": 6.579119086460032, "grad_norm": 0.17364229261875153, "learning_rate": 0.0008486478634892149, "loss": 0.0892, "num_input_tokens_seen": 87009952, "step": 40330 }, { "epoch": 6.579934747145187, "grad_norm": 0.026833603158593178, "learning_rate": 0.0008485968394299308, "loss": 0.0348, "num_input_tokens_seen": 87021152, "step": 40335 }, { "epoch": 6.580750407830343, "grad_norm": 0.059796661138534546, "learning_rate": 0.0008485458083058896, "loss": 0.0281, "num_input_tokens_seen": 87031328, "step": 40340 }, { "epoch": 6.581566068515498, "grad_norm": 0.4125504791736603, "learning_rate": 0.0008484947701181254, "loss": 0.0622, "num_input_tokens_seen": 87041536, "step": 40345 }, { "epoch": 6.582381729200653, "grad_norm": 0.14283022284507751, "learning_rate": 0.0008484437248676726, "loss": 0.0574, "num_input_tokens_seen": 87053344, "step": 40350 }, { "epoch": 6.583197389885807, "grad_norm": 0.00402618246152997, "learning_rate": 0.0008483926725555655, "loss": 0.0981, "num_input_tokens_seen": 87064768, "step": 40355 }, { "epoch": 6.584013050570962, "grad_norm": 0.024538861587643623, "learning_rate": 0.0008483416131828392, "loss": 0.1654, "num_input_tokens_seen": 87075904, "step": 40360 }, { "epoch": 6.584828711256117, "grad_norm": 0.08286990225315094, "learning_rate": 0.000848290546750528, "loss": 0.0538, "num_input_tokens_seen": 87085888, "step": 40365 }, { "epoch": 6.585644371941273, "grad_norm": 0.03729123994708061, "learning_rate": 0.0008482394732596672, "loss": 0.0568, "num_input_tokens_seen": 87096672, "step": 40370 }, { "epoch": 6.5864600326264275, "grad_norm": 0.2476803958415985, "learning_rate": 0.0008481883927112917, "loss": 0.2241, "num_input_tokens_seen": 87107456, "step": 40375 }, { "epoch": 6.587275693311582, "grad_norm": 0.07604549080133438, "learning_rate": 0.0008481373051064365, "loss": 0.1907, "num_input_tokens_seen": 87119680, "step": 40380 }, { "epoch": 6.588091353996737, "grad_norm": 0.11326006054878235, "learning_rate": 0.0008480862104461374, "loss": 0.0744, "num_input_tokens_seen": 87130048, "step": 40385 }, { "epoch": 6.588907014681892, "grad_norm": 0.03263407200574875, "learning_rate": 0.0008480351087314295, "loss": 0.0609, "num_input_tokens_seen": 87141568, "step": 40390 }, { "epoch": 6.589722675367048, "grad_norm": 0.13409337401390076, "learning_rate": 0.0008479839999633487, "loss": 0.1305, "num_input_tokens_seen": 87151904, "step": 40395 }, { "epoch": 6.5905383360522025, "grad_norm": 0.17076104879379272, "learning_rate": 0.0008479328841429306, "loss": 0.0714, "num_input_tokens_seen": 87163456, "step": 40400 }, { "epoch": 6.591353996737357, "grad_norm": 0.4619564712047577, "learning_rate": 0.0008478817612712113, "loss": 0.2133, "num_input_tokens_seen": 87173792, "step": 40405 }, { "epoch": 6.592169657422512, "grad_norm": 0.32536450028419495, "learning_rate": 0.0008478306313492267, "loss": 0.1721, "num_input_tokens_seen": 87185088, "step": 40410 }, { "epoch": 6.592985318107667, "grad_norm": 0.08550991117954254, "learning_rate": 0.0008477794943780132, "loss": 0.0323, "num_input_tokens_seen": 87196512, "step": 40415 }, { "epoch": 6.593800978792823, "grad_norm": 0.27229124307632446, "learning_rate": 0.0008477283503586072, "loss": 0.0408, "num_input_tokens_seen": 87207712, "step": 40420 }, { "epoch": 6.5946166394779775, "grad_norm": 0.2587302327156067, "learning_rate": 0.0008476771992920449, "loss": 0.0667, "num_input_tokens_seen": 87219264, "step": 40425 }, { "epoch": 6.595432300163132, "grad_norm": 0.18160952627658844, "learning_rate": 0.0008476260411793631, "loss": 0.1131, "num_input_tokens_seen": 87230624, "step": 40430 }, { "epoch": 6.596247960848287, "grad_norm": 0.014219757169485092, "learning_rate": 0.0008475748760215984, "loss": 0.0473, "num_input_tokens_seen": 87241280, "step": 40435 }, { "epoch": 6.597063621533442, "grad_norm": 0.24040021002292633, "learning_rate": 0.0008475237038197882, "loss": 0.0368, "num_input_tokens_seen": 87251360, "step": 40440 }, { "epoch": 6.597879282218597, "grad_norm": 0.05186044052243233, "learning_rate": 0.0008474725245749691, "loss": 0.022, "num_input_tokens_seen": 87261824, "step": 40445 }, { "epoch": 6.598694942903752, "grad_norm": 0.014359881170094013, "learning_rate": 0.0008474213382881786, "loss": 0.0586, "num_input_tokens_seen": 87271360, "step": 40450 }, { "epoch": 6.599510603588907, "grad_norm": 0.5133166313171387, "learning_rate": 0.0008473701449604539, "loss": 0.1218, "num_input_tokens_seen": 87281440, "step": 40455 }, { "epoch": 6.600326264274062, "grad_norm": 0.3423725664615631, "learning_rate": 0.0008473189445928325, "loss": 0.2036, "num_input_tokens_seen": 87291808, "step": 40460 }, { "epoch": 6.601141924959217, "grad_norm": 0.024262959137558937, "learning_rate": 0.0008472677371863521, "loss": 0.2433, "num_input_tokens_seen": 87302560, "step": 40465 }, { "epoch": 6.601957585644372, "grad_norm": 0.26843392848968506, "learning_rate": 0.0008472165227420505, "loss": 0.0794, "num_input_tokens_seen": 87313696, "step": 40470 }, { "epoch": 6.602773246329527, "grad_norm": 0.015822848305106163, "learning_rate": 0.0008471653012609655, "loss": 0.0422, "num_input_tokens_seen": 87323328, "step": 40475 }, { "epoch": 6.603588907014682, "grad_norm": 0.05093332752585411, "learning_rate": 0.0008471140727441353, "loss": 0.1338, "num_input_tokens_seen": 87334272, "step": 40480 }, { "epoch": 6.604404567699837, "grad_norm": 0.02642383798956871, "learning_rate": 0.0008470628371925981, "loss": 0.1228, "num_input_tokens_seen": 87344896, "step": 40485 }, { "epoch": 6.605220228384992, "grad_norm": 0.2249574363231659, "learning_rate": 0.0008470115946073922, "loss": 0.2112, "num_input_tokens_seen": 87356576, "step": 40490 }, { "epoch": 6.606035889070147, "grad_norm": 0.24390633404254913, "learning_rate": 0.0008469603449895562, "loss": 0.0407, "num_input_tokens_seen": 87366816, "step": 40495 }, { "epoch": 6.6068515497553015, "grad_norm": 0.28147757053375244, "learning_rate": 0.0008469090883401286, "loss": 0.0698, "num_input_tokens_seen": 87377312, "step": 40500 }, { "epoch": 6.607667210440457, "grad_norm": 0.014178567565977573, "learning_rate": 0.0008468578246601482, "loss": 0.0746, "num_input_tokens_seen": 87387808, "step": 40505 }, { "epoch": 6.608482871125612, "grad_norm": 0.026559146121144295, "learning_rate": 0.000846806553950654, "loss": 0.0774, "num_input_tokens_seen": 87398688, "step": 40510 }, { "epoch": 6.609298531810767, "grad_norm": 0.3430555462837219, "learning_rate": 0.0008467552762126851, "loss": 0.1377, "num_input_tokens_seen": 87409376, "step": 40515 }, { "epoch": 6.610114192495922, "grad_norm": 0.2148626148700714, "learning_rate": 0.0008467039914472805, "loss": 0.0954, "num_input_tokens_seen": 87420576, "step": 40520 }, { "epoch": 6.6109298531810765, "grad_norm": 0.2175336629152298, "learning_rate": 0.0008466526996554797, "loss": 0.151, "num_input_tokens_seen": 87431424, "step": 40525 }, { "epoch": 6.611745513866231, "grad_norm": 0.05649390444159508, "learning_rate": 0.0008466014008383224, "loss": 0.0507, "num_input_tokens_seen": 87442112, "step": 40530 }, { "epoch": 6.612561174551386, "grad_norm": 0.014867009595036507, "learning_rate": 0.0008465500949968479, "loss": 0.0444, "num_input_tokens_seen": 87453728, "step": 40535 }, { "epoch": 6.613376835236542, "grad_norm": 0.21989694237709045, "learning_rate": 0.000846498782132096, "loss": 0.0689, "num_input_tokens_seen": 87465408, "step": 40540 }, { "epoch": 6.614192495921697, "grad_norm": 0.1529182642698288, "learning_rate": 0.0008464474622451067, "loss": 0.0272, "num_input_tokens_seen": 87475744, "step": 40545 }, { "epoch": 6.6150081566068515, "grad_norm": 0.028404152020812035, "learning_rate": 0.0008463961353369202, "loss": 0.1158, "num_input_tokens_seen": 87485056, "step": 40550 }, { "epoch": 6.615823817292006, "grad_norm": 0.03244340419769287, "learning_rate": 0.0008463448014085765, "loss": 0.1028, "num_input_tokens_seen": 87495968, "step": 40555 }, { "epoch": 6.616639477977161, "grad_norm": 0.5371663570404053, "learning_rate": 0.000846293460461116, "loss": 0.1263, "num_input_tokens_seen": 87507392, "step": 40560 }, { "epoch": 6.617455138662317, "grad_norm": 0.19859786331653595, "learning_rate": 0.0008462421124955792, "loss": 0.1069, "num_input_tokens_seen": 87518048, "step": 40565 }, { "epoch": 6.618270799347472, "grad_norm": 0.05986612290143967, "learning_rate": 0.0008461907575130069, "loss": 0.075, "num_input_tokens_seen": 87529824, "step": 40570 }, { "epoch": 6.6190864600326265, "grad_norm": 0.34898674488067627, "learning_rate": 0.0008461393955144397, "loss": 0.1074, "num_input_tokens_seen": 87541280, "step": 40575 }, { "epoch": 6.619902120717781, "grad_norm": 0.04001203924417496, "learning_rate": 0.0008460880265009185, "loss": 0.1155, "num_input_tokens_seen": 87551808, "step": 40580 }, { "epoch": 6.620717781402936, "grad_norm": 0.43334755301475525, "learning_rate": 0.0008460366504734843, "loss": 0.1557, "num_input_tokens_seen": 87563104, "step": 40585 }, { "epoch": 6.621533442088092, "grad_norm": 0.016394706442952156, "learning_rate": 0.0008459852674331785, "loss": 0.0248, "num_input_tokens_seen": 87572896, "step": 40590 }, { "epoch": 6.622349102773247, "grad_norm": 0.0805009976029396, "learning_rate": 0.0008459338773810424, "loss": 0.2179, "num_input_tokens_seen": 87582720, "step": 40595 }, { "epoch": 6.623164763458401, "grad_norm": 0.10477925091981888, "learning_rate": 0.0008458824803181174, "loss": 0.062, "num_input_tokens_seen": 87594272, "step": 40600 }, { "epoch": 6.623980424143556, "grad_norm": 0.059833824634552, "learning_rate": 0.0008458310762454451, "loss": 0.0378, "num_input_tokens_seen": 87604064, "step": 40605 }, { "epoch": 6.624796084828711, "grad_norm": 0.061222560703754425, "learning_rate": 0.0008457796651640672, "loss": 0.025, "num_input_tokens_seen": 87615264, "step": 40610 }, { "epoch": 6.625611745513866, "grad_norm": 0.13462354242801666, "learning_rate": 0.0008457282470750259, "loss": 0.1074, "num_input_tokens_seen": 87626240, "step": 40615 }, { "epoch": 6.626427406199021, "grad_norm": 0.0192709993571043, "learning_rate": 0.0008456768219793631, "loss": 0.0892, "num_input_tokens_seen": 87637760, "step": 40620 }, { "epoch": 6.627243066884176, "grad_norm": 0.009318066760897636, "learning_rate": 0.000845625389878121, "loss": 0.105, "num_input_tokens_seen": 87647552, "step": 40625 }, { "epoch": 6.628058727569331, "grad_norm": 0.40362200140953064, "learning_rate": 0.0008455739507723418, "loss": 0.1518, "num_input_tokens_seen": 87658400, "step": 40630 }, { "epoch": 6.628874388254486, "grad_norm": 0.0877603217959404, "learning_rate": 0.0008455225046630681, "loss": 0.0835, "num_input_tokens_seen": 87668960, "step": 40635 }, { "epoch": 6.629690048939641, "grad_norm": 0.035944268107414246, "learning_rate": 0.0008454710515513426, "loss": 0.0627, "num_input_tokens_seen": 87679392, "step": 40640 }, { "epoch": 6.630505709624796, "grad_norm": 0.1840999871492386, "learning_rate": 0.0008454195914382079, "loss": 0.0611, "num_input_tokens_seen": 87689568, "step": 40645 }, { "epoch": 6.631321370309951, "grad_norm": 0.11143762618303299, "learning_rate": 0.0008453681243247071, "loss": 0.0522, "num_input_tokens_seen": 87700480, "step": 40650 }, { "epoch": 6.632137030995106, "grad_norm": 0.007782479282468557, "learning_rate": 0.000845316650211883, "loss": 0.1254, "num_input_tokens_seen": 87710912, "step": 40655 }, { "epoch": 6.632952691680261, "grad_norm": 0.03462053835391998, "learning_rate": 0.0008452651691007789, "loss": 0.2556, "num_input_tokens_seen": 87723840, "step": 40660 }, { "epoch": 6.633768352365416, "grad_norm": 0.2330126166343689, "learning_rate": 0.0008452136809924384, "loss": 0.0863, "num_input_tokens_seen": 87734976, "step": 40665 }, { "epoch": 6.634584013050571, "grad_norm": 0.18118293583393097, "learning_rate": 0.0008451621858879043, "loss": 0.1008, "num_input_tokens_seen": 87745696, "step": 40670 }, { "epoch": 6.635399673735726, "grad_norm": 0.2609518766403198, "learning_rate": 0.000845110683788221, "loss": 0.0974, "num_input_tokens_seen": 87756992, "step": 40675 }, { "epoch": 6.636215334420881, "grad_norm": 0.051894452422857285, "learning_rate": 0.0008450591746944319, "loss": 0.0301, "num_input_tokens_seen": 87767296, "step": 40680 }, { "epoch": 6.637030995106036, "grad_norm": 0.009920398704707623, "learning_rate": 0.0008450076586075805, "loss": 0.0329, "num_input_tokens_seen": 87777984, "step": 40685 }, { "epoch": 6.637846655791191, "grad_norm": 0.13161221146583557, "learning_rate": 0.0008449561355287116, "loss": 0.1711, "num_input_tokens_seen": 87789216, "step": 40690 }, { "epoch": 6.638662316476346, "grad_norm": 0.2799515128135681, "learning_rate": 0.000844904605458869, "loss": 0.0974, "num_input_tokens_seen": 87798976, "step": 40695 }, { "epoch": 6.6394779771615005, "grad_norm": 0.12181621789932251, "learning_rate": 0.0008448530683990968, "loss": 0.0558, "num_input_tokens_seen": 87810464, "step": 40700 }, { "epoch": 6.640293637846656, "grad_norm": 0.13613636791706085, "learning_rate": 0.0008448015243504398, "loss": 0.0309, "num_input_tokens_seen": 87820320, "step": 40705 }, { "epoch": 6.641109298531811, "grad_norm": 0.00638515455648303, "learning_rate": 0.0008447499733139426, "loss": 0.0971, "num_input_tokens_seen": 87831488, "step": 40710 }, { "epoch": 6.641924959216966, "grad_norm": 0.007513434160500765, "learning_rate": 0.0008446984152906496, "loss": 0.1206, "num_input_tokens_seen": 87842784, "step": 40715 }, { "epoch": 6.642740619902121, "grad_norm": 0.03965970501303673, "learning_rate": 0.0008446468502816061, "loss": 0.242, "num_input_tokens_seen": 87852608, "step": 40720 }, { "epoch": 6.643556280587275, "grad_norm": 0.012965613976120949, "learning_rate": 0.000844595278287857, "loss": 0.0868, "num_input_tokens_seen": 87864160, "step": 40725 }, { "epoch": 6.64437194127243, "grad_norm": 0.12301129102706909, "learning_rate": 0.0008445436993104473, "loss": 0.0311, "num_input_tokens_seen": 87875104, "step": 40730 }, { "epoch": 6.645187601957586, "grad_norm": 0.002742187352851033, "learning_rate": 0.0008444921133504225, "loss": 0.093, "num_input_tokens_seen": 87887296, "step": 40735 }, { "epoch": 6.646003262642741, "grad_norm": 0.47137582302093506, "learning_rate": 0.0008444405204088281, "loss": 0.0287, "num_input_tokens_seen": 87898336, "step": 40740 }, { "epoch": 6.646818923327896, "grad_norm": 0.03939202055335045, "learning_rate": 0.0008443889204867095, "loss": 0.1327, "num_input_tokens_seen": 87908512, "step": 40745 }, { "epoch": 6.64763458401305, "grad_norm": 0.7502223253250122, "learning_rate": 0.0008443373135851125, "loss": 0.1587, "num_input_tokens_seen": 87919072, "step": 40750 }, { "epoch": 6.648450244698205, "grad_norm": 0.18294845521450043, "learning_rate": 0.0008442856997050832, "loss": 0.0967, "num_input_tokens_seen": 87930016, "step": 40755 }, { "epoch": 6.649265905383361, "grad_norm": 0.024886323139071465, "learning_rate": 0.0008442340788476672, "loss": 0.0086, "num_input_tokens_seen": 87940640, "step": 40760 }, { "epoch": 6.650081566068516, "grad_norm": 0.2929039001464844, "learning_rate": 0.0008441824510139111, "loss": 0.1237, "num_input_tokens_seen": 87952032, "step": 40765 }, { "epoch": 6.650897226753671, "grad_norm": 0.16838286817073822, "learning_rate": 0.0008441308162048609, "loss": 0.0893, "num_input_tokens_seen": 87961920, "step": 40770 }, { "epoch": 6.651712887438825, "grad_norm": 0.13611488044261932, "learning_rate": 0.0008440791744215632, "loss": 0.0916, "num_input_tokens_seen": 87974112, "step": 40775 }, { "epoch": 6.65252854812398, "grad_norm": 0.013770214281976223, "learning_rate": 0.0008440275256650644, "loss": 0.0642, "num_input_tokens_seen": 87984416, "step": 40780 }, { "epoch": 6.653344208809135, "grad_norm": 0.027458546683192253, "learning_rate": 0.0008439758699364115, "loss": 0.1236, "num_input_tokens_seen": 87995360, "step": 40785 }, { "epoch": 6.654159869494291, "grad_norm": 0.5511561632156372, "learning_rate": 0.0008439242072366511, "loss": 0.066, "num_input_tokens_seen": 88006272, "step": 40790 }, { "epoch": 6.6549755301794455, "grad_norm": 0.17879268527030945, "learning_rate": 0.0008438725375668305, "loss": 0.0388, "num_input_tokens_seen": 88016672, "step": 40795 }, { "epoch": 6.6557911908646, "grad_norm": 0.15943937003612518, "learning_rate": 0.0008438208609279967, "loss": 0.0717, "num_input_tokens_seen": 88026656, "step": 40800 }, { "epoch": 6.656606851549755, "grad_norm": 0.05186906084418297, "learning_rate": 0.0008437691773211969, "loss": 0.1144, "num_input_tokens_seen": 88037984, "step": 40805 }, { "epoch": 6.65742251223491, "grad_norm": 0.04879816621541977, "learning_rate": 0.0008437174867474786, "loss": 0.0679, "num_input_tokens_seen": 88048960, "step": 40810 }, { "epoch": 6.658238172920065, "grad_norm": 0.4712921380996704, "learning_rate": 0.0008436657892078895, "loss": 0.0934, "num_input_tokens_seen": 88060128, "step": 40815 }, { "epoch": 6.6590538336052205, "grad_norm": 0.4637976288795471, "learning_rate": 0.0008436140847034772, "loss": 0.1336, "num_input_tokens_seen": 88071808, "step": 40820 }, { "epoch": 6.659869494290375, "grad_norm": 0.29070261120796204, "learning_rate": 0.0008435623732352895, "loss": 0.13, "num_input_tokens_seen": 88082624, "step": 40825 }, { "epoch": 6.66068515497553, "grad_norm": 0.09856334328651428, "learning_rate": 0.0008435106548043745, "loss": 0.0986, "num_input_tokens_seen": 88094720, "step": 40830 }, { "epoch": 6.661500815660685, "grad_norm": 0.24137462675571442, "learning_rate": 0.0008434589294117802, "loss": 0.0666, "num_input_tokens_seen": 88105408, "step": 40835 }, { "epoch": 6.66231647634584, "grad_norm": 0.09692644327878952, "learning_rate": 0.0008434071970585551, "loss": 0.0419, "num_input_tokens_seen": 88115328, "step": 40840 }, { "epoch": 6.6631321370309955, "grad_norm": 0.08034862577915192, "learning_rate": 0.0008433554577457475, "loss": 0.2059, "num_input_tokens_seen": 88126880, "step": 40845 }, { "epoch": 6.66394779771615, "grad_norm": 0.10260270535945892, "learning_rate": 0.000843303711474406, "loss": 0.0277, "num_input_tokens_seen": 88136896, "step": 40850 }, { "epoch": 6.664763458401305, "grad_norm": 0.2504258453845978, "learning_rate": 0.0008432519582455792, "loss": 0.1051, "num_input_tokens_seen": 88147968, "step": 40855 }, { "epoch": 6.66557911908646, "grad_norm": 0.6285250186920166, "learning_rate": 0.0008432001980603161, "loss": 0.1962, "num_input_tokens_seen": 88159072, "step": 40860 }, { "epoch": 6.666394779771615, "grad_norm": 0.025775015354156494, "learning_rate": 0.0008431484309196656, "loss": 0.1216, "num_input_tokens_seen": 88169152, "step": 40865 }, { "epoch": 6.6672104404567705, "grad_norm": 0.13468480110168457, "learning_rate": 0.0008430966568246768, "loss": 0.1983, "num_input_tokens_seen": 88180352, "step": 40870 }, { "epoch": 6.668026101141925, "grad_norm": 0.4416242837905884, "learning_rate": 0.0008430448757763989, "loss": 0.0369, "num_input_tokens_seen": 88190304, "step": 40875 }, { "epoch": 6.66884176182708, "grad_norm": 0.41449350118637085, "learning_rate": 0.0008429930877758814, "loss": 0.0938, "num_input_tokens_seen": 88199744, "step": 40880 }, { "epoch": 6.669657422512235, "grad_norm": 0.013919606804847717, "learning_rate": 0.000842941292824174, "loss": 0.0386, "num_input_tokens_seen": 88210240, "step": 40885 }, { "epoch": 6.67047308319739, "grad_norm": 0.3339521884918213, "learning_rate": 0.0008428894909223261, "loss": 0.0861, "num_input_tokens_seen": 88221568, "step": 40890 }, { "epoch": 6.671288743882545, "grad_norm": 0.052577439695596695, "learning_rate": 0.0008428376820713879, "loss": 0.0718, "num_input_tokens_seen": 88232064, "step": 40895 }, { "epoch": 6.672104404567699, "grad_norm": 0.19718241691589355, "learning_rate": 0.000842785866272409, "loss": 0.047, "num_input_tokens_seen": 88243264, "step": 40900 }, { "epoch": 6.672920065252855, "grad_norm": 0.0698908194899559, "learning_rate": 0.0008427340435264397, "loss": 0.0934, "num_input_tokens_seen": 88255616, "step": 40905 }, { "epoch": 6.67373572593801, "grad_norm": 0.09876023232936859, "learning_rate": 0.0008426822138345302, "loss": 0.0584, "num_input_tokens_seen": 88265536, "step": 40910 }, { "epoch": 6.674551386623165, "grad_norm": 0.08884736150503159, "learning_rate": 0.0008426303771977311, "loss": 0.1988, "num_input_tokens_seen": 88275552, "step": 40915 }, { "epoch": 6.6753670473083195, "grad_norm": 0.540953516960144, "learning_rate": 0.0008425785336170925, "loss": 0.2093, "num_input_tokens_seen": 88286208, "step": 40920 }, { "epoch": 6.676182707993474, "grad_norm": 0.2763039469718933, "learning_rate": 0.0008425266830936654, "loss": 0.2348, "num_input_tokens_seen": 88297312, "step": 40925 }, { "epoch": 6.67699836867863, "grad_norm": 0.03222949057817459, "learning_rate": 0.0008424748256285005, "loss": 0.0748, "num_input_tokens_seen": 88307360, "step": 40930 }, { "epoch": 6.677814029363785, "grad_norm": 0.1555800586938858, "learning_rate": 0.0008424229612226488, "loss": 0.1322, "num_input_tokens_seen": 88317696, "step": 40935 }, { "epoch": 6.67862969004894, "grad_norm": 0.05882593244314194, "learning_rate": 0.0008423710898771614, "loss": 0.24, "num_input_tokens_seen": 88328512, "step": 40940 }, { "epoch": 6.6794453507340945, "grad_norm": 0.058301255106925964, "learning_rate": 0.0008423192115930897, "loss": 0.0578, "num_input_tokens_seen": 88338432, "step": 40945 }, { "epoch": 6.680261011419249, "grad_norm": 0.6262646317481995, "learning_rate": 0.0008422673263714848, "loss": 0.1129, "num_input_tokens_seen": 88350048, "step": 40950 }, { "epoch": 6.681076672104405, "grad_norm": 0.0638042464852333, "learning_rate": 0.0008422154342133983, "loss": 0.0424, "num_input_tokens_seen": 88361056, "step": 40955 }, { "epoch": 6.68189233278956, "grad_norm": 0.0358414463698864, "learning_rate": 0.0008421635351198819, "loss": 0.0451, "num_input_tokens_seen": 88370720, "step": 40960 }, { "epoch": 6.682707993474715, "grad_norm": 0.05145019665360451, "learning_rate": 0.0008421116290919875, "loss": 0.0356, "num_input_tokens_seen": 88382304, "step": 40965 }, { "epoch": 6.6835236541598695, "grad_norm": 0.1512133628129959, "learning_rate": 0.0008420597161307668, "loss": 0.108, "num_input_tokens_seen": 88393216, "step": 40970 }, { "epoch": 6.684339314845024, "grad_norm": 0.027534687891602516, "learning_rate": 0.0008420077962372721, "loss": 0.1198, "num_input_tokens_seen": 88404896, "step": 40975 }, { "epoch": 6.685154975530179, "grad_norm": 0.04689402878284454, "learning_rate": 0.0008419558694125555, "loss": 0.0556, "num_input_tokens_seen": 88416544, "step": 40980 }, { "epoch": 6.685970636215334, "grad_norm": 0.233490452170372, "learning_rate": 0.0008419039356576695, "loss": 0.0984, "num_input_tokens_seen": 88427072, "step": 40985 }, { "epoch": 6.68678629690049, "grad_norm": 0.17801909148693085, "learning_rate": 0.0008418519949736664, "loss": 0.0336, "num_input_tokens_seen": 88437632, "step": 40990 }, { "epoch": 6.6876019575856445, "grad_norm": 0.15150417387485504, "learning_rate": 0.000841800047361599, "loss": 0.041, "num_input_tokens_seen": 88446560, "step": 40995 }, { "epoch": 6.688417618270799, "grad_norm": 0.12716467678546906, "learning_rate": 0.00084174809282252, "loss": 0.1365, "num_input_tokens_seen": 88456800, "step": 41000 }, { "epoch": 6.689233278955954, "grad_norm": 0.26064029335975647, "learning_rate": 0.0008416961313574824, "loss": 0.1104, "num_input_tokens_seen": 88467424, "step": 41005 }, { "epoch": 6.690048939641109, "grad_norm": 0.3407898247241974, "learning_rate": 0.0008416441629675391, "loss": 0.0785, "num_input_tokens_seen": 88478592, "step": 41010 }, { "epoch": 6.690864600326265, "grad_norm": 0.1734040528535843, "learning_rate": 0.0008415921876537436, "loss": 0.043, "num_input_tokens_seen": 88491328, "step": 41015 }, { "epoch": 6.691680261011419, "grad_norm": 0.03416004776954651, "learning_rate": 0.000841540205417149, "loss": 0.114, "num_input_tokens_seen": 88502848, "step": 41020 }, { "epoch": 6.692495921696574, "grad_norm": 0.05283604934811592, "learning_rate": 0.0008414882162588089, "loss": 0.0535, "num_input_tokens_seen": 88512704, "step": 41025 }, { "epoch": 6.693311582381729, "grad_norm": 0.05132652074098587, "learning_rate": 0.0008414362201797768, "loss": 0.114, "num_input_tokens_seen": 88523648, "step": 41030 }, { "epoch": 6.694127243066884, "grad_norm": 0.07435958087444305, "learning_rate": 0.0008413842171811066, "loss": 0.0651, "num_input_tokens_seen": 88533568, "step": 41035 }, { "epoch": 6.69494290375204, "grad_norm": 0.0728800818324089, "learning_rate": 0.0008413322072638523, "loss": 0.0153, "num_input_tokens_seen": 88544160, "step": 41040 }, { "epoch": 6.695758564437194, "grad_norm": 0.1306280791759491, "learning_rate": 0.0008412801904290677, "loss": 0.033, "num_input_tokens_seen": 88555264, "step": 41045 }, { "epoch": 6.696574225122349, "grad_norm": 0.08100580424070358, "learning_rate": 0.000841228166677807, "loss": 0.1464, "num_input_tokens_seen": 88566368, "step": 41050 }, { "epoch": 6.697389885807504, "grad_norm": 0.3710465729236603, "learning_rate": 0.0008411761360111248, "loss": 0.1677, "num_input_tokens_seen": 88575904, "step": 41055 }, { "epoch": 6.698205546492659, "grad_norm": 0.08936677873134613, "learning_rate": 0.0008411240984300752, "loss": 0.0636, "num_input_tokens_seen": 88587680, "step": 41060 }, { "epoch": 6.699021207177814, "grad_norm": 0.005481621716171503, "learning_rate": 0.0008410720539357132, "loss": 0.0418, "num_input_tokens_seen": 88597440, "step": 41065 }, { "epoch": 6.699836867862969, "grad_norm": 0.5715301036834717, "learning_rate": 0.0008410200025290933, "loss": 0.2765, "num_input_tokens_seen": 88608256, "step": 41070 }, { "epoch": 6.700652528548124, "grad_norm": 0.1810375154018402, "learning_rate": 0.0008409679442112703, "loss": 0.2123, "num_input_tokens_seen": 88619456, "step": 41075 }, { "epoch": 6.701468189233279, "grad_norm": 0.05662427097558975, "learning_rate": 0.0008409158789832994, "loss": 0.0851, "num_input_tokens_seen": 88630720, "step": 41080 }, { "epoch": 6.702283849918434, "grad_norm": 0.014287220314145088, "learning_rate": 0.0008408638068462357, "loss": 0.1564, "num_input_tokens_seen": 88641888, "step": 41085 }, { "epoch": 6.703099510603589, "grad_norm": 0.014776411466300488, "learning_rate": 0.0008408117278011347, "loss": 0.1829, "num_input_tokens_seen": 88651328, "step": 41090 }, { "epoch": 6.7039151712887435, "grad_norm": 0.26656484603881836, "learning_rate": 0.0008407596418490515, "loss": 0.0347, "num_input_tokens_seen": 88663456, "step": 41095 }, { "epoch": 6.704730831973899, "grad_norm": 0.3139142394065857, "learning_rate": 0.0008407075489910421, "loss": 0.0746, "num_input_tokens_seen": 88673984, "step": 41100 }, { "epoch": 6.705546492659054, "grad_norm": 0.13080765306949615, "learning_rate": 0.0008406554492281618, "loss": 0.0282, "num_input_tokens_seen": 88684736, "step": 41105 }, { "epoch": 6.706362153344209, "grad_norm": 0.12310944497585297, "learning_rate": 0.0008406033425614667, "loss": 0.173, "num_input_tokens_seen": 88694464, "step": 41110 }, { "epoch": 6.707177814029364, "grad_norm": 0.3507760167121887, "learning_rate": 0.0008405512289920129, "loss": 0.0953, "num_input_tokens_seen": 88706336, "step": 41115 }, { "epoch": 6.7079934747145185, "grad_norm": 0.020901674404740334, "learning_rate": 0.0008404991085208562, "loss": 0.187, "num_input_tokens_seen": 88717696, "step": 41120 }, { "epoch": 6.708809135399674, "grad_norm": 0.023080123588442802, "learning_rate": 0.0008404469811490534, "loss": 0.0306, "num_input_tokens_seen": 88727968, "step": 41125 }, { "epoch": 6.709624796084829, "grad_norm": 0.02298988215625286, "learning_rate": 0.0008403948468776604, "loss": 0.1768, "num_input_tokens_seen": 88739296, "step": 41130 }, { "epoch": 6.710440456769984, "grad_norm": 0.24384532868862152, "learning_rate": 0.0008403427057077342, "loss": 0.1251, "num_input_tokens_seen": 88750528, "step": 41135 }, { "epoch": 6.711256117455139, "grad_norm": 0.05696605145931244, "learning_rate": 0.0008402905576403312, "loss": 0.0197, "num_input_tokens_seen": 88761440, "step": 41140 }, { "epoch": 6.712071778140293, "grad_norm": 0.20630264282226562, "learning_rate": 0.0008402384026765084, "loss": 0.0386, "num_input_tokens_seen": 88771648, "step": 41145 }, { "epoch": 6.712887438825448, "grad_norm": 0.08208222687244415, "learning_rate": 0.0008401862408173226, "loss": 0.0729, "num_input_tokens_seen": 88783104, "step": 41150 }, { "epoch": 6.713703099510604, "grad_norm": 0.10429111123085022, "learning_rate": 0.0008401340720638313, "loss": 0.1029, "num_input_tokens_seen": 88793984, "step": 41155 }, { "epoch": 6.714518760195759, "grad_norm": 0.3674009442329407, "learning_rate": 0.0008400818964170913, "loss": 0.1221, "num_input_tokens_seen": 88804640, "step": 41160 }, { "epoch": 6.715334420880914, "grad_norm": 0.2793924808502197, "learning_rate": 0.0008400297138781605, "loss": 0.1791, "num_input_tokens_seen": 88814880, "step": 41165 }, { "epoch": 6.716150081566068, "grad_norm": 0.21336986124515533, "learning_rate": 0.0008399775244480961, "loss": 0.06, "num_input_tokens_seen": 88825440, "step": 41170 }, { "epoch": 6.716965742251223, "grad_norm": 0.18469882011413574, "learning_rate": 0.0008399253281279557, "loss": 0.1613, "num_input_tokens_seen": 88836416, "step": 41175 }, { "epoch": 6.717781402936378, "grad_norm": 0.36405104398727417, "learning_rate": 0.0008398731249187975, "loss": 0.0983, "num_input_tokens_seen": 88847264, "step": 41180 }, { "epoch": 6.718597063621534, "grad_norm": 0.031871143728494644, "learning_rate": 0.0008398209148216793, "loss": 0.1404, "num_input_tokens_seen": 88858272, "step": 41185 }, { "epoch": 6.719412724306689, "grad_norm": 0.04225146770477295, "learning_rate": 0.000839768697837659, "loss": 0.203, "num_input_tokens_seen": 88870752, "step": 41190 }, { "epoch": 6.720228384991843, "grad_norm": 0.03307630494236946, "learning_rate": 0.0008397164739677951, "loss": 0.0462, "num_input_tokens_seen": 88880000, "step": 41195 }, { "epoch": 6.721044045676998, "grad_norm": 0.036664701998233795, "learning_rate": 0.0008396642432131459, "loss": 0.0295, "num_input_tokens_seen": 88889888, "step": 41200 }, { "epoch": 6.721859706362153, "grad_norm": 0.033187851309776306, "learning_rate": 0.0008396120055747698, "loss": 0.0187, "num_input_tokens_seen": 88900736, "step": 41205 }, { "epoch": 6.722675367047309, "grad_norm": 0.19586631655693054, "learning_rate": 0.0008395597610537257, "loss": 0.1104, "num_input_tokens_seen": 88911904, "step": 41210 }, { "epoch": 6.7234910277324635, "grad_norm": 0.4715951383113861, "learning_rate": 0.0008395075096510723, "loss": 0.1216, "num_input_tokens_seen": 88921920, "step": 41215 }, { "epoch": 6.724306688417618, "grad_norm": 0.18069659173488617, "learning_rate": 0.0008394552513678684, "loss": 0.047, "num_input_tokens_seen": 88933120, "step": 41220 }, { "epoch": 6.725122349102773, "grad_norm": 0.004841307643800974, "learning_rate": 0.0008394029862051733, "loss": 0.0342, "num_input_tokens_seen": 88944512, "step": 41225 }, { "epoch": 6.725938009787928, "grad_norm": 0.028462974354624748, "learning_rate": 0.0008393507141640461, "loss": 0.1596, "num_input_tokens_seen": 88955616, "step": 41230 }, { "epoch": 6.726753670473083, "grad_norm": 0.7897561192512512, "learning_rate": 0.0008392984352455461, "loss": 0.1138, "num_input_tokens_seen": 88967328, "step": 41235 }, { "epoch": 6.7275693311582385, "grad_norm": 0.17723752558231354, "learning_rate": 0.0008392461494507331, "loss": 0.0323, "num_input_tokens_seen": 88978880, "step": 41240 }, { "epoch": 6.728384991843393, "grad_norm": 0.5891535878181458, "learning_rate": 0.0008391938567806663, "loss": 0.0733, "num_input_tokens_seen": 88988736, "step": 41245 }, { "epoch": 6.729200652528548, "grad_norm": 0.35494551062583923, "learning_rate": 0.0008391415572364058, "loss": 0.2917, "num_input_tokens_seen": 88998944, "step": 41250 }, { "epoch": 6.730016313213703, "grad_norm": 0.03493521362543106, "learning_rate": 0.0008390892508190113, "loss": 0.0826, "num_input_tokens_seen": 89008288, "step": 41255 }, { "epoch": 6.730831973898858, "grad_norm": 0.013462793081998825, "learning_rate": 0.000839036937529543, "loss": 0.0309, "num_input_tokens_seen": 89019200, "step": 41260 }, { "epoch": 6.731647634584013, "grad_norm": 0.12517182528972626, "learning_rate": 0.0008389846173690611, "loss": 0.1976, "num_input_tokens_seen": 89031200, "step": 41265 }, { "epoch": 6.732463295269168, "grad_norm": 0.04248917102813721, "learning_rate": 0.0008389322903386261, "loss": 0.1249, "num_input_tokens_seen": 89041664, "step": 41270 }, { "epoch": 6.733278955954323, "grad_norm": 0.01735823228955269, "learning_rate": 0.0008388799564392979, "loss": 0.1422, "num_input_tokens_seen": 89051680, "step": 41275 }, { "epoch": 6.734094616639478, "grad_norm": 0.47924238443374634, "learning_rate": 0.0008388276156721377, "loss": 0.1338, "num_input_tokens_seen": 89063264, "step": 41280 }, { "epoch": 6.734910277324633, "grad_norm": 0.028898166492581367, "learning_rate": 0.0008387752680382062, "loss": 0.0291, "num_input_tokens_seen": 89073888, "step": 41285 }, { "epoch": 6.735725938009788, "grad_norm": 0.1791941523551941, "learning_rate": 0.0008387229135385638, "loss": 0.0544, "num_input_tokens_seen": 89085344, "step": 41290 }, { "epoch": 6.736541598694943, "grad_norm": 0.2314877212047577, "learning_rate": 0.0008386705521742719, "loss": 0.0734, "num_input_tokens_seen": 89096064, "step": 41295 }, { "epoch": 6.737357259380098, "grad_norm": 0.32113394141197205, "learning_rate": 0.0008386181839463918, "loss": 0.1872, "num_input_tokens_seen": 89107264, "step": 41300 }, { "epoch": 6.738172920065253, "grad_norm": 0.03045910969376564, "learning_rate": 0.0008385658088559845, "loss": 0.0539, "num_input_tokens_seen": 89117280, "step": 41305 }, { "epoch": 6.738988580750408, "grad_norm": 0.014223791658878326, "learning_rate": 0.0008385134269041116, "loss": 0.0432, "num_input_tokens_seen": 89127616, "step": 41310 }, { "epoch": 6.739804241435563, "grad_norm": 0.1209477111697197, "learning_rate": 0.0008384610380918347, "loss": 0.043, "num_input_tokens_seen": 89138624, "step": 41315 }, { "epoch": 6.740619902120718, "grad_norm": 0.2953536808490753, "learning_rate": 0.0008384086424202156, "loss": 0.1778, "num_input_tokens_seen": 89149920, "step": 41320 }, { "epoch": 6.741435562805873, "grad_norm": 0.036753665655851364, "learning_rate": 0.0008383562398903157, "loss": 0.0341, "num_input_tokens_seen": 89160640, "step": 41325 }, { "epoch": 6.742251223491028, "grad_norm": 0.3572719991207123, "learning_rate": 0.0008383038305031976, "loss": 0.2165, "num_input_tokens_seen": 89170336, "step": 41330 }, { "epoch": 6.743066884176183, "grad_norm": 0.13514456152915955, "learning_rate": 0.0008382514142599234, "loss": 0.0446, "num_input_tokens_seen": 89182112, "step": 41335 }, { "epoch": 6.7438825448613375, "grad_norm": 0.15911409258842468, "learning_rate": 0.0008381989911615548, "loss": 0.1281, "num_input_tokens_seen": 89193664, "step": 41340 }, { "epoch": 6.744698205546492, "grad_norm": 0.1512255072593689, "learning_rate": 0.0008381465612091549, "loss": 0.1984, "num_input_tokens_seen": 89205152, "step": 41345 }, { "epoch": 6.745513866231647, "grad_norm": 0.30731382966041565, "learning_rate": 0.0008380941244037858, "loss": 0.0807, "num_input_tokens_seen": 89216544, "step": 41350 }, { "epoch": 6.746329526916803, "grad_norm": 0.012128636240959167, "learning_rate": 0.0008380416807465106, "loss": 0.0267, "num_input_tokens_seen": 89227200, "step": 41355 }, { "epoch": 6.747145187601958, "grad_norm": 0.03304027020931244, "learning_rate": 0.0008379892302383916, "loss": 0.0317, "num_input_tokens_seen": 89238336, "step": 41360 }, { "epoch": 6.7479608482871125, "grad_norm": 0.3328229486942291, "learning_rate": 0.0008379367728804923, "loss": 0.0639, "num_input_tokens_seen": 89248640, "step": 41365 }, { "epoch": 6.748776508972267, "grad_norm": 0.050051089376211166, "learning_rate": 0.0008378843086738755, "loss": 0.0437, "num_input_tokens_seen": 89258976, "step": 41370 }, { "epoch": 6.749592169657422, "grad_norm": 0.17837372422218323, "learning_rate": 0.0008378318376196046, "loss": 0.0175, "num_input_tokens_seen": 89269472, "step": 41375 }, { "epoch": 6.750407830342578, "grad_norm": 0.08706925809383392, "learning_rate": 0.0008377793597187428, "loss": 0.0237, "num_input_tokens_seen": 89279136, "step": 41380 }, { "epoch": 6.751223491027733, "grad_norm": 0.014418494887650013, "learning_rate": 0.000837726874972354, "loss": 0.0293, "num_input_tokens_seen": 89288704, "step": 41385 }, { "epoch": 6.7520391517128875, "grad_norm": 0.06211232393980026, "learning_rate": 0.0008376743833815015, "loss": 0.1712, "num_input_tokens_seen": 89299616, "step": 41390 }, { "epoch": 6.752854812398042, "grad_norm": 0.20374055206775665, "learning_rate": 0.0008376218849472493, "loss": 0.071, "num_input_tokens_seen": 89310848, "step": 41395 }, { "epoch": 6.753670473083197, "grad_norm": 0.048559244722127914, "learning_rate": 0.0008375693796706613, "loss": 0.0178, "num_input_tokens_seen": 89321344, "step": 41400 }, { "epoch": 6.754486133768353, "grad_norm": 0.007528493646532297, "learning_rate": 0.0008375168675528016, "loss": 0.2189, "num_input_tokens_seen": 89332512, "step": 41405 }, { "epoch": 6.755301794453508, "grad_norm": 0.5078732371330261, "learning_rate": 0.0008374643485947342, "loss": 0.1106, "num_input_tokens_seen": 89343616, "step": 41410 }, { "epoch": 6.7561174551386625, "grad_norm": 0.02055232785642147, "learning_rate": 0.0008374118227975238, "loss": 0.0823, "num_input_tokens_seen": 89354560, "step": 41415 }, { "epoch": 6.756933115823817, "grad_norm": 0.1295469105243683, "learning_rate": 0.0008373592901622349, "loss": 0.0971, "num_input_tokens_seen": 89364064, "step": 41420 }, { "epoch": 6.757748776508972, "grad_norm": 0.18277381360530853, "learning_rate": 0.0008373067506899319, "loss": 0.1143, "num_input_tokens_seen": 89373792, "step": 41425 }, { "epoch": 6.758564437194127, "grad_norm": 0.03403567895293236, "learning_rate": 0.0008372542043816797, "loss": 0.0321, "num_input_tokens_seen": 89384384, "step": 41430 }, { "epoch": 6.759380097879282, "grad_norm": 0.13269227743148804, "learning_rate": 0.0008372016512385432, "loss": 0.105, "num_input_tokens_seen": 89395680, "step": 41435 }, { "epoch": 6.760195758564437, "grad_norm": 0.04528535529971123, "learning_rate": 0.0008371490912615875, "loss": 0.219, "num_input_tokens_seen": 89407552, "step": 41440 }, { "epoch": 6.761011419249592, "grad_norm": 0.11880362033843994, "learning_rate": 0.0008370965244518778, "loss": 0.0979, "num_input_tokens_seen": 89419328, "step": 41445 }, { "epoch": 6.761827079934747, "grad_norm": 0.30906984210014343, "learning_rate": 0.0008370439508104794, "loss": 0.0793, "num_input_tokens_seen": 89430272, "step": 41450 }, { "epoch": 6.762642740619902, "grad_norm": 0.1346231997013092, "learning_rate": 0.0008369913703384576, "loss": 0.0558, "num_input_tokens_seen": 89441632, "step": 41455 }, { "epoch": 6.763458401305057, "grad_norm": 0.09207511693239212, "learning_rate": 0.0008369387830368785, "loss": 0.2237, "num_input_tokens_seen": 89451808, "step": 41460 }, { "epoch": 6.764274061990212, "grad_norm": 0.2747217118740082, "learning_rate": 0.0008368861889068071, "loss": 0.129, "num_input_tokens_seen": 89463008, "step": 41465 }, { "epoch": 6.765089722675367, "grad_norm": 0.015168285928666592, "learning_rate": 0.0008368335879493099, "loss": 0.0366, "num_input_tokens_seen": 89474816, "step": 41470 }, { "epoch": 6.765905383360522, "grad_norm": 0.34787145256996155, "learning_rate": 0.0008367809801654529, "loss": 0.064, "num_input_tokens_seen": 89485024, "step": 41475 }, { "epoch": 6.766721044045677, "grad_norm": 0.056495532393455505, "learning_rate": 0.0008367283655563018, "loss": 0.0529, "num_input_tokens_seen": 89494528, "step": 41480 }, { "epoch": 6.767536704730832, "grad_norm": 0.02767673134803772, "learning_rate": 0.0008366757441229235, "loss": 0.1012, "num_input_tokens_seen": 89506112, "step": 41485 }, { "epoch": 6.768352365415987, "grad_norm": 0.5915576219558716, "learning_rate": 0.000836623115866384, "loss": 0.0983, "num_input_tokens_seen": 89516480, "step": 41490 }, { "epoch": 6.769168026101142, "grad_norm": 0.31049564480781555, "learning_rate": 0.00083657048078775, "loss": 0.0552, "num_input_tokens_seen": 89526304, "step": 41495 }, { "epoch": 6.769983686786297, "grad_norm": 0.030810778960585594, "learning_rate": 0.0008365178388880883, "loss": 0.055, "num_input_tokens_seen": 89536672, "step": 41500 }, { "epoch": 6.770799347471452, "grad_norm": 0.5391156673431396, "learning_rate": 0.0008364651901684657, "loss": 0.1294, "num_input_tokens_seen": 89547072, "step": 41505 }, { "epoch": 6.771615008156607, "grad_norm": 0.055158261209726334, "learning_rate": 0.0008364125346299492, "loss": 0.1407, "num_input_tokens_seen": 89559328, "step": 41510 }, { "epoch": 6.7724306688417615, "grad_norm": 0.014132093638181686, "learning_rate": 0.0008363598722736057, "loss": 0.1442, "num_input_tokens_seen": 89568608, "step": 41515 }, { "epoch": 6.773246329526917, "grad_norm": 0.10845532268285751, "learning_rate": 0.0008363072031005028, "loss": 0.1553, "num_input_tokens_seen": 89580032, "step": 41520 }, { "epoch": 6.774061990212072, "grad_norm": 0.3795512616634369, "learning_rate": 0.0008362545271117079, "loss": 0.09, "num_input_tokens_seen": 89589984, "step": 41525 }, { "epoch": 6.774877650897227, "grad_norm": 0.09278767555952072, "learning_rate": 0.0008362018443082884, "loss": 0.0916, "num_input_tokens_seen": 89599424, "step": 41530 }, { "epoch": 6.775693311582382, "grad_norm": 0.01544953789561987, "learning_rate": 0.000836149154691312, "loss": 0.0936, "num_input_tokens_seen": 89611552, "step": 41535 }, { "epoch": 6.7765089722675365, "grad_norm": 0.20057585835456848, "learning_rate": 0.0008360964582618465, "loss": 0.0385, "num_input_tokens_seen": 89623008, "step": 41540 }, { "epoch": 6.777324632952691, "grad_norm": 0.01042347401380539, "learning_rate": 0.0008360437550209599, "loss": 0.0621, "num_input_tokens_seen": 89634432, "step": 41545 }, { "epoch": 6.778140293637847, "grad_norm": 0.12281845510005951, "learning_rate": 0.0008359910449697203, "loss": 0.0533, "num_input_tokens_seen": 89645216, "step": 41550 }, { "epoch": 6.778955954323002, "grad_norm": 0.2375798523426056, "learning_rate": 0.0008359383281091961, "loss": 0.1552, "num_input_tokens_seen": 89655424, "step": 41555 }, { "epoch": 6.779771615008157, "grad_norm": 0.32664602994918823, "learning_rate": 0.0008358856044404553, "loss": 0.0577, "num_input_tokens_seen": 89665504, "step": 41560 }, { "epoch": 6.780587275693311, "grad_norm": 0.2531193196773529, "learning_rate": 0.0008358328739645668, "loss": 0.1113, "num_input_tokens_seen": 89675072, "step": 41565 }, { "epoch": 6.781402936378466, "grad_norm": 0.33266544342041016, "learning_rate": 0.000835780136682599, "loss": 0.2339, "num_input_tokens_seen": 89686016, "step": 41570 }, { "epoch": 6.782218597063622, "grad_norm": 0.11532379686832428, "learning_rate": 0.0008357273925956208, "loss": 0.0409, "num_input_tokens_seen": 89696416, "step": 41575 }, { "epoch": 6.783034257748777, "grad_norm": 0.04395350441336632, "learning_rate": 0.000835674641704701, "loss": 0.0131, "num_input_tokens_seen": 89706848, "step": 41580 }, { "epoch": 6.783849918433932, "grad_norm": 0.08121004700660706, "learning_rate": 0.0008356218840109089, "loss": 0.0459, "num_input_tokens_seen": 89718912, "step": 41585 }, { "epoch": 6.784665579119086, "grad_norm": 0.3389904797077179, "learning_rate": 0.0008355691195153134, "loss": 0.0902, "num_input_tokens_seen": 89729824, "step": 41590 }, { "epoch": 6.785481239804241, "grad_norm": 0.08917682617902756, "learning_rate": 0.000835516348218984, "loss": 0.1176, "num_input_tokens_seen": 89740128, "step": 41595 }, { "epoch": 6.786296900489396, "grad_norm": 0.3284420669078827, "learning_rate": 0.0008354635701229902, "loss": 0.1368, "num_input_tokens_seen": 89751552, "step": 41600 }, { "epoch": 6.787112561174552, "grad_norm": 0.2035391926765442, "learning_rate": 0.0008354107852284016, "loss": 0.0515, "num_input_tokens_seen": 89763392, "step": 41605 }, { "epoch": 6.787928221859707, "grad_norm": 0.015781525522470474, "learning_rate": 0.0008353579935362881, "loss": 0.1125, "num_input_tokens_seen": 89773696, "step": 41610 }, { "epoch": 6.788743882544861, "grad_norm": 0.16875390708446503, "learning_rate": 0.0008353051950477192, "loss": 0.1294, "num_input_tokens_seen": 89784704, "step": 41615 }, { "epoch": 6.789559543230016, "grad_norm": 0.016785863786935806, "learning_rate": 0.0008352523897637652, "loss": 0.1971, "num_input_tokens_seen": 89796352, "step": 41620 }, { "epoch": 6.790375203915171, "grad_norm": 0.40442395210266113, "learning_rate": 0.0008351995776854962, "loss": 0.1024, "num_input_tokens_seen": 89805504, "step": 41625 }, { "epoch": 6.791190864600326, "grad_norm": 0.03135751187801361, "learning_rate": 0.0008351467588139827, "loss": 0.0343, "num_input_tokens_seen": 89817024, "step": 41630 }, { "epoch": 6.7920065252854815, "grad_norm": 0.0776589959859848, "learning_rate": 0.0008350939331502949, "loss": 0.0794, "num_input_tokens_seen": 89827872, "step": 41635 }, { "epoch": 6.792822185970636, "grad_norm": 0.01409236341714859, "learning_rate": 0.0008350411006955033, "loss": 0.0345, "num_input_tokens_seen": 89838112, "step": 41640 }, { "epoch": 6.793637846655791, "grad_norm": 0.1448766589164734, "learning_rate": 0.0008349882614506789, "loss": 0.0616, "num_input_tokens_seen": 89848224, "step": 41645 }, { "epoch": 6.794453507340946, "grad_norm": 0.08187838643789291, "learning_rate": 0.0008349354154168924, "loss": 0.1314, "num_input_tokens_seen": 89858336, "step": 41650 }, { "epoch": 6.795269168026101, "grad_norm": 0.17778705060482025, "learning_rate": 0.0008348825625952148, "loss": 0.1122, "num_input_tokens_seen": 89869408, "step": 41655 }, { "epoch": 6.7960848287112565, "grad_norm": 0.038378339260816574, "learning_rate": 0.0008348297029867172, "loss": 0.1022, "num_input_tokens_seen": 89880800, "step": 41660 }, { "epoch": 6.796900489396411, "grad_norm": 0.4019589424133301, "learning_rate": 0.0008347768365924709, "loss": 0.082, "num_input_tokens_seen": 89892384, "step": 41665 }, { "epoch": 6.797716150081566, "grad_norm": 0.15764068067073822, "learning_rate": 0.0008347239634135474, "loss": 0.0476, "num_input_tokens_seen": 89903008, "step": 41670 }, { "epoch": 6.798531810766721, "grad_norm": 0.3939819931983948, "learning_rate": 0.0008346710834510181, "loss": 0.206, "num_input_tokens_seen": 89914912, "step": 41675 }, { "epoch": 6.799347471451876, "grad_norm": 0.03844374418258667, "learning_rate": 0.0008346181967059548, "loss": 0.0318, "num_input_tokens_seen": 89926784, "step": 41680 }, { "epoch": 6.800163132137031, "grad_norm": 0.1659233272075653, "learning_rate": 0.0008345653031794292, "loss": 0.1384, "num_input_tokens_seen": 89937824, "step": 41685 }, { "epoch": 6.800978792822186, "grad_norm": 0.26507705450057983, "learning_rate": 0.0008345124028725133, "loss": 0.1069, "num_input_tokens_seen": 89948320, "step": 41690 }, { "epoch": 6.801794453507341, "grad_norm": 0.26495927572250366, "learning_rate": 0.0008344594957862792, "loss": 0.1016, "num_input_tokens_seen": 89958656, "step": 41695 }, { "epoch": 6.802610114192496, "grad_norm": 0.027208911255002022, "learning_rate": 0.000834406581921799, "loss": 0.0716, "num_input_tokens_seen": 89969920, "step": 41700 }, { "epoch": 6.803425774877651, "grad_norm": 0.10096990317106247, "learning_rate": 0.0008343536612801454, "loss": 0.204, "num_input_tokens_seen": 89981632, "step": 41705 }, { "epoch": 6.804241435562806, "grad_norm": 0.0225969348102808, "learning_rate": 0.0008343007338623906, "loss": 0.0871, "num_input_tokens_seen": 89993376, "step": 41710 }, { "epoch": 6.80505709624796, "grad_norm": 0.0448911227285862, "learning_rate": 0.0008342477996696074, "loss": 0.1006, "num_input_tokens_seen": 90004768, "step": 41715 }, { "epoch": 6.805872756933116, "grad_norm": 0.06597235053777695, "learning_rate": 0.0008341948587028684, "loss": 0.1887, "num_input_tokens_seen": 90014720, "step": 41720 }, { "epoch": 6.806688417618271, "grad_norm": 0.22813384234905243, "learning_rate": 0.0008341419109632466, "loss": 0.1741, "num_input_tokens_seen": 90026528, "step": 41725 }, { "epoch": 6.807504078303426, "grad_norm": 0.02713964320719242, "learning_rate": 0.0008340889564518153, "loss": 0.1598, "num_input_tokens_seen": 90037152, "step": 41730 }, { "epoch": 6.808319738988581, "grad_norm": 0.051184266805648804, "learning_rate": 0.0008340359951696472, "loss": 0.1529, "num_input_tokens_seen": 90046336, "step": 41735 }, { "epoch": 6.809135399673735, "grad_norm": 0.1838841438293457, "learning_rate": 0.0008339830271178162, "loss": 0.3389, "num_input_tokens_seen": 90056160, "step": 41740 }, { "epoch": 6.809951060358891, "grad_norm": 0.11482001841068268, "learning_rate": 0.0008339300522973952, "loss": 0.132, "num_input_tokens_seen": 90066400, "step": 41745 }, { "epoch": 6.810766721044046, "grad_norm": 0.06716779619455338, "learning_rate": 0.0008338770707094583, "loss": 0.091, "num_input_tokens_seen": 90077920, "step": 41750 }, { "epoch": 6.811582381729201, "grad_norm": 0.18572188913822174, "learning_rate": 0.0008338240823550789, "loss": 0.052, "num_input_tokens_seen": 90088352, "step": 41755 }, { "epoch": 6.8123980424143555, "grad_norm": 0.295441597700119, "learning_rate": 0.000833771087235331, "loss": 0.092, "num_input_tokens_seen": 90099488, "step": 41760 }, { "epoch": 6.81321370309951, "grad_norm": 0.3770286738872528, "learning_rate": 0.0008337180853512885, "loss": 0.2118, "num_input_tokens_seen": 90109760, "step": 41765 }, { "epoch": 6.814029363784666, "grad_norm": 0.08099472522735596, "learning_rate": 0.0008336650767040258, "loss": 0.0605, "num_input_tokens_seen": 90120320, "step": 41770 }, { "epoch": 6.814845024469821, "grad_norm": 0.01928417570888996, "learning_rate": 0.000833612061294617, "loss": 0.0727, "num_input_tokens_seen": 90129440, "step": 41775 }, { "epoch": 6.815660685154976, "grad_norm": 0.03533562272787094, "learning_rate": 0.0008335590391241365, "loss": 0.022, "num_input_tokens_seen": 90139616, "step": 41780 }, { "epoch": 6.8164763458401305, "grad_norm": 0.031532347202301025, "learning_rate": 0.000833506010193659, "loss": 0.0239, "num_input_tokens_seen": 90150016, "step": 41785 }, { "epoch": 6.817292006525285, "grad_norm": 0.2744573950767517, "learning_rate": 0.000833452974504259, "loss": 0.1345, "num_input_tokens_seen": 90160672, "step": 41790 }, { "epoch": 6.81810766721044, "grad_norm": 0.09152977913618088, "learning_rate": 0.0008333999320570116, "loss": 0.0434, "num_input_tokens_seen": 90171584, "step": 41795 }, { "epoch": 6.818923327895595, "grad_norm": 0.03734327107667923, "learning_rate": 0.0008333468828529916, "loss": 0.0704, "num_input_tokens_seen": 90181440, "step": 41800 }, { "epoch": 6.819738988580751, "grad_norm": 0.4962945878505707, "learning_rate": 0.0008332938268932742, "loss": 0.0771, "num_input_tokens_seen": 90192000, "step": 41805 }, { "epoch": 6.8205546492659055, "grad_norm": 0.1765662431716919, "learning_rate": 0.0008332407641789344, "loss": 0.0621, "num_input_tokens_seen": 90202976, "step": 41810 }, { "epoch": 6.82137030995106, "grad_norm": 0.339362233877182, "learning_rate": 0.0008331876947110478, "loss": 0.1536, "num_input_tokens_seen": 90213696, "step": 41815 }, { "epoch": 6.822185970636215, "grad_norm": 0.05223323032259941, "learning_rate": 0.00083313461849069, "loss": 0.0296, "num_input_tokens_seen": 90224064, "step": 41820 }, { "epoch": 6.82300163132137, "grad_norm": 0.12776850163936615, "learning_rate": 0.0008330815355189365, "loss": 0.0993, "num_input_tokens_seen": 90235936, "step": 41825 }, { "epoch": 6.823817292006526, "grad_norm": 0.015993699431419373, "learning_rate": 0.0008330284457968631, "loss": 0.0587, "num_input_tokens_seen": 90246944, "step": 41830 }, { "epoch": 6.8246329526916805, "grad_norm": 0.45065009593963623, "learning_rate": 0.0008329753493255458, "loss": 0.1267, "num_input_tokens_seen": 90257664, "step": 41835 }, { "epoch": 6.825448613376835, "grad_norm": 0.011275382712483406, "learning_rate": 0.0008329222461060606, "loss": 0.0568, "num_input_tokens_seen": 90268640, "step": 41840 }, { "epoch": 6.82626427406199, "grad_norm": 0.3012131452560425, "learning_rate": 0.0008328691361394838, "loss": 0.0753, "num_input_tokens_seen": 90280928, "step": 41845 }, { "epoch": 6.827079934747145, "grad_norm": 0.4809798002243042, "learning_rate": 0.0008328160194268916, "loss": 0.0762, "num_input_tokens_seen": 90291936, "step": 41850 }, { "epoch": 6.827895595432301, "grad_norm": 0.1640678495168686, "learning_rate": 0.0008327628959693606, "loss": 0.0682, "num_input_tokens_seen": 90302944, "step": 41855 }, { "epoch": 6.828711256117455, "grad_norm": 0.0352686271071434, "learning_rate": 0.0008327097657679674, "loss": 0.1123, "num_input_tokens_seen": 90313536, "step": 41860 }, { "epoch": 6.82952691680261, "grad_norm": 0.4338096082210541, "learning_rate": 0.0008326566288237887, "loss": 0.0911, "num_input_tokens_seen": 90324192, "step": 41865 }, { "epoch": 6.830342577487765, "grad_norm": 0.2676042914390564, "learning_rate": 0.0008326034851379014, "loss": 0.069, "num_input_tokens_seen": 90335584, "step": 41870 }, { "epoch": 6.83115823817292, "grad_norm": 0.2711193859577179, "learning_rate": 0.0008325503347113826, "loss": 0.0498, "num_input_tokens_seen": 90345088, "step": 41875 }, { "epoch": 6.831973898858075, "grad_norm": 0.011488964781165123, "learning_rate": 0.0008324971775453094, "loss": 0.0309, "num_input_tokens_seen": 90355872, "step": 41880 }, { "epoch": 6.8327895595432295, "grad_norm": 0.3368357717990875, "learning_rate": 0.0008324440136407591, "loss": 0.0917, "num_input_tokens_seen": 90365664, "step": 41885 }, { "epoch": 6.833605220228385, "grad_norm": 0.03656940162181854, "learning_rate": 0.000832390842998809, "loss": 0.0443, "num_input_tokens_seen": 90376736, "step": 41890 }, { "epoch": 6.83442088091354, "grad_norm": 0.005492472089827061, "learning_rate": 0.0008323376656205369, "loss": 0.0357, "num_input_tokens_seen": 90387072, "step": 41895 }, { "epoch": 6.835236541598695, "grad_norm": 0.10960322618484497, "learning_rate": 0.0008322844815070204, "loss": 0.0316, "num_input_tokens_seen": 90397696, "step": 41900 }, { "epoch": 6.83605220228385, "grad_norm": 0.008288799785077572, "learning_rate": 0.0008322312906593373, "loss": 0.0126, "num_input_tokens_seen": 90408320, "step": 41905 }, { "epoch": 6.8368678629690045, "grad_norm": 0.03051341138780117, "learning_rate": 0.0008321780930785657, "loss": 0.1618, "num_input_tokens_seen": 90419136, "step": 41910 }, { "epoch": 6.83768352365416, "grad_norm": 0.08481857180595398, "learning_rate": 0.0008321248887657836, "loss": 0.1373, "num_input_tokens_seen": 90430624, "step": 41915 }, { "epoch": 6.838499184339315, "grad_norm": 0.18655864894390106, "learning_rate": 0.0008320716777220694, "loss": 0.0463, "num_input_tokens_seen": 90443008, "step": 41920 }, { "epoch": 6.83931484502447, "grad_norm": 0.022143732756376266, "learning_rate": 0.0008320184599485012, "loss": 0.1137, "num_input_tokens_seen": 90454784, "step": 41925 }, { "epoch": 6.840130505709625, "grad_norm": 0.13375002145767212, "learning_rate": 0.0008319652354461577, "loss": 0.1143, "num_input_tokens_seen": 90465120, "step": 41930 }, { "epoch": 6.8409461663947795, "grad_norm": 0.7048324346542358, "learning_rate": 0.0008319120042161179, "loss": 0.1247, "num_input_tokens_seen": 90475552, "step": 41935 }, { "epoch": 6.841761827079935, "grad_norm": 0.032683949917554855, "learning_rate": 0.00083185876625946, "loss": 0.0576, "num_input_tokens_seen": 90484448, "step": 41940 }, { "epoch": 6.84257748776509, "grad_norm": 0.24643513560295105, "learning_rate": 0.0008318055215772633, "loss": 0.1177, "num_input_tokens_seen": 90495648, "step": 41945 }, { "epoch": 6.843393148450245, "grad_norm": 0.12778688967227936, "learning_rate": 0.0008317522701706066, "loss": 0.1804, "num_input_tokens_seen": 90506112, "step": 41950 }, { "epoch": 6.8442088091354, "grad_norm": 0.021788502112030983, "learning_rate": 0.0008316990120405695, "loss": 0.059, "num_input_tokens_seen": 90516352, "step": 41955 }, { "epoch": 6.8450244698205545, "grad_norm": 0.18001769483089447, "learning_rate": 0.0008316457471882311, "loss": 0.1122, "num_input_tokens_seen": 90527200, "step": 41960 }, { "epoch": 6.845840130505709, "grad_norm": 0.04440588876605034, "learning_rate": 0.0008315924756146708, "loss": 0.1331, "num_input_tokens_seen": 90537952, "step": 41965 }, { "epoch": 6.846655791190865, "grad_norm": 0.5375038981437683, "learning_rate": 0.0008315391973209685, "loss": 0.1029, "num_input_tokens_seen": 90548640, "step": 41970 }, { "epoch": 6.84747145187602, "grad_norm": 0.44407784938812256, "learning_rate": 0.0008314859123082037, "loss": 0.174, "num_input_tokens_seen": 90558528, "step": 41975 }, { "epoch": 6.848287112561175, "grad_norm": 0.35079607367515564, "learning_rate": 0.0008314326205774563, "loss": 0.0979, "num_input_tokens_seen": 90568288, "step": 41980 }, { "epoch": 6.849102773246329, "grad_norm": 0.08829190582036972, "learning_rate": 0.0008313793221298065, "loss": 0.0647, "num_input_tokens_seen": 90579584, "step": 41985 }, { "epoch": 6.849918433931484, "grad_norm": 0.0738891065120697, "learning_rate": 0.0008313260169663343, "loss": 0.0676, "num_input_tokens_seen": 90591040, "step": 41990 }, { "epoch": 6.850734094616639, "grad_norm": 0.007565811742097139, "learning_rate": 0.00083127270508812, "loss": 0.0215, "num_input_tokens_seen": 90601280, "step": 41995 }, { "epoch": 6.851549755301795, "grad_norm": 0.16407953202724457, "learning_rate": 0.0008312193864962442, "loss": 0.1503, "num_input_tokens_seen": 90611488, "step": 42000 }, { "epoch": 6.85236541598695, "grad_norm": 0.034260187298059464, "learning_rate": 0.0008311660611917873, "loss": 0.0237, "num_input_tokens_seen": 90623488, "step": 42005 }, { "epoch": 6.853181076672104, "grad_norm": 0.0960346907377243, "learning_rate": 0.00083111272917583, "loss": 0.1119, "num_input_tokens_seen": 90633792, "step": 42010 }, { "epoch": 6.853996737357259, "grad_norm": 0.1372324377298355, "learning_rate": 0.0008310593904494532, "loss": 0.1331, "num_input_tokens_seen": 90645024, "step": 42015 }, { "epoch": 6.854812398042414, "grad_norm": 0.0768456682562828, "learning_rate": 0.000831006045013738, "loss": 0.123, "num_input_tokens_seen": 90655008, "step": 42020 }, { "epoch": 6.85562805872757, "grad_norm": 0.041058801114559174, "learning_rate": 0.0008309526928697653, "loss": 0.0906, "num_input_tokens_seen": 90665152, "step": 42025 }, { "epoch": 6.856443719412725, "grad_norm": 0.22541950643062592, "learning_rate": 0.0008308993340186164, "loss": 0.0391, "num_input_tokens_seen": 90676576, "step": 42030 }, { "epoch": 6.857259380097879, "grad_norm": 0.27193722128868103, "learning_rate": 0.0008308459684613727, "loss": 0.0419, "num_input_tokens_seen": 90687488, "step": 42035 }, { "epoch": 6.858075040783034, "grad_norm": 0.5081382989883423, "learning_rate": 0.0008307925961991158, "loss": 0.1473, "num_input_tokens_seen": 90699552, "step": 42040 }, { "epoch": 6.858890701468189, "grad_norm": 0.016883132979273796, "learning_rate": 0.0008307392172329273, "loss": 0.0655, "num_input_tokens_seen": 90709184, "step": 42045 }, { "epoch": 6.859706362153344, "grad_norm": 0.24830155074596405, "learning_rate": 0.000830685831563889, "loss": 0.2123, "num_input_tokens_seen": 90719680, "step": 42050 }, { "epoch": 6.8605220228384995, "grad_norm": 0.1347314864397049, "learning_rate": 0.0008306324391930827, "loss": 0.1225, "num_input_tokens_seen": 90730432, "step": 42055 }, { "epoch": 6.861337683523654, "grad_norm": 0.03565412014722824, "learning_rate": 0.0008305790401215906, "loss": 0.1897, "num_input_tokens_seen": 90741216, "step": 42060 }, { "epoch": 6.862153344208809, "grad_norm": 0.03335873410105705, "learning_rate": 0.000830525634350495, "loss": 0.0268, "num_input_tokens_seen": 90751008, "step": 42065 }, { "epoch": 6.862969004893964, "grad_norm": 0.48377206921577454, "learning_rate": 0.0008304722218808782, "loss": 0.2044, "num_input_tokens_seen": 90761152, "step": 42070 }, { "epoch": 6.863784665579119, "grad_norm": 0.05339043587446213, "learning_rate": 0.0008304188027138225, "loss": 0.0294, "num_input_tokens_seen": 90772192, "step": 42075 }, { "epoch": 6.864600326264274, "grad_norm": 0.05580843985080719, "learning_rate": 0.0008303653768504105, "loss": 0.0845, "num_input_tokens_seen": 90784416, "step": 42080 }, { "epoch": 6.865415986949429, "grad_norm": 0.07382971048355103, "learning_rate": 0.000830311944291725, "loss": 0.0905, "num_input_tokens_seen": 90794784, "step": 42085 }, { "epoch": 6.866231647634584, "grad_norm": 0.11433255672454834, "learning_rate": 0.0008302585050388491, "loss": 0.2879, "num_input_tokens_seen": 90806400, "step": 42090 }, { "epoch": 6.867047308319739, "grad_norm": 0.1618431955575943, "learning_rate": 0.0008302050590928656, "loss": 0.1247, "num_input_tokens_seen": 90816864, "step": 42095 }, { "epoch": 6.867862969004894, "grad_norm": 0.013313322328031063, "learning_rate": 0.0008301516064548577, "loss": 0.0272, "num_input_tokens_seen": 90827776, "step": 42100 }, { "epoch": 6.868678629690049, "grad_norm": 0.06353279203176498, "learning_rate": 0.0008300981471259086, "loss": 0.1912, "num_input_tokens_seen": 90838400, "step": 42105 }, { "epoch": 6.869494290375204, "grad_norm": 0.03974494710564613, "learning_rate": 0.0008300446811071018, "loss": 0.0428, "num_input_tokens_seen": 90849280, "step": 42110 }, { "epoch": 6.870309951060359, "grad_norm": 0.47459807991981506, "learning_rate": 0.0008299912083995208, "loss": 0.1151, "num_input_tokens_seen": 90859104, "step": 42115 }, { "epoch": 6.871125611745514, "grad_norm": 0.021005159243941307, "learning_rate": 0.0008299377290042493, "loss": 0.1131, "num_input_tokens_seen": 90870848, "step": 42120 }, { "epoch": 6.871941272430669, "grad_norm": 0.07266499102115631, "learning_rate": 0.0008298842429223714, "loss": 0.0756, "num_input_tokens_seen": 90880704, "step": 42125 }, { "epoch": 6.872756933115824, "grad_norm": 0.02477717585861683, "learning_rate": 0.0008298307501549706, "loss": 0.034, "num_input_tokens_seen": 90891232, "step": 42130 }, { "epoch": 6.873572593800979, "grad_norm": 0.21119016408920288, "learning_rate": 0.0008297772507031314, "loss": 0.0602, "num_input_tokens_seen": 90901920, "step": 42135 }, { "epoch": 6.874388254486134, "grad_norm": 0.1970050036907196, "learning_rate": 0.0008297237445679378, "loss": 0.2374, "num_input_tokens_seen": 90913088, "step": 42140 }, { "epoch": 6.875203915171289, "grad_norm": 0.07823794335126877, "learning_rate": 0.0008296702317504741, "loss": 0.1049, "num_input_tokens_seen": 90923616, "step": 42145 }, { "epoch": 6.876019575856444, "grad_norm": 0.06535633653402328, "learning_rate": 0.0008296167122518252, "loss": 0.0671, "num_input_tokens_seen": 90934080, "step": 42150 }, { "epoch": 6.876835236541599, "grad_norm": 0.3889283835887909, "learning_rate": 0.0008295631860730752, "loss": 0.1804, "num_input_tokens_seen": 90945120, "step": 42155 }, { "epoch": 6.877650897226753, "grad_norm": 0.21959878504276276, "learning_rate": 0.0008295096532153093, "loss": 0.1076, "num_input_tokens_seen": 90956928, "step": 42160 }, { "epoch": 6.878466557911908, "grad_norm": 0.39505910873413086, "learning_rate": 0.0008294561136796122, "loss": 0.1461, "num_input_tokens_seen": 90967296, "step": 42165 }, { "epoch": 6.879282218597064, "grad_norm": 0.3026389479637146, "learning_rate": 0.000829402567467069, "loss": 0.2614, "num_input_tokens_seen": 90977536, "step": 42170 }, { "epoch": 6.880097879282219, "grad_norm": 0.3187331259250641, "learning_rate": 0.000829349014578765, "loss": 0.1179, "num_input_tokens_seen": 90988768, "step": 42175 }, { "epoch": 6.8809135399673735, "grad_norm": 0.061212677508592606, "learning_rate": 0.0008292954550157853, "loss": 0.0373, "num_input_tokens_seen": 90999872, "step": 42180 }, { "epoch": 6.881729200652528, "grad_norm": 0.03337814658880234, "learning_rate": 0.0008292418887792155, "loss": 0.1848, "num_input_tokens_seen": 91010272, "step": 42185 }, { "epoch": 6.882544861337683, "grad_norm": 0.046024419367313385, "learning_rate": 0.0008291883158701413, "loss": 0.0621, "num_input_tokens_seen": 91021760, "step": 42190 }, { "epoch": 6.883360522022839, "grad_norm": 0.10031213611364365, "learning_rate": 0.000829134736289648, "loss": 0.0579, "num_input_tokens_seen": 91030848, "step": 42195 }, { "epoch": 6.884176182707994, "grad_norm": 0.18108637630939484, "learning_rate": 0.0008290811500388219, "loss": 0.1119, "num_input_tokens_seen": 91042816, "step": 42200 }, { "epoch": 6.8849918433931485, "grad_norm": 0.04681359604001045, "learning_rate": 0.0008290275571187488, "loss": 0.0308, "num_input_tokens_seen": 91054816, "step": 42205 }, { "epoch": 6.885807504078303, "grad_norm": 0.3081273138523102, "learning_rate": 0.0008289739575305148, "loss": 0.127, "num_input_tokens_seen": 91065344, "step": 42210 }, { "epoch": 6.886623164763458, "grad_norm": 0.2042372226715088, "learning_rate": 0.0008289203512752063, "loss": 0.0722, "num_input_tokens_seen": 91076352, "step": 42215 }, { "epoch": 6.887438825448614, "grad_norm": 0.040237005800008774, "learning_rate": 0.0008288667383539097, "loss": 0.1122, "num_input_tokens_seen": 91087936, "step": 42220 }, { "epoch": 6.888254486133769, "grad_norm": 0.027607537806034088, "learning_rate": 0.0008288131187677112, "loss": 0.0564, "num_input_tokens_seen": 91098720, "step": 42225 }, { "epoch": 6.8890701468189235, "grad_norm": 0.038924481719732285, "learning_rate": 0.000828759492517698, "loss": 0.0799, "num_input_tokens_seen": 91109600, "step": 42230 }, { "epoch": 6.889885807504078, "grad_norm": 0.11821479350328445, "learning_rate": 0.0008287058596049563, "loss": 0.2705, "num_input_tokens_seen": 91120064, "step": 42235 }, { "epoch": 6.890701468189233, "grad_norm": 0.029640715569257736, "learning_rate": 0.0008286522200305738, "loss": 0.0508, "num_input_tokens_seen": 91130912, "step": 42240 }, { "epoch": 6.891517128874388, "grad_norm": 0.09412707388401031, "learning_rate": 0.0008285985737956367, "loss": 0.1158, "num_input_tokens_seen": 91139904, "step": 42245 }, { "epoch": 6.892332789559543, "grad_norm": 0.0739709660410881, "learning_rate": 0.0008285449209012328, "loss": 0.1029, "num_input_tokens_seen": 91150816, "step": 42250 }, { "epoch": 6.8931484502446985, "grad_norm": 0.0238915104418993, "learning_rate": 0.0008284912613484493, "loss": 0.0782, "num_input_tokens_seen": 91161152, "step": 42255 }, { "epoch": 6.893964110929853, "grad_norm": 0.056844599545001984, "learning_rate": 0.0008284375951383738, "loss": 0.065, "num_input_tokens_seen": 91171456, "step": 42260 }, { "epoch": 6.894779771615008, "grad_norm": 0.02075403556227684, "learning_rate": 0.0008283839222720935, "loss": 0.2126, "num_input_tokens_seen": 91181792, "step": 42265 }, { "epoch": 6.895595432300163, "grad_norm": 0.04865756630897522, "learning_rate": 0.0008283302427506966, "loss": 0.0583, "num_input_tokens_seen": 91192832, "step": 42270 }, { "epoch": 6.896411092985318, "grad_norm": 0.1098991259932518, "learning_rate": 0.0008282765565752708, "loss": 0.117, "num_input_tokens_seen": 91203584, "step": 42275 }, { "epoch": 6.897226753670473, "grad_norm": 0.038176391273736954, "learning_rate": 0.0008282228637469042, "loss": 0.0684, "num_input_tokens_seen": 91213280, "step": 42280 }, { "epoch": 6.898042414355628, "grad_norm": 0.026600686833262444, "learning_rate": 0.0008281691642666848, "loss": 0.0259, "num_input_tokens_seen": 91224032, "step": 42285 }, { "epoch": 6.898858075040783, "grad_norm": 0.09274445474147797, "learning_rate": 0.000828115458135701, "loss": 0.0546, "num_input_tokens_seen": 91234848, "step": 42290 }, { "epoch": 6.899673735725938, "grad_norm": 0.22010569274425507, "learning_rate": 0.0008280617453550412, "loss": 0.0811, "num_input_tokens_seen": 91243264, "step": 42295 }, { "epoch": 6.900489396411093, "grad_norm": 0.019967537373304367, "learning_rate": 0.0008280080259257939, "loss": 0.0235, "num_input_tokens_seen": 91254592, "step": 42300 }, { "epoch": 6.901305057096248, "grad_norm": 0.3995530605316162, "learning_rate": 0.0008279542998490479, "loss": 0.1232, "num_input_tokens_seen": 91266496, "step": 42305 }, { "epoch": 6.902120717781403, "grad_norm": 0.20655997097492218, "learning_rate": 0.000827900567125892, "loss": 0.1524, "num_input_tokens_seen": 91275232, "step": 42310 }, { "epoch": 6.902936378466558, "grad_norm": 0.02706918679177761, "learning_rate": 0.0008278468277574152, "loss": 0.0744, "num_input_tokens_seen": 91286496, "step": 42315 }, { "epoch": 6.903752039151713, "grad_norm": 0.26955166459083557, "learning_rate": 0.0008277930817447063, "loss": 0.1426, "num_input_tokens_seen": 91297664, "step": 42320 }, { "epoch": 6.904567699836868, "grad_norm": 0.03640248253941536, "learning_rate": 0.000827739329088855, "loss": 0.1739, "num_input_tokens_seen": 91307584, "step": 42325 }, { "epoch": 6.9053833605220225, "grad_norm": 0.21425504982471466, "learning_rate": 0.0008276855697909502, "loss": 0.1853, "num_input_tokens_seen": 91316960, "step": 42330 }, { "epoch": 6.906199021207177, "grad_norm": 0.0758684054017067, "learning_rate": 0.0008276318038520818, "loss": 0.0432, "num_input_tokens_seen": 91327840, "step": 42335 }, { "epoch": 6.907014681892333, "grad_norm": 0.03310230374336243, "learning_rate": 0.0008275780312733392, "loss": 0.0208, "num_input_tokens_seen": 91337408, "step": 42340 }, { "epoch": 6.907830342577488, "grad_norm": 0.012968582101166248, "learning_rate": 0.0008275242520558124, "loss": 0.0545, "num_input_tokens_seen": 91346400, "step": 42345 }, { "epoch": 6.908646003262643, "grad_norm": 0.22500422596931458, "learning_rate": 0.000827470466200591, "loss": 0.0513, "num_input_tokens_seen": 91357088, "step": 42350 }, { "epoch": 6.9094616639477975, "grad_norm": 0.0027959051076322794, "learning_rate": 0.0008274166737087652, "loss": 0.0691, "num_input_tokens_seen": 91366784, "step": 42355 }, { "epoch": 6.910277324632952, "grad_norm": 0.5848742127418518, "learning_rate": 0.000827362874581425, "loss": 0.162, "num_input_tokens_seen": 91377472, "step": 42360 }, { "epoch": 6.911092985318108, "grad_norm": 0.03475257381796837, "learning_rate": 0.000827309068819661, "loss": 0.0452, "num_input_tokens_seen": 91388256, "step": 42365 }, { "epoch": 6.911908646003263, "grad_norm": 0.3094511926174164, "learning_rate": 0.0008272552564245635, "loss": 0.0408, "num_input_tokens_seen": 91399648, "step": 42370 }, { "epoch": 6.912724306688418, "grad_norm": 0.050988491624593735, "learning_rate": 0.000827201437397223, "loss": 0.0973, "num_input_tokens_seen": 91410656, "step": 42375 }, { "epoch": 6.9135399673735725, "grad_norm": 0.485505074262619, "learning_rate": 0.0008271476117387303, "loss": 0.0692, "num_input_tokens_seen": 91420576, "step": 42380 }, { "epoch": 6.914355628058727, "grad_norm": 0.6088079810142517, "learning_rate": 0.0008270937794501763, "loss": 0.0872, "num_input_tokens_seen": 91432032, "step": 42385 }, { "epoch": 6.915171288743883, "grad_norm": 0.014471979811787605, "learning_rate": 0.0008270399405326519, "loss": 0.0369, "num_input_tokens_seen": 91442912, "step": 42390 }, { "epoch": 6.915986949429038, "grad_norm": 0.043920159339904785, "learning_rate": 0.0008269860949872484, "loss": 0.1708, "num_input_tokens_seen": 91454272, "step": 42395 }, { "epoch": 6.916802610114193, "grad_norm": 0.15794876217842102, "learning_rate": 0.0008269322428150565, "loss": 0.0553, "num_input_tokens_seen": 91466432, "step": 42400 }, { "epoch": 6.917618270799347, "grad_norm": 0.056662287563085556, "learning_rate": 0.0008268783840171682, "loss": 0.0367, "num_input_tokens_seen": 91477888, "step": 42405 }, { "epoch": 6.918433931484502, "grad_norm": 0.011091602966189384, "learning_rate": 0.0008268245185946748, "loss": 0.0872, "num_input_tokens_seen": 91487328, "step": 42410 }, { "epoch": 6.919249592169657, "grad_norm": 0.04144521430134773, "learning_rate": 0.0008267706465486677, "loss": 0.0742, "num_input_tokens_seen": 91497760, "step": 42415 }, { "epoch": 6.920065252854813, "grad_norm": 0.04122058302164078, "learning_rate": 0.000826716767880239, "loss": 0.0463, "num_input_tokens_seen": 91508352, "step": 42420 }, { "epoch": 6.920880913539968, "grad_norm": 0.018312150612473488, "learning_rate": 0.0008266628825904807, "loss": 0.1371, "num_input_tokens_seen": 91520768, "step": 42425 }, { "epoch": 6.921696574225122, "grad_norm": 0.3369309902191162, "learning_rate": 0.0008266089906804845, "loss": 0.0744, "num_input_tokens_seen": 91531264, "step": 42430 }, { "epoch": 6.922512234910277, "grad_norm": 0.34164267778396606, "learning_rate": 0.0008265550921513428, "loss": 0.1934, "num_input_tokens_seen": 91543008, "step": 42435 }, { "epoch": 6.923327895595432, "grad_norm": 0.14824849367141724, "learning_rate": 0.000826501187004148, "loss": 0.1051, "num_input_tokens_seen": 91553952, "step": 42440 }, { "epoch": 6.924143556280587, "grad_norm": 0.02646949701011181, "learning_rate": 0.0008264472752399923, "loss": 0.0698, "num_input_tokens_seen": 91564672, "step": 42445 }, { "epoch": 6.924959216965743, "grad_norm": 0.10538527369499207, "learning_rate": 0.0008263933568599687, "loss": 0.1226, "num_input_tokens_seen": 91576352, "step": 42450 }, { "epoch": 6.925774877650897, "grad_norm": 0.016175301745533943, "learning_rate": 0.0008263394318651693, "loss": 0.0518, "num_input_tokens_seen": 91587616, "step": 42455 }, { "epoch": 6.926590538336052, "grad_norm": 0.010527018457651138, "learning_rate": 0.0008262855002566876, "loss": 0.02, "num_input_tokens_seen": 91598688, "step": 42460 }, { "epoch": 6.927406199021207, "grad_norm": 1.0308940410614014, "learning_rate": 0.0008262315620356163, "loss": 0.1851, "num_input_tokens_seen": 91609856, "step": 42465 }, { "epoch": 6.928221859706362, "grad_norm": 0.36434733867645264, "learning_rate": 0.0008261776172030484, "loss": 0.2748, "num_input_tokens_seen": 91621536, "step": 42470 }, { "epoch": 6.9290375203915175, "grad_norm": 0.03316596522927284, "learning_rate": 0.0008261236657600773, "loss": 0.0933, "num_input_tokens_seen": 91631904, "step": 42475 }, { "epoch": 6.929853181076672, "grad_norm": 0.28905561566352844, "learning_rate": 0.0008260697077077964, "loss": 0.0724, "num_input_tokens_seen": 91642592, "step": 42480 }, { "epoch": 6.930668841761827, "grad_norm": 0.021420542150735855, "learning_rate": 0.0008260157430472992, "loss": 0.1527, "num_input_tokens_seen": 91653952, "step": 42485 }, { "epoch": 6.931484502446982, "grad_norm": 0.28145018219947815, "learning_rate": 0.0008259617717796795, "loss": 0.051, "num_input_tokens_seen": 91664544, "step": 42490 }, { "epoch": 6.932300163132137, "grad_norm": 0.006104886066168547, "learning_rate": 0.0008259077939060309, "loss": 0.0912, "num_input_tokens_seen": 91675040, "step": 42495 }, { "epoch": 6.933115823817292, "grad_norm": 0.11542543768882751, "learning_rate": 0.0008258538094274475, "loss": 0.0454, "num_input_tokens_seen": 91685056, "step": 42500 }, { "epoch": 6.933931484502447, "grad_norm": 0.006111524999141693, "learning_rate": 0.0008257998183450233, "loss": 0.0345, "num_input_tokens_seen": 91696256, "step": 42505 }, { "epoch": 6.934747145187602, "grad_norm": 0.05116130784153938, "learning_rate": 0.0008257458206598524, "loss": 0.0131, "num_input_tokens_seen": 91708096, "step": 42510 }, { "epoch": 6.935562805872757, "grad_norm": 0.09470958262681961, "learning_rate": 0.0008256918163730291, "loss": 0.0613, "num_input_tokens_seen": 91719072, "step": 42515 }, { "epoch": 6.936378466557912, "grad_norm": 0.03251597285270691, "learning_rate": 0.0008256378054856482, "loss": 0.0818, "num_input_tokens_seen": 91730432, "step": 42520 }, { "epoch": 6.937194127243067, "grad_norm": 0.06378404796123505, "learning_rate": 0.000825583787998804, "loss": 0.0124, "num_input_tokens_seen": 91739840, "step": 42525 }, { "epoch": 6.938009787928221, "grad_norm": 0.23776055872440338, "learning_rate": 0.0008255297639135912, "loss": 0.038, "num_input_tokens_seen": 91750976, "step": 42530 }, { "epoch": 6.938825448613377, "grad_norm": 0.1733567714691162, "learning_rate": 0.000825475733231105, "loss": 0.0708, "num_input_tokens_seen": 91763616, "step": 42535 }, { "epoch": 6.939641109298532, "grad_norm": 0.04321181774139404, "learning_rate": 0.0008254216959524399, "loss": 0.0906, "num_input_tokens_seen": 91775424, "step": 42540 }, { "epoch": 6.940456769983687, "grad_norm": 0.21850253641605377, "learning_rate": 0.0008253676520786914, "loss": 0.0583, "num_input_tokens_seen": 91785728, "step": 42545 }, { "epoch": 6.941272430668842, "grad_norm": 0.4650218188762665, "learning_rate": 0.0008253136016109547, "loss": 0.1277, "num_input_tokens_seen": 91795520, "step": 42550 }, { "epoch": 6.942088091353996, "grad_norm": 0.09119347482919693, "learning_rate": 0.0008252595445503253, "loss": 0.1808, "num_input_tokens_seen": 91805984, "step": 42555 }, { "epoch": 6.942903752039152, "grad_norm": 0.029528966173529625, "learning_rate": 0.0008252054808978984, "loss": 0.0238, "num_input_tokens_seen": 91817184, "step": 42560 }, { "epoch": 6.943719412724307, "grad_norm": 0.07109400629997253, "learning_rate": 0.0008251514106547698, "loss": 0.1174, "num_input_tokens_seen": 91829120, "step": 42565 }, { "epoch": 6.944535073409462, "grad_norm": 0.2692277133464813, "learning_rate": 0.0008250973338220356, "loss": 0.1108, "num_input_tokens_seen": 91839584, "step": 42570 }, { "epoch": 6.945350734094617, "grad_norm": 0.022252218797802925, "learning_rate": 0.0008250432504007914, "loss": 0.0286, "num_input_tokens_seen": 91850464, "step": 42575 }, { "epoch": 6.946166394779771, "grad_norm": 0.06307028979063034, "learning_rate": 0.0008249891603921334, "loss": 0.1098, "num_input_tokens_seen": 91861856, "step": 42580 }, { "epoch": 6.946982055464927, "grad_norm": 0.02056192234158516, "learning_rate": 0.0008249350637971577, "loss": 0.0552, "num_input_tokens_seen": 91873536, "step": 42585 }, { "epoch": 6.947797716150082, "grad_norm": 0.08320312201976776, "learning_rate": 0.0008248809606169609, "loss": 0.1782, "num_input_tokens_seen": 91883968, "step": 42590 }, { "epoch": 6.948613376835237, "grad_norm": 0.061043594032526016, "learning_rate": 0.0008248268508526393, "loss": 0.1449, "num_input_tokens_seen": 91895136, "step": 42595 }, { "epoch": 6.9494290375203915, "grad_norm": 0.022441010922193527, "learning_rate": 0.0008247727345052894, "loss": 0.016, "num_input_tokens_seen": 91906144, "step": 42600 }, { "epoch": 6.950244698205546, "grad_norm": 0.3586297035217285, "learning_rate": 0.000824718611576008, "loss": 0.0819, "num_input_tokens_seen": 91916480, "step": 42605 }, { "epoch": 6.951060358890701, "grad_norm": 0.5544759035110474, "learning_rate": 0.0008246644820658922, "loss": 0.2251, "num_input_tokens_seen": 91926176, "step": 42610 }, { "epoch": 6.951876019575856, "grad_norm": 0.012227549217641354, "learning_rate": 0.0008246103459760385, "loss": 0.0588, "num_input_tokens_seen": 91936320, "step": 42615 }, { "epoch": 6.952691680261012, "grad_norm": 0.03646855428814888, "learning_rate": 0.0008245562033075446, "loss": 0.0221, "num_input_tokens_seen": 91947808, "step": 42620 }, { "epoch": 6.9535073409461665, "grad_norm": 0.05324862152338028, "learning_rate": 0.0008245020540615074, "loss": 0.137, "num_input_tokens_seen": 91959488, "step": 42625 }, { "epoch": 6.954323001631321, "grad_norm": 0.027974702417850494, "learning_rate": 0.0008244478982390245, "loss": 0.0584, "num_input_tokens_seen": 91970368, "step": 42630 }, { "epoch": 6.955138662316476, "grad_norm": 0.044085096567869186, "learning_rate": 0.0008243937358411933, "loss": 0.0811, "num_input_tokens_seen": 91981184, "step": 42635 }, { "epoch": 6.955954323001631, "grad_norm": 0.026197049766778946, "learning_rate": 0.0008243395668691113, "loss": 0.0815, "num_input_tokens_seen": 91992928, "step": 42640 }, { "epoch": 6.956769983686787, "grad_norm": 0.033517785370349884, "learning_rate": 0.0008242853913238769, "loss": 0.1617, "num_input_tokens_seen": 92003616, "step": 42645 }, { "epoch": 6.9575856443719415, "grad_norm": 0.304104208946228, "learning_rate": 0.0008242312092065873, "loss": 0.1419, "num_input_tokens_seen": 92014816, "step": 42650 }, { "epoch": 6.958401305057096, "grad_norm": 0.09259176254272461, "learning_rate": 0.0008241770205183412, "loss": 0.14, "num_input_tokens_seen": 92026624, "step": 42655 }, { "epoch": 6.959216965742251, "grad_norm": 0.07539231330156326, "learning_rate": 0.0008241228252602364, "loss": 0.1212, "num_input_tokens_seen": 92036416, "step": 42660 }, { "epoch": 6.960032626427406, "grad_norm": 0.017154810950160027, "learning_rate": 0.0008240686234333714, "loss": 0.0392, "num_input_tokens_seen": 92045952, "step": 42665 }, { "epoch": 6.960848287112562, "grad_norm": 0.1912280172109604, "learning_rate": 0.0008240144150388446, "loss": 0.1696, "num_input_tokens_seen": 92056224, "step": 42670 }, { "epoch": 6.9616639477977165, "grad_norm": 0.30025991797447205, "learning_rate": 0.0008239602000777548, "loss": 0.2112, "num_input_tokens_seen": 92067648, "step": 42675 }, { "epoch": 6.962479608482871, "grad_norm": 0.025300297886133194, "learning_rate": 0.0008239059785512005, "loss": 0.1166, "num_input_tokens_seen": 92078048, "step": 42680 }, { "epoch": 6.963295269168026, "grad_norm": 0.03309603035449982, "learning_rate": 0.0008238517504602805, "loss": 0.1593, "num_input_tokens_seen": 92088832, "step": 42685 }, { "epoch": 6.964110929853181, "grad_norm": 0.24542009830474854, "learning_rate": 0.0008237975158060939, "loss": 0.0589, "num_input_tokens_seen": 92098336, "step": 42690 }, { "epoch": 6.964926590538336, "grad_norm": 0.04161224514245987, "learning_rate": 0.0008237432745897402, "loss": 0.095, "num_input_tokens_seen": 92107840, "step": 42695 }, { "epoch": 6.9657422512234906, "grad_norm": 0.08136509358882904, "learning_rate": 0.000823689026812318, "loss": 0.1886, "num_input_tokens_seen": 92119456, "step": 42700 }, { "epoch": 6.966557911908646, "grad_norm": 0.07352624088525772, "learning_rate": 0.0008236347724749274, "loss": 0.1582, "num_input_tokens_seen": 92131232, "step": 42705 }, { "epoch": 6.967373572593801, "grad_norm": 0.2750554084777832, "learning_rate": 0.0008235805115786672, "loss": 0.0911, "num_input_tokens_seen": 92141728, "step": 42710 }, { "epoch": 6.968189233278956, "grad_norm": 0.14867351949214935, "learning_rate": 0.0008235262441246376, "loss": 0.1407, "num_input_tokens_seen": 92151584, "step": 42715 }, { "epoch": 6.969004893964111, "grad_norm": 0.10934548079967499, "learning_rate": 0.0008234719701139384, "loss": 0.0756, "num_input_tokens_seen": 92162464, "step": 42720 }, { "epoch": 6.9698205546492655, "grad_norm": 0.09491892158985138, "learning_rate": 0.0008234176895476692, "loss": 0.101, "num_input_tokens_seen": 92172672, "step": 42725 }, { "epoch": 6.970636215334421, "grad_norm": 0.13320164382457733, "learning_rate": 0.0008233634024269302, "loss": 0.0309, "num_input_tokens_seen": 92183040, "step": 42730 }, { "epoch": 6.971451876019576, "grad_norm": 0.2806028127670288, "learning_rate": 0.0008233091087528217, "loss": 0.1204, "num_input_tokens_seen": 92192992, "step": 42735 }, { "epoch": 6.972267536704731, "grad_norm": 0.29792845249176025, "learning_rate": 0.000823254808526444, "loss": 0.1618, "num_input_tokens_seen": 92204544, "step": 42740 }, { "epoch": 6.973083197389886, "grad_norm": 0.06293490529060364, "learning_rate": 0.0008232005017488975, "loss": 0.0903, "num_input_tokens_seen": 92215424, "step": 42745 }, { "epoch": 6.9738988580750405, "grad_norm": 0.10183566063642502, "learning_rate": 0.0008231461884212828, "loss": 0.0445, "num_input_tokens_seen": 92225888, "step": 42750 }, { "epoch": 6.974714518760196, "grad_norm": 0.021720802411437035, "learning_rate": 0.0008230918685447006, "loss": 0.0793, "num_input_tokens_seen": 92237344, "step": 42755 }, { "epoch": 6.975530179445351, "grad_norm": 0.32610127329826355, "learning_rate": 0.000823037542120252, "loss": 0.1967, "num_input_tokens_seen": 92247616, "step": 42760 }, { "epoch": 6.976345840130506, "grad_norm": 0.1894839107990265, "learning_rate": 0.0008229832091490377, "loss": 0.0894, "num_input_tokens_seen": 92258880, "step": 42765 }, { "epoch": 6.977161500815661, "grad_norm": 0.3397112190723419, "learning_rate": 0.0008229288696321588, "loss": 0.1929, "num_input_tokens_seen": 92268544, "step": 42770 }, { "epoch": 6.9779771615008155, "grad_norm": 0.14287039637565613, "learning_rate": 0.0008228745235707169, "loss": 0.0328, "num_input_tokens_seen": 92279104, "step": 42775 }, { "epoch": 6.97879282218597, "grad_norm": 0.011046701110899448, "learning_rate": 0.000822820170965813, "loss": 0.0375, "num_input_tokens_seen": 92289440, "step": 42780 }, { "epoch": 6.979608482871125, "grad_norm": 0.0907086730003357, "learning_rate": 0.0008227658118185491, "loss": 0.0295, "num_input_tokens_seen": 92300832, "step": 42785 }, { "epoch": 6.980424143556281, "grad_norm": 0.13725587725639343, "learning_rate": 0.0008227114461300262, "loss": 0.0215, "num_input_tokens_seen": 92311712, "step": 42790 }, { "epoch": 6.981239804241436, "grad_norm": 0.13284292817115784, "learning_rate": 0.0008226570739013466, "loss": 0.0354, "num_input_tokens_seen": 92322464, "step": 42795 }, { "epoch": 6.9820554649265905, "grad_norm": 0.3152807950973511, "learning_rate": 0.0008226026951336121, "loss": 0.0489, "num_input_tokens_seen": 92331296, "step": 42800 }, { "epoch": 6.982871125611745, "grad_norm": 0.022204695269465446, "learning_rate": 0.0008225483098279247, "loss": 0.0367, "num_input_tokens_seen": 92341952, "step": 42805 }, { "epoch": 6.9836867862969, "grad_norm": 0.040940165519714355, "learning_rate": 0.0008224939179853868, "loss": 0.1, "num_input_tokens_seen": 92351840, "step": 42810 }, { "epoch": 6.984502446982056, "grad_norm": 0.007488491013646126, "learning_rate": 0.0008224395196071003, "loss": 0.1013, "num_input_tokens_seen": 92363488, "step": 42815 }, { "epoch": 6.985318107667211, "grad_norm": 0.011530367657542229, "learning_rate": 0.000822385114694168, "loss": 0.0427, "num_input_tokens_seen": 92374848, "step": 42820 }, { "epoch": 6.986133768352365, "grad_norm": 0.06060733273625374, "learning_rate": 0.0008223307032476923, "loss": 0.0236, "num_input_tokens_seen": 92385728, "step": 42825 }, { "epoch": 6.98694942903752, "grad_norm": 0.06938505917787552, "learning_rate": 0.0008222762852687762, "loss": 0.2083, "num_input_tokens_seen": 92396576, "step": 42830 }, { "epoch": 6.987765089722675, "grad_norm": 0.055980414152145386, "learning_rate": 0.0008222218607585221, "loss": 0.0548, "num_input_tokens_seen": 92406912, "step": 42835 }, { "epoch": 6.988580750407831, "grad_norm": 0.7430779933929443, "learning_rate": 0.0008221674297180334, "loss": 0.1927, "num_input_tokens_seen": 92417728, "step": 42840 }, { "epoch": 6.989396411092986, "grad_norm": 0.1786465048789978, "learning_rate": 0.000822112992148413, "loss": 0.1435, "num_input_tokens_seen": 92428800, "step": 42845 }, { "epoch": 6.99021207177814, "grad_norm": 0.16406850516796112, "learning_rate": 0.000822058548050764, "loss": 0.1241, "num_input_tokens_seen": 92440448, "step": 42850 }, { "epoch": 6.991027732463295, "grad_norm": 0.32208719849586487, "learning_rate": 0.0008220040974261901, "loss": 0.0448, "num_input_tokens_seen": 92450496, "step": 42855 }, { "epoch": 6.99184339314845, "grad_norm": 0.14926102757453918, "learning_rate": 0.0008219496402757948, "loss": 0.0447, "num_input_tokens_seen": 92461056, "step": 42860 }, { "epoch": 6.992659053833605, "grad_norm": 0.14845219254493713, "learning_rate": 0.0008218951766006815, "loss": 0.11, "num_input_tokens_seen": 92471456, "step": 42865 }, { "epoch": 6.993474714518761, "grad_norm": 0.23424747586250305, "learning_rate": 0.0008218407064019541, "loss": 0.0238, "num_input_tokens_seen": 92482400, "step": 42870 }, { "epoch": 6.994290375203915, "grad_norm": 0.044552795588970184, "learning_rate": 0.0008217862296807165, "loss": 0.0498, "num_input_tokens_seen": 92494304, "step": 42875 }, { "epoch": 6.99510603588907, "grad_norm": 0.038800694048404694, "learning_rate": 0.0008217317464380727, "loss": 0.1241, "num_input_tokens_seen": 92506176, "step": 42880 }, { "epoch": 6.995921696574225, "grad_norm": 0.05844039469957352, "learning_rate": 0.0008216772566751269, "loss": 0.0389, "num_input_tokens_seen": 92516800, "step": 42885 }, { "epoch": 6.99673735725938, "grad_norm": 0.023771530017256737, "learning_rate": 0.0008216227603929835, "loss": 0.0625, "num_input_tokens_seen": 92527232, "step": 42890 }, { "epoch": 6.997553017944535, "grad_norm": 0.015153403393924236, "learning_rate": 0.0008215682575927468, "loss": 0.0395, "num_input_tokens_seen": 92538240, "step": 42895 }, { "epoch": 6.99836867862969, "grad_norm": 0.061845388263463974, "learning_rate": 0.0008215137482755215, "loss": 0.0968, "num_input_tokens_seen": 92548320, "step": 42900 }, { "epoch": 6.999184339314845, "grad_norm": 0.07697153836488724, "learning_rate": 0.0008214592324424122, "loss": 0.1661, "num_input_tokens_seen": 92558976, "step": 42905 }, { "epoch": 7.0, "grad_norm": 0.009140337817370892, "learning_rate": 0.0008214047100945236, "loss": 0.0444, "num_input_tokens_seen": 92568704, "step": 42910 }, { "epoch": 7.0, "eval_loss": 0.13612739741802216, "eval_runtime": 104.6578, "eval_samples_per_second": 26.037, "eval_steps_per_second": 6.516, "num_input_tokens_seen": 92568704, "step": 42910 }, { "epoch": 7.000815660685155, "grad_norm": 0.05621584504842758, "learning_rate": 0.0008213501812329609, "loss": 0.0444, "num_input_tokens_seen": 92579584, "step": 42915 }, { "epoch": 7.00163132137031, "grad_norm": 0.023878080770373344, "learning_rate": 0.0008212956458588292, "loss": 0.0244, "num_input_tokens_seen": 92591392, "step": 42920 }, { "epoch": 7.002446982055465, "grad_norm": 0.08467896282672882, "learning_rate": 0.0008212411039732336, "loss": 0.1651, "num_input_tokens_seen": 92601760, "step": 42925 }, { "epoch": 7.00326264274062, "grad_norm": 0.01949106529355049, "learning_rate": 0.0008211865555772795, "loss": 0.0223, "num_input_tokens_seen": 92610592, "step": 42930 }, { "epoch": 7.004078303425775, "grad_norm": 0.05664452537894249, "learning_rate": 0.0008211320006720723, "loss": 0.0312, "num_input_tokens_seen": 92620704, "step": 42935 }, { "epoch": 7.00489396411093, "grad_norm": 0.23780561983585358, "learning_rate": 0.000821077439258718, "loss": 0.0454, "num_input_tokens_seen": 92631392, "step": 42940 }, { "epoch": 7.005709624796085, "grad_norm": 0.27732589840888977, "learning_rate": 0.0008210228713383218, "loss": 0.0927, "num_input_tokens_seen": 92642496, "step": 42945 }, { "epoch": 7.006525285481239, "grad_norm": 0.16691787540912628, "learning_rate": 0.00082096829691199, "loss": 0.1303, "num_input_tokens_seen": 92653152, "step": 42950 }, { "epoch": 7.007340946166395, "grad_norm": 0.4045598804950714, "learning_rate": 0.0008209137159808284, "loss": 0.0752, "num_input_tokens_seen": 92663328, "step": 42955 }, { "epoch": 7.00815660685155, "grad_norm": 0.3367452919483185, "learning_rate": 0.0008208591285459434, "loss": 0.14, "num_input_tokens_seen": 92675232, "step": 42960 }, { "epoch": 7.008972267536705, "grad_norm": 0.0086878826841712, "learning_rate": 0.0008208045346084409, "loss": 0.0065, "num_input_tokens_seen": 92685312, "step": 42965 }, { "epoch": 7.00978792822186, "grad_norm": 0.2903786599636078, "learning_rate": 0.0008207499341694278, "loss": 0.076, "num_input_tokens_seen": 92697312, "step": 42970 }, { "epoch": 7.010603588907014, "grad_norm": 0.3355376422405243, "learning_rate": 0.0008206953272300102, "loss": 0.08, "num_input_tokens_seen": 92707904, "step": 42975 }, { "epoch": 7.011419249592169, "grad_norm": 0.01161609124392271, "learning_rate": 0.000820640713791295, "loss": 0.0698, "num_input_tokens_seen": 92719104, "step": 42980 }, { "epoch": 7.012234910277325, "grad_norm": 0.19665658473968506, "learning_rate": 0.000820586093854389, "loss": 0.0351, "num_input_tokens_seen": 92729568, "step": 42985 }, { "epoch": 7.01305057096248, "grad_norm": 0.01043128501623869, "learning_rate": 0.0008205314674203989, "loss": 0.0821, "num_input_tokens_seen": 92740384, "step": 42990 }, { "epoch": 7.013866231647635, "grad_norm": 0.0891595259308815, "learning_rate": 0.0008204768344904323, "loss": 0.0213, "num_input_tokens_seen": 92749856, "step": 42995 }, { "epoch": 7.014681892332789, "grad_norm": 0.10561666637659073, "learning_rate": 0.0008204221950655959, "loss": 0.045, "num_input_tokens_seen": 92761664, "step": 43000 }, { "epoch": 7.015497553017944, "grad_norm": 0.016842138022184372, "learning_rate": 0.0008203675491469973, "loss": 0.0364, "num_input_tokens_seen": 92771456, "step": 43005 }, { "epoch": 7.0163132137031, "grad_norm": 0.1803014874458313, "learning_rate": 0.0008203128967357438, "loss": 0.0525, "num_input_tokens_seen": 92781728, "step": 43010 }, { "epoch": 7.017128874388255, "grad_norm": 0.03186974674463272, "learning_rate": 0.0008202582378329433, "loss": 0.2492, "num_input_tokens_seen": 92792672, "step": 43015 }, { "epoch": 7.0179445350734095, "grad_norm": 0.19018307328224182, "learning_rate": 0.0008202035724397032, "loss": 0.116, "num_input_tokens_seen": 92802656, "step": 43020 }, { "epoch": 7.018760195758564, "grad_norm": 0.06839246302843094, "learning_rate": 0.0008201489005571316, "loss": 0.0612, "num_input_tokens_seen": 92813056, "step": 43025 }, { "epoch": 7.019575856443719, "grad_norm": 0.3688131272792816, "learning_rate": 0.0008200942221863363, "loss": 0.0986, "num_input_tokens_seen": 92823936, "step": 43030 }, { "epoch": 7.020391517128874, "grad_norm": 0.033726051449775696, "learning_rate": 0.0008200395373284255, "loss": 0.1244, "num_input_tokens_seen": 92834944, "step": 43035 }, { "epoch": 7.02120717781403, "grad_norm": 0.014913531020283699, "learning_rate": 0.0008199848459845077, "loss": 0.0592, "num_input_tokens_seen": 92846848, "step": 43040 }, { "epoch": 7.0220228384991845, "grad_norm": 0.07623451948165894, "learning_rate": 0.0008199301481556907, "loss": 0.0929, "num_input_tokens_seen": 92858496, "step": 43045 }, { "epoch": 7.022838499184339, "grad_norm": 0.05421224609017372, "learning_rate": 0.0008198754438430836, "loss": 0.0192, "num_input_tokens_seen": 92869536, "step": 43050 }, { "epoch": 7.023654159869494, "grad_norm": 0.3889907896518707, "learning_rate": 0.000819820733047795, "loss": 0.0681, "num_input_tokens_seen": 92880192, "step": 43055 }, { "epoch": 7.024469820554649, "grad_norm": 0.0350983701646328, "learning_rate": 0.0008197660157709333, "loss": 0.0159, "num_input_tokens_seen": 92892352, "step": 43060 }, { "epoch": 7.025285481239805, "grad_norm": 0.3771122097969055, "learning_rate": 0.0008197112920136076, "loss": 0.0735, "num_input_tokens_seen": 92904672, "step": 43065 }, { "epoch": 7.0261011419249595, "grad_norm": 0.16802431643009186, "learning_rate": 0.000819656561776927, "loss": 0.034, "num_input_tokens_seen": 92915968, "step": 43070 }, { "epoch": 7.026916802610114, "grad_norm": 0.006825015880167484, "learning_rate": 0.0008196018250620008, "loss": 0.0235, "num_input_tokens_seen": 92926976, "step": 43075 }, { "epoch": 7.027732463295269, "grad_norm": 0.3640078604221344, "learning_rate": 0.0008195470818699381, "loss": 0.0673, "num_input_tokens_seen": 92938080, "step": 43080 }, { "epoch": 7.028548123980424, "grad_norm": 0.4565901458263397, "learning_rate": 0.0008194923322018484, "loss": 0.1444, "num_input_tokens_seen": 92949312, "step": 43085 }, { "epoch": 7.029363784665579, "grad_norm": 0.046069759875535965, "learning_rate": 0.0008194375760588413, "loss": 0.0505, "num_input_tokens_seen": 92960736, "step": 43090 }, { "epoch": 7.0301794453507345, "grad_norm": 0.31401526927948, "learning_rate": 0.0008193828134420265, "loss": 0.1288, "num_input_tokens_seen": 92970560, "step": 43095 }, { "epoch": 7.030995106035889, "grad_norm": 0.4128706455230713, "learning_rate": 0.0008193280443525138, "loss": 0.2038, "num_input_tokens_seen": 92980480, "step": 43100 }, { "epoch": 7.031810766721044, "grad_norm": 0.01692216843366623, "learning_rate": 0.0008192732687914131, "loss": 0.1471, "num_input_tokens_seen": 92990432, "step": 43105 }, { "epoch": 7.032626427406199, "grad_norm": 0.039115238934755325, "learning_rate": 0.0008192184867598347, "loss": 0.0982, "num_input_tokens_seen": 93001792, "step": 43110 }, { "epoch": 7.033442088091354, "grad_norm": 0.3791634142398834, "learning_rate": 0.0008191636982588887, "loss": 0.1503, "num_input_tokens_seen": 93012480, "step": 43115 }, { "epoch": 7.034257748776509, "grad_norm": 0.006412103306502104, "learning_rate": 0.0008191089032896855, "loss": 0.0298, "num_input_tokens_seen": 93023424, "step": 43120 }, { "epoch": 7.035073409461664, "grad_norm": 0.035452552139759064, "learning_rate": 0.0008190541018533353, "loss": 0.0514, "num_input_tokens_seen": 93034624, "step": 43125 }, { "epoch": 7.035889070146819, "grad_norm": 0.04379449039697647, "learning_rate": 0.0008189992939509491, "loss": 0.255, "num_input_tokens_seen": 93045888, "step": 43130 }, { "epoch": 7.036704730831974, "grad_norm": 0.19535574316978455, "learning_rate": 0.0008189444795836377, "loss": 0.0354, "num_input_tokens_seen": 93056800, "step": 43135 }, { "epoch": 7.037520391517129, "grad_norm": 0.01829916425049305, "learning_rate": 0.0008188896587525118, "loss": 0.0098, "num_input_tokens_seen": 93066848, "step": 43140 }, { "epoch": 7.0383360522022835, "grad_norm": 0.259156197309494, "learning_rate": 0.0008188348314586823, "loss": 0.0639, "num_input_tokens_seen": 93077088, "step": 43145 }, { "epoch": 7.039151712887439, "grad_norm": 0.04192090779542923, "learning_rate": 0.0008187799977032605, "loss": 0.0758, "num_input_tokens_seen": 93087008, "step": 43150 }, { "epoch": 7.039967373572594, "grad_norm": 0.44756826758384705, "learning_rate": 0.0008187251574873576, "loss": 0.1659, "num_input_tokens_seen": 93096608, "step": 43155 }, { "epoch": 7.040783034257749, "grad_norm": 0.1987903118133545, "learning_rate": 0.0008186703108120852, "loss": 0.0795, "num_input_tokens_seen": 93107168, "step": 43160 }, { "epoch": 7.041598694942904, "grad_norm": 0.21643608808517456, "learning_rate": 0.0008186154576785545, "loss": 0.1841, "num_input_tokens_seen": 93118656, "step": 43165 }, { "epoch": 7.0424143556280585, "grad_norm": 0.08364047110080719, "learning_rate": 0.0008185605980878775, "loss": 0.0932, "num_input_tokens_seen": 93129568, "step": 43170 }, { "epoch": 7.043230016313213, "grad_norm": 0.06223098933696747, "learning_rate": 0.0008185057320411658, "loss": 0.0281, "num_input_tokens_seen": 93140800, "step": 43175 }, { "epoch": 7.044045676998369, "grad_norm": 0.20272275805473328, "learning_rate": 0.0008184508595395314, "loss": 0.0375, "num_input_tokens_seen": 93152128, "step": 43180 }, { "epoch": 7.044861337683524, "grad_norm": 0.2920411229133606, "learning_rate": 0.0008183959805840863, "loss": 0.055, "num_input_tokens_seen": 93163072, "step": 43185 }, { "epoch": 7.045676998368679, "grad_norm": 0.03556692227721214, "learning_rate": 0.0008183410951759429, "loss": 0.0614, "num_input_tokens_seen": 93173600, "step": 43190 }, { "epoch": 7.0464926590538335, "grad_norm": 0.11350714415311813, "learning_rate": 0.0008182862033162131, "loss": 0.1825, "num_input_tokens_seen": 93184576, "step": 43195 }, { "epoch": 7.047308319738988, "grad_norm": 0.2556411027908325, "learning_rate": 0.0008182313050060098, "loss": 0.1432, "num_input_tokens_seen": 93195840, "step": 43200 }, { "epoch": 7.048123980424143, "grad_norm": 0.29032012820243835, "learning_rate": 0.0008181764002464454, "loss": 0.1651, "num_input_tokens_seen": 93206656, "step": 43205 }, { "epoch": 7.048939641109299, "grad_norm": 0.07633698731660843, "learning_rate": 0.0008181214890386326, "loss": 0.1127, "num_input_tokens_seen": 93218464, "step": 43210 }, { "epoch": 7.049755301794454, "grad_norm": 0.024120373651385307, "learning_rate": 0.0008180665713836842, "loss": 0.0815, "num_input_tokens_seen": 93229056, "step": 43215 }, { "epoch": 7.0505709624796085, "grad_norm": 0.34292569756507874, "learning_rate": 0.0008180116472827133, "loss": 0.168, "num_input_tokens_seen": 93240096, "step": 43220 }, { "epoch": 7.051386623164763, "grad_norm": 0.01862553134560585, "learning_rate": 0.000817956716736833, "loss": 0.0456, "num_input_tokens_seen": 93250592, "step": 43225 }, { "epoch": 7.052202283849918, "grad_norm": 0.5628551840782166, "learning_rate": 0.0008179017797471562, "loss": 0.1486, "num_input_tokens_seen": 93261696, "step": 43230 }, { "epoch": 7.053017944535074, "grad_norm": 0.06941450387239456, "learning_rate": 0.0008178468363147968, "loss": 0.0171, "num_input_tokens_seen": 93272384, "step": 43235 }, { "epoch": 7.053833605220229, "grad_norm": 0.31638649106025696, "learning_rate": 0.000817791886440868, "loss": 0.1605, "num_input_tokens_seen": 93283904, "step": 43240 }, { "epoch": 7.054649265905383, "grad_norm": 0.01983080431818962, "learning_rate": 0.0008177369301264834, "loss": 0.0439, "num_input_tokens_seen": 93294848, "step": 43245 }, { "epoch": 7.055464926590538, "grad_norm": 0.12355517596006393, "learning_rate": 0.0008176819673727569, "loss": 0.0551, "num_input_tokens_seen": 93304704, "step": 43250 }, { "epoch": 7.056280587275693, "grad_norm": 0.10293567180633545, "learning_rate": 0.0008176269981808023, "loss": 0.0275, "num_input_tokens_seen": 93316224, "step": 43255 }, { "epoch": 7.057096247960848, "grad_norm": 0.13578490912914276, "learning_rate": 0.0008175720225517337, "loss": 0.0455, "num_input_tokens_seen": 93327008, "step": 43260 }, { "epoch": 7.057911908646004, "grad_norm": 0.3371644616127014, "learning_rate": 0.0008175170404866652, "loss": 0.0699, "num_input_tokens_seen": 93338304, "step": 43265 }, { "epoch": 7.058727569331158, "grad_norm": 0.39034247398376465, "learning_rate": 0.0008174620519867109, "loss": 0.0595, "num_input_tokens_seen": 93349888, "step": 43270 }, { "epoch": 7.059543230016313, "grad_norm": 0.008789513260126114, "learning_rate": 0.0008174070570529854, "loss": 0.0732, "num_input_tokens_seen": 93360896, "step": 43275 }, { "epoch": 7.060358890701468, "grad_norm": 0.37617579102516174, "learning_rate": 0.0008173520556866035, "loss": 0.0789, "num_input_tokens_seen": 93372448, "step": 43280 }, { "epoch": 7.061174551386623, "grad_norm": 0.14513754844665527, "learning_rate": 0.0008172970478886794, "loss": 0.0631, "num_input_tokens_seen": 93384576, "step": 43285 }, { "epoch": 7.061990212071779, "grad_norm": 0.00973565224558115, "learning_rate": 0.0008172420336603281, "loss": 0.0153, "num_input_tokens_seen": 93395488, "step": 43290 }, { "epoch": 7.062805872756933, "grad_norm": 0.03684699162840843, "learning_rate": 0.0008171870130026646, "loss": 0.0773, "num_input_tokens_seen": 93406016, "step": 43295 }, { "epoch": 7.063621533442088, "grad_norm": 0.013263503089547157, "learning_rate": 0.000817131985916804, "loss": 0.0653, "num_input_tokens_seen": 93416256, "step": 43300 }, { "epoch": 7.064437194127243, "grad_norm": 0.009219163097441196, "learning_rate": 0.0008170769524038613, "loss": 0.1393, "num_input_tokens_seen": 93427584, "step": 43305 }, { "epoch": 7.065252854812398, "grad_norm": 0.37303683161735535, "learning_rate": 0.0008170219124649518, "loss": 0.1174, "num_input_tokens_seen": 93438400, "step": 43310 }, { "epoch": 7.066068515497553, "grad_norm": 0.005729866214096546, "learning_rate": 0.0008169668661011912, "loss": 0.0509, "num_input_tokens_seen": 93448256, "step": 43315 }, { "epoch": 7.066884176182708, "grad_norm": 0.013649282045662403, "learning_rate": 0.0008169118133136951, "loss": 0.0992, "num_input_tokens_seen": 93457984, "step": 43320 }, { "epoch": 7.067699836867863, "grad_norm": 0.3667338192462921, "learning_rate": 0.0008168567541035788, "loss": 0.1621, "num_input_tokens_seen": 93468864, "step": 43325 }, { "epoch": 7.068515497553018, "grad_norm": 0.10405556112527847, "learning_rate": 0.0008168016884719585, "loss": 0.0505, "num_input_tokens_seen": 93479424, "step": 43330 }, { "epoch": 7.069331158238173, "grad_norm": 0.05104127153754234, "learning_rate": 0.0008167466164199499, "loss": 0.0658, "num_input_tokens_seen": 93490944, "step": 43335 }, { "epoch": 7.070146818923328, "grad_norm": 0.14969474077224731, "learning_rate": 0.0008166915379486697, "loss": 0.1075, "num_input_tokens_seen": 93501312, "step": 43340 }, { "epoch": 7.0709624796084825, "grad_norm": 0.07914786040782928, "learning_rate": 0.0008166364530592334, "loss": 0.1163, "num_input_tokens_seen": 93512288, "step": 43345 }, { "epoch": 7.071778140293638, "grad_norm": 0.2548310160636902, "learning_rate": 0.0008165813617527579, "loss": 0.0503, "num_input_tokens_seen": 93522944, "step": 43350 }, { "epoch": 7.072593800978793, "grad_norm": 0.37040266394615173, "learning_rate": 0.0008165262640303595, "loss": 0.0737, "num_input_tokens_seen": 93534624, "step": 43355 }, { "epoch": 7.073409461663948, "grad_norm": 0.23089298605918884, "learning_rate": 0.0008164711598931546, "loss": 0.1023, "num_input_tokens_seen": 93544832, "step": 43360 }, { "epoch": 7.074225122349103, "grad_norm": 0.1670691817998886, "learning_rate": 0.0008164160493422604, "loss": 0.0425, "num_input_tokens_seen": 93555264, "step": 43365 }, { "epoch": 7.075040783034257, "grad_norm": 0.04714969918131828, "learning_rate": 0.0008163609323787934, "loss": 0.1158, "num_input_tokens_seen": 93565568, "step": 43370 }, { "epoch": 7.075856443719413, "grad_norm": 0.04111149162054062, "learning_rate": 0.0008163058090038709, "loss": 0.047, "num_input_tokens_seen": 93576736, "step": 43375 }, { "epoch": 7.076672104404568, "grad_norm": 0.026524124667048454, "learning_rate": 0.0008162506792186099, "loss": 0.0161, "num_input_tokens_seen": 93586496, "step": 43380 }, { "epoch": 7.077487765089723, "grad_norm": 0.03092281147837639, "learning_rate": 0.0008161955430241276, "loss": 0.0684, "num_input_tokens_seen": 93597280, "step": 43385 }, { "epoch": 7.078303425774878, "grad_norm": 0.16600550711154938, "learning_rate": 0.0008161404004215415, "loss": 0.1307, "num_input_tokens_seen": 93608864, "step": 43390 }, { "epoch": 7.079119086460032, "grad_norm": 0.09903465956449509, "learning_rate": 0.0008160852514119692, "loss": 0.0398, "num_input_tokens_seen": 93620224, "step": 43395 }, { "epoch": 7.079934747145187, "grad_norm": 0.12294992804527283, "learning_rate": 0.0008160300959965284, "loss": 0.0772, "num_input_tokens_seen": 93630816, "step": 43400 }, { "epoch": 7.080750407830343, "grad_norm": 0.04933968931436539, "learning_rate": 0.0008159749341763367, "loss": 0.052, "num_input_tokens_seen": 93641792, "step": 43405 }, { "epoch": 7.081566068515498, "grad_norm": 0.044081125408411026, "learning_rate": 0.000815919765952512, "loss": 0.0403, "num_input_tokens_seen": 93653440, "step": 43410 }, { "epoch": 7.082381729200653, "grad_norm": 0.019500134512782097, "learning_rate": 0.0008158645913261726, "loss": 0.0294, "num_input_tokens_seen": 93664128, "step": 43415 }, { "epoch": 7.083197389885807, "grad_norm": 0.04038427025079727, "learning_rate": 0.0008158094102984366, "loss": 0.0977, "num_input_tokens_seen": 93675456, "step": 43420 }, { "epoch": 7.084013050570962, "grad_norm": 0.06313642859458923, "learning_rate": 0.0008157542228704221, "loss": 0.0257, "num_input_tokens_seen": 93686880, "step": 43425 }, { "epoch": 7.084828711256117, "grad_norm": 0.230858713388443, "learning_rate": 0.0008156990290432478, "loss": 0.3001, "num_input_tokens_seen": 93699136, "step": 43430 }, { "epoch": 7.085644371941273, "grad_norm": 0.3827678859233856, "learning_rate": 0.0008156438288180321, "loss": 0.1552, "num_input_tokens_seen": 93710720, "step": 43435 }, { "epoch": 7.0864600326264275, "grad_norm": 0.026266539469361305, "learning_rate": 0.0008155886221958939, "loss": 0.0615, "num_input_tokens_seen": 93721504, "step": 43440 }, { "epoch": 7.087275693311582, "grad_norm": 0.14249640703201294, "learning_rate": 0.0008155334091779518, "loss": 0.2254, "num_input_tokens_seen": 93731264, "step": 43445 }, { "epoch": 7.088091353996737, "grad_norm": 0.5585278272628784, "learning_rate": 0.0008154781897653251, "loss": 0.1239, "num_input_tokens_seen": 93742560, "step": 43450 }, { "epoch": 7.088907014681892, "grad_norm": 0.0663832351565361, "learning_rate": 0.0008154229639591324, "loss": 0.1218, "num_input_tokens_seen": 93752608, "step": 43455 }, { "epoch": 7.089722675367048, "grad_norm": 0.2317977398633957, "learning_rate": 0.0008153677317604935, "loss": 0.0986, "num_input_tokens_seen": 93762784, "step": 43460 }, { "epoch": 7.0905383360522025, "grad_norm": 0.04973970353603363, "learning_rate": 0.0008153124931705271, "loss": 0.0738, "num_input_tokens_seen": 93773536, "step": 43465 }, { "epoch": 7.091353996737357, "grad_norm": 0.020977241918444633, "learning_rate": 0.0008152572481903533, "loss": 0.0785, "num_input_tokens_seen": 93784416, "step": 43470 }, { "epoch": 7.092169657422512, "grad_norm": 0.2582453489303589, "learning_rate": 0.0008152019968210913, "loss": 0.3735, "num_input_tokens_seen": 93796032, "step": 43475 }, { "epoch": 7.092985318107667, "grad_norm": 0.06173722818493843, "learning_rate": 0.0008151467390638611, "loss": 0.1283, "num_input_tokens_seen": 93806976, "step": 43480 }, { "epoch": 7.093800978792822, "grad_norm": 0.058044400066137314, "learning_rate": 0.0008150914749197823, "loss": 0.131, "num_input_tokens_seen": 93817696, "step": 43485 }, { "epoch": 7.0946166394779775, "grad_norm": 0.11192791908979416, "learning_rate": 0.0008150362043899751, "loss": 0.1088, "num_input_tokens_seen": 93828576, "step": 43490 }, { "epoch": 7.095432300163132, "grad_norm": 0.15583764016628265, "learning_rate": 0.0008149809274755595, "loss": 0.1011, "num_input_tokens_seen": 93839808, "step": 43495 }, { "epoch": 7.096247960848287, "grad_norm": 0.25816863775253296, "learning_rate": 0.0008149256441776559, "loss": 0.1314, "num_input_tokens_seen": 93851008, "step": 43500 }, { "epoch": 7.097063621533442, "grad_norm": 0.05049748718738556, "learning_rate": 0.0008148703544973846, "loss": 0.0573, "num_input_tokens_seen": 93860384, "step": 43505 }, { "epoch": 7.097879282218597, "grad_norm": 0.028491158038377762, "learning_rate": 0.000814815058435866, "loss": 0.128, "num_input_tokens_seen": 93870240, "step": 43510 }, { "epoch": 7.0986949429037525, "grad_norm": 0.15648305416107178, "learning_rate": 0.0008147597559942211, "loss": 0.0476, "num_input_tokens_seen": 93880704, "step": 43515 }, { "epoch": 7.099510603588907, "grad_norm": 0.08021257072687149, "learning_rate": 0.0008147044471735703, "loss": 0.0781, "num_input_tokens_seen": 93892128, "step": 43520 }, { "epoch": 7.100326264274062, "grad_norm": 0.06722480058670044, "learning_rate": 0.0008146491319750346, "loss": 0.1002, "num_input_tokens_seen": 93901472, "step": 43525 }, { "epoch": 7.101141924959217, "grad_norm": 0.015327050350606441, "learning_rate": 0.0008145938103997352, "loss": 0.0101, "num_input_tokens_seen": 93912864, "step": 43530 }, { "epoch": 7.101957585644372, "grad_norm": 0.013162313960492611, "learning_rate": 0.0008145384824487931, "loss": 0.0364, "num_input_tokens_seen": 93923744, "step": 43535 }, { "epoch": 7.102773246329527, "grad_norm": 0.4006735682487488, "learning_rate": 0.0008144831481233296, "loss": 0.169, "num_input_tokens_seen": 93934496, "step": 43540 }, { "epoch": 7.103588907014682, "grad_norm": 0.023454079404473305, "learning_rate": 0.0008144278074244662, "loss": 0.0429, "num_input_tokens_seen": 93945312, "step": 43545 }, { "epoch": 7.104404567699837, "grad_norm": 0.02436712011694908, "learning_rate": 0.0008143724603533243, "loss": 0.0489, "num_input_tokens_seen": 93955904, "step": 43550 }, { "epoch": 7.105220228384992, "grad_norm": 0.012012063525617123, "learning_rate": 0.0008143171069110258, "loss": 0.1966, "num_input_tokens_seen": 93967808, "step": 43555 }, { "epoch": 7.106035889070147, "grad_norm": 0.28120508790016174, "learning_rate": 0.0008142617470986924, "loss": 0.0561, "num_input_tokens_seen": 93979104, "step": 43560 }, { "epoch": 7.1068515497553015, "grad_norm": 0.04824819043278694, "learning_rate": 0.000814206380917446, "loss": 0.083, "num_input_tokens_seen": 93991040, "step": 43565 }, { "epoch": 7.107667210440456, "grad_norm": 0.016212094575166702, "learning_rate": 0.0008141510083684087, "loss": 0.0888, "num_input_tokens_seen": 94001472, "step": 43570 }, { "epoch": 7.108482871125612, "grad_norm": 0.007858370430767536, "learning_rate": 0.0008140956294527026, "loss": 0.043, "num_input_tokens_seen": 94012352, "step": 43575 }, { "epoch": 7.109298531810767, "grad_norm": 0.040593452751636505, "learning_rate": 0.00081404024417145, "loss": 0.1022, "num_input_tokens_seen": 94023968, "step": 43580 }, { "epoch": 7.110114192495922, "grad_norm": 0.1286478340625763, "learning_rate": 0.0008139848525257737, "loss": 0.023, "num_input_tokens_seen": 94033664, "step": 43585 }, { "epoch": 7.1109298531810765, "grad_norm": 0.05951720476150513, "learning_rate": 0.000813929454516796, "loss": 0.0972, "num_input_tokens_seen": 94045408, "step": 43590 }, { "epoch": 7.111745513866231, "grad_norm": 0.008042372763156891, "learning_rate": 0.0008138740501456396, "loss": 0.1135, "num_input_tokens_seen": 94056768, "step": 43595 }, { "epoch": 7.112561174551387, "grad_norm": 0.009623503312468529, "learning_rate": 0.0008138186394134275, "loss": 0.0409, "num_input_tokens_seen": 94066368, "step": 43600 }, { "epoch": 7.113376835236542, "grad_norm": 0.10521452873945236, "learning_rate": 0.0008137632223212824, "loss": 0.0372, "num_input_tokens_seen": 94076544, "step": 43605 }, { "epoch": 7.114192495921697, "grad_norm": 0.05702836811542511, "learning_rate": 0.0008137077988703276, "loss": 0.0874, "num_input_tokens_seen": 94088128, "step": 43610 }, { "epoch": 7.1150081566068515, "grad_norm": 0.06025119498372078, "learning_rate": 0.0008136523690616864, "loss": 0.1068, "num_input_tokens_seen": 94099296, "step": 43615 }, { "epoch": 7.115823817292006, "grad_norm": 0.017126720398664474, "learning_rate": 0.000813596932896482, "loss": 0.0537, "num_input_tokens_seen": 94108928, "step": 43620 }, { "epoch": 7.116639477977161, "grad_norm": 0.1938651204109192, "learning_rate": 0.000813541490375838, "loss": 0.0214, "num_input_tokens_seen": 94119200, "step": 43625 }, { "epoch": 7.117455138662317, "grad_norm": 0.0051362658850848675, "learning_rate": 0.0008134860415008778, "loss": 0.0905, "num_input_tokens_seen": 94129792, "step": 43630 }, { "epoch": 7.118270799347472, "grad_norm": 0.012279465794563293, "learning_rate": 0.0008134305862727253, "loss": 0.0289, "num_input_tokens_seen": 94139712, "step": 43635 }, { "epoch": 7.1190864600326265, "grad_norm": 0.22891195118427277, "learning_rate": 0.0008133751246925046, "loss": 0.05, "num_input_tokens_seen": 94151008, "step": 43640 }, { "epoch": 7.119902120717781, "grad_norm": 0.03631577268242836, "learning_rate": 0.0008133196567613391, "loss": 0.0466, "num_input_tokens_seen": 94162080, "step": 43645 }, { "epoch": 7.120717781402936, "grad_norm": 0.5401914715766907, "learning_rate": 0.0008132641824803534, "loss": 0.1044, "num_input_tokens_seen": 94172160, "step": 43650 }, { "epoch": 7.121533442088092, "grad_norm": 0.07123128324747086, "learning_rate": 0.0008132087018506716, "loss": 0.1806, "num_input_tokens_seen": 94182304, "step": 43655 }, { "epoch": 7.122349102773247, "grad_norm": 0.09127622097730637, "learning_rate": 0.0008131532148734182, "loss": 0.0467, "num_input_tokens_seen": 94192800, "step": 43660 }, { "epoch": 7.123164763458401, "grad_norm": 0.26450788974761963, "learning_rate": 0.0008130977215497177, "loss": 0.0904, "num_input_tokens_seen": 94203680, "step": 43665 }, { "epoch": 7.123980424143556, "grad_norm": 0.14121392369270325, "learning_rate": 0.0008130422218806945, "loss": 0.0984, "num_input_tokens_seen": 94214624, "step": 43670 }, { "epoch": 7.124796084828711, "grad_norm": 0.5717516541481018, "learning_rate": 0.0008129867158674737, "loss": 0.2634, "num_input_tokens_seen": 94225088, "step": 43675 }, { "epoch": 7.125611745513866, "grad_norm": 0.795817494392395, "learning_rate": 0.00081293120351118, "loss": 0.3187, "num_input_tokens_seen": 94236032, "step": 43680 }, { "epoch": 7.126427406199022, "grad_norm": 0.056841589510440826, "learning_rate": 0.0008128756848129386, "loss": 0.0952, "num_input_tokens_seen": 94247392, "step": 43685 }, { "epoch": 7.127243066884176, "grad_norm": 0.012434907257556915, "learning_rate": 0.0008128201597738744, "loss": 0.0181, "num_input_tokens_seen": 94258176, "step": 43690 }, { "epoch": 7.128058727569331, "grad_norm": 0.01829572767019272, "learning_rate": 0.0008127646283951129, "loss": 0.0939, "num_input_tokens_seen": 94269376, "step": 43695 }, { "epoch": 7.128874388254486, "grad_norm": 0.009943843819200993, "learning_rate": 0.0008127090906777793, "loss": 0.1748, "num_input_tokens_seen": 94280128, "step": 43700 }, { "epoch": 7.129690048939641, "grad_norm": 0.022619767114520073, "learning_rate": 0.0008126535466229993, "loss": 0.0397, "num_input_tokens_seen": 94290720, "step": 43705 }, { "epoch": 7.130505709624796, "grad_norm": 0.02261081524193287, "learning_rate": 0.0008125979962318987, "loss": 0.0456, "num_input_tokens_seen": 94302400, "step": 43710 }, { "epoch": 7.131321370309951, "grad_norm": 0.5030266046524048, "learning_rate": 0.000812542439505603, "loss": 0.1053, "num_input_tokens_seen": 94313536, "step": 43715 }, { "epoch": 7.132137030995106, "grad_norm": 0.3340288996696472, "learning_rate": 0.0008124868764452384, "loss": 0.0578, "num_input_tokens_seen": 94324800, "step": 43720 }, { "epoch": 7.132952691680261, "grad_norm": 0.03445883095264435, "learning_rate": 0.0008124313070519307, "loss": 0.0145, "num_input_tokens_seen": 94335904, "step": 43725 }, { "epoch": 7.133768352365416, "grad_norm": 0.45275720953941345, "learning_rate": 0.0008123757313268064, "loss": 0.1154, "num_input_tokens_seen": 94347232, "step": 43730 }, { "epoch": 7.134584013050571, "grad_norm": 0.46623119711875916, "learning_rate": 0.0008123201492709915, "loss": 0.149, "num_input_tokens_seen": 94358272, "step": 43735 }, { "epoch": 7.135399673735726, "grad_norm": 0.12243355810642242, "learning_rate": 0.0008122645608856125, "loss": 0.058, "num_input_tokens_seen": 94369728, "step": 43740 }, { "epoch": 7.136215334420881, "grad_norm": 0.27705639600753784, "learning_rate": 0.0008122089661717961, "loss": 0.0648, "num_input_tokens_seen": 94381856, "step": 43745 }, { "epoch": 7.137030995106036, "grad_norm": 0.20928199589252472, "learning_rate": 0.000812153365130669, "loss": 0.096, "num_input_tokens_seen": 94392384, "step": 43750 }, { "epoch": 7.137846655791191, "grad_norm": 0.07331326603889465, "learning_rate": 0.0008120977577633578, "loss": 0.0441, "num_input_tokens_seen": 94402848, "step": 43755 }, { "epoch": 7.138662316476346, "grad_norm": 0.02677987329661846, "learning_rate": 0.0008120421440709897, "loss": 0.054, "num_input_tokens_seen": 94415072, "step": 43760 }, { "epoch": 7.1394779771615005, "grad_norm": 0.03160775825381279, "learning_rate": 0.0008119865240546918, "loss": 0.0396, "num_input_tokens_seen": 94425920, "step": 43765 }, { "epoch": 7.140293637846656, "grad_norm": 0.1440647393465042, "learning_rate": 0.000811930897715591, "loss": 0.1156, "num_input_tokens_seen": 94435840, "step": 43770 }, { "epoch": 7.141109298531811, "grad_norm": 0.4107045829296112, "learning_rate": 0.0008118752650548151, "loss": 0.2557, "num_input_tokens_seen": 94446752, "step": 43775 }, { "epoch": 7.141924959216966, "grad_norm": 0.34999459981918335, "learning_rate": 0.0008118196260734911, "loss": 0.227, "num_input_tokens_seen": 94457440, "step": 43780 }, { "epoch": 7.142740619902121, "grad_norm": 0.04697418957948685, "learning_rate": 0.000811763980772747, "loss": 0.0449, "num_input_tokens_seen": 94468224, "step": 43785 }, { "epoch": 7.143556280587275, "grad_norm": 0.05708957836031914, "learning_rate": 0.0008117083291537102, "loss": 0.035, "num_input_tokens_seen": 94479424, "step": 43790 }, { "epoch": 7.14437194127243, "grad_norm": 0.02532113902270794, "learning_rate": 0.0008116526712175087, "loss": 0.0406, "num_input_tokens_seen": 94489792, "step": 43795 }, { "epoch": 7.145187601957586, "grad_norm": 0.024616217240691185, "learning_rate": 0.0008115970069652705, "loss": 0.0418, "num_input_tokens_seen": 94500608, "step": 43800 }, { "epoch": 7.146003262642741, "grad_norm": 0.23185306787490845, "learning_rate": 0.0008115413363981237, "loss": 0.0902, "num_input_tokens_seen": 94511936, "step": 43805 }, { "epoch": 7.146818923327896, "grad_norm": 0.03193129971623421, "learning_rate": 0.0008114856595171963, "loss": 0.0539, "num_input_tokens_seen": 94522464, "step": 43810 }, { "epoch": 7.14763458401305, "grad_norm": 0.11275696754455566, "learning_rate": 0.000811429976323617, "loss": 0.0323, "num_input_tokens_seen": 94533600, "step": 43815 }, { "epoch": 7.148450244698205, "grad_norm": 0.0039909291081130505, "learning_rate": 0.0008113742868185142, "loss": 0.0362, "num_input_tokens_seen": 94544928, "step": 43820 }, { "epoch": 7.149265905383361, "grad_norm": 0.025235064327716827, "learning_rate": 0.0008113185910030163, "loss": 0.0554, "num_input_tokens_seen": 94556224, "step": 43825 }, { "epoch": 7.150081566068516, "grad_norm": 0.3476864993572235, "learning_rate": 0.0008112628888782523, "loss": 0.0528, "num_input_tokens_seen": 94567200, "step": 43830 }, { "epoch": 7.150897226753671, "grad_norm": 0.33536460995674133, "learning_rate": 0.0008112071804453511, "loss": 0.0752, "num_input_tokens_seen": 94578272, "step": 43835 }, { "epoch": 7.151712887438825, "grad_norm": 0.31544187664985657, "learning_rate": 0.0008111514657054415, "loss": 0.0506, "num_input_tokens_seen": 94590080, "step": 43840 }, { "epoch": 7.15252854812398, "grad_norm": 0.07480638474225998, "learning_rate": 0.0008110957446596527, "loss": 0.0977, "num_input_tokens_seen": 94600224, "step": 43845 }, { "epoch": 7.153344208809135, "grad_norm": 0.25543704628944397, "learning_rate": 0.0008110400173091142, "loss": 0.0756, "num_input_tokens_seen": 94610592, "step": 43850 }, { "epoch": 7.154159869494291, "grad_norm": 0.41889941692352295, "learning_rate": 0.0008109842836549549, "loss": 0.2045, "num_input_tokens_seen": 94620640, "step": 43855 }, { "epoch": 7.1549755301794455, "grad_norm": 0.1074126809835434, "learning_rate": 0.0008109285436983047, "loss": 0.0626, "num_input_tokens_seen": 94630144, "step": 43860 }, { "epoch": 7.1557911908646, "grad_norm": 0.5262054204940796, "learning_rate": 0.000810872797440293, "loss": 0.0444, "num_input_tokens_seen": 94642080, "step": 43865 }, { "epoch": 7.156606851549755, "grad_norm": 0.022135723382234573, "learning_rate": 0.0008108170448820498, "loss": 0.0176, "num_input_tokens_seen": 94653120, "step": 43870 }, { "epoch": 7.15742251223491, "grad_norm": 0.19672982394695282, "learning_rate": 0.0008107612860247049, "loss": 0.1477, "num_input_tokens_seen": 94663264, "step": 43875 }, { "epoch": 7.158238172920065, "grad_norm": 0.027926815673708916, "learning_rate": 0.0008107055208693882, "loss": 0.0077, "num_input_tokens_seen": 94675104, "step": 43880 }, { "epoch": 7.1590538336052205, "grad_norm": 0.826599657535553, "learning_rate": 0.00081064974941723, "loss": 0.0841, "num_input_tokens_seen": 94684992, "step": 43885 }, { "epoch": 7.159869494290375, "grad_norm": 0.017097435891628265, "learning_rate": 0.0008105939716693606, "loss": 0.02, "num_input_tokens_seen": 94695840, "step": 43890 }, { "epoch": 7.16068515497553, "grad_norm": 0.17803454399108887, "learning_rate": 0.0008105381876269104, "loss": 0.1178, "num_input_tokens_seen": 94706208, "step": 43895 }, { "epoch": 7.161500815660685, "grad_norm": 0.07060866802930832, "learning_rate": 0.0008104823972910098, "loss": 0.0329, "num_input_tokens_seen": 94717536, "step": 43900 }, { "epoch": 7.16231647634584, "grad_norm": 0.10126982629299164, "learning_rate": 0.0008104266006627895, "loss": 0.0419, "num_input_tokens_seen": 94726400, "step": 43905 }, { "epoch": 7.1631321370309955, "grad_norm": 0.02352185919880867, "learning_rate": 0.0008103707977433804, "loss": 0.0448, "num_input_tokens_seen": 94736992, "step": 43910 }, { "epoch": 7.16394779771615, "grad_norm": 0.008209151215851307, "learning_rate": 0.0008103149885339134, "loss": 0.0254, "num_input_tokens_seen": 94748992, "step": 43915 }, { "epoch": 7.164763458401305, "grad_norm": 0.029053283855319023, "learning_rate": 0.0008102591730355193, "loss": 0.0118, "num_input_tokens_seen": 94760160, "step": 43920 }, { "epoch": 7.16557911908646, "grad_norm": 0.6342970132827759, "learning_rate": 0.0008102033512493297, "loss": 0.1172, "num_input_tokens_seen": 94770528, "step": 43925 }, { "epoch": 7.166394779771615, "grad_norm": 0.0024230489507317543, "learning_rate": 0.0008101475231764756, "loss": 0.0218, "num_input_tokens_seen": 94780992, "step": 43930 }, { "epoch": 7.16721044045677, "grad_norm": 0.1245429590344429, "learning_rate": 0.0008100916888180884, "loss": 0.1695, "num_input_tokens_seen": 94793312, "step": 43935 }, { "epoch": 7.168026101141925, "grad_norm": 0.1881529837846756, "learning_rate": 0.0008100358481752998, "loss": 0.0873, "num_input_tokens_seen": 94804000, "step": 43940 }, { "epoch": 7.16884176182708, "grad_norm": 0.003708864329382777, "learning_rate": 0.0008099800012492415, "loss": 0.0448, "num_input_tokens_seen": 94813088, "step": 43945 }, { "epoch": 7.169657422512235, "grad_norm": 0.4791778028011322, "learning_rate": 0.0008099241480410451, "loss": 0.103, "num_input_tokens_seen": 94824544, "step": 43950 }, { "epoch": 7.17047308319739, "grad_norm": 0.017841247841715813, "learning_rate": 0.0008098682885518427, "loss": 0.0291, "num_input_tokens_seen": 94834752, "step": 43955 }, { "epoch": 7.171288743882545, "grad_norm": 0.12681090831756592, "learning_rate": 0.0008098124227827663, "loss": 0.0402, "num_input_tokens_seen": 94846144, "step": 43960 }, { "epoch": 7.1721044045677, "grad_norm": 0.10033146291971207, "learning_rate": 0.0008097565507349482, "loss": 0.1695, "num_input_tokens_seen": 94857376, "step": 43965 }, { "epoch": 7.172920065252855, "grad_norm": 0.4251362681388855, "learning_rate": 0.0008097006724095208, "loss": 0.1019, "num_input_tokens_seen": 94867552, "step": 43970 }, { "epoch": 7.17373572593801, "grad_norm": 0.17401285469532013, "learning_rate": 0.0008096447878076161, "loss": 0.0224, "num_input_tokens_seen": 94878976, "step": 43975 }, { "epoch": 7.174551386623165, "grad_norm": 0.11046452075242996, "learning_rate": 0.0008095888969303672, "loss": 0.0432, "num_input_tokens_seen": 94887648, "step": 43980 }, { "epoch": 7.1753670473083195, "grad_norm": 0.06511957198381424, "learning_rate": 0.0008095329997789063, "loss": 0.0701, "num_input_tokens_seen": 94897952, "step": 43985 }, { "epoch": 7.176182707993474, "grad_norm": 0.012178033590316772, "learning_rate": 0.0008094770963543667, "loss": 0.0192, "num_input_tokens_seen": 94909824, "step": 43990 }, { "epoch": 7.17699836867863, "grad_norm": 1.0476908683776855, "learning_rate": 0.0008094211866578812, "loss": 0.1172, "num_input_tokens_seen": 94920192, "step": 43995 }, { "epoch": 7.177814029363785, "grad_norm": 0.18471547961235046, "learning_rate": 0.0008093652706905827, "loss": 0.022, "num_input_tokens_seen": 94932032, "step": 44000 }, { "epoch": 7.17862969004894, "grad_norm": 0.25224941968917847, "learning_rate": 0.0008093093484536045, "loss": 0.0475, "num_input_tokens_seen": 94943968, "step": 44005 }, { "epoch": 7.1794453507340945, "grad_norm": 0.26341569423675537, "learning_rate": 0.0008092534199480801, "loss": 0.1056, "num_input_tokens_seen": 94953792, "step": 44010 }, { "epoch": 7.180261011419249, "grad_norm": 0.4182332456111908, "learning_rate": 0.0008091974851751427, "loss": 0.3237, "num_input_tokens_seen": 94963904, "step": 44015 }, { "epoch": 7.181076672104404, "grad_norm": 0.6529690623283386, "learning_rate": 0.0008091415441359261, "loss": 0.1023, "num_input_tokens_seen": 94975456, "step": 44020 }, { "epoch": 7.18189233278956, "grad_norm": 0.28972285985946655, "learning_rate": 0.000809085596831564, "loss": 0.1061, "num_input_tokens_seen": 94985056, "step": 44025 }, { "epoch": 7.182707993474715, "grad_norm": 0.06278565526008606, "learning_rate": 0.0008090296432631901, "loss": 0.023, "num_input_tokens_seen": 94996448, "step": 44030 }, { "epoch": 7.1835236541598695, "grad_norm": 0.06026162579655647, "learning_rate": 0.0008089736834319384, "loss": 0.0244, "num_input_tokens_seen": 95008160, "step": 44035 }, { "epoch": 7.184339314845024, "grad_norm": 0.005305078346282244, "learning_rate": 0.0008089177173389431, "loss": 0.2488, "num_input_tokens_seen": 95018080, "step": 44040 }, { "epoch": 7.185154975530179, "grad_norm": 0.008626905269920826, "learning_rate": 0.0008088617449853382, "loss": 0.0386, "num_input_tokens_seen": 95028256, "step": 44045 }, { "epoch": 7.185970636215335, "grad_norm": 0.02194381132721901, "learning_rate": 0.0008088057663722583, "loss": 0.0562, "num_input_tokens_seen": 95038368, "step": 44050 }, { "epoch": 7.18678629690049, "grad_norm": 0.00783210713416338, "learning_rate": 0.000808749781500838, "loss": 0.0535, "num_input_tokens_seen": 95047808, "step": 44055 }, { "epoch": 7.1876019575856445, "grad_norm": 0.043280526995658875, "learning_rate": 0.0008086937903722114, "loss": 0.0362, "num_input_tokens_seen": 95058560, "step": 44060 }, { "epoch": 7.188417618270799, "grad_norm": 0.03245038911700249, "learning_rate": 0.0008086377929875137, "loss": 0.0505, "num_input_tokens_seen": 95068384, "step": 44065 }, { "epoch": 7.189233278955954, "grad_norm": 0.09135684370994568, "learning_rate": 0.0008085817893478797, "loss": 0.1605, "num_input_tokens_seen": 95078304, "step": 44070 }, { "epoch": 7.190048939641109, "grad_norm": 0.29165327548980713, "learning_rate": 0.0008085257794544441, "loss": 0.194, "num_input_tokens_seen": 95088928, "step": 44075 }, { "epoch": 7.190864600326265, "grad_norm": 0.15445441007614136, "learning_rate": 0.0008084697633083422, "loss": 0.1129, "num_input_tokens_seen": 95098496, "step": 44080 }, { "epoch": 7.191680261011419, "grad_norm": 0.341234415769577, "learning_rate": 0.0008084137409107093, "loss": 0.0792, "num_input_tokens_seen": 95109920, "step": 44085 }, { "epoch": 7.192495921696574, "grad_norm": 0.6282361149787903, "learning_rate": 0.0008083577122626806, "loss": 0.1526, "num_input_tokens_seen": 95121056, "step": 44090 }, { "epoch": 7.193311582381729, "grad_norm": 0.3828500509262085, "learning_rate": 0.0008083016773653917, "loss": 0.0878, "num_input_tokens_seen": 95133056, "step": 44095 }, { "epoch": 7.194127243066884, "grad_norm": 0.037887729704380035, "learning_rate": 0.0008082456362199783, "loss": 0.0903, "num_input_tokens_seen": 95143616, "step": 44100 }, { "epoch": 7.19494290375204, "grad_norm": 0.05093253031373024, "learning_rate": 0.000808189588827576, "loss": 0.023, "num_input_tokens_seen": 95153952, "step": 44105 }, { "epoch": 7.195758564437194, "grad_norm": 0.18737812340259552, "learning_rate": 0.0008081335351893206, "loss": 0.2304, "num_input_tokens_seen": 95165984, "step": 44110 }, { "epoch": 7.196574225122349, "grad_norm": 0.372313916683197, "learning_rate": 0.0008080774753063485, "loss": 0.1438, "num_input_tokens_seen": 95176576, "step": 44115 }, { "epoch": 7.197389885807504, "grad_norm": 0.03473867103457451, "learning_rate": 0.0008080214091797953, "loss": 0.0661, "num_input_tokens_seen": 95188640, "step": 44120 }, { "epoch": 7.198205546492659, "grad_norm": 0.18864575028419495, "learning_rate": 0.0008079653368107975, "loss": 0.0381, "num_input_tokens_seen": 95198336, "step": 44125 }, { "epoch": 7.199021207177814, "grad_norm": 0.026059413328766823, "learning_rate": 0.0008079092582004915, "loss": 0.0664, "num_input_tokens_seen": 95209440, "step": 44130 }, { "epoch": 7.199836867862969, "grad_norm": 0.025319736450910568, "learning_rate": 0.0008078531733500137, "loss": 0.1248, "num_input_tokens_seen": 95220160, "step": 44135 }, { "epoch": 7.200652528548124, "grad_norm": 0.2258998602628708, "learning_rate": 0.000807797082260501, "loss": 0.0986, "num_input_tokens_seen": 95230592, "step": 44140 }, { "epoch": 7.201468189233279, "grad_norm": 0.32872721552848816, "learning_rate": 0.0008077409849330898, "loss": 0.0948, "num_input_tokens_seen": 95241472, "step": 44145 }, { "epoch": 7.202283849918434, "grad_norm": 0.00851452350616455, "learning_rate": 0.0008076848813689171, "loss": 0.0111, "num_input_tokens_seen": 95254272, "step": 44150 }, { "epoch": 7.203099510603589, "grad_norm": 0.16321249306201935, "learning_rate": 0.0008076287715691201, "loss": 0.024, "num_input_tokens_seen": 95265344, "step": 44155 }, { "epoch": 7.2039151712887435, "grad_norm": 0.29166531562805176, "learning_rate": 0.0008075726555348357, "loss": 0.066, "num_input_tokens_seen": 95275168, "step": 44160 }, { "epoch": 7.204730831973899, "grad_norm": 0.5991953015327454, "learning_rate": 0.0008075165332672013, "loss": 0.1775, "num_input_tokens_seen": 95286496, "step": 44165 }, { "epoch": 7.205546492659054, "grad_norm": 0.42374148964881897, "learning_rate": 0.0008074604047673542, "loss": 0.1075, "num_input_tokens_seen": 95295968, "step": 44170 }, { "epoch": 7.206362153344209, "grad_norm": 0.08390222489833832, "learning_rate": 0.000807404270036432, "loss": 0.0598, "num_input_tokens_seen": 95307648, "step": 44175 }, { "epoch": 7.207177814029364, "grad_norm": 0.31629419326782227, "learning_rate": 0.0008073481290755723, "loss": 0.2415, "num_input_tokens_seen": 95317952, "step": 44180 }, { "epoch": 7.2079934747145185, "grad_norm": 0.0912608653306961, "learning_rate": 0.0008072919818859128, "loss": 0.0804, "num_input_tokens_seen": 95329696, "step": 44185 }, { "epoch": 7.208809135399674, "grad_norm": 0.009148065000772476, "learning_rate": 0.0008072358284685915, "loss": 0.0417, "num_input_tokens_seen": 95340928, "step": 44190 }, { "epoch": 7.209624796084829, "grad_norm": 0.029757576063275337, "learning_rate": 0.0008071796688247463, "loss": 0.0614, "num_input_tokens_seen": 95351712, "step": 44195 }, { "epoch": 7.210440456769984, "grad_norm": 0.04428458213806152, "learning_rate": 0.0008071235029555155, "loss": 0.022, "num_input_tokens_seen": 95363392, "step": 44200 }, { "epoch": 7.211256117455139, "grad_norm": 0.011975009925663471, "learning_rate": 0.0008070673308620373, "loss": 0.0164, "num_input_tokens_seen": 95373920, "step": 44205 }, { "epoch": 7.212071778140293, "grad_norm": 0.16730913519859314, "learning_rate": 0.0008070111525454501, "loss": 0.139, "num_input_tokens_seen": 95384352, "step": 44210 }, { "epoch": 7.212887438825448, "grad_norm": 0.163056880235672, "learning_rate": 0.0008069549680068923, "loss": 0.027, "num_input_tokens_seen": 95396128, "step": 44215 }, { "epoch": 7.213703099510604, "grad_norm": 0.05390625447034836, "learning_rate": 0.0008068987772475029, "loss": 0.0288, "num_input_tokens_seen": 95407168, "step": 44220 }, { "epoch": 7.214518760195759, "grad_norm": 0.322605162858963, "learning_rate": 0.0008068425802684204, "loss": 0.1112, "num_input_tokens_seen": 95418112, "step": 44225 }, { "epoch": 7.215334420880914, "grad_norm": 0.22637002170085907, "learning_rate": 0.0008067863770707838, "loss": 0.0355, "num_input_tokens_seen": 95428160, "step": 44230 }, { "epoch": 7.216150081566068, "grad_norm": 0.05112895369529724, "learning_rate": 0.0008067301676557319, "loss": 0.0934, "num_input_tokens_seen": 95439200, "step": 44235 }, { "epoch": 7.216965742251223, "grad_norm": 0.4358474910259247, "learning_rate": 0.0008066739520244042, "loss": 0.1622, "num_input_tokens_seen": 95451072, "step": 44240 }, { "epoch": 7.217781402936378, "grad_norm": 0.3051150143146515, "learning_rate": 0.0008066177301779396, "loss": 0.0421, "num_input_tokens_seen": 95461536, "step": 44245 }, { "epoch": 7.218597063621534, "grad_norm": 0.1421228051185608, "learning_rate": 0.0008065615021174779, "loss": 0.0337, "num_input_tokens_seen": 95471328, "step": 44250 }, { "epoch": 7.219412724306689, "grad_norm": 0.010581164620816708, "learning_rate": 0.0008065052678441584, "loss": 0.2603, "num_input_tokens_seen": 95479968, "step": 44255 }, { "epoch": 7.220228384991843, "grad_norm": 0.03444560617208481, "learning_rate": 0.0008064490273591209, "loss": 0.0298, "num_input_tokens_seen": 95491936, "step": 44260 }, { "epoch": 7.221044045676998, "grad_norm": 0.06758686900138855, "learning_rate": 0.000806392780663505, "loss": 0.2402, "num_input_tokens_seen": 95503488, "step": 44265 }, { "epoch": 7.221859706362153, "grad_norm": 0.252352774143219, "learning_rate": 0.0008063365277584508, "loss": 0.1126, "num_input_tokens_seen": 95514496, "step": 44270 }, { "epoch": 7.222675367047309, "grad_norm": 0.02394126169383526, "learning_rate": 0.0008062802686450982, "loss": 0.0075, "num_input_tokens_seen": 95524992, "step": 44275 }, { "epoch": 7.2234910277324635, "grad_norm": 0.15020652115345, "learning_rate": 0.0008062240033245875, "loss": 0.0673, "num_input_tokens_seen": 95536096, "step": 44280 }, { "epoch": 7.224306688417618, "grad_norm": 0.16681607067584991, "learning_rate": 0.0008061677317980587, "loss": 0.0551, "num_input_tokens_seen": 95546592, "step": 44285 }, { "epoch": 7.225122349102773, "grad_norm": 0.06704016029834747, "learning_rate": 0.0008061114540666525, "loss": 0.039, "num_input_tokens_seen": 95558432, "step": 44290 }, { "epoch": 7.225938009787928, "grad_norm": 0.08653748035430908, "learning_rate": 0.0008060551701315093, "loss": 0.0519, "num_input_tokens_seen": 95567360, "step": 44295 }, { "epoch": 7.226753670473083, "grad_norm": 0.01804112270474434, "learning_rate": 0.00080599887999377, "loss": 0.0822, "num_input_tokens_seen": 95579456, "step": 44300 }, { "epoch": 7.2275693311582385, "grad_norm": 0.057349514216184616, "learning_rate": 0.0008059425836545751, "loss": 0.0346, "num_input_tokens_seen": 95590176, "step": 44305 }, { "epoch": 7.228384991843393, "grad_norm": 0.16346199810504913, "learning_rate": 0.0008058862811150657, "loss": 0.118, "num_input_tokens_seen": 95601632, "step": 44310 }, { "epoch": 7.229200652528548, "grad_norm": 0.21547041833400726, "learning_rate": 0.0008058299723763826, "loss": 0.0447, "num_input_tokens_seen": 95612960, "step": 44315 }, { "epoch": 7.230016313213703, "grad_norm": 0.1597173660993576, "learning_rate": 0.0008057736574396673, "loss": 0.0173, "num_input_tokens_seen": 95624416, "step": 44320 }, { "epoch": 7.230831973898858, "grad_norm": 0.0055515579879283905, "learning_rate": 0.000805717336306061, "loss": 0.1017, "num_input_tokens_seen": 95635968, "step": 44325 }, { "epoch": 7.231647634584013, "grad_norm": 0.0865335538983345, "learning_rate": 0.000805661008976705, "loss": 0.0291, "num_input_tokens_seen": 95645824, "step": 44330 }, { "epoch": 7.232463295269168, "grad_norm": 0.07298435270786285, "learning_rate": 0.0008056046754527406, "loss": 0.0902, "num_input_tokens_seen": 95657056, "step": 44335 }, { "epoch": 7.233278955954323, "grad_norm": 0.005751929711550474, "learning_rate": 0.00080554833573531, "loss": 0.0454, "num_input_tokens_seen": 95667744, "step": 44340 }, { "epoch": 7.234094616639478, "grad_norm": 0.02267143316566944, "learning_rate": 0.0008054919898255548, "loss": 0.0505, "num_input_tokens_seen": 95678528, "step": 44345 }, { "epoch": 7.234910277324633, "grad_norm": 0.024015581235289574, "learning_rate": 0.0008054356377246168, "loss": 0.0372, "num_input_tokens_seen": 95688768, "step": 44350 }, { "epoch": 7.235725938009788, "grad_norm": 0.06957139819860458, "learning_rate": 0.0008053792794336381, "loss": 0.0266, "num_input_tokens_seen": 95698944, "step": 44355 }, { "epoch": 7.236541598694943, "grad_norm": 0.07766461372375488, "learning_rate": 0.0008053229149537611, "loss": 0.0143, "num_input_tokens_seen": 95707008, "step": 44360 }, { "epoch": 7.237357259380098, "grad_norm": 0.571931779384613, "learning_rate": 0.0008052665442861278, "loss": 0.0674, "num_input_tokens_seen": 95717728, "step": 44365 }, { "epoch": 7.238172920065253, "grad_norm": 0.15675191581249237, "learning_rate": 0.0008052101674318805, "loss": 0.1895, "num_input_tokens_seen": 95729152, "step": 44370 }, { "epoch": 7.238988580750408, "grad_norm": 0.05943544581532478, "learning_rate": 0.0008051537843921623, "loss": 0.0215, "num_input_tokens_seen": 95741280, "step": 44375 }, { "epoch": 7.239804241435563, "grad_norm": 0.5166104435920715, "learning_rate": 0.0008050973951681153, "loss": 0.1907, "num_input_tokens_seen": 95751840, "step": 44380 }, { "epoch": 7.240619902120717, "grad_norm": 0.5448908805847168, "learning_rate": 0.0008050409997608827, "loss": 0.1867, "num_input_tokens_seen": 95763936, "step": 44385 }, { "epoch": 7.241435562805873, "grad_norm": 0.260073721408844, "learning_rate": 0.0008049845981716072, "loss": 0.1445, "num_input_tokens_seen": 95772864, "step": 44390 }, { "epoch": 7.242251223491028, "grad_norm": 0.03557813540101051, "learning_rate": 0.0008049281904014318, "loss": 0.0842, "num_input_tokens_seen": 95782944, "step": 44395 }, { "epoch": 7.243066884176183, "grad_norm": 0.08776529878377914, "learning_rate": 0.0008048717764514999, "loss": 0.0517, "num_input_tokens_seen": 95793728, "step": 44400 }, { "epoch": 7.2438825448613375, "grad_norm": 0.09808285534381866, "learning_rate": 0.0008048153563229548, "loss": 0.1737, "num_input_tokens_seen": 95803872, "step": 44405 }, { "epoch": 7.244698205546492, "grad_norm": 0.3908463418483734, "learning_rate": 0.0008047589300169398, "loss": 0.1451, "num_input_tokens_seen": 95814784, "step": 44410 }, { "epoch": 7.245513866231648, "grad_norm": 0.3702581524848938, "learning_rate": 0.0008047024975345983, "loss": 0.3422, "num_input_tokens_seen": 95826528, "step": 44415 }, { "epoch": 7.246329526916803, "grad_norm": 0.1768919676542282, "learning_rate": 0.0008046460588770743, "loss": 0.0915, "num_input_tokens_seen": 95836832, "step": 44420 }, { "epoch": 7.247145187601958, "grad_norm": 0.15638843178749084, "learning_rate": 0.0008045896140455114, "loss": 0.0745, "num_input_tokens_seen": 95848256, "step": 44425 }, { "epoch": 7.2479608482871125, "grad_norm": 0.26909416913986206, "learning_rate": 0.0008045331630410535, "loss": 0.0863, "num_input_tokens_seen": 95859424, "step": 44430 }, { "epoch": 7.248776508972267, "grad_norm": 0.41958698630332947, "learning_rate": 0.0008044767058648448, "loss": 0.2005, "num_input_tokens_seen": 95870240, "step": 44435 }, { "epoch": 7.249592169657422, "grad_norm": 0.06381496042013168, "learning_rate": 0.0008044202425180293, "loss": 0.098, "num_input_tokens_seen": 95881152, "step": 44440 }, { "epoch": 7.250407830342578, "grad_norm": 0.03571905568242073, "learning_rate": 0.0008043637730017515, "loss": 0.0632, "num_input_tokens_seen": 95890816, "step": 44445 }, { "epoch": 7.251223491027733, "grad_norm": 0.4943830966949463, "learning_rate": 0.0008043072973171557, "loss": 0.2426, "num_input_tokens_seen": 95902080, "step": 44450 }, { "epoch": 7.2520391517128875, "grad_norm": 0.24512127041816711, "learning_rate": 0.0008042508154653865, "loss": 0.1203, "num_input_tokens_seen": 95912928, "step": 44455 }, { "epoch": 7.252854812398042, "grad_norm": 0.18231791257858276, "learning_rate": 0.0008041943274475886, "loss": 0.1148, "num_input_tokens_seen": 95924128, "step": 44460 }, { "epoch": 7.253670473083197, "grad_norm": 0.34111642837524414, "learning_rate": 0.0008041378332649067, "loss": 0.1262, "num_input_tokens_seen": 95934592, "step": 44465 }, { "epoch": 7.254486133768353, "grad_norm": 0.07699751853942871, "learning_rate": 0.0008040813329184857, "loss": 0.033, "num_input_tokens_seen": 95945088, "step": 44470 }, { "epoch": 7.255301794453508, "grad_norm": 0.0859646275639534, "learning_rate": 0.000804024826409471, "loss": 0.0592, "num_input_tokens_seen": 95956640, "step": 44475 }, { "epoch": 7.2561174551386625, "grad_norm": 0.34625181555747986, "learning_rate": 0.0008039683137390073, "loss": 0.1868, "num_input_tokens_seen": 95967360, "step": 44480 }, { "epoch": 7.256933115823817, "grad_norm": 0.213846355676651, "learning_rate": 0.0008039117949082401, "loss": 0.1257, "num_input_tokens_seen": 95977376, "step": 44485 }, { "epoch": 7.257748776508972, "grad_norm": 0.33502423763275146, "learning_rate": 0.0008038552699183148, "loss": 0.1028, "num_input_tokens_seen": 95987392, "step": 44490 }, { "epoch": 7.258564437194127, "grad_norm": 0.028653619810938835, "learning_rate": 0.0008037987387703771, "loss": 0.1027, "num_input_tokens_seen": 95997696, "step": 44495 }, { "epoch": 7.259380097879283, "grad_norm": 0.10009276866912842, "learning_rate": 0.0008037422014655725, "loss": 0.0971, "num_input_tokens_seen": 96008832, "step": 44500 }, { "epoch": 7.260195758564437, "grad_norm": 0.2052982747554779, "learning_rate": 0.0008036856580050469, "loss": 0.0549, "num_input_tokens_seen": 96018880, "step": 44505 }, { "epoch": 7.261011419249592, "grad_norm": 0.2997707724571228, "learning_rate": 0.000803629108389946, "loss": 0.1406, "num_input_tokens_seen": 96029024, "step": 44510 }, { "epoch": 7.261827079934747, "grad_norm": 0.01411355659365654, "learning_rate": 0.0008035725526214164, "loss": 0.1515, "num_input_tokens_seen": 96039264, "step": 44515 }, { "epoch": 7.262642740619902, "grad_norm": 0.36338305473327637, "learning_rate": 0.0008035159907006037, "loss": 0.1377, "num_input_tokens_seen": 96049920, "step": 44520 }, { "epoch": 7.263458401305057, "grad_norm": 0.30852988362312317, "learning_rate": 0.0008034594226286545, "loss": 0.1791, "num_input_tokens_seen": 96060320, "step": 44525 }, { "epoch": 7.264274061990212, "grad_norm": 0.04857664555311203, "learning_rate": 0.0008034028484067149, "loss": 0.0261, "num_input_tokens_seen": 96071520, "step": 44530 }, { "epoch": 7.265089722675367, "grad_norm": 0.007870425470173359, "learning_rate": 0.0008033462680359319, "loss": 0.0783, "num_input_tokens_seen": 96083264, "step": 44535 }, { "epoch": 7.265905383360522, "grad_norm": 0.04948393255472183, "learning_rate": 0.000803289681517452, "loss": 0.0461, "num_input_tokens_seen": 96094080, "step": 44540 }, { "epoch": 7.266721044045677, "grad_norm": 0.31275731325149536, "learning_rate": 0.0008032330888524217, "loss": 0.0846, "num_input_tokens_seen": 96103968, "step": 44545 }, { "epoch": 7.267536704730832, "grad_norm": 0.27960896492004395, "learning_rate": 0.0008031764900419885, "loss": 0.0546, "num_input_tokens_seen": 96115712, "step": 44550 }, { "epoch": 7.268352365415987, "grad_norm": 0.1881726235151291, "learning_rate": 0.000803119885087299, "loss": 0.1151, "num_input_tokens_seen": 96126560, "step": 44555 }, { "epoch": 7.269168026101142, "grad_norm": 0.03182040527462959, "learning_rate": 0.0008030632739895004, "loss": 0.0574, "num_input_tokens_seen": 96137728, "step": 44560 }, { "epoch": 7.269983686786297, "grad_norm": 0.01518465019762516, "learning_rate": 0.0008030066567497401, "loss": 0.0328, "num_input_tokens_seen": 96149920, "step": 44565 }, { "epoch": 7.270799347471452, "grad_norm": 0.2792905867099762, "learning_rate": 0.0008029500333691656, "loss": 0.194, "num_input_tokens_seen": 96161472, "step": 44570 }, { "epoch": 7.271615008156607, "grad_norm": 0.03350904956459999, "learning_rate": 0.0008028934038489243, "loss": 0.1606, "num_input_tokens_seen": 96171648, "step": 44575 }, { "epoch": 7.2724306688417615, "grad_norm": 0.19551445543766022, "learning_rate": 0.000802836768190164, "loss": 0.0779, "num_input_tokens_seen": 96182048, "step": 44580 }, { "epoch": 7.273246329526917, "grad_norm": 0.09546945244073868, "learning_rate": 0.0008027801263940322, "loss": 0.0368, "num_input_tokens_seen": 96193600, "step": 44585 }, { "epoch": 7.274061990212072, "grad_norm": 0.35585927963256836, "learning_rate": 0.0008027234784616773, "loss": 0.176, "num_input_tokens_seen": 96204992, "step": 44590 }, { "epoch": 7.274877650897227, "grad_norm": 0.058231059461832047, "learning_rate": 0.0008026668243942469, "loss": 0.12, "num_input_tokens_seen": 96215168, "step": 44595 }, { "epoch": 7.275693311582382, "grad_norm": 0.009627573192119598, "learning_rate": 0.0008026101641928895, "loss": 0.0878, "num_input_tokens_seen": 96224672, "step": 44600 }, { "epoch": 7.2765089722675365, "grad_norm": 0.045792192220687866, "learning_rate": 0.000802553497858753, "loss": 0.0545, "num_input_tokens_seen": 96235712, "step": 44605 }, { "epoch": 7.277324632952691, "grad_norm": 0.17084085941314697, "learning_rate": 0.0008024968253929861, "loss": 0.0461, "num_input_tokens_seen": 96246496, "step": 44610 }, { "epoch": 7.278140293637847, "grad_norm": 0.03394502028822899, "learning_rate": 0.0008024401467967375, "loss": 0.0733, "num_input_tokens_seen": 96257056, "step": 44615 }, { "epoch": 7.278955954323002, "grad_norm": 0.01889253407716751, "learning_rate": 0.0008023834620711555, "loss": 0.1089, "num_input_tokens_seen": 96267424, "step": 44620 }, { "epoch": 7.279771615008157, "grad_norm": 0.16721847653388977, "learning_rate": 0.000802326771217389, "loss": 0.0433, "num_input_tokens_seen": 96278752, "step": 44625 }, { "epoch": 7.280587275693311, "grad_norm": 0.19981302320957184, "learning_rate": 0.0008022700742365871, "loss": 0.0731, "num_input_tokens_seen": 96289600, "step": 44630 }, { "epoch": 7.281402936378466, "grad_norm": 0.14188116788864136, "learning_rate": 0.0008022133711298987, "loss": 0.1148, "num_input_tokens_seen": 96300672, "step": 44635 }, { "epoch": 7.282218597063622, "grad_norm": 0.17594295740127563, "learning_rate": 0.0008021566618984728, "loss": 0.0401, "num_input_tokens_seen": 96311328, "step": 44640 }, { "epoch": 7.283034257748777, "grad_norm": 0.10706756263971329, "learning_rate": 0.0008020999465434589, "loss": 0.0385, "num_input_tokens_seen": 96322880, "step": 44645 }, { "epoch": 7.283849918433932, "grad_norm": 0.183606818318367, "learning_rate": 0.0008020432250660063, "loss": 0.04, "num_input_tokens_seen": 96334336, "step": 44650 }, { "epoch": 7.284665579119086, "grad_norm": 0.3366658091545105, "learning_rate": 0.0008019864974672646, "loss": 0.167, "num_input_tokens_seen": 96345760, "step": 44655 }, { "epoch": 7.285481239804241, "grad_norm": 0.3044050335884094, "learning_rate": 0.0008019297637483836, "loss": 0.0585, "num_input_tokens_seen": 96355776, "step": 44660 }, { "epoch": 7.286296900489396, "grad_norm": 0.8694233894348145, "learning_rate": 0.0008018730239105127, "loss": 0.077, "num_input_tokens_seen": 96366432, "step": 44665 }, { "epoch": 7.287112561174552, "grad_norm": 0.04380872845649719, "learning_rate": 0.000801816277954802, "loss": 0.1025, "num_input_tokens_seen": 96378432, "step": 44670 }, { "epoch": 7.287928221859707, "grad_norm": 0.013474180363118649, "learning_rate": 0.0008017595258824016, "loss": 0.0354, "num_input_tokens_seen": 96389984, "step": 44675 }, { "epoch": 7.288743882544861, "grad_norm": 0.062481589615345, "learning_rate": 0.0008017027676944617, "loss": 0.0216, "num_input_tokens_seen": 96399360, "step": 44680 }, { "epoch": 7.289559543230016, "grad_norm": 0.2023136168718338, "learning_rate": 0.0008016460033921323, "loss": 0.0763, "num_input_tokens_seen": 96408672, "step": 44685 }, { "epoch": 7.290375203915171, "grad_norm": 0.04362421855330467, "learning_rate": 0.0008015892329765642, "loss": 0.0155, "num_input_tokens_seen": 96420320, "step": 44690 }, { "epoch": 7.291190864600326, "grad_norm": 0.06963168829679489, "learning_rate": 0.0008015324564489075, "loss": 0.037, "num_input_tokens_seen": 96430816, "step": 44695 }, { "epoch": 7.2920065252854815, "grad_norm": 0.4110236167907715, "learning_rate": 0.0008014756738103132, "loss": 0.1014, "num_input_tokens_seen": 96441408, "step": 44700 }, { "epoch": 7.292822185970636, "grad_norm": 0.23684130609035492, "learning_rate": 0.0008014188850619318, "loss": 0.0907, "num_input_tokens_seen": 96452544, "step": 44705 }, { "epoch": 7.293637846655791, "grad_norm": 0.0586276575922966, "learning_rate": 0.0008013620902049143, "loss": 0.0411, "num_input_tokens_seen": 96463456, "step": 44710 }, { "epoch": 7.294453507340946, "grad_norm": 0.03127945587038994, "learning_rate": 0.0008013052892404118, "loss": 0.0189, "num_input_tokens_seen": 96474144, "step": 44715 }, { "epoch": 7.295269168026101, "grad_norm": 0.008146870881319046, "learning_rate": 0.0008012484821695754, "loss": 0.096, "num_input_tokens_seen": 96484768, "step": 44720 }, { "epoch": 7.2960848287112565, "grad_norm": 0.024679630994796753, "learning_rate": 0.0008011916689935563, "loss": 0.0219, "num_input_tokens_seen": 96494912, "step": 44725 }, { "epoch": 7.296900489396411, "grad_norm": 0.004978868179023266, "learning_rate": 0.000801134849713506, "loss": 0.0054, "num_input_tokens_seen": 96506016, "step": 44730 }, { "epoch": 7.297716150081566, "grad_norm": 0.047816094011068344, "learning_rate": 0.0008010780243305758, "loss": 0.1117, "num_input_tokens_seen": 96516352, "step": 44735 }, { "epoch": 7.298531810766721, "grad_norm": 0.0098728584125638, "learning_rate": 0.0008010211928459177, "loss": 0.0393, "num_input_tokens_seen": 96527488, "step": 44740 }, { "epoch": 7.299347471451876, "grad_norm": 0.48066943883895874, "learning_rate": 0.0008009643552606831, "loss": 0.1177, "num_input_tokens_seen": 96539040, "step": 44745 }, { "epoch": 7.300163132137031, "grad_norm": 0.05421772971749306, "learning_rate": 0.0008009075115760243, "loss": 0.2061, "num_input_tokens_seen": 96549952, "step": 44750 }, { "epoch": 7.300978792822186, "grad_norm": 0.018347647041082382, "learning_rate": 0.0008008506617930926, "loss": 0.0396, "num_input_tokens_seen": 96562080, "step": 44755 }, { "epoch": 7.301794453507341, "grad_norm": 0.056630175560712814, "learning_rate": 0.000800793805913041, "loss": 0.0129, "num_input_tokens_seen": 96573440, "step": 44760 }, { "epoch": 7.302610114192496, "grad_norm": 0.10636483132839203, "learning_rate": 0.0008007369439370211, "loss": 0.0172, "num_input_tokens_seen": 96584192, "step": 44765 }, { "epoch": 7.303425774877651, "grad_norm": 0.11897508054971695, "learning_rate": 0.0008006800758661856, "loss": 0.0724, "num_input_tokens_seen": 96595136, "step": 44770 }, { "epoch": 7.304241435562806, "grad_norm": 0.012469706125557423, "learning_rate": 0.000800623201701687, "loss": 0.0456, "num_input_tokens_seen": 96604928, "step": 44775 }, { "epoch": 7.30505709624796, "grad_norm": 0.3245897591114044, "learning_rate": 0.0008005663214446777, "loss": 0.0409, "num_input_tokens_seen": 96616288, "step": 44780 }, { "epoch": 7.305872756933116, "grad_norm": 0.10663722455501556, "learning_rate": 0.0008005094350963107, "loss": 0.2466, "num_input_tokens_seen": 96627168, "step": 44785 }, { "epoch": 7.306688417618271, "grad_norm": 0.057368312031030655, "learning_rate": 0.0008004525426577387, "loss": 0.0531, "num_input_tokens_seen": 96637632, "step": 44790 }, { "epoch": 7.307504078303426, "grad_norm": 0.015169876627624035, "learning_rate": 0.0008003956441301149, "loss": 0.061, "num_input_tokens_seen": 96647744, "step": 44795 }, { "epoch": 7.308319738988581, "grad_norm": 0.3878147006034851, "learning_rate": 0.0008003387395145922, "loss": 0.1464, "num_input_tokens_seen": 96658016, "step": 44800 }, { "epoch": 7.309135399673735, "grad_norm": 0.2443951815366745, "learning_rate": 0.0008002818288123239, "loss": 0.0931, "num_input_tokens_seen": 96669824, "step": 44805 }, { "epoch": 7.309951060358891, "grad_norm": 0.3824320435523987, "learning_rate": 0.0008002249120244635, "loss": 0.2427, "num_input_tokens_seen": 96678784, "step": 44810 }, { "epoch": 7.310766721044046, "grad_norm": 0.5043080449104309, "learning_rate": 0.0008001679891521642, "loss": 0.1544, "num_input_tokens_seen": 96688896, "step": 44815 }, { "epoch": 7.311582381729201, "grad_norm": 0.39147958159446716, "learning_rate": 0.00080011106019658, "loss": 0.2142, "num_input_tokens_seen": 96700256, "step": 44820 }, { "epoch": 7.3123980424143555, "grad_norm": 0.0821220800280571, "learning_rate": 0.0008000541251588644, "loss": 0.0806, "num_input_tokens_seen": 96710752, "step": 44825 }, { "epoch": 7.31321370309951, "grad_norm": 0.2268812358379364, "learning_rate": 0.0007999971840401714, "loss": 0.0776, "num_input_tokens_seen": 96721088, "step": 44830 }, { "epoch": 7.314029363784665, "grad_norm": 0.01954479143023491, "learning_rate": 0.0007999402368416548, "loss": 0.0616, "num_input_tokens_seen": 96732960, "step": 44835 }, { "epoch": 7.314845024469821, "grad_norm": 0.31298768520355225, "learning_rate": 0.0007998832835644687, "loss": 0.0981, "num_input_tokens_seen": 96743328, "step": 44840 }, { "epoch": 7.315660685154976, "grad_norm": 0.3063667416572571, "learning_rate": 0.0007998263242097675, "loss": 0.1421, "num_input_tokens_seen": 96754112, "step": 44845 }, { "epoch": 7.3164763458401305, "grad_norm": 0.2220902293920517, "learning_rate": 0.0007997693587787056, "loss": 0.0575, "num_input_tokens_seen": 96763872, "step": 44850 }, { "epoch": 7.317292006525285, "grad_norm": 0.3629862368106842, "learning_rate": 0.0007997123872724373, "loss": 0.1345, "num_input_tokens_seen": 96774496, "step": 44855 }, { "epoch": 7.31810766721044, "grad_norm": 0.04192895442247391, "learning_rate": 0.0007996554096921172, "loss": 0.0265, "num_input_tokens_seen": 96784416, "step": 44860 }, { "epoch": 7.318923327895595, "grad_norm": 0.016884543001651764, "learning_rate": 0.0007995984260389001, "loss": 0.1263, "num_input_tokens_seen": 96795424, "step": 44865 }, { "epoch": 7.319738988580751, "grad_norm": 0.23910973966121674, "learning_rate": 0.0007995414363139408, "loss": 0.0794, "num_input_tokens_seen": 96806688, "step": 44870 }, { "epoch": 7.3205546492659055, "grad_norm": 0.055270466953516006, "learning_rate": 0.0007994844405183944, "loss": 0.1299, "num_input_tokens_seen": 96817472, "step": 44875 }, { "epoch": 7.32137030995106, "grad_norm": 0.03465723618865013, "learning_rate": 0.0007994274386534158, "loss": 0.0568, "num_input_tokens_seen": 96830112, "step": 44880 }, { "epoch": 7.322185970636215, "grad_norm": 0.11128037422895432, "learning_rate": 0.0007993704307201604, "loss": 0.0729, "num_input_tokens_seen": 96841376, "step": 44885 }, { "epoch": 7.32300163132137, "grad_norm": 0.49349164962768555, "learning_rate": 0.0007993134167197833, "loss": 0.182, "num_input_tokens_seen": 96852672, "step": 44890 }, { "epoch": 7.323817292006526, "grad_norm": 0.03250536322593689, "learning_rate": 0.0007992563966534403, "loss": 0.051, "num_input_tokens_seen": 96863424, "step": 44895 }, { "epoch": 7.3246329526916805, "grad_norm": 0.10508814454078674, "learning_rate": 0.0007991993705222867, "loss": 0.0967, "num_input_tokens_seen": 96874592, "step": 44900 }, { "epoch": 7.325448613376835, "grad_norm": 0.4611310660839081, "learning_rate": 0.0007991423383274782, "loss": 0.1714, "num_input_tokens_seen": 96884992, "step": 44905 }, { "epoch": 7.32626427406199, "grad_norm": 0.1679326444864273, "learning_rate": 0.0007990853000701708, "loss": 0.1146, "num_input_tokens_seen": 96895040, "step": 44910 }, { "epoch": 7.327079934747145, "grad_norm": 0.04367463290691376, "learning_rate": 0.0007990282557515204, "loss": 0.0977, "num_input_tokens_seen": 96905856, "step": 44915 }, { "epoch": 7.327895595432301, "grad_norm": 0.2549987733364105, "learning_rate": 0.0007989712053726829, "loss": 0.0943, "num_input_tokens_seen": 96916352, "step": 44920 }, { "epoch": 7.328711256117455, "grad_norm": 0.020827017724514008, "learning_rate": 0.0007989141489348149, "loss": 0.0184, "num_input_tokens_seen": 96926560, "step": 44925 }, { "epoch": 7.32952691680261, "grad_norm": 0.024547185748815536, "learning_rate": 0.0007988570864390723, "loss": 0.0431, "num_input_tokens_seen": 96938176, "step": 44930 }, { "epoch": 7.330342577487765, "grad_norm": 0.35543200373649597, "learning_rate": 0.0007988000178866117, "loss": 0.1014, "num_input_tokens_seen": 96950368, "step": 44935 }, { "epoch": 7.33115823817292, "grad_norm": 0.07484564930200577, "learning_rate": 0.0007987429432785897, "loss": 0.0653, "num_input_tokens_seen": 96960032, "step": 44940 }, { "epoch": 7.331973898858075, "grad_norm": 0.3434649407863617, "learning_rate": 0.000798685862616163, "loss": 0.0596, "num_input_tokens_seen": 96970592, "step": 44945 }, { "epoch": 7.33278955954323, "grad_norm": 0.007049186620861292, "learning_rate": 0.0007986287759004884, "loss": 0.0081, "num_input_tokens_seen": 96981760, "step": 44950 }, { "epoch": 7.333605220228385, "grad_norm": 0.008865798823535442, "learning_rate": 0.000798571683132723, "loss": 0.0924, "num_input_tokens_seen": 96992544, "step": 44955 }, { "epoch": 7.33442088091354, "grad_norm": 0.40072038769721985, "learning_rate": 0.0007985145843140233, "loss": 0.0821, "num_input_tokens_seen": 97004000, "step": 44960 }, { "epoch": 7.335236541598695, "grad_norm": 0.08721873164176941, "learning_rate": 0.0007984574794455472, "loss": 0.0449, "num_input_tokens_seen": 97015904, "step": 44965 }, { "epoch": 7.33605220228385, "grad_norm": 0.2823683023452759, "learning_rate": 0.0007984003685284516, "loss": 0.1214, "num_input_tokens_seen": 97027072, "step": 44970 }, { "epoch": 7.3368678629690045, "grad_norm": 0.10194271802902222, "learning_rate": 0.0007983432515638937, "loss": 0.0398, "num_input_tokens_seen": 97038912, "step": 44975 }, { "epoch": 7.33768352365416, "grad_norm": 0.03468884155154228, "learning_rate": 0.0007982861285530317, "loss": 0.0338, "num_input_tokens_seen": 97049440, "step": 44980 }, { "epoch": 7.338499184339315, "grad_norm": 0.06911741942167282, "learning_rate": 0.0007982289994970227, "loss": 0.0594, "num_input_tokens_seen": 97058688, "step": 44985 }, { "epoch": 7.33931484502447, "grad_norm": 0.09313280880451202, "learning_rate": 0.0007981718643970246, "loss": 0.0561, "num_input_tokens_seen": 97068768, "step": 44990 }, { "epoch": 7.340130505709625, "grad_norm": 0.44375500082969666, "learning_rate": 0.0007981147232541956, "loss": 0.1094, "num_input_tokens_seen": 97080320, "step": 44995 }, { "epoch": 7.3409461663947795, "grad_norm": 0.03189815953373909, "learning_rate": 0.0007980575760696935, "loss": 0.0398, "num_input_tokens_seen": 97092608, "step": 45000 }, { "epoch": 7.341761827079935, "grad_norm": 0.056526027619838715, "learning_rate": 0.0007980004228446765, "loss": 0.0353, "num_input_tokens_seen": 97102944, "step": 45005 }, { "epoch": 7.34257748776509, "grad_norm": 0.016778087243437767, "learning_rate": 0.0007979432635803029, "loss": 0.0448, "num_input_tokens_seen": 97114848, "step": 45010 }, { "epoch": 7.343393148450245, "grad_norm": 0.009796129539608955, "learning_rate": 0.000797886098277731, "loss": 0.0061, "num_input_tokens_seen": 97124864, "step": 45015 }, { "epoch": 7.3442088091354, "grad_norm": 0.034303128719329834, "learning_rate": 0.0007978289269381196, "loss": 0.0199, "num_input_tokens_seen": 97135264, "step": 45020 }, { "epoch": 7.3450244698205545, "grad_norm": 0.5176885724067688, "learning_rate": 0.0007977717495626271, "loss": 0.1796, "num_input_tokens_seen": 97145280, "step": 45025 }, { "epoch": 7.345840130505709, "grad_norm": 0.4619094431400299, "learning_rate": 0.0007977145661524123, "loss": 0.2377, "num_input_tokens_seen": 97156288, "step": 45030 }, { "epoch": 7.346655791190865, "grad_norm": 0.027681073173880577, "learning_rate": 0.000797657376708634, "loss": 0.0136, "num_input_tokens_seen": 97165984, "step": 45035 }, { "epoch": 7.34747145187602, "grad_norm": 0.7673472166061401, "learning_rate": 0.0007976001812324516, "loss": 0.078, "num_input_tokens_seen": 97176768, "step": 45040 }, { "epoch": 7.348287112561175, "grad_norm": 0.1298486292362213, "learning_rate": 0.0007975429797250239, "loss": 0.1191, "num_input_tokens_seen": 97185952, "step": 45045 }, { "epoch": 7.349102773246329, "grad_norm": 0.08000068366527557, "learning_rate": 0.0007974857721875102, "loss": 0.0366, "num_input_tokens_seen": 97196000, "step": 45050 }, { "epoch": 7.349918433931484, "grad_norm": 0.07098639011383057, "learning_rate": 0.0007974285586210701, "loss": 0.1072, "num_input_tokens_seen": 97207840, "step": 45055 }, { "epoch": 7.350734094616639, "grad_norm": 0.10400813072919846, "learning_rate": 0.0007973713390268629, "loss": 0.0365, "num_input_tokens_seen": 97218912, "step": 45060 }, { "epoch": 7.351549755301795, "grad_norm": 0.12377993762493134, "learning_rate": 0.0007973141134060483, "loss": 0.0209, "num_input_tokens_seen": 97229984, "step": 45065 }, { "epoch": 7.35236541598695, "grad_norm": 0.06517606973648071, "learning_rate": 0.0007972568817597857, "loss": 0.189, "num_input_tokens_seen": 97241792, "step": 45070 }, { "epoch": 7.353181076672104, "grad_norm": 0.15152716636657715, "learning_rate": 0.0007971996440892356, "loss": 0.0403, "num_input_tokens_seen": 97252128, "step": 45075 }, { "epoch": 7.353996737357259, "grad_norm": 0.11698517203330994, "learning_rate": 0.0007971424003955577, "loss": 0.0294, "num_input_tokens_seen": 97262368, "step": 45080 }, { "epoch": 7.354812398042414, "grad_norm": 0.1156277135014534, "learning_rate": 0.0007970851506799119, "loss": 0.0786, "num_input_tokens_seen": 97272192, "step": 45085 }, { "epoch": 7.35562805872757, "grad_norm": 0.39749982953071594, "learning_rate": 0.0007970278949434588, "loss": 0.0749, "num_input_tokens_seen": 97282656, "step": 45090 }, { "epoch": 7.356443719412725, "grad_norm": 0.17117774486541748, "learning_rate": 0.0007969706331873586, "loss": 0.1604, "num_input_tokens_seen": 97294240, "step": 45095 }, { "epoch": 7.357259380097879, "grad_norm": 0.2364300638437271, "learning_rate": 0.0007969133654127718, "loss": 0.0544, "num_input_tokens_seen": 97304768, "step": 45100 }, { "epoch": 7.358075040783034, "grad_norm": 0.004655746277421713, "learning_rate": 0.0007968560916208589, "loss": 0.1326, "num_input_tokens_seen": 97316032, "step": 45105 }, { "epoch": 7.358890701468189, "grad_norm": 0.05007951334118843, "learning_rate": 0.0007967988118127808, "loss": 0.0745, "num_input_tokens_seen": 97326304, "step": 45110 }, { "epoch": 7.359706362153344, "grad_norm": 0.020962875336408615, "learning_rate": 0.0007967415259896982, "loss": 0.0157, "num_input_tokens_seen": 97335776, "step": 45115 }, { "epoch": 7.3605220228384995, "grad_norm": 0.2636253535747528, "learning_rate": 0.0007966842341527722, "loss": 0.0353, "num_input_tokens_seen": 97347712, "step": 45120 }, { "epoch": 7.361337683523654, "grad_norm": 0.8048582077026367, "learning_rate": 0.0007966269363031637, "loss": 0.0571, "num_input_tokens_seen": 97359168, "step": 45125 }, { "epoch": 7.362153344208809, "grad_norm": 0.01369799580425024, "learning_rate": 0.0007965696324420342, "loss": 0.0305, "num_input_tokens_seen": 97368576, "step": 45130 }, { "epoch": 7.362969004893964, "grad_norm": 0.30398988723754883, "learning_rate": 0.0007965123225705447, "loss": 0.0217, "num_input_tokens_seen": 97380000, "step": 45135 }, { "epoch": 7.363784665579119, "grad_norm": 0.10101661086082458, "learning_rate": 0.000796455006689857, "loss": 0.114, "num_input_tokens_seen": 97390400, "step": 45140 }, { "epoch": 7.364600326264274, "grad_norm": 0.2408280372619629, "learning_rate": 0.0007963976848011324, "loss": 0.0854, "num_input_tokens_seen": 97401472, "step": 45145 }, { "epoch": 7.365415986949429, "grad_norm": 0.13443009555339813, "learning_rate": 0.0007963403569055328, "loss": 0.2045, "num_input_tokens_seen": 97411904, "step": 45150 }, { "epoch": 7.366231647634584, "grad_norm": 0.012114451266825199, "learning_rate": 0.0007962830230042197, "loss": 0.0113, "num_input_tokens_seen": 97422976, "step": 45155 }, { "epoch": 7.367047308319739, "grad_norm": 0.0056746890768408775, "learning_rate": 0.0007962256830983556, "loss": 0.02, "num_input_tokens_seen": 97432672, "step": 45160 }, { "epoch": 7.367862969004894, "grad_norm": 0.11288474500179291, "learning_rate": 0.0007961683371891019, "loss": 0.1311, "num_input_tokens_seen": 97443072, "step": 45165 }, { "epoch": 7.368678629690049, "grad_norm": 0.06006375327706337, "learning_rate": 0.0007961109852776214, "loss": 0.0172, "num_input_tokens_seen": 97452864, "step": 45170 }, { "epoch": 7.369494290375204, "grad_norm": 0.3917618691921234, "learning_rate": 0.0007960536273650761, "loss": 0.2746, "num_input_tokens_seen": 97463872, "step": 45175 }, { "epoch": 7.370309951060359, "grad_norm": 0.4076634347438812, "learning_rate": 0.0007959962634526285, "loss": 0.1019, "num_input_tokens_seen": 97475392, "step": 45180 }, { "epoch": 7.371125611745514, "grad_norm": 0.0073449090123176575, "learning_rate": 0.0007959388935414411, "loss": 0.0228, "num_input_tokens_seen": 97487168, "step": 45185 }, { "epoch": 7.371941272430669, "grad_norm": 0.3821856677532196, "learning_rate": 0.0007958815176326764, "loss": 0.2224, "num_input_tokens_seen": 97498048, "step": 45190 }, { "epoch": 7.372756933115824, "grad_norm": 0.30424386262893677, "learning_rate": 0.0007958241357274976, "loss": 0.1261, "num_input_tokens_seen": 97508192, "step": 45195 }, { "epoch": 7.373572593800978, "grad_norm": 0.3336222171783447, "learning_rate": 0.0007957667478270674, "loss": 0.0613, "num_input_tokens_seen": 97519456, "step": 45200 }, { "epoch": 7.374388254486134, "grad_norm": 0.30917349457740784, "learning_rate": 0.0007957093539325489, "loss": 0.0824, "num_input_tokens_seen": 97530368, "step": 45205 }, { "epoch": 7.375203915171289, "grad_norm": 0.23479588329792023, "learning_rate": 0.000795651954045105, "loss": 0.0724, "num_input_tokens_seen": 97542016, "step": 45210 }, { "epoch": 7.376019575856444, "grad_norm": 0.22930112481117249, "learning_rate": 0.0007955945481658992, "loss": 0.0199, "num_input_tokens_seen": 97552320, "step": 45215 }, { "epoch": 7.376835236541599, "grad_norm": 0.297370970249176, "learning_rate": 0.0007955371362960951, "loss": 0.0378, "num_input_tokens_seen": 97563296, "step": 45220 }, { "epoch": 7.377650897226753, "grad_norm": 0.009774671867489815, "learning_rate": 0.000795479718436856, "loss": 0.1071, "num_input_tokens_seen": 97573216, "step": 45225 }, { "epoch": 7.378466557911908, "grad_norm": 0.06250493973493576, "learning_rate": 0.0007954222945893455, "loss": 0.132, "num_input_tokens_seen": 97583808, "step": 45230 }, { "epoch": 7.379282218597064, "grad_norm": 0.061288438737392426, "learning_rate": 0.0007953648647547274, "loss": 0.1291, "num_input_tokens_seen": 97594880, "step": 45235 }, { "epoch": 7.380097879282219, "grad_norm": 0.28206875920295715, "learning_rate": 0.0007953074289341655, "loss": 0.074, "num_input_tokens_seen": 97604608, "step": 45240 }, { "epoch": 7.3809135399673735, "grad_norm": 0.055965863168239594, "learning_rate": 0.0007952499871288241, "loss": 0.063, "num_input_tokens_seen": 97615552, "step": 45245 }, { "epoch": 7.381729200652528, "grad_norm": 0.4205869734287262, "learning_rate": 0.0007951925393398672, "loss": 0.072, "num_input_tokens_seen": 97626624, "step": 45250 }, { "epoch": 7.382544861337683, "grad_norm": 0.008700661361217499, "learning_rate": 0.0007951350855684588, "loss": 0.0622, "num_input_tokens_seen": 97637664, "step": 45255 }, { "epoch": 7.383360522022839, "grad_norm": 0.3483394682407379, "learning_rate": 0.0007950776258157637, "loss": 0.054, "num_input_tokens_seen": 97647648, "step": 45260 }, { "epoch": 7.384176182707994, "grad_norm": 0.11331566423177719, "learning_rate": 0.000795020160082946, "loss": 0.0643, "num_input_tokens_seen": 97659520, "step": 45265 }, { "epoch": 7.3849918433931485, "grad_norm": 0.2991211712360382, "learning_rate": 0.0007949626883711707, "loss": 0.21, "num_input_tokens_seen": 97668704, "step": 45270 }, { "epoch": 7.385807504078303, "grad_norm": 0.0452130064368248, "learning_rate": 0.0007949052106816022, "loss": 0.0119, "num_input_tokens_seen": 97679264, "step": 45275 }, { "epoch": 7.386623164763458, "grad_norm": 0.23242086172103882, "learning_rate": 0.0007948477270154056, "loss": 0.0472, "num_input_tokens_seen": 97689472, "step": 45280 }, { "epoch": 7.387438825448613, "grad_norm": 0.240744948387146, "learning_rate": 0.0007947902373737456, "loss": 0.1917, "num_input_tokens_seen": 97700480, "step": 45285 }, { "epoch": 7.388254486133769, "grad_norm": 0.04855697602033615, "learning_rate": 0.0007947327417577875, "loss": 0.0092, "num_input_tokens_seen": 97710528, "step": 45290 }, { "epoch": 7.3890701468189235, "grad_norm": 0.257434606552124, "learning_rate": 0.0007946752401686966, "loss": 0.0641, "num_input_tokens_seen": 97721216, "step": 45295 }, { "epoch": 7.389885807504078, "grad_norm": 0.015980631113052368, "learning_rate": 0.000794617732607638, "loss": 0.0963, "num_input_tokens_seen": 97731552, "step": 45300 }, { "epoch": 7.390701468189233, "grad_norm": 0.037306156009435654, "learning_rate": 0.0007945602190757775, "loss": 0.1829, "num_input_tokens_seen": 97742176, "step": 45305 }, { "epoch": 7.391517128874388, "grad_norm": 0.014900156296789646, "learning_rate": 0.0007945026995742803, "loss": 0.0677, "num_input_tokens_seen": 97752320, "step": 45310 }, { "epoch": 7.392332789559543, "grad_norm": 0.024418281391263008, "learning_rate": 0.0007944451741043124, "loss": 0.0734, "num_input_tokens_seen": 97762720, "step": 45315 }, { "epoch": 7.3931484502446985, "grad_norm": 0.07962283492088318, "learning_rate": 0.0007943876426670395, "loss": 0.0745, "num_input_tokens_seen": 97772960, "step": 45320 }, { "epoch": 7.393964110929853, "grad_norm": 0.2690376937389374, "learning_rate": 0.0007943301052636276, "loss": 0.0444, "num_input_tokens_seen": 97783360, "step": 45325 }, { "epoch": 7.394779771615008, "grad_norm": 0.0465802438557148, "learning_rate": 0.0007942725618952426, "loss": 0.0449, "num_input_tokens_seen": 97793536, "step": 45330 }, { "epoch": 7.395595432300163, "grad_norm": 0.0454472079873085, "learning_rate": 0.000794215012563051, "loss": 0.0968, "num_input_tokens_seen": 97804992, "step": 45335 }, { "epoch": 7.396411092985318, "grad_norm": 0.004776742774993181, "learning_rate": 0.0007941574572682187, "loss": 0.0441, "num_input_tokens_seen": 97816384, "step": 45340 }, { "epoch": 7.397226753670473, "grad_norm": 0.2122475653886795, "learning_rate": 0.0007940998960119126, "loss": 0.048, "num_input_tokens_seen": 97828032, "step": 45345 }, { "epoch": 7.398042414355628, "grad_norm": 0.48974356055259705, "learning_rate": 0.0007940423287952989, "loss": 0.1111, "num_input_tokens_seen": 97840160, "step": 45350 }, { "epoch": 7.398858075040783, "grad_norm": 0.02755374275147915, "learning_rate": 0.0007939847556195443, "loss": 0.0606, "num_input_tokens_seen": 97851040, "step": 45355 }, { "epoch": 7.399673735725938, "grad_norm": 0.3815477192401886, "learning_rate": 0.0007939271764858158, "loss": 0.1565, "num_input_tokens_seen": 97861408, "step": 45360 }, { "epoch": 7.400489396411093, "grad_norm": 0.01289767399430275, "learning_rate": 0.0007938695913952802, "loss": 0.0418, "num_input_tokens_seen": 97872160, "step": 45365 }, { "epoch": 7.401305057096248, "grad_norm": 0.028448032215237617, "learning_rate": 0.0007938120003491045, "loss": 0.1215, "num_input_tokens_seen": 97882848, "step": 45370 }, { "epoch": 7.402120717781403, "grad_norm": 0.05195760354399681, "learning_rate": 0.0007937544033484558, "loss": 0.018, "num_input_tokens_seen": 97895072, "step": 45375 }, { "epoch": 7.402936378466558, "grad_norm": 0.034554410725831985, "learning_rate": 0.0007936968003945015, "loss": 0.0503, "num_input_tokens_seen": 97906656, "step": 45380 }, { "epoch": 7.403752039151713, "grad_norm": 0.4184320569038391, "learning_rate": 0.0007936391914884092, "loss": 0.128, "num_input_tokens_seen": 97918208, "step": 45385 }, { "epoch": 7.404567699836868, "grad_norm": 0.31847238540649414, "learning_rate": 0.0007935815766313459, "loss": 0.0848, "num_input_tokens_seen": 97928800, "step": 45390 }, { "epoch": 7.4053833605220225, "grad_norm": 0.030809953808784485, "learning_rate": 0.0007935239558244795, "loss": 0.0483, "num_input_tokens_seen": 97938624, "step": 45395 }, { "epoch": 7.406199021207178, "grad_norm": 0.19012406468391418, "learning_rate": 0.000793466329068978, "loss": 0.0627, "num_input_tokens_seen": 97949024, "step": 45400 }, { "epoch": 7.407014681892333, "grad_norm": 0.1277860403060913, "learning_rate": 0.000793408696366009, "loss": 0.1475, "num_input_tokens_seen": 97961184, "step": 45405 }, { "epoch": 7.407830342577488, "grad_norm": 0.4970308840274811, "learning_rate": 0.0007933510577167404, "loss": 0.0798, "num_input_tokens_seen": 97972800, "step": 45410 }, { "epoch": 7.408646003262643, "grad_norm": 0.06367593258619308, "learning_rate": 0.0007932934131223406, "loss": 0.0427, "num_input_tokens_seen": 97984096, "step": 45415 }, { "epoch": 7.4094616639477975, "grad_norm": 0.3649691343307495, "learning_rate": 0.0007932357625839776, "loss": 0.1419, "num_input_tokens_seen": 97996000, "step": 45420 }, { "epoch": 7.410277324632952, "grad_norm": 0.03377820923924446, "learning_rate": 0.0007931781061028201, "loss": 0.0215, "num_input_tokens_seen": 98007296, "step": 45425 }, { "epoch": 7.411092985318108, "grad_norm": 0.40231409668922424, "learning_rate": 0.0007931204436800361, "loss": 0.0512, "num_input_tokens_seen": 98017216, "step": 45430 }, { "epoch": 7.411908646003263, "grad_norm": 0.07036181539297104, "learning_rate": 0.0007930627753167945, "loss": 0.1167, "num_input_tokens_seen": 98028960, "step": 45435 }, { "epoch": 7.412724306688418, "grad_norm": 0.039319396018981934, "learning_rate": 0.0007930051010142641, "loss": 0.0196, "num_input_tokens_seen": 98039552, "step": 45440 }, { "epoch": 7.4135399673735725, "grad_norm": 0.13210931420326233, "learning_rate": 0.0007929474207736136, "loss": 0.1208, "num_input_tokens_seen": 98051456, "step": 45445 }, { "epoch": 7.414355628058727, "grad_norm": 0.3385019302368164, "learning_rate": 0.000792889734596012, "loss": 0.2457, "num_input_tokens_seen": 98063712, "step": 45450 }, { "epoch": 7.415171288743883, "grad_norm": 0.06650233268737793, "learning_rate": 0.0007928320424826284, "loss": 0.0794, "num_input_tokens_seen": 98074560, "step": 45455 }, { "epoch": 7.415986949429038, "grad_norm": 0.07601414620876312, "learning_rate": 0.0007927743444346317, "loss": 0.034, "num_input_tokens_seen": 98084896, "step": 45460 }, { "epoch": 7.416802610114193, "grad_norm": 0.03215811401605606, "learning_rate": 0.0007927166404531916, "loss": 0.0209, "num_input_tokens_seen": 98096160, "step": 45465 }, { "epoch": 7.417618270799347, "grad_norm": 0.3975168764591217, "learning_rate": 0.0007926589305394776, "loss": 0.1569, "num_input_tokens_seen": 98107424, "step": 45470 }, { "epoch": 7.418433931484502, "grad_norm": 0.2210247814655304, "learning_rate": 0.0007926012146946591, "loss": 0.1018, "num_input_tokens_seen": 98117376, "step": 45475 }, { "epoch": 7.419249592169657, "grad_norm": 0.41766196489334106, "learning_rate": 0.0007925434929199058, "loss": 0.1518, "num_input_tokens_seen": 98127744, "step": 45480 }, { "epoch": 7.420065252854813, "grad_norm": 0.13558121025562286, "learning_rate": 0.0007924857652163873, "loss": 0.0329, "num_input_tokens_seen": 98138944, "step": 45485 }, { "epoch": 7.420880913539968, "grad_norm": 0.13868281245231628, "learning_rate": 0.0007924280315852739, "loss": 0.031, "num_input_tokens_seen": 98149696, "step": 45490 }, { "epoch": 7.421696574225122, "grad_norm": 0.15936878323554993, "learning_rate": 0.0007923702920277355, "loss": 0.0422, "num_input_tokens_seen": 98160928, "step": 45495 }, { "epoch": 7.422512234910277, "grad_norm": 0.009117166511714458, "learning_rate": 0.0007923125465449421, "loss": 0.157, "num_input_tokens_seen": 98172128, "step": 45500 }, { "epoch": 7.423327895595432, "grad_norm": 0.008292265236377716, "learning_rate": 0.0007922547951380643, "loss": 0.0095, "num_input_tokens_seen": 98182688, "step": 45505 }, { "epoch": 7.424143556280587, "grad_norm": 0.33745118975639343, "learning_rate": 0.0007921970378082722, "loss": 0.1746, "num_input_tokens_seen": 98193664, "step": 45510 }, { "epoch": 7.424959216965743, "grad_norm": 0.016101593151688576, "learning_rate": 0.0007921392745567364, "loss": 0.0239, "num_input_tokens_seen": 98204128, "step": 45515 }, { "epoch": 7.425774877650897, "grad_norm": 0.01119141187518835, "learning_rate": 0.0007920815053846277, "loss": 0.0712, "num_input_tokens_seen": 98214624, "step": 45520 }, { "epoch": 7.426590538336052, "grad_norm": 0.02829412929713726, "learning_rate": 0.0007920237302931167, "loss": 0.0465, "num_input_tokens_seen": 98226176, "step": 45525 }, { "epoch": 7.427406199021207, "grad_norm": 0.1702958345413208, "learning_rate": 0.0007919659492833744, "loss": 0.0309, "num_input_tokens_seen": 98238048, "step": 45530 }, { "epoch": 7.428221859706362, "grad_norm": 0.09760595113039017, "learning_rate": 0.0007919081623565717, "loss": 0.0407, "num_input_tokens_seen": 98249696, "step": 45535 }, { "epoch": 7.4290375203915175, "grad_norm": 0.22956730425357819, "learning_rate": 0.0007918503695138799, "loss": 0.0179, "num_input_tokens_seen": 98261248, "step": 45540 }, { "epoch": 7.429853181076672, "grad_norm": 0.0504307895898819, "learning_rate": 0.0007917925707564699, "loss": 0.0555, "num_input_tokens_seen": 98271552, "step": 45545 }, { "epoch": 7.430668841761827, "grad_norm": 0.27703818678855896, "learning_rate": 0.0007917347660855134, "loss": 0.0974, "num_input_tokens_seen": 98283264, "step": 45550 }, { "epoch": 7.431484502446982, "grad_norm": 0.08327177911996841, "learning_rate": 0.0007916769555021819, "loss": 0.1149, "num_input_tokens_seen": 98292512, "step": 45555 }, { "epoch": 7.432300163132137, "grad_norm": 0.3279486298561096, "learning_rate": 0.0007916191390076468, "loss": 0.1365, "num_input_tokens_seen": 98303360, "step": 45560 }, { "epoch": 7.433115823817292, "grad_norm": 0.051596276462078094, "learning_rate": 0.0007915613166030799, "loss": 0.0325, "num_input_tokens_seen": 98313568, "step": 45565 }, { "epoch": 7.433931484502447, "grad_norm": 0.22452832758426666, "learning_rate": 0.0007915034882896528, "loss": 0.1062, "num_input_tokens_seen": 98324000, "step": 45570 }, { "epoch": 7.434747145187602, "grad_norm": 0.020875144749879837, "learning_rate": 0.0007914456540685379, "loss": 0.0104, "num_input_tokens_seen": 98335648, "step": 45575 }, { "epoch": 7.435562805872757, "grad_norm": 0.3853148818016052, "learning_rate": 0.0007913878139409072, "loss": 0.1357, "num_input_tokens_seen": 98347232, "step": 45580 }, { "epoch": 7.436378466557912, "grad_norm": 0.014520413242280483, "learning_rate": 0.0007913299679079326, "loss": 0.0908, "num_input_tokens_seen": 98359104, "step": 45585 }, { "epoch": 7.437194127243067, "grad_norm": 0.07019753754138947, "learning_rate": 0.000791272115970787, "loss": 0.0835, "num_input_tokens_seen": 98369504, "step": 45590 }, { "epoch": 7.438009787928221, "grad_norm": 0.5184402465820312, "learning_rate": 0.0007912142581306421, "loss": 0.2607, "num_input_tokens_seen": 98379648, "step": 45595 }, { "epoch": 7.438825448613377, "grad_norm": 0.02248012088239193, "learning_rate": 0.0007911563943886709, "loss": 0.09, "num_input_tokens_seen": 98390368, "step": 45600 }, { "epoch": 7.439641109298532, "grad_norm": 0.061573971062898636, "learning_rate": 0.000791098524746046, "loss": 0.0734, "num_input_tokens_seen": 98402016, "step": 45605 }, { "epoch": 7.440456769983687, "grad_norm": 0.4289899468421936, "learning_rate": 0.0007910406492039404, "loss": 0.0695, "num_input_tokens_seen": 98413216, "step": 45610 }, { "epoch": 7.441272430668842, "grad_norm": 0.04155628755688667, "learning_rate": 0.0007909827677635267, "loss": 0.0855, "num_input_tokens_seen": 98423520, "step": 45615 }, { "epoch": 7.442088091353996, "grad_norm": 0.03025716356933117, "learning_rate": 0.000790924880425978, "loss": 0.1739, "num_input_tokens_seen": 98432704, "step": 45620 }, { "epoch": 7.442903752039152, "grad_norm": 0.6800268292427063, "learning_rate": 0.0007908669871924676, "loss": 0.2932, "num_input_tokens_seen": 98443136, "step": 45625 }, { "epoch": 7.443719412724307, "grad_norm": 0.19001524150371552, "learning_rate": 0.0007908090880641688, "loss": 0.0256, "num_input_tokens_seen": 98454464, "step": 45630 }, { "epoch": 7.444535073409462, "grad_norm": 0.19488872587680817, "learning_rate": 0.0007907511830422547, "loss": 0.0604, "num_input_tokens_seen": 98466784, "step": 45635 }, { "epoch": 7.445350734094617, "grad_norm": 0.007901213131844997, "learning_rate": 0.0007906932721278992, "loss": 0.0504, "num_input_tokens_seen": 98477728, "step": 45640 }, { "epoch": 7.446166394779771, "grad_norm": 0.028696853667497635, "learning_rate": 0.0007906353553222757, "loss": 0.0409, "num_input_tokens_seen": 98489184, "step": 45645 }, { "epoch": 7.446982055464926, "grad_norm": 0.09648387134075165, "learning_rate": 0.000790577432626558, "loss": 0.0939, "num_input_tokens_seen": 98500960, "step": 45650 }, { "epoch": 7.447797716150082, "grad_norm": 0.02138504385948181, "learning_rate": 0.0007905195040419202, "loss": 0.0336, "num_input_tokens_seen": 98510912, "step": 45655 }, { "epoch": 7.448613376835237, "grad_norm": 0.018604503944516182, "learning_rate": 0.0007904615695695359, "loss": 0.1288, "num_input_tokens_seen": 98521760, "step": 45660 }, { "epoch": 7.4494290375203915, "grad_norm": 0.06388849765062332, "learning_rate": 0.0007904036292105794, "loss": 0.1434, "num_input_tokens_seen": 98531072, "step": 45665 }, { "epoch": 7.450244698205546, "grad_norm": 0.4496106505393982, "learning_rate": 0.000790345682966225, "loss": 0.1502, "num_input_tokens_seen": 98541792, "step": 45670 }, { "epoch": 7.451060358890701, "grad_norm": 0.32725685834884644, "learning_rate": 0.000790287730837647, "loss": 0.0864, "num_input_tokens_seen": 98552320, "step": 45675 }, { "epoch": 7.451876019575856, "grad_norm": 0.0599450021982193, "learning_rate": 0.0007902297728260199, "loss": 0.2011, "num_input_tokens_seen": 98563904, "step": 45680 }, { "epoch": 7.452691680261012, "grad_norm": 0.22208617627620697, "learning_rate": 0.0007901718089325183, "loss": 0.145, "num_input_tokens_seen": 98574080, "step": 45685 }, { "epoch": 7.4535073409461665, "grad_norm": 0.03218377009034157, "learning_rate": 0.0007901138391583169, "loss": 0.0409, "num_input_tokens_seen": 98585984, "step": 45690 }, { "epoch": 7.454323001631321, "grad_norm": 0.1486860066652298, "learning_rate": 0.0007900558635045904, "loss": 0.0282, "num_input_tokens_seen": 98597280, "step": 45695 }, { "epoch": 7.455138662316476, "grad_norm": 0.01838448829948902, "learning_rate": 0.000789997881972514, "loss": 0.0371, "num_input_tokens_seen": 98608256, "step": 45700 }, { "epoch": 7.455954323001631, "grad_norm": 0.2720358073711395, "learning_rate": 0.0007899398945632626, "loss": 0.1165, "num_input_tokens_seen": 98619424, "step": 45705 }, { "epoch": 7.456769983686787, "grad_norm": 0.04772483929991722, "learning_rate": 0.0007898819012780114, "loss": 0.0749, "num_input_tokens_seen": 98629920, "step": 45710 }, { "epoch": 7.4575856443719415, "grad_norm": 0.008635647594928741, "learning_rate": 0.0007898239021179356, "loss": 0.0318, "num_input_tokens_seen": 98641408, "step": 45715 }, { "epoch": 7.458401305057096, "grad_norm": 0.029868699610233307, "learning_rate": 0.000789765897084211, "loss": 0.0455, "num_input_tokens_seen": 98652416, "step": 45720 }, { "epoch": 7.459216965742251, "grad_norm": 0.037980929017066956, "learning_rate": 0.0007897078861780127, "loss": 0.0132, "num_input_tokens_seen": 98662592, "step": 45725 }, { "epoch": 7.460032626427406, "grad_norm": 0.08652736991643906, "learning_rate": 0.0007896498694005168, "loss": 0.0679, "num_input_tokens_seen": 98672672, "step": 45730 }, { "epoch": 7.460848287112561, "grad_norm": 0.1138477474451065, "learning_rate": 0.0007895918467528987, "loss": 0.0205, "num_input_tokens_seen": 98683616, "step": 45735 }, { "epoch": 7.4616639477977165, "grad_norm": 0.006343527231365442, "learning_rate": 0.0007895338182363343, "loss": 0.0654, "num_input_tokens_seen": 98692896, "step": 45740 }, { "epoch": 7.462479608482871, "grad_norm": 0.024261370301246643, "learning_rate": 0.0007894757838519999, "loss": 0.0222, "num_input_tokens_seen": 98702368, "step": 45745 }, { "epoch": 7.463295269168026, "grad_norm": 0.3922736346721649, "learning_rate": 0.0007894177436010716, "loss": 0.1473, "num_input_tokens_seen": 98712608, "step": 45750 }, { "epoch": 7.464110929853181, "grad_norm": 0.21313834190368652, "learning_rate": 0.0007893596974847255, "loss": 0.0383, "num_input_tokens_seen": 98723648, "step": 45755 }, { "epoch": 7.464926590538336, "grad_norm": 0.04094382002949715, "learning_rate": 0.000789301645504138, "loss": 0.0391, "num_input_tokens_seen": 98734624, "step": 45760 }, { "epoch": 7.465742251223491, "grad_norm": 0.11056195199489594, "learning_rate": 0.0007892435876604857, "loss": 0.1359, "num_input_tokens_seen": 98745888, "step": 45765 }, { "epoch": 7.466557911908646, "grad_norm": 0.48346689343452454, "learning_rate": 0.0007891855239549453, "loss": 0.1367, "num_input_tokens_seen": 98756768, "step": 45770 }, { "epoch": 7.467373572593801, "grad_norm": 0.05523720383644104, "learning_rate": 0.0007891274543886933, "loss": 0.0127, "num_input_tokens_seen": 98767552, "step": 45775 }, { "epoch": 7.468189233278956, "grad_norm": 0.287562757730484, "learning_rate": 0.0007890693789629064, "loss": 0.0829, "num_input_tokens_seen": 98778944, "step": 45780 }, { "epoch": 7.469004893964111, "grad_norm": 0.33080559968948364, "learning_rate": 0.0007890112976787621, "loss": 0.1871, "num_input_tokens_seen": 98789728, "step": 45785 }, { "epoch": 7.4698205546492655, "grad_norm": 0.06104956194758415, "learning_rate": 0.0007889532105374373, "loss": 0.0249, "num_input_tokens_seen": 98801408, "step": 45790 }, { "epoch": 7.470636215334421, "grad_norm": 0.013354141265153885, "learning_rate": 0.0007888951175401089, "loss": 0.0114, "num_input_tokens_seen": 98812480, "step": 45795 }, { "epoch": 7.471451876019576, "grad_norm": 0.11160764843225479, "learning_rate": 0.0007888370186879545, "loss": 0.0474, "num_input_tokens_seen": 98824256, "step": 45800 }, { "epoch": 7.472267536704731, "grad_norm": 0.6586228609085083, "learning_rate": 0.0007887789139821516, "loss": 0.1461, "num_input_tokens_seen": 98834752, "step": 45805 }, { "epoch": 7.473083197389886, "grad_norm": 0.051296211779117584, "learning_rate": 0.0007887208034238777, "loss": 0.0284, "num_input_tokens_seen": 98846400, "step": 45810 }, { "epoch": 7.4738988580750405, "grad_norm": 0.01640196330845356, "learning_rate": 0.0007886626870143103, "loss": 0.0223, "num_input_tokens_seen": 98855392, "step": 45815 }, { "epoch": 7.474714518760196, "grad_norm": 0.11553415656089783, "learning_rate": 0.0007886045647546274, "loss": 0.0145, "num_input_tokens_seen": 98866240, "step": 45820 }, { "epoch": 7.475530179445351, "grad_norm": 0.3310805559158325, "learning_rate": 0.0007885464366460069, "loss": 0.0637, "num_input_tokens_seen": 98877472, "step": 45825 }, { "epoch": 7.476345840130506, "grad_norm": 0.5880880355834961, "learning_rate": 0.0007884883026896268, "loss": 0.0954, "num_input_tokens_seen": 98887136, "step": 45830 }, { "epoch": 7.477161500815661, "grad_norm": 0.4071207046508789, "learning_rate": 0.0007884301628866652, "loss": 0.1433, "num_input_tokens_seen": 98898464, "step": 45835 }, { "epoch": 7.4779771615008155, "grad_norm": 0.025556376203894615, "learning_rate": 0.0007883720172383007, "loss": 0.0628, "num_input_tokens_seen": 98908544, "step": 45840 }, { "epoch": 7.47879282218597, "grad_norm": 0.26563364267349243, "learning_rate": 0.0007883138657457111, "loss": 0.0427, "num_input_tokens_seen": 98918240, "step": 45845 }, { "epoch": 7.479608482871126, "grad_norm": 0.010090251453220844, "learning_rate": 0.0007882557084100755, "loss": 0.0846, "num_input_tokens_seen": 98929024, "step": 45850 }, { "epoch": 7.480424143556281, "grad_norm": 0.1399764120578766, "learning_rate": 0.0007881975452325722, "loss": 0.0607, "num_input_tokens_seen": 98940384, "step": 45855 }, { "epoch": 7.481239804241436, "grad_norm": 0.17735078930854797, "learning_rate": 0.00078813937621438, "loss": 0.018, "num_input_tokens_seen": 98950624, "step": 45860 }, { "epoch": 7.4820554649265905, "grad_norm": 0.2618032991886139, "learning_rate": 0.000788081201356678, "loss": 0.0433, "num_input_tokens_seen": 98961728, "step": 45865 }, { "epoch": 7.482871125611745, "grad_norm": 0.10430657118558884, "learning_rate": 0.0007880230206606449, "loss": 0.0276, "num_input_tokens_seen": 98971680, "step": 45870 }, { "epoch": 7.4836867862969, "grad_norm": 0.12120870500802994, "learning_rate": 0.0007879648341274599, "loss": 0.0445, "num_input_tokens_seen": 98982912, "step": 45875 }, { "epoch": 7.484502446982056, "grad_norm": 0.1883370578289032, "learning_rate": 0.0007879066417583021, "loss": 0.091, "num_input_tokens_seen": 98993024, "step": 45880 }, { "epoch": 7.485318107667211, "grad_norm": 0.021503519266843796, "learning_rate": 0.0007878484435543511, "loss": 0.0159, "num_input_tokens_seen": 99003648, "step": 45885 }, { "epoch": 7.486133768352365, "grad_norm": 0.017988789826631546, "learning_rate": 0.0007877902395167862, "loss": 0.0827, "num_input_tokens_seen": 99014560, "step": 45890 }, { "epoch": 7.48694942903752, "grad_norm": 0.011251496151089668, "learning_rate": 0.000787732029646787, "loss": 0.0172, "num_input_tokens_seen": 99024864, "step": 45895 }, { "epoch": 7.487765089722675, "grad_norm": 0.8975038528442383, "learning_rate": 0.0007876738139455332, "loss": 0.0332, "num_input_tokens_seen": 99034816, "step": 45900 }, { "epoch": 7.488580750407831, "grad_norm": 0.0318521186709404, "learning_rate": 0.0007876155924142046, "loss": 0.075, "num_input_tokens_seen": 99044896, "step": 45905 }, { "epoch": 7.489396411092986, "grad_norm": 0.08386589586734772, "learning_rate": 0.0007875573650539811, "loss": 0.0154, "num_input_tokens_seen": 99055328, "step": 45910 }, { "epoch": 7.49021207177814, "grad_norm": 0.026162289083003998, "learning_rate": 0.0007874991318660429, "loss": 0.1089, "num_input_tokens_seen": 99065664, "step": 45915 }, { "epoch": 7.491027732463295, "grad_norm": 0.003726135939359665, "learning_rate": 0.0007874408928515702, "loss": 0.0252, "num_input_tokens_seen": 99075904, "step": 45920 }, { "epoch": 7.49184339314845, "grad_norm": 0.12071631848812103, "learning_rate": 0.000787382648011743, "loss": 0.0371, "num_input_tokens_seen": 99086016, "step": 45925 }, { "epoch": 7.492659053833605, "grad_norm": 0.01674828492105007, "learning_rate": 0.0007873243973477419, "loss": 0.1679, "num_input_tokens_seen": 99096416, "step": 45930 }, { "epoch": 7.493474714518761, "grad_norm": 0.7316375970840454, "learning_rate": 0.0007872661408607473, "loss": 0.1042, "num_input_tokens_seen": 99106368, "step": 45935 }, { "epoch": 7.494290375203915, "grad_norm": 0.036734648048877716, "learning_rate": 0.0007872078785519401, "loss": 0.076, "num_input_tokens_seen": 99115936, "step": 45940 }, { "epoch": 7.49510603588907, "grad_norm": 0.004907377064228058, "learning_rate": 0.0007871496104225007, "loss": 0.0063, "num_input_tokens_seen": 99126816, "step": 45945 }, { "epoch": 7.495921696574225, "grad_norm": 0.007799459621310234, "learning_rate": 0.0007870913364736103, "loss": 0.0275, "num_input_tokens_seen": 99136736, "step": 45950 }, { "epoch": 7.49673735725938, "grad_norm": 0.5735790729522705, "learning_rate": 0.0007870330567064499, "loss": 0.09, "num_input_tokens_seen": 99147552, "step": 45955 }, { "epoch": 7.497553017944535, "grad_norm": 0.015136484056711197, "learning_rate": 0.0007869747711222001, "loss": 0.1023, "num_input_tokens_seen": 99158944, "step": 45960 }, { "epoch": 7.49836867862969, "grad_norm": 0.031046533957123756, "learning_rate": 0.0007869164797220429, "loss": 0.026, "num_input_tokens_seen": 99169984, "step": 45965 }, { "epoch": 7.499184339314845, "grad_norm": 0.011330963112413883, "learning_rate": 0.000786858182507159, "loss": 0.023, "num_input_tokens_seen": 99180896, "step": 45970 }, { "epoch": 7.5, "grad_norm": 0.007748182397335768, "learning_rate": 0.0007867998794787303, "loss": 0.1328, "num_input_tokens_seen": 99191008, "step": 45975 }, { "epoch": 7.500815660685155, "grad_norm": 0.3179551661014557, "learning_rate": 0.0007867415706379381, "loss": 0.0643, "num_input_tokens_seen": 99201984, "step": 45980 }, { "epoch": 7.50163132137031, "grad_norm": 0.27452707290649414, "learning_rate": 0.0007866832559859642, "loss": 0.0427, "num_input_tokens_seen": 99213536, "step": 45985 }, { "epoch": 7.502446982055465, "grad_norm": 0.05044414848089218, "learning_rate": 0.0007866249355239905, "loss": 0.0194, "num_input_tokens_seen": 99224992, "step": 45990 }, { "epoch": 7.50326264274062, "grad_norm": 0.16619515419006348, "learning_rate": 0.0007865666092531989, "loss": 0.0475, "num_input_tokens_seen": 99235168, "step": 45995 }, { "epoch": 7.504078303425775, "grad_norm": 0.4527769982814789, "learning_rate": 0.0007865082771747713, "loss": 0.1626, "num_input_tokens_seen": 99246496, "step": 46000 }, { "epoch": 7.50489396411093, "grad_norm": 0.13338510692119598, "learning_rate": 0.00078644993928989, "loss": 0.064, "num_input_tokens_seen": 99256192, "step": 46005 }, { "epoch": 7.505709624796085, "grad_norm": 0.1280643194913864, "learning_rate": 0.0007863915955997374, "loss": 0.0772, "num_input_tokens_seen": 99266816, "step": 46010 }, { "epoch": 7.506525285481239, "grad_norm": 0.0636386051774025, "learning_rate": 0.0007863332461054957, "loss": 0.0483, "num_input_tokens_seen": 99277280, "step": 46015 }, { "epoch": 7.507340946166395, "grad_norm": 0.2832755744457245, "learning_rate": 0.0007862748908083477, "loss": 0.0931, "num_input_tokens_seen": 99288096, "step": 46020 }, { "epoch": 7.50815660685155, "grad_norm": 0.1493140608072281, "learning_rate": 0.0007862165297094758, "loss": 0.0409, "num_input_tokens_seen": 99298176, "step": 46025 }, { "epoch": 7.508972267536705, "grad_norm": 0.12753690779209137, "learning_rate": 0.0007861581628100628, "loss": 0.0201, "num_input_tokens_seen": 99308352, "step": 46030 }, { "epoch": 7.50978792822186, "grad_norm": 0.010356430895626545, "learning_rate": 0.0007860997901112917, "loss": 0.02, "num_input_tokens_seen": 99319584, "step": 46035 }, { "epoch": 7.510603588907014, "grad_norm": 0.05895746499300003, "learning_rate": 0.0007860414116143453, "loss": 0.1019, "num_input_tokens_seen": 99330080, "step": 46040 }, { "epoch": 7.511419249592169, "grad_norm": 0.010224574245512486, "learning_rate": 0.0007859830273204069, "loss": 0.0085, "num_input_tokens_seen": 99342304, "step": 46045 }, { "epoch": 7.512234910277325, "grad_norm": 0.1965058445930481, "learning_rate": 0.0007859246372306595, "loss": 0.1007, "num_input_tokens_seen": 99353888, "step": 46050 }, { "epoch": 7.51305057096248, "grad_norm": 0.45925799012184143, "learning_rate": 0.0007858662413462867, "loss": 0.0731, "num_input_tokens_seen": 99363904, "step": 46055 }, { "epoch": 7.513866231647635, "grad_norm": 0.20795680582523346, "learning_rate": 0.000785807839668472, "loss": 0.1313, "num_input_tokens_seen": 99375072, "step": 46060 }, { "epoch": 7.514681892332789, "grad_norm": 0.19454431533813477, "learning_rate": 0.0007857494321983987, "loss": 0.0585, "num_input_tokens_seen": 99385856, "step": 46065 }, { "epoch": 7.515497553017944, "grad_norm": 0.3405384421348572, "learning_rate": 0.0007856910189372506, "loss": 0.0295, "num_input_tokens_seen": 99394944, "step": 46070 }, { "epoch": 7.5163132137031, "grad_norm": 0.005011253524571657, "learning_rate": 0.0007856325998862118, "loss": 0.0586, "num_input_tokens_seen": 99406720, "step": 46075 }, { "epoch": 7.517128874388255, "grad_norm": 0.07828003168106079, "learning_rate": 0.0007855741750464658, "loss": 0.2639, "num_input_tokens_seen": 99416736, "step": 46080 }, { "epoch": 7.5179445350734095, "grad_norm": 0.10270196199417114, "learning_rate": 0.0007855157444191969, "loss": 0.0134, "num_input_tokens_seen": 99426912, "step": 46085 }, { "epoch": 7.518760195758564, "grad_norm": 0.21540626883506775, "learning_rate": 0.0007854573080055894, "loss": 0.1447, "num_input_tokens_seen": 99439104, "step": 46090 }, { "epoch": 7.519575856443719, "grad_norm": 0.1845274418592453, "learning_rate": 0.0007853988658068274, "loss": 0.03, "num_input_tokens_seen": 99449440, "step": 46095 }, { "epoch": 7.520391517128875, "grad_norm": 0.10276073962450027, "learning_rate": 0.000785340417824095, "loss": 0.0551, "num_input_tokens_seen": 99458496, "step": 46100 }, { "epoch": 7.52120717781403, "grad_norm": 0.018076535314321518, "learning_rate": 0.0007852819640585773, "loss": 0.0527, "num_input_tokens_seen": 99468928, "step": 46105 }, { "epoch": 7.5220228384991845, "grad_norm": 0.0030629539396613836, "learning_rate": 0.0007852235045114588, "loss": 0.0272, "num_input_tokens_seen": 99480640, "step": 46110 }, { "epoch": 7.522838499184339, "grad_norm": 0.007843377999961376, "learning_rate": 0.000785165039183924, "loss": 0.0955, "num_input_tokens_seen": 99490336, "step": 46115 }, { "epoch": 7.523654159869494, "grad_norm": 0.03852209448814392, "learning_rate": 0.0007851065680771581, "loss": 0.0258, "num_input_tokens_seen": 99500224, "step": 46120 }, { "epoch": 7.524469820554649, "grad_norm": 0.094424769282341, "learning_rate": 0.0007850480911923457, "loss": 0.1857, "num_input_tokens_seen": 99511552, "step": 46125 }, { "epoch": 7.525285481239804, "grad_norm": 0.26474109292030334, "learning_rate": 0.0007849896085306723, "loss": 0.1615, "num_input_tokens_seen": 99521728, "step": 46130 }, { "epoch": 7.5261011419249595, "grad_norm": 0.0061035193502902985, "learning_rate": 0.0007849311200933228, "loss": 0.0384, "num_input_tokens_seen": 99532544, "step": 46135 }, { "epoch": 7.526916802610114, "grad_norm": 0.09074371308088303, "learning_rate": 0.0007848726258814826, "loss": 0.0138, "num_input_tokens_seen": 99543360, "step": 46140 }, { "epoch": 7.527732463295269, "grad_norm": 0.06427381187677383, "learning_rate": 0.0007848141258963375, "loss": 0.0444, "num_input_tokens_seen": 99554432, "step": 46145 }, { "epoch": 7.528548123980424, "grad_norm": 0.07650013267993927, "learning_rate": 0.0007847556201390727, "loss": 0.0414, "num_input_tokens_seen": 99566464, "step": 46150 }, { "epoch": 7.529363784665579, "grad_norm": 0.04257727041840553, "learning_rate": 0.0007846971086108741, "loss": 0.0836, "num_input_tokens_seen": 99576384, "step": 46155 }, { "epoch": 7.5301794453507345, "grad_norm": 0.0072668371722102165, "learning_rate": 0.0007846385913129273, "loss": 0.201, "num_input_tokens_seen": 99586912, "step": 46160 }, { "epoch": 7.530995106035889, "grad_norm": 0.020547883585095406, "learning_rate": 0.0007845800682464185, "loss": 0.0079, "num_input_tokens_seen": 99597696, "step": 46165 }, { "epoch": 7.531810766721044, "grad_norm": 0.3281855285167694, "learning_rate": 0.0007845215394125336, "loss": 0.0713, "num_input_tokens_seen": 99607296, "step": 46170 }, { "epoch": 7.532626427406199, "grad_norm": 0.006211583036929369, "learning_rate": 0.0007844630048124586, "loss": 0.0499, "num_input_tokens_seen": 99617344, "step": 46175 }, { "epoch": 7.533442088091354, "grad_norm": 0.032929204404354095, "learning_rate": 0.00078440446444738, "loss": 0.0862, "num_input_tokens_seen": 99629248, "step": 46180 }, { "epoch": 7.5342577487765094, "grad_norm": 0.10998833179473877, "learning_rate": 0.0007843459183184843, "loss": 0.1037, "num_input_tokens_seen": 99640288, "step": 46185 }, { "epoch": 7.535073409461664, "grad_norm": 0.034989554435014725, "learning_rate": 0.0007842873664269576, "loss": 0.0197, "num_input_tokens_seen": 99652544, "step": 46190 }, { "epoch": 7.535889070146819, "grad_norm": 0.1933150589466095, "learning_rate": 0.0007842288087739868, "loss": 0.0248, "num_input_tokens_seen": 99662880, "step": 46195 }, { "epoch": 7.536704730831974, "grad_norm": 0.4281262457370758, "learning_rate": 0.0007841702453607589, "loss": 0.2265, "num_input_tokens_seen": 99672864, "step": 46200 }, { "epoch": 7.537520391517129, "grad_norm": 0.41623130440711975, "learning_rate": 0.0007841116761884601, "loss": 0.189, "num_input_tokens_seen": 99684192, "step": 46205 }, { "epoch": 7.5383360522022835, "grad_norm": 0.38763245940208435, "learning_rate": 0.000784053101258278, "loss": 0.1953, "num_input_tokens_seen": 99694432, "step": 46210 }, { "epoch": 7.539151712887438, "grad_norm": 0.21869000792503357, "learning_rate": 0.0007839945205713995, "loss": 0.1112, "num_input_tokens_seen": 99704608, "step": 46215 }, { "epoch": 7.539967373572594, "grad_norm": 0.013901392929255962, "learning_rate": 0.0007839359341290116, "loss": 0.1248, "num_input_tokens_seen": 99715616, "step": 46220 }, { "epoch": 7.540783034257749, "grad_norm": 0.058885861188173294, "learning_rate": 0.0007838773419323019, "loss": 0.0597, "num_input_tokens_seen": 99724832, "step": 46225 }, { "epoch": 7.541598694942904, "grad_norm": 0.012735147960484028, "learning_rate": 0.0007838187439824577, "loss": 0.0235, "num_input_tokens_seen": 99734976, "step": 46230 }, { "epoch": 7.5424143556280585, "grad_norm": 0.07323247194290161, "learning_rate": 0.0007837601402806666, "loss": 0.119, "num_input_tokens_seen": 99746208, "step": 46235 }, { "epoch": 7.543230016313213, "grad_norm": 0.016022782772779465, "learning_rate": 0.0007837015308281163, "loss": 0.0256, "num_input_tokens_seen": 99757632, "step": 46240 }, { "epoch": 7.544045676998369, "grad_norm": 0.0811619982123375, "learning_rate": 0.0007836429156259946, "loss": 0.0632, "num_input_tokens_seen": 99768032, "step": 46245 }, { "epoch": 7.544861337683524, "grad_norm": 0.4182024598121643, "learning_rate": 0.0007835842946754893, "loss": 0.1486, "num_input_tokens_seen": 99779136, "step": 46250 }, { "epoch": 7.545676998368679, "grad_norm": 0.34814903140068054, "learning_rate": 0.0007835256679777887, "loss": 0.1451, "num_input_tokens_seen": 99788576, "step": 46255 }, { "epoch": 7.5464926590538335, "grad_norm": 0.009438794106245041, "learning_rate": 0.0007834670355340805, "loss": 0.028, "num_input_tokens_seen": 99799040, "step": 46260 }, { "epoch": 7.547308319738988, "grad_norm": 0.12132467329502106, "learning_rate": 0.0007834083973455535, "loss": 0.132, "num_input_tokens_seen": 99809120, "step": 46265 }, { "epoch": 7.548123980424144, "grad_norm": 0.4579325020313263, "learning_rate": 0.0007833497534133955, "loss": 0.1226, "num_input_tokens_seen": 99821312, "step": 46270 }, { "epoch": 7.548939641109299, "grad_norm": 0.068083256483078, "learning_rate": 0.0007832911037387955, "loss": 0.0625, "num_input_tokens_seen": 99832032, "step": 46275 }, { "epoch": 7.549755301794454, "grad_norm": 0.20665737986564636, "learning_rate": 0.000783232448322942, "loss": 0.0803, "num_input_tokens_seen": 99843776, "step": 46280 }, { "epoch": 7.5505709624796085, "grad_norm": 0.06061796098947525, "learning_rate": 0.0007831737871670235, "loss": 0.1235, "num_input_tokens_seen": 99854208, "step": 46285 }, { "epoch": 7.551386623164763, "grad_norm": 0.010989098809659481, "learning_rate": 0.0007831151202722288, "loss": 0.0218, "num_input_tokens_seen": 99865280, "step": 46290 }, { "epoch": 7.552202283849918, "grad_norm": 0.019288230687379837, "learning_rate": 0.0007830564476397473, "loss": 0.0963, "num_input_tokens_seen": 99877440, "step": 46295 }, { "epoch": 7.553017944535073, "grad_norm": 0.2677210867404938, "learning_rate": 0.0007829977692707676, "loss": 0.0581, "num_input_tokens_seen": 99887488, "step": 46300 }, { "epoch": 7.553833605220229, "grad_norm": 0.23237259685993195, "learning_rate": 0.0007829390851664793, "loss": 0.1149, "num_input_tokens_seen": 99897856, "step": 46305 }, { "epoch": 7.554649265905383, "grad_norm": 0.10389093309640884, "learning_rate": 0.0007828803953280713, "loss": 0.0939, "num_input_tokens_seen": 99908768, "step": 46310 }, { "epoch": 7.555464926590538, "grad_norm": 0.1081535816192627, "learning_rate": 0.0007828216997567333, "loss": 0.022, "num_input_tokens_seen": 99920352, "step": 46315 }, { "epoch": 7.556280587275693, "grad_norm": 0.23186028003692627, "learning_rate": 0.0007827629984536548, "loss": 0.1178, "num_input_tokens_seen": 99930560, "step": 46320 }, { "epoch": 7.557096247960848, "grad_norm": 0.21652182936668396, "learning_rate": 0.0007827042914200254, "loss": 0.0323, "num_input_tokens_seen": 99940640, "step": 46325 }, { "epoch": 7.557911908646004, "grad_norm": 0.1966649442911148, "learning_rate": 0.000782645578657035, "loss": 0.0308, "num_input_tokens_seen": 99950880, "step": 46330 }, { "epoch": 7.558727569331158, "grad_norm": 0.32814842462539673, "learning_rate": 0.0007825868601658733, "loss": 0.1229, "num_input_tokens_seen": 99962688, "step": 46335 }, { "epoch": 7.559543230016313, "grad_norm": 0.07176913321018219, "learning_rate": 0.0007825281359477303, "loss": 0.0761, "num_input_tokens_seen": 99972736, "step": 46340 }, { "epoch": 7.560358890701468, "grad_norm": 0.06713598966598511, "learning_rate": 0.0007824694060037964, "loss": 0.0237, "num_input_tokens_seen": 99984096, "step": 46345 }, { "epoch": 7.561174551386623, "grad_norm": 0.03826012462377548, "learning_rate": 0.0007824106703352616, "loss": 0.0968, "num_input_tokens_seen": 99994048, "step": 46350 }, { "epoch": 7.561990212071779, "grad_norm": 0.012011098675429821, "learning_rate": 0.0007823519289433162, "loss": 0.0615, "num_input_tokens_seen": 100004928, "step": 46355 }, { "epoch": 7.562805872756933, "grad_norm": 0.023487579077482224, "learning_rate": 0.0007822931818291508, "loss": 0.0551, "num_input_tokens_seen": 100015520, "step": 46360 }, { "epoch": 7.563621533442088, "grad_norm": 0.020770564675331116, "learning_rate": 0.0007822344289939561, "loss": 0.1106, "num_input_tokens_seen": 100027776, "step": 46365 }, { "epoch": 7.564437194127243, "grad_norm": 0.05405575409531593, "learning_rate": 0.0007821756704389224, "loss": 0.0781, "num_input_tokens_seen": 100038656, "step": 46370 }, { "epoch": 7.565252854812398, "grad_norm": 0.03750795125961304, "learning_rate": 0.000782116906165241, "loss": 0.0835, "num_input_tokens_seen": 100048960, "step": 46375 }, { "epoch": 7.566068515497553, "grad_norm": 0.5017601847648621, "learning_rate": 0.0007820581361741025, "loss": 0.1731, "num_input_tokens_seen": 100059200, "step": 46380 }, { "epoch": 7.566884176182708, "grad_norm": 0.34836703538894653, "learning_rate": 0.0007819993604666982, "loss": 0.1753, "num_input_tokens_seen": 100070720, "step": 46385 }, { "epoch": 7.567699836867863, "grad_norm": 0.07578709721565247, "learning_rate": 0.0007819405790442189, "loss": 0.0609, "num_input_tokens_seen": 100081152, "step": 46390 }, { "epoch": 7.568515497553018, "grad_norm": 0.3667226731777191, "learning_rate": 0.0007818817919078562, "loss": 0.1054, "num_input_tokens_seen": 100092416, "step": 46395 }, { "epoch": 7.569331158238173, "grad_norm": 0.10475272685289383, "learning_rate": 0.0007818229990588013, "loss": 0.0613, "num_input_tokens_seen": 100102176, "step": 46400 }, { "epoch": 7.570146818923328, "grad_norm": 0.47740504145622253, "learning_rate": 0.000781764200498246, "loss": 0.2573, "num_input_tokens_seen": 100113216, "step": 46405 }, { "epoch": 7.5709624796084825, "grad_norm": 0.10070536285638809, "learning_rate": 0.0007817053962273817, "loss": 0.1274, "num_input_tokens_seen": 100122272, "step": 46410 }, { "epoch": 7.571778140293638, "grad_norm": 0.1765705645084381, "learning_rate": 0.0007816465862474, "loss": 0.1995, "num_input_tokens_seen": 100132064, "step": 46415 }, { "epoch": 7.572593800978793, "grad_norm": 0.36314818263053894, "learning_rate": 0.000781587770559493, "loss": 0.0926, "num_input_tokens_seen": 100141952, "step": 46420 }, { "epoch": 7.573409461663948, "grad_norm": 0.11624655872583389, "learning_rate": 0.0007815289491648527, "loss": 0.0752, "num_input_tokens_seen": 100153056, "step": 46425 }, { "epoch": 7.574225122349103, "grad_norm": 0.3514878451824188, "learning_rate": 0.000781470122064671, "loss": 0.1789, "num_input_tokens_seen": 100164416, "step": 46430 }, { "epoch": 7.575040783034257, "grad_norm": 0.26775190234184265, "learning_rate": 0.0007814112892601403, "loss": 0.0794, "num_input_tokens_seen": 100175424, "step": 46435 }, { "epoch": 7.575856443719413, "grad_norm": 0.6383694410324097, "learning_rate": 0.0007813524507524527, "loss": 0.1229, "num_input_tokens_seen": 100187168, "step": 46440 }, { "epoch": 7.576672104404568, "grad_norm": 0.03292429819703102, "learning_rate": 0.0007812936065428009, "loss": 0.1336, "num_input_tokens_seen": 100199488, "step": 46445 }, { "epoch": 7.577487765089723, "grad_norm": 0.011090737767517567, "learning_rate": 0.0007812347566323774, "loss": 0.0201, "num_input_tokens_seen": 100210560, "step": 46450 }, { "epoch": 7.578303425774878, "grad_norm": 0.022827552631497383, "learning_rate": 0.0007811759010223747, "loss": 0.1682, "num_input_tokens_seen": 100221088, "step": 46455 }, { "epoch": 7.579119086460032, "grad_norm": 0.4851168096065521, "learning_rate": 0.0007811170397139855, "loss": 0.0976, "num_input_tokens_seen": 100232640, "step": 46460 }, { "epoch": 7.579934747145187, "grad_norm": 0.22226828336715698, "learning_rate": 0.000781058172708403, "loss": 0.0737, "num_input_tokens_seen": 100244384, "step": 46465 }, { "epoch": 7.580750407830343, "grad_norm": 0.11311071366071701, "learning_rate": 0.00078099930000682, "loss": 0.097, "num_input_tokens_seen": 100255424, "step": 46470 }, { "epoch": 7.581566068515498, "grad_norm": 0.015537581406533718, "learning_rate": 0.0007809404216104299, "loss": 0.0125, "num_input_tokens_seen": 100266304, "step": 46475 }, { "epoch": 7.582381729200653, "grad_norm": 0.272030770778656, "learning_rate": 0.0007808815375204257, "loss": 0.0554, "num_input_tokens_seen": 100276768, "step": 46480 }, { "epoch": 7.583197389885807, "grad_norm": 0.15440763533115387, "learning_rate": 0.0007808226477380007, "loss": 0.0485, "num_input_tokens_seen": 100287264, "step": 46485 }, { "epoch": 7.584013050570962, "grad_norm": 0.03730657324194908, "learning_rate": 0.0007807637522643484, "loss": 0.0304, "num_input_tokens_seen": 100297856, "step": 46490 }, { "epoch": 7.584828711256117, "grad_norm": 0.009463442489504814, "learning_rate": 0.0007807048511006628, "loss": 0.0515, "num_input_tokens_seen": 100309632, "step": 46495 }, { "epoch": 7.585644371941273, "grad_norm": 0.32142892479896545, "learning_rate": 0.0007806459442481372, "loss": 0.0464, "num_input_tokens_seen": 100319808, "step": 46500 }, { "epoch": 7.5864600326264275, "grad_norm": 0.46607813239097595, "learning_rate": 0.0007805870317079654, "loss": 0.2082, "num_input_tokens_seen": 100329920, "step": 46505 }, { "epoch": 7.587275693311582, "grad_norm": 0.44509458541870117, "learning_rate": 0.0007805281134813416, "loss": 0.104, "num_input_tokens_seen": 100340704, "step": 46510 }, { "epoch": 7.588091353996737, "grad_norm": 0.07136111706495285, "learning_rate": 0.0007804691895694595, "loss": 0.0296, "num_input_tokens_seen": 100351744, "step": 46515 }, { "epoch": 7.588907014681892, "grad_norm": 0.041778646409511566, "learning_rate": 0.0007804102599735137, "loss": 0.1071, "num_input_tokens_seen": 100362880, "step": 46520 }, { "epoch": 7.589722675367048, "grad_norm": 0.20553387701511383, "learning_rate": 0.0007803513246946981, "loss": 0.1778, "num_input_tokens_seen": 100373984, "step": 46525 }, { "epoch": 7.5905383360522025, "grad_norm": 0.03869929164648056, "learning_rate": 0.0007802923837342072, "loss": 0.0146, "num_input_tokens_seen": 100385120, "step": 46530 }, { "epoch": 7.591353996737357, "grad_norm": 0.03399762883782387, "learning_rate": 0.0007802334370932357, "loss": 0.0689, "num_input_tokens_seen": 100394432, "step": 46535 }, { "epoch": 7.592169657422512, "grad_norm": 0.41761869192123413, "learning_rate": 0.0007801744847729781, "loss": 0.2001, "num_input_tokens_seen": 100405120, "step": 46540 }, { "epoch": 7.592985318107667, "grad_norm": 0.07752034813165665, "learning_rate": 0.0007801155267746291, "loss": 0.1413, "num_input_tokens_seen": 100416544, "step": 46545 }, { "epoch": 7.593800978792823, "grad_norm": 0.015127328224480152, "learning_rate": 0.0007800565630993834, "loss": 0.052, "num_input_tokens_seen": 100428352, "step": 46550 }, { "epoch": 7.5946166394779775, "grad_norm": 0.06631854176521301, "learning_rate": 0.0007799975937484365, "loss": 0.0397, "num_input_tokens_seen": 100439392, "step": 46555 }, { "epoch": 7.595432300163132, "grad_norm": 0.022747881710529327, "learning_rate": 0.000779938618722983, "loss": 0.136, "num_input_tokens_seen": 100449344, "step": 46560 }, { "epoch": 7.596247960848287, "grad_norm": 0.09240453690290451, "learning_rate": 0.0007798796380242183, "loss": 0.2199, "num_input_tokens_seen": 100461152, "step": 46565 }, { "epoch": 7.597063621533442, "grad_norm": 0.13927426934242249, "learning_rate": 0.0007798206516533377, "loss": 0.101, "num_input_tokens_seen": 100471776, "step": 46570 }, { "epoch": 7.597879282218597, "grad_norm": 0.06104928255081177, "learning_rate": 0.0007797616596115365, "loss": 0.0719, "num_input_tokens_seen": 100483904, "step": 46575 }, { "epoch": 7.598694942903752, "grad_norm": 0.05355094373226166, "learning_rate": 0.0007797026619000105, "loss": 0.0151, "num_input_tokens_seen": 100495264, "step": 46580 }, { "epoch": 7.599510603588907, "grad_norm": 0.011258815415203571, "learning_rate": 0.0007796436585199553, "loss": 0.0965, "num_input_tokens_seen": 100505248, "step": 46585 }, { "epoch": 7.600326264274062, "grad_norm": 0.014729495160281658, "learning_rate": 0.0007795846494725665, "loss": 0.0398, "num_input_tokens_seen": 100515360, "step": 46590 }, { "epoch": 7.601141924959217, "grad_norm": 0.3814273476600647, "learning_rate": 0.00077952563475904, "loss": 0.0654, "num_input_tokens_seen": 100526144, "step": 46595 }, { "epoch": 7.601957585644372, "grad_norm": 0.17271380126476288, "learning_rate": 0.000779466614380572, "loss": 0.0717, "num_input_tokens_seen": 100537184, "step": 46600 }, { "epoch": 7.602773246329527, "grad_norm": 0.016881173476576805, "learning_rate": 0.0007794075883383586, "loss": 0.0585, "num_input_tokens_seen": 100547840, "step": 46605 }, { "epoch": 7.603588907014682, "grad_norm": 0.008525685407221317, "learning_rate": 0.0007793485566335958, "loss": 0.034, "num_input_tokens_seen": 100557728, "step": 46610 }, { "epoch": 7.604404567699837, "grad_norm": 0.012353586032986641, "learning_rate": 0.0007792895192674802, "loss": 0.0378, "num_input_tokens_seen": 100569472, "step": 46615 }, { "epoch": 7.605220228384992, "grad_norm": 0.13547487556934357, "learning_rate": 0.0007792304762412084, "loss": 0.0542, "num_input_tokens_seen": 100580224, "step": 46620 }, { "epoch": 7.606035889070147, "grad_norm": 0.2483649104833603, "learning_rate": 0.0007791714275559765, "loss": 0.0562, "num_input_tokens_seen": 100591168, "step": 46625 }, { "epoch": 7.6068515497553015, "grad_norm": 0.06343470513820648, "learning_rate": 0.0007791123732129815, "loss": 0.0121, "num_input_tokens_seen": 100602688, "step": 46630 }, { "epoch": 7.607667210440457, "grad_norm": 0.4349709749221802, "learning_rate": 0.0007790533132134201, "loss": 0.164, "num_input_tokens_seen": 100614368, "step": 46635 }, { "epoch": 7.608482871125612, "grad_norm": 0.04472370073199272, "learning_rate": 0.0007789942475584894, "loss": 0.0267, "num_input_tokens_seen": 100625472, "step": 46640 }, { "epoch": 7.609298531810767, "grad_norm": 0.05435958877205849, "learning_rate": 0.0007789351762493865, "loss": 0.0888, "num_input_tokens_seen": 100634656, "step": 46645 }, { "epoch": 7.610114192495922, "grad_norm": 0.00428013363853097, "learning_rate": 0.0007788760992873083, "loss": 0.0134, "num_input_tokens_seen": 100643712, "step": 46650 }, { "epoch": 7.6109298531810765, "grad_norm": 0.7785499095916748, "learning_rate": 0.000778817016673452, "loss": 0.1868, "num_input_tokens_seen": 100653440, "step": 46655 }, { "epoch": 7.611745513866231, "grad_norm": 0.09897690266370773, "learning_rate": 0.0007787579284090154, "loss": 0.0262, "num_input_tokens_seen": 100664928, "step": 46660 }, { "epoch": 7.612561174551386, "grad_norm": 0.2544030249118805, "learning_rate": 0.0007786988344951956, "loss": 0.0521, "num_input_tokens_seen": 100675744, "step": 46665 }, { "epoch": 7.613376835236542, "grad_norm": 0.022761745378375053, "learning_rate": 0.0007786397349331904, "loss": 0.0737, "num_input_tokens_seen": 100686528, "step": 46670 }, { "epoch": 7.614192495921697, "grad_norm": 0.02007404901087284, "learning_rate": 0.0007785806297241976, "loss": 0.0652, "num_input_tokens_seen": 100697632, "step": 46675 }, { "epoch": 7.6150081566068515, "grad_norm": 0.14318294823169708, "learning_rate": 0.0007785215188694148, "loss": 0.1277, "num_input_tokens_seen": 100708864, "step": 46680 }, { "epoch": 7.615823817292006, "grad_norm": 0.07681535929441452, "learning_rate": 0.0007784624023700402, "loss": 0.1168, "num_input_tokens_seen": 100720096, "step": 46685 }, { "epoch": 7.616639477977161, "grad_norm": 0.06324903666973114, "learning_rate": 0.0007784032802272716, "loss": 0.0276, "num_input_tokens_seen": 100731392, "step": 46690 }, { "epoch": 7.617455138662317, "grad_norm": 0.009777977131307125, "learning_rate": 0.0007783441524423074, "loss": 0.1731, "num_input_tokens_seen": 100741344, "step": 46695 }, { "epoch": 7.618270799347472, "grad_norm": 0.43527597188949585, "learning_rate": 0.0007782850190163459, "loss": 0.0893, "num_input_tokens_seen": 100752704, "step": 46700 }, { "epoch": 7.6190864600326265, "grad_norm": 0.28343409299850464, "learning_rate": 0.0007782258799505855, "loss": 0.0635, "num_input_tokens_seen": 100764160, "step": 46705 }, { "epoch": 7.619902120717781, "grad_norm": 0.005726078990846872, "learning_rate": 0.0007781667352462245, "loss": 0.043, "num_input_tokens_seen": 100776224, "step": 46710 }, { "epoch": 7.620717781402936, "grad_norm": 0.5450302362442017, "learning_rate": 0.0007781075849044619, "loss": 0.2017, "num_input_tokens_seen": 100785888, "step": 46715 }, { "epoch": 7.621533442088092, "grad_norm": 0.00949555542320013, "learning_rate": 0.0007780484289264961, "loss": 0.2403, "num_input_tokens_seen": 100795552, "step": 46720 }, { "epoch": 7.622349102773247, "grad_norm": 0.6577156186103821, "learning_rate": 0.0007779892673135264, "loss": 0.1112, "num_input_tokens_seen": 100806208, "step": 46725 }, { "epoch": 7.623164763458401, "grad_norm": 0.006318395026028156, "learning_rate": 0.0007779301000667516, "loss": 0.1037, "num_input_tokens_seen": 100819232, "step": 46730 }, { "epoch": 7.623980424143556, "grad_norm": 0.19697102904319763, "learning_rate": 0.0007778709271873706, "loss": 0.0875, "num_input_tokens_seen": 100830176, "step": 46735 }, { "epoch": 7.624796084828711, "grad_norm": 0.05637812986969948, "learning_rate": 0.0007778117486765825, "loss": 0.0127, "num_input_tokens_seen": 100840384, "step": 46740 }, { "epoch": 7.625611745513866, "grad_norm": 0.017125973477959633, "learning_rate": 0.0007777525645355872, "loss": 0.1129, "num_input_tokens_seen": 100850624, "step": 46745 }, { "epoch": 7.626427406199021, "grad_norm": 0.20981204509735107, "learning_rate": 0.0007776933747655838, "loss": 0.0985, "num_input_tokens_seen": 100861792, "step": 46750 }, { "epoch": 7.627243066884176, "grad_norm": 0.09351412951946259, "learning_rate": 0.0007776341793677719, "loss": 0.0615, "num_input_tokens_seen": 100872736, "step": 46755 }, { "epoch": 7.628058727569331, "grad_norm": 0.21914109587669373, "learning_rate": 0.000777574978343351, "loss": 0.0739, "num_input_tokens_seen": 100881856, "step": 46760 }, { "epoch": 7.628874388254486, "grad_norm": 0.1745602935552597, "learning_rate": 0.000777515771693521, "loss": 0.0705, "num_input_tokens_seen": 100892480, "step": 46765 }, { "epoch": 7.629690048939641, "grad_norm": 0.027226507663726807, "learning_rate": 0.0007774565594194821, "loss": 0.0257, "num_input_tokens_seen": 100902048, "step": 46770 }, { "epoch": 7.630505709624796, "grad_norm": 0.0521843284368515, "learning_rate": 0.0007773973415224339, "loss": 0.0346, "num_input_tokens_seen": 100913472, "step": 46775 }, { "epoch": 7.631321370309951, "grad_norm": 0.006723497994244099, "learning_rate": 0.0007773381180035766, "loss": 0.1417, "num_input_tokens_seen": 100925248, "step": 46780 }, { "epoch": 7.632137030995106, "grad_norm": 0.011966727674007416, "learning_rate": 0.0007772788888641107, "loss": 0.0496, "num_input_tokens_seen": 100936064, "step": 46785 }, { "epoch": 7.632952691680261, "grad_norm": 0.3021274507045746, "learning_rate": 0.0007772196541052361, "loss": 0.0513, "num_input_tokens_seen": 100945920, "step": 46790 }, { "epoch": 7.633768352365416, "grad_norm": 0.1939060539007187, "learning_rate": 0.0007771604137281538, "loss": 0.1103, "num_input_tokens_seen": 100957344, "step": 46795 }, { "epoch": 7.634584013050571, "grad_norm": 0.02499699778854847, "learning_rate": 0.0007771011677340639, "loss": 0.0964, "num_input_tokens_seen": 100967328, "step": 46800 }, { "epoch": 7.635399673735726, "grad_norm": 0.06668586283922195, "learning_rate": 0.0007770419161241675, "loss": 0.0266, "num_input_tokens_seen": 100979040, "step": 46805 }, { "epoch": 7.636215334420881, "grad_norm": 0.22992080450057983, "learning_rate": 0.0007769826588996651, "loss": 0.1976, "num_input_tokens_seen": 100990272, "step": 46810 }, { "epoch": 7.637030995106036, "grad_norm": 0.01042146421968937, "learning_rate": 0.0007769233960617576, "loss": 0.1344, "num_input_tokens_seen": 101001792, "step": 46815 }, { "epoch": 7.637846655791191, "grad_norm": 0.037998393177986145, "learning_rate": 0.0007768641276116465, "loss": 0.2305, "num_input_tokens_seen": 101012800, "step": 46820 }, { "epoch": 7.638662316476346, "grad_norm": 0.10095963627099991, "learning_rate": 0.0007768048535505324, "loss": 0.0639, "num_input_tokens_seen": 101023936, "step": 46825 }, { "epoch": 7.6394779771615005, "grad_norm": 0.21788924932479858, "learning_rate": 0.0007767455738796169, "loss": 0.1821, "num_input_tokens_seen": 101035104, "step": 46830 }, { "epoch": 7.640293637846656, "grad_norm": 0.016058048233389854, "learning_rate": 0.0007766862886001011, "loss": 0.12, "num_input_tokens_seen": 101047520, "step": 46835 }, { "epoch": 7.641109298531811, "grad_norm": 0.04772372916340828, "learning_rate": 0.0007766269977131868, "loss": 0.0431, "num_input_tokens_seen": 101058336, "step": 46840 }, { "epoch": 7.641924959216966, "grad_norm": 0.1734350472688675, "learning_rate": 0.0007765677012200753, "loss": 0.1866, "num_input_tokens_seen": 101069536, "step": 46845 }, { "epoch": 7.642740619902121, "grad_norm": 0.04860033467411995, "learning_rate": 0.0007765083991219688, "loss": 0.149, "num_input_tokens_seen": 101081056, "step": 46850 }, { "epoch": 7.643556280587275, "grad_norm": 0.11690030992031097, "learning_rate": 0.0007764490914200686, "loss": 0.053, "num_input_tokens_seen": 101092448, "step": 46855 }, { "epoch": 7.64437194127243, "grad_norm": 0.022356640547513962, "learning_rate": 0.0007763897781155769, "loss": 0.0461, "num_input_tokens_seen": 101101888, "step": 46860 }, { "epoch": 7.645187601957586, "grad_norm": 0.09663122892379761, "learning_rate": 0.0007763304592096956, "loss": 0.0545, "num_input_tokens_seen": 101112160, "step": 46865 }, { "epoch": 7.646003262642741, "grad_norm": 0.12149572372436523, "learning_rate": 0.0007762711347036273, "loss": 0.0627, "num_input_tokens_seen": 101123072, "step": 46870 }, { "epoch": 7.646818923327896, "grad_norm": 0.1723361313343048, "learning_rate": 0.0007762118045985738, "loss": 0.0349, "num_input_tokens_seen": 101133056, "step": 46875 }, { "epoch": 7.64763458401305, "grad_norm": 0.1721801608800888, "learning_rate": 0.0007761524688957377, "loss": 0.09, "num_input_tokens_seen": 101144928, "step": 46880 }, { "epoch": 7.648450244698205, "grad_norm": 0.3012996315956116, "learning_rate": 0.0007760931275963215, "loss": 0.1163, "num_input_tokens_seen": 101155808, "step": 46885 }, { "epoch": 7.649265905383361, "grad_norm": 0.4031534194946289, "learning_rate": 0.0007760337807015276, "loss": 0.1555, "num_input_tokens_seen": 101166496, "step": 46890 }, { "epoch": 7.650081566068516, "grad_norm": 0.07599641382694244, "learning_rate": 0.0007759744282125593, "loss": 0.0279, "num_input_tokens_seen": 101176544, "step": 46895 }, { "epoch": 7.650897226753671, "grad_norm": 0.025272149592638016, "learning_rate": 0.000775915070130619, "loss": 0.0345, "num_input_tokens_seen": 101186912, "step": 46900 }, { "epoch": 7.651712887438825, "grad_norm": 0.027811110019683838, "learning_rate": 0.0007758557064569096, "loss": 0.1065, "num_input_tokens_seen": 101197408, "step": 46905 }, { "epoch": 7.65252854812398, "grad_norm": 0.057040099054574966, "learning_rate": 0.0007757963371926346, "loss": 0.0845, "num_input_tokens_seen": 101208768, "step": 46910 }, { "epoch": 7.653344208809135, "grad_norm": 0.11778818815946579, "learning_rate": 0.000775736962338997, "loss": 0.0857, "num_input_tokens_seen": 101220768, "step": 46915 }, { "epoch": 7.654159869494291, "grad_norm": 0.38019445538520813, "learning_rate": 0.0007756775818971998, "loss": 0.068, "num_input_tokens_seen": 101231552, "step": 46920 }, { "epoch": 7.6549755301794455, "grad_norm": 0.461713582277298, "learning_rate": 0.0007756181958684467, "loss": 0.0417, "num_input_tokens_seen": 101242752, "step": 46925 }, { "epoch": 7.6557911908646, "grad_norm": 0.01572607085108757, "learning_rate": 0.0007755588042539414, "loss": 0.0484, "num_input_tokens_seen": 101252832, "step": 46930 }, { "epoch": 7.656606851549755, "grad_norm": 0.7450742125511169, "learning_rate": 0.0007754994070548873, "loss": 0.1095, "num_input_tokens_seen": 101264704, "step": 46935 }, { "epoch": 7.65742251223491, "grad_norm": 0.32260194420814514, "learning_rate": 0.0007754400042724881, "loss": 0.1815, "num_input_tokens_seen": 101276256, "step": 46940 }, { "epoch": 7.658238172920065, "grad_norm": 0.5089951753616333, "learning_rate": 0.0007753805959079481, "loss": 0.0504, "num_input_tokens_seen": 101287136, "step": 46945 }, { "epoch": 7.6590538336052205, "grad_norm": 0.19943107664585114, "learning_rate": 0.0007753211819624706, "loss": 0.0299, "num_input_tokens_seen": 101297664, "step": 46950 }, { "epoch": 7.659869494290375, "grad_norm": 0.006044904701411724, "learning_rate": 0.0007752617624372602, "loss": 0.0822, "num_input_tokens_seen": 101309024, "step": 46955 }, { "epoch": 7.66068515497553, "grad_norm": 0.29377782344818115, "learning_rate": 0.000775202337333521, "loss": 0.0779, "num_input_tokens_seen": 101319808, "step": 46960 }, { "epoch": 7.661500815660685, "grad_norm": 0.3126506805419922, "learning_rate": 0.0007751429066524575, "loss": 0.1549, "num_input_tokens_seen": 101330496, "step": 46965 }, { "epoch": 7.66231647634584, "grad_norm": 0.1197192594408989, "learning_rate": 0.0007750834703952738, "loss": 0.0806, "num_input_tokens_seen": 101341856, "step": 46970 }, { "epoch": 7.6631321370309955, "grad_norm": 0.10566963255405426, "learning_rate": 0.0007750240285631745, "loss": 0.092, "num_input_tokens_seen": 101353024, "step": 46975 }, { "epoch": 7.66394779771615, "grad_norm": 0.08385958522558212, "learning_rate": 0.0007749645811573646, "loss": 0.161, "num_input_tokens_seen": 101364256, "step": 46980 }, { "epoch": 7.664763458401305, "grad_norm": 0.17057965695858002, "learning_rate": 0.0007749051281790484, "loss": 0.1226, "num_input_tokens_seen": 101374496, "step": 46985 }, { "epoch": 7.66557911908646, "grad_norm": 0.42451468110084534, "learning_rate": 0.0007748456696294312, "loss": 0.0516, "num_input_tokens_seen": 101385024, "step": 46990 }, { "epoch": 7.666394779771615, "grad_norm": 0.12065618485212326, "learning_rate": 0.0007747862055097179, "loss": 0.0402, "num_input_tokens_seen": 101394944, "step": 46995 }, { "epoch": 7.6672104404567705, "grad_norm": 0.03171085938811302, "learning_rate": 0.0007747267358211135, "loss": 0.0431, "num_input_tokens_seen": 101405760, "step": 47000 }, { "epoch": 7.668026101141925, "grad_norm": 0.0206484105437994, "learning_rate": 0.0007746672605648231, "loss": 0.0583, "num_input_tokens_seen": 101417088, "step": 47005 }, { "epoch": 7.66884176182708, "grad_norm": 0.17164865136146545, "learning_rate": 0.0007746077797420524, "loss": 0.051, "num_input_tokens_seen": 101427232, "step": 47010 }, { "epoch": 7.669657422512235, "grad_norm": 0.010221361182630062, "learning_rate": 0.0007745482933540067, "loss": 0.1174, "num_input_tokens_seen": 101437536, "step": 47015 }, { "epoch": 7.67047308319739, "grad_norm": 0.09456347674131393, "learning_rate": 0.0007744888014018914, "loss": 0.2015, "num_input_tokens_seen": 101448256, "step": 47020 }, { "epoch": 7.671288743882545, "grad_norm": 0.143135666847229, "learning_rate": 0.0007744293038869125, "loss": 0.1064, "num_input_tokens_seen": 101459424, "step": 47025 }, { "epoch": 7.672104404567699, "grad_norm": 0.3407108187675476, "learning_rate": 0.0007743698008102755, "loss": 0.2028, "num_input_tokens_seen": 101469920, "step": 47030 }, { "epoch": 7.672920065252855, "grad_norm": 0.3655891418457031, "learning_rate": 0.0007743102921731864, "loss": 0.2511, "num_input_tokens_seen": 101480608, "step": 47035 }, { "epoch": 7.67373572593801, "grad_norm": 0.04168061167001724, "learning_rate": 0.0007742507779768513, "loss": 0.1244, "num_input_tokens_seen": 101491744, "step": 47040 }, { "epoch": 7.674551386623165, "grad_norm": 0.0300362017005682, "learning_rate": 0.0007741912582224764, "loss": 0.1565, "num_input_tokens_seen": 101502528, "step": 47045 }, { "epoch": 7.6753670473083195, "grad_norm": 0.027343744412064552, "learning_rate": 0.0007741317329112675, "loss": 0.0859, "num_input_tokens_seen": 101512992, "step": 47050 }, { "epoch": 7.676182707993474, "grad_norm": 0.21833541989326477, "learning_rate": 0.0007740722020444315, "loss": 0.0506, "num_input_tokens_seen": 101523488, "step": 47055 }, { "epoch": 7.67699836867863, "grad_norm": 0.09772036224603653, "learning_rate": 0.0007740126656231746, "loss": 0.1421, "num_input_tokens_seen": 101533856, "step": 47060 }, { "epoch": 7.677814029363785, "grad_norm": 0.2808668613433838, "learning_rate": 0.0007739531236487034, "loss": 0.0411, "num_input_tokens_seen": 101544992, "step": 47065 }, { "epoch": 7.67862969004894, "grad_norm": 0.15837228298187256, "learning_rate": 0.0007738935761222247, "loss": 0.1391, "num_input_tokens_seen": 101554720, "step": 47070 }, { "epoch": 7.6794453507340945, "grad_norm": 0.37423115968704224, "learning_rate": 0.0007738340230449451, "loss": 0.1226, "num_input_tokens_seen": 101566080, "step": 47075 }, { "epoch": 7.680261011419249, "grad_norm": 0.09489347785711288, "learning_rate": 0.0007737744644180718, "loss": 0.0851, "num_input_tokens_seen": 101577920, "step": 47080 }, { "epoch": 7.681076672104405, "grad_norm": 0.038735806941986084, "learning_rate": 0.0007737149002428114, "loss": 0.0667, "num_input_tokens_seen": 101589280, "step": 47085 }, { "epoch": 7.68189233278956, "grad_norm": 0.034815721213817596, "learning_rate": 0.0007736553305203715, "loss": 0.0745, "num_input_tokens_seen": 101598912, "step": 47090 }, { "epoch": 7.682707993474715, "grad_norm": 0.11853384971618652, "learning_rate": 0.0007735957552519592, "loss": 0.0449, "num_input_tokens_seen": 101609792, "step": 47095 }, { "epoch": 7.6835236541598695, "grad_norm": 0.33795544505119324, "learning_rate": 0.0007735361744387818, "loss": 0.1337, "num_input_tokens_seen": 101619328, "step": 47100 }, { "epoch": 7.684339314845024, "grad_norm": 0.025622496381402016, "learning_rate": 0.0007734765880820468, "loss": 0.0879, "num_input_tokens_seen": 101628896, "step": 47105 }, { "epoch": 7.685154975530179, "grad_norm": 0.03628996014595032, "learning_rate": 0.0007734169961829618, "loss": 0.1587, "num_input_tokens_seen": 101640352, "step": 47110 }, { "epoch": 7.685970636215334, "grad_norm": 0.06671030074357986, "learning_rate": 0.0007733573987427346, "loss": 0.1377, "num_input_tokens_seen": 101650240, "step": 47115 }, { "epoch": 7.68678629690049, "grad_norm": 0.19347085058689117, "learning_rate": 0.0007732977957625729, "loss": 0.0366, "num_input_tokens_seen": 101660928, "step": 47120 }, { "epoch": 7.6876019575856445, "grad_norm": 0.296563059091568, "learning_rate": 0.0007732381872436846, "loss": 0.1826, "num_input_tokens_seen": 101672064, "step": 47125 }, { "epoch": 7.688417618270799, "grad_norm": 0.07601918280124664, "learning_rate": 0.0007731785731872778, "loss": 0.0562, "num_input_tokens_seen": 101684032, "step": 47130 }, { "epoch": 7.689233278955954, "grad_norm": 0.013964531011879444, "learning_rate": 0.0007731189535945609, "loss": 0.0285, "num_input_tokens_seen": 101693568, "step": 47135 }, { "epoch": 7.690048939641109, "grad_norm": 0.01385793648660183, "learning_rate": 0.0007730593284667416, "loss": 0.0476, "num_input_tokens_seen": 101705440, "step": 47140 }, { "epoch": 7.690864600326265, "grad_norm": 0.7446133494377136, "learning_rate": 0.0007729996978050287, "loss": 0.1533, "num_input_tokens_seen": 101716192, "step": 47145 }, { "epoch": 7.691680261011419, "grad_norm": 0.46772652864456177, "learning_rate": 0.0007729400616106308, "loss": 0.1845, "num_input_tokens_seen": 101727488, "step": 47150 }, { "epoch": 7.692495921696574, "grad_norm": 0.13377557694911957, "learning_rate": 0.0007728804198847561, "loss": 0.1294, "num_input_tokens_seen": 101738592, "step": 47155 }, { "epoch": 7.693311582381729, "grad_norm": 0.024528253823518753, "learning_rate": 0.0007728207726286136, "loss": 0.0417, "num_input_tokens_seen": 101750432, "step": 47160 }, { "epoch": 7.694127243066884, "grad_norm": 0.1424509435892105, "learning_rate": 0.000772761119843412, "loss": 0.0328, "num_input_tokens_seen": 101760064, "step": 47165 }, { "epoch": 7.69494290375204, "grad_norm": 0.33052077889442444, "learning_rate": 0.0007727014615303602, "loss": 0.1293, "num_input_tokens_seen": 101770304, "step": 47170 }, { "epoch": 7.695758564437194, "grad_norm": 0.04105739668011665, "learning_rate": 0.0007726417976906674, "loss": 0.1102, "num_input_tokens_seen": 101781216, "step": 47175 }, { "epoch": 7.696574225122349, "grad_norm": 0.3748859763145447, "learning_rate": 0.0007725821283255427, "loss": 0.0703, "num_input_tokens_seen": 101792256, "step": 47180 }, { "epoch": 7.697389885807504, "grad_norm": 0.05112401023507118, "learning_rate": 0.0007725224534361955, "loss": 0.0304, "num_input_tokens_seen": 101803296, "step": 47185 }, { "epoch": 7.698205546492659, "grad_norm": 0.0364927276968956, "learning_rate": 0.000772462773023835, "loss": 0.0724, "num_input_tokens_seen": 101813984, "step": 47190 }, { "epoch": 7.699021207177814, "grad_norm": 0.1940586417913437, "learning_rate": 0.0007724030870896707, "loss": 0.0448, "num_input_tokens_seen": 101824864, "step": 47195 }, { "epoch": 7.699836867862969, "grad_norm": 0.11837819963693619, "learning_rate": 0.0007723433956349123, "loss": 0.1345, "num_input_tokens_seen": 101834144, "step": 47200 }, { "epoch": 7.700652528548124, "grad_norm": 0.10171327739953995, "learning_rate": 0.0007722836986607696, "loss": 0.0754, "num_input_tokens_seen": 101844096, "step": 47205 }, { "epoch": 7.701468189233279, "grad_norm": 0.1458754539489746, "learning_rate": 0.000772223996168452, "loss": 0.0333, "num_input_tokens_seen": 101854240, "step": 47210 }, { "epoch": 7.702283849918434, "grad_norm": 0.21040485799312592, "learning_rate": 0.0007721642881591701, "loss": 0.0487, "num_input_tokens_seen": 101864256, "step": 47215 }, { "epoch": 7.703099510603589, "grad_norm": 0.39518624544143677, "learning_rate": 0.0007721045746341335, "loss": 0.0898, "num_input_tokens_seen": 101875392, "step": 47220 }, { "epoch": 7.7039151712887435, "grad_norm": 0.25650304555892944, "learning_rate": 0.0007720448555945527, "loss": 0.2285, "num_input_tokens_seen": 101886848, "step": 47225 }, { "epoch": 7.704730831973899, "grad_norm": 0.07399705797433853, "learning_rate": 0.0007719851310416376, "loss": 0.0294, "num_input_tokens_seen": 101896896, "step": 47230 }, { "epoch": 7.705546492659054, "grad_norm": 0.031412750482559204, "learning_rate": 0.0007719254009765988, "loss": 0.1448, "num_input_tokens_seen": 101909920, "step": 47235 }, { "epoch": 7.706362153344209, "grad_norm": 0.023176098242402077, "learning_rate": 0.0007718656654006469, "loss": 0.0513, "num_input_tokens_seen": 101920800, "step": 47240 }, { "epoch": 7.707177814029364, "grad_norm": 0.12271186709403992, "learning_rate": 0.0007718059243149921, "loss": 0.0286, "num_input_tokens_seen": 101930688, "step": 47245 }, { "epoch": 7.7079934747145185, "grad_norm": 0.07300354540348053, "learning_rate": 0.0007717461777208458, "loss": 0.0486, "num_input_tokens_seen": 101942144, "step": 47250 }, { "epoch": 7.708809135399674, "grad_norm": 0.07152844220399857, "learning_rate": 0.0007716864256194182, "loss": 0.1154, "num_input_tokens_seen": 101952320, "step": 47255 }, { "epoch": 7.709624796084829, "grad_norm": 0.056217651814222336, "learning_rate": 0.0007716266680119207, "loss": 0.1459, "num_input_tokens_seen": 101963008, "step": 47260 }, { "epoch": 7.710440456769984, "grad_norm": 0.2466568946838379, "learning_rate": 0.0007715669048995641, "loss": 0.0772, "num_input_tokens_seen": 101973152, "step": 47265 }, { "epoch": 7.711256117455139, "grad_norm": 0.5311647653579712, "learning_rate": 0.0007715071362835597, "loss": 0.0632, "num_input_tokens_seen": 101983808, "step": 47270 }, { "epoch": 7.712071778140293, "grad_norm": 0.026713168248534203, "learning_rate": 0.0007714473621651188, "loss": 0.0666, "num_input_tokens_seen": 101995072, "step": 47275 }, { "epoch": 7.712887438825448, "grad_norm": 0.015445607714354992, "learning_rate": 0.0007713875825454526, "loss": 0.0326, "num_input_tokens_seen": 102005056, "step": 47280 }, { "epoch": 7.713703099510604, "grad_norm": 0.015405201353132725, "learning_rate": 0.0007713277974257729, "loss": 0.0928, "num_input_tokens_seen": 102015488, "step": 47285 }, { "epoch": 7.714518760195759, "grad_norm": 0.3714098632335663, "learning_rate": 0.0007712680068072911, "loss": 0.1681, "num_input_tokens_seen": 102027072, "step": 47290 }, { "epoch": 7.715334420880914, "grad_norm": 0.012258387170732021, "learning_rate": 0.000771208210691219, "loss": 0.0584, "num_input_tokens_seen": 102035840, "step": 47295 }, { "epoch": 7.716150081566068, "grad_norm": 0.06253540515899658, "learning_rate": 0.0007711484090787686, "loss": 0.0813, "num_input_tokens_seen": 102046624, "step": 47300 }, { "epoch": 7.716965742251223, "grad_norm": 0.43035510182380676, "learning_rate": 0.0007710886019711516, "loss": 0.1969, "num_input_tokens_seen": 102058272, "step": 47305 }, { "epoch": 7.717781402936378, "grad_norm": 0.11183225363492966, "learning_rate": 0.0007710287893695803, "loss": 0.1048, "num_input_tokens_seen": 102069632, "step": 47310 }, { "epoch": 7.718597063621534, "grad_norm": 0.019517144188284874, "learning_rate": 0.0007709689712752666, "loss": 0.1171, "num_input_tokens_seen": 102081152, "step": 47315 }, { "epoch": 7.719412724306689, "grad_norm": 0.2773378789424896, "learning_rate": 0.000770909147689423, "loss": 0.0584, "num_input_tokens_seen": 102091328, "step": 47320 }, { "epoch": 7.720228384991843, "grad_norm": 0.6093724966049194, "learning_rate": 0.000770849318613262, "loss": 0.1519, "num_input_tokens_seen": 102102144, "step": 47325 }, { "epoch": 7.721044045676998, "grad_norm": 0.054318930953741074, "learning_rate": 0.0007707894840479957, "loss": 0.0146, "num_input_tokens_seen": 102111616, "step": 47330 }, { "epoch": 7.721859706362153, "grad_norm": 0.01822144165635109, "learning_rate": 0.0007707296439948372, "loss": 0.1106, "num_input_tokens_seen": 102122784, "step": 47335 }, { "epoch": 7.722675367047309, "grad_norm": 0.31291964650154114, "learning_rate": 0.0007706697984549988, "loss": 0.1095, "num_input_tokens_seen": 102134464, "step": 47340 }, { "epoch": 7.7234910277324635, "grad_norm": 0.5163543224334717, "learning_rate": 0.0007706099474296938, "loss": 0.3724, "num_input_tokens_seen": 102146240, "step": 47345 }, { "epoch": 7.724306688417618, "grad_norm": 0.030911464244127274, "learning_rate": 0.0007705500909201349, "loss": 0.023, "num_input_tokens_seen": 102156448, "step": 47350 }, { "epoch": 7.725122349102773, "grad_norm": 0.15409010648727417, "learning_rate": 0.0007704902289275351, "loss": 0.1949, "num_input_tokens_seen": 102165792, "step": 47355 }, { "epoch": 7.725938009787928, "grad_norm": 0.05173174664378166, "learning_rate": 0.0007704303614531076, "loss": 0.0578, "num_input_tokens_seen": 102175520, "step": 47360 }, { "epoch": 7.726753670473083, "grad_norm": 0.1199563592672348, "learning_rate": 0.0007703704884980659, "loss": 0.0457, "num_input_tokens_seen": 102186016, "step": 47365 }, { "epoch": 7.7275693311582385, "grad_norm": 0.10026361793279648, "learning_rate": 0.0007703106100636233, "loss": 0.1612, "num_input_tokens_seen": 102196864, "step": 47370 }, { "epoch": 7.728384991843393, "grad_norm": 0.022796250879764557, "learning_rate": 0.0007702507261509932, "loss": 0.0407, "num_input_tokens_seen": 102207232, "step": 47375 }, { "epoch": 7.729200652528548, "grad_norm": 0.45962539315223694, "learning_rate": 0.000770190836761389, "loss": 0.1316, "num_input_tokens_seen": 102216768, "step": 47380 }, { "epoch": 7.730016313213703, "grad_norm": 0.09081903845071793, "learning_rate": 0.0007701309418960252, "loss": 0.0374, "num_input_tokens_seen": 102227808, "step": 47385 }, { "epoch": 7.730831973898858, "grad_norm": 0.065775066614151, "learning_rate": 0.000770071041556115, "loss": 0.0456, "num_input_tokens_seen": 102238464, "step": 47390 }, { "epoch": 7.731647634584013, "grad_norm": 0.11152444779872894, "learning_rate": 0.0007700111357428724, "loss": 0.078, "num_input_tokens_seen": 102249088, "step": 47395 }, { "epoch": 7.732463295269168, "grad_norm": 0.5276990532875061, "learning_rate": 0.0007699512244575118, "loss": 0.1529, "num_input_tokens_seen": 102260672, "step": 47400 }, { "epoch": 7.733278955954323, "grad_norm": 0.03335936740040779, "learning_rate": 0.0007698913077012471, "loss": 0.0403, "num_input_tokens_seen": 102271040, "step": 47405 }, { "epoch": 7.734094616639478, "grad_norm": 0.0168902650475502, "learning_rate": 0.0007698313854752925, "loss": 0.0903, "num_input_tokens_seen": 102281536, "step": 47410 }, { "epoch": 7.734910277324633, "grad_norm": 0.05835627391934395, "learning_rate": 0.0007697714577808627, "loss": 0.0794, "num_input_tokens_seen": 102292832, "step": 47415 }, { "epoch": 7.735725938009788, "grad_norm": 0.3234997093677521, "learning_rate": 0.0007697115246191723, "loss": 0.1273, "num_input_tokens_seen": 102303616, "step": 47420 }, { "epoch": 7.736541598694943, "grad_norm": 0.24532905220985413, "learning_rate": 0.0007696515859914355, "loss": 0.1266, "num_input_tokens_seen": 102313312, "step": 47425 }, { "epoch": 7.737357259380098, "grad_norm": 0.06934057921171188, "learning_rate": 0.0007695916418988672, "loss": 0.0593, "num_input_tokens_seen": 102324160, "step": 47430 }, { "epoch": 7.738172920065253, "grad_norm": 0.2691959738731384, "learning_rate": 0.0007695316923426823, "loss": 0.0622, "num_input_tokens_seen": 102336480, "step": 47435 }, { "epoch": 7.738988580750408, "grad_norm": 0.31702467799186707, "learning_rate": 0.0007694717373240957, "loss": 0.0822, "num_input_tokens_seen": 102346656, "step": 47440 }, { "epoch": 7.739804241435563, "grad_norm": 0.17968790233135223, "learning_rate": 0.0007694117768443225, "loss": 0.0793, "num_input_tokens_seen": 102357472, "step": 47445 }, { "epoch": 7.740619902120718, "grad_norm": 0.29459330439567566, "learning_rate": 0.0007693518109045779, "loss": 0.0529, "num_input_tokens_seen": 102368128, "step": 47450 }, { "epoch": 7.741435562805873, "grad_norm": 0.008404044434428215, "learning_rate": 0.0007692918395060772, "loss": 0.0677, "num_input_tokens_seen": 102378560, "step": 47455 }, { "epoch": 7.742251223491028, "grad_norm": 0.5222642421722412, "learning_rate": 0.0007692318626500357, "loss": 0.1182, "num_input_tokens_seen": 102390272, "step": 47460 }, { "epoch": 7.743066884176183, "grad_norm": 0.2248661071062088, "learning_rate": 0.000769171880337669, "loss": 0.1292, "num_input_tokens_seen": 102401504, "step": 47465 }, { "epoch": 7.7438825448613375, "grad_norm": 0.17313404381275177, "learning_rate": 0.0007691118925701927, "loss": 0.0591, "num_input_tokens_seen": 102411424, "step": 47470 }, { "epoch": 7.744698205546492, "grad_norm": 0.05929055064916611, "learning_rate": 0.0007690518993488225, "loss": 0.2708, "num_input_tokens_seen": 102422080, "step": 47475 }, { "epoch": 7.745513866231647, "grad_norm": 0.27385184168815613, "learning_rate": 0.0007689919006747741, "loss": 0.0374, "num_input_tokens_seen": 102432160, "step": 47480 }, { "epoch": 7.746329526916803, "grad_norm": 0.22686569392681122, "learning_rate": 0.0007689318965492637, "loss": 0.0695, "num_input_tokens_seen": 102441344, "step": 47485 }, { "epoch": 7.747145187601958, "grad_norm": 0.09095137566328049, "learning_rate": 0.0007688718869735072, "loss": 0.041, "num_input_tokens_seen": 102452192, "step": 47490 }, { "epoch": 7.7479608482871125, "grad_norm": 0.04118858277797699, "learning_rate": 0.0007688118719487209, "loss": 0.1067, "num_input_tokens_seen": 102463136, "step": 47495 }, { "epoch": 7.748776508972267, "grad_norm": 0.022132527083158493, "learning_rate": 0.000768751851476121, "loss": 0.096, "num_input_tokens_seen": 102473088, "step": 47500 }, { "epoch": 7.749592169657422, "grad_norm": 0.08129636198282242, "learning_rate": 0.0007686918255569238, "loss": 0.1464, "num_input_tokens_seen": 102483872, "step": 47505 }, { "epoch": 7.750407830342578, "grad_norm": 0.055905357003211975, "learning_rate": 0.000768631794192346, "loss": 0.1445, "num_input_tokens_seen": 102494176, "step": 47510 }, { "epoch": 7.751223491027733, "grad_norm": 0.03258087858557701, "learning_rate": 0.0007685717573836041, "loss": 0.0445, "num_input_tokens_seen": 102505568, "step": 47515 }, { "epoch": 7.7520391517128875, "grad_norm": 0.23776917159557343, "learning_rate": 0.0007685117151319148, "loss": 0.034, "num_input_tokens_seen": 102516320, "step": 47520 }, { "epoch": 7.752854812398042, "grad_norm": 0.04038042947649956, "learning_rate": 0.000768451667438495, "loss": 0.0592, "num_input_tokens_seen": 102527424, "step": 47525 }, { "epoch": 7.753670473083197, "grad_norm": 0.010090802796185017, "learning_rate": 0.0007683916143045615, "loss": 0.0138, "num_input_tokens_seen": 102537504, "step": 47530 }, { "epoch": 7.754486133768353, "grad_norm": 0.4297090172767639, "learning_rate": 0.0007683315557313315, "loss": 0.2149, "num_input_tokens_seen": 102549088, "step": 47535 }, { "epoch": 7.755301794453508, "grad_norm": 0.06765374541282654, "learning_rate": 0.0007682714917200222, "loss": 0.1198, "num_input_tokens_seen": 102560544, "step": 47540 }, { "epoch": 7.7561174551386625, "grad_norm": 0.07721754163503647, "learning_rate": 0.0007682114222718507, "loss": 0.0304, "num_input_tokens_seen": 102571680, "step": 47545 }, { "epoch": 7.756933115823817, "grad_norm": 0.42118704319000244, "learning_rate": 0.0007681513473880345, "loss": 0.1594, "num_input_tokens_seen": 102583136, "step": 47550 }, { "epoch": 7.757748776508972, "grad_norm": 0.0051977066323161125, "learning_rate": 0.000768091267069791, "loss": 0.0355, "num_input_tokens_seen": 102593440, "step": 47555 }, { "epoch": 7.758564437194127, "grad_norm": 0.058405227959156036, "learning_rate": 0.000768031181318338, "loss": 0.0558, "num_input_tokens_seen": 102603680, "step": 47560 }, { "epoch": 7.759380097879282, "grad_norm": 0.18608562648296356, "learning_rate": 0.000767971090134893, "loss": 0.0652, "num_input_tokens_seen": 102615008, "step": 47565 }, { "epoch": 7.760195758564437, "grad_norm": 0.008816501125693321, "learning_rate": 0.0007679109935206741, "loss": 0.0722, "num_input_tokens_seen": 102626432, "step": 47570 }, { "epoch": 7.761011419249592, "grad_norm": 0.08861344307661057, "learning_rate": 0.0007678508914768989, "loss": 0.0584, "num_input_tokens_seen": 102637056, "step": 47575 }, { "epoch": 7.761827079934747, "grad_norm": 0.021900072693824768, "learning_rate": 0.0007677907840047855, "loss": 0.1134, "num_input_tokens_seen": 102646720, "step": 47580 }, { "epoch": 7.762642740619902, "grad_norm": 0.10574264079332352, "learning_rate": 0.0007677306711055523, "loss": 0.0704, "num_input_tokens_seen": 102657120, "step": 47585 }, { "epoch": 7.763458401305057, "grad_norm": 0.0327899195253849, "learning_rate": 0.0007676705527804173, "loss": 0.2082, "num_input_tokens_seen": 102669024, "step": 47590 }, { "epoch": 7.764274061990212, "grad_norm": 0.0055746715515851974, "learning_rate": 0.000767610429030599, "loss": 0.0198, "num_input_tokens_seen": 102680768, "step": 47595 }, { "epoch": 7.765089722675367, "grad_norm": 0.21290093660354614, "learning_rate": 0.0007675502998573159, "loss": 0.1657, "num_input_tokens_seen": 102690976, "step": 47600 }, { "epoch": 7.765905383360522, "grad_norm": 0.06461843103170395, "learning_rate": 0.0007674901652617865, "loss": 0.0532, "num_input_tokens_seen": 102701312, "step": 47605 }, { "epoch": 7.766721044045677, "grad_norm": 0.06886417418718338, "learning_rate": 0.0007674300252452297, "loss": 0.1215, "num_input_tokens_seen": 102713536, "step": 47610 }, { "epoch": 7.767536704730832, "grad_norm": 0.13266102969646454, "learning_rate": 0.000767369879808864, "loss": 0.0318, "num_input_tokens_seen": 102725760, "step": 47615 }, { "epoch": 7.768352365415987, "grad_norm": 0.018810324370861053, "learning_rate": 0.0007673097289539086, "loss": 0.0288, "num_input_tokens_seen": 102736992, "step": 47620 }, { "epoch": 7.769168026101142, "grad_norm": 0.3510235846042633, "learning_rate": 0.0007672495726815825, "loss": 0.1036, "num_input_tokens_seen": 102747392, "step": 47625 }, { "epoch": 7.769983686786297, "grad_norm": 0.015238435007631779, "learning_rate": 0.0007671894109931048, "loss": 0.0574, "num_input_tokens_seen": 102758816, "step": 47630 }, { "epoch": 7.770799347471452, "grad_norm": 0.2805767357349396, "learning_rate": 0.0007671292438896946, "loss": 0.0716, "num_input_tokens_seen": 102770112, "step": 47635 }, { "epoch": 7.771615008156607, "grad_norm": 0.139348566532135, "learning_rate": 0.0007670690713725715, "loss": 0.0552, "num_input_tokens_seen": 102780960, "step": 47640 }, { "epoch": 7.7724306688417615, "grad_norm": 0.16774505376815796, "learning_rate": 0.0007670088934429548, "loss": 0.0672, "num_input_tokens_seen": 102791488, "step": 47645 }, { "epoch": 7.773246329526917, "grad_norm": 0.2608852684497833, "learning_rate": 0.0007669487101020642, "loss": 0.1077, "num_input_tokens_seen": 102802272, "step": 47650 }, { "epoch": 7.774061990212072, "grad_norm": 0.27258777618408203, "learning_rate": 0.0007668885213511193, "loss": 0.0389, "num_input_tokens_seen": 102813504, "step": 47655 }, { "epoch": 7.774877650897227, "grad_norm": 0.3451569974422455, "learning_rate": 0.0007668283271913399, "loss": 0.0962, "num_input_tokens_seen": 102823328, "step": 47660 }, { "epoch": 7.775693311582382, "grad_norm": 0.22725844383239746, "learning_rate": 0.000766768127623946, "loss": 0.1016, "num_input_tokens_seen": 102834336, "step": 47665 }, { "epoch": 7.7765089722675365, "grad_norm": 0.07195267081260681, "learning_rate": 0.0007667079226501576, "loss": 0.0436, "num_input_tokens_seen": 102846496, "step": 47670 }, { "epoch": 7.777324632952691, "grad_norm": 0.08462546020746231, "learning_rate": 0.0007666477122711948, "loss": 0.0949, "num_input_tokens_seen": 102856128, "step": 47675 }, { "epoch": 7.778140293637847, "grad_norm": 0.009504844434559345, "learning_rate": 0.000766587496488278, "loss": 0.0237, "num_input_tokens_seen": 102867616, "step": 47680 }, { "epoch": 7.778955954323002, "grad_norm": 0.38907328248023987, "learning_rate": 0.0007665272753026271, "loss": 0.1675, "num_input_tokens_seen": 102878016, "step": 47685 }, { "epoch": 7.779771615008157, "grad_norm": 0.017110228538513184, "learning_rate": 0.000766467048715463, "loss": 0.1416, "num_input_tokens_seen": 102889664, "step": 47690 }, { "epoch": 7.780587275693311, "grad_norm": 0.005684800911694765, "learning_rate": 0.000766406816728006, "loss": 0.0502, "num_input_tokens_seen": 102900256, "step": 47695 }, { "epoch": 7.781402936378466, "grad_norm": 0.6504929661750793, "learning_rate": 0.000766346579341477, "loss": 0.1075, "num_input_tokens_seen": 102910624, "step": 47700 }, { "epoch": 7.782218597063622, "grad_norm": 0.11250746250152588, "learning_rate": 0.0007662863365570967, "loss": 0.0572, "num_input_tokens_seen": 102920224, "step": 47705 }, { "epoch": 7.783034257748777, "grad_norm": 0.12152735888957977, "learning_rate": 0.000766226088376086, "loss": 0.0438, "num_input_tokens_seen": 102931552, "step": 47710 }, { "epoch": 7.783849918433932, "grad_norm": 0.004328442271798849, "learning_rate": 0.0007661658347996659, "loss": 0.0163, "num_input_tokens_seen": 102942176, "step": 47715 }, { "epoch": 7.784665579119086, "grad_norm": 0.01648569479584694, "learning_rate": 0.0007661055758290574, "loss": 0.0446, "num_input_tokens_seen": 102952800, "step": 47720 }, { "epoch": 7.785481239804241, "grad_norm": 0.010411852039396763, "learning_rate": 0.0007660453114654819, "loss": 0.058, "num_input_tokens_seen": 102963456, "step": 47725 }, { "epoch": 7.786296900489396, "grad_norm": 0.1307300478219986, "learning_rate": 0.0007659850417101606, "loss": 0.0682, "num_input_tokens_seen": 102974368, "step": 47730 }, { "epoch": 7.787112561174552, "grad_norm": 0.4529654085636139, "learning_rate": 0.0007659247665643151, "loss": 0.0545, "num_input_tokens_seen": 102984704, "step": 47735 }, { "epoch": 7.787928221859707, "grad_norm": 0.16411900520324707, "learning_rate": 0.0007658644860291668, "loss": 0.0764, "num_input_tokens_seen": 102994560, "step": 47740 }, { "epoch": 7.788743882544861, "grad_norm": 0.06640923023223877, "learning_rate": 0.0007658042001059373, "loss": 0.056, "num_input_tokens_seen": 103005376, "step": 47745 }, { "epoch": 7.789559543230016, "grad_norm": 0.5506373047828674, "learning_rate": 0.0007657439087958486, "loss": 0.1762, "num_input_tokens_seen": 103017056, "step": 47750 }, { "epoch": 7.790375203915171, "grad_norm": 0.017594967037439346, "learning_rate": 0.0007656836121001225, "loss": 0.0654, "num_input_tokens_seen": 103026368, "step": 47755 }, { "epoch": 7.791190864600326, "grad_norm": 0.017976531758904457, "learning_rate": 0.0007656233100199809, "loss": 0.0665, "num_input_tokens_seen": 103037408, "step": 47760 }, { "epoch": 7.7920065252854815, "grad_norm": 0.0827372744679451, "learning_rate": 0.000765563002556646, "loss": 0.134, "num_input_tokens_seen": 103047136, "step": 47765 }, { "epoch": 7.792822185970636, "grad_norm": 0.01857643388211727, "learning_rate": 0.00076550268971134, "loss": 0.0538, "num_input_tokens_seen": 103057248, "step": 47770 }, { "epoch": 7.793637846655791, "grad_norm": 0.04456664249300957, "learning_rate": 0.0007654423714852852, "loss": 0.2582, "num_input_tokens_seen": 103067552, "step": 47775 }, { "epoch": 7.794453507340946, "grad_norm": 0.03645290061831474, "learning_rate": 0.0007653820478797038, "loss": 0.1227, "num_input_tokens_seen": 103079392, "step": 47780 }, { "epoch": 7.795269168026101, "grad_norm": 0.28056466579437256, "learning_rate": 0.0007653217188958188, "loss": 0.0248, "num_input_tokens_seen": 103089888, "step": 47785 }, { "epoch": 7.7960848287112565, "grad_norm": 0.28104403614997864, "learning_rate": 0.0007652613845348524, "loss": 0.0568, "num_input_tokens_seen": 103099584, "step": 47790 }, { "epoch": 7.796900489396411, "grad_norm": 0.25869059562683105, "learning_rate": 0.0007652010447980276, "loss": 0.1021, "num_input_tokens_seen": 103110848, "step": 47795 }, { "epoch": 7.797716150081566, "grad_norm": 0.09836013615131378, "learning_rate": 0.0007651406996865672, "loss": 0.1006, "num_input_tokens_seen": 103121696, "step": 47800 }, { "epoch": 7.798531810766721, "grad_norm": 0.03654855862259865, "learning_rate": 0.000765080349201694, "loss": 0.0127, "num_input_tokens_seen": 103132448, "step": 47805 }, { "epoch": 7.799347471451876, "grad_norm": 0.28264299035072327, "learning_rate": 0.0007650199933446314, "loss": 0.1014, "num_input_tokens_seen": 103143424, "step": 47810 }, { "epoch": 7.800163132137031, "grad_norm": 0.2561463415622711, "learning_rate": 0.0007649596321166025, "loss": 0.0667, "num_input_tokens_seen": 103154752, "step": 47815 }, { "epoch": 7.800978792822186, "grad_norm": 0.2974696159362793, "learning_rate": 0.0007648992655188305, "loss": 0.031, "num_input_tokens_seen": 103166176, "step": 47820 }, { "epoch": 7.801794453507341, "grad_norm": 0.18018274009227753, "learning_rate": 0.0007648388935525388, "loss": 0.0345, "num_input_tokens_seen": 103176288, "step": 47825 }, { "epoch": 7.802610114192496, "grad_norm": 0.08334909379482269, "learning_rate": 0.0007647785162189509, "loss": 0.1139, "num_input_tokens_seen": 103187552, "step": 47830 }, { "epoch": 7.803425774877651, "grad_norm": 0.11148272454738617, "learning_rate": 0.0007647181335192905, "loss": 0.039, "num_input_tokens_seen": 103198368, "step": 47835 }, { "epoch": 7.804241435562806, "grad_norm": 0.28908711671829224, "learning_rate": 0.0007646577454547814, "loss": 0.1862, "num_input_tokens_seen": 103209120, "step": 47840 }, { "epoch": 7.80505709624796, "grad_norm": 0.12933997809886932, "learning_rate": 0.0007645973520266472, "loss": 0.0396, "num_input_tokens_seen": 103221088, "step": 47845 }, { "epoch": 7.805872756933116, "grad_norm": 0.6591330766677856, "learning_rate": 0.000764536953236112, "loss": 0.1263, "num_input_tokens_seen": 103231456, "step": 47850 }, { "epoch": 7.806688417618271, "grad_norm": 0.3834850788116455, "learning_rate": 0.0007644765490844, "loss": 0.0588, "num_input_tokens_seen": 103242304, "step": 47855 }, { "epoch": 7.807504078303426, "grad_norm": 0.4123549163341522, "learning_rate": 0.0007644161395727352, "loss": 0.1145, "num_input_tokens_seen": 103253536, "step": 47860 }, { "epoch": 7.808319738988581, "grad_norm": 0.06551089882850647, "learning_rate": 0.0007643557247023418, "loss": 0.0328, "num_input_tokens_seen": 103262784, "step": 47865 }, { "epoch": 7.809135399673735, "grad_norm": 0.0437370240688324, "learning_rate": 0.0007642953044744443, "loss": 0.1591, "num_input_tokens_seen": 103272736, "step": 47870 }, { "epoch": 7.809951060358891, "grad_norm": 0.47758248448371887, "learning_rate": 0.0007642348788902672, "loss": 0.1903, "num_input_tokens_seen": 103283904, "step": 47875 }, { "epoch": 7.810766721044046, "grad_norm": 0.3410838842391968, "learning_rate": 0.000764174447951035, "loss": 0.0722, "num_input_tokens_seen": 103294944, "step": 47880 }, { "epoch": 7.811582381729201, "grad_norm": 0.28547340631484985, "learning_rate": 0.0007641140116579725, "loss": 0.0466, "num_input_tokens_seen": 103306016, "step": 47885 }, { "epoch": 7.8123980424143555, "grad_norm": 0.03195732831954956, "learning_rate": 0.0007640535700123047, "loss": 0.0498, "num_input_tokens_seen": 103316096, "step": 47890 }, { "epoch": 7.81321370309951, "grad_norm": 0.008194796741008759, "learning_rate": 0.000763993123015256, "loss": 0.0946, "num_input_tokens_seen": 103327520, "step": 47895 }, { "epoch": 7.814029363784666, "grad_norm": 0.4613550305366516, "learning_rate": 0.0007639326706680521, "loss": 0.1349, "num_input_tokens_seen": 103338560, "step": 47900 }, { "epoch": 7.814845024469821, "grad_norm": 0.014604778029024601, "learning_rate": 0.0007638722129719175, "loss": 0.1623, "num_input_tokens_seen": 103349088, "step": 47905 }, { "epoch": 7.815660685154976, "grad_norm": 0.007735392544418573, "learning_rate": 0.0007638117499280778, "loss": 0.0706, "num_input_tokens_seen": 103358176, "step": 47910 }, { "epoch": 7.8164763458401305, "grad_norm": 0.04469853639602661, "learning_rate": 0.0007637512815377585, "loss": 0.017, "num_input_tokens_seen": 103368992, "step": 47915 }, { "epoch": 7.817292006525285, "grad_norm": 0.015718691051006317, "learning_rate": 0.0007636908078021848, "loss": 0.1426, "num_input_tokens_seen": 103380128, "step": 47920 }, { "epoch": 7.81810766721044, "grad_norm": 0.4076104164123535, "learning_rate": 0.0007636303287225823, "loss": 0.1579, "num_input_tokens_seen": 103391776, "step": 47925 }, { "epoch": 7.818923327895595, "grad_norm": 0.021534664556384087, "learning_rate": 0.0007635698443001768, "loss": 0.0247, "num_input_tokens_seen": 103403232, "step": 47930 }, { "epoch": 7.819738988580751, "grad_norm": 0.09878955036401749, "learning_rate": 0.0007635093545361942, "loss": 0.1013, "num_input_tokens_seen": 103414560, "step": 47935 }, { "epoch": 7.8205546492659055, "grad_norm": 0.019348828122019768, "learning_rate": 0.00076344885943186, "loss": 0.1308, "num_input_tokens_seen": 103426144, "step": 47940 }, { "epoch": 7.82137030995106, "grad_norm": 0.016258427873253822, "learning_rate": 0.0007633883589884007, "loss": 0.0462, "num_input_tokens_seen": 103436960, "step": 47945 }, { "epoch": 7.822185970636215, "grad_norm": 0.38299328088760376, "learning_rate": 0.000763327853207042, "loss": 0.1159, "num_input_tokens_seen": 103447840, "step": 47950 }, { "epoch": 7.82300163132137, "grad_norm": 0.07456308603286743, "learning_rate": 0.0007632673420890104, "loss": 0.0932, "num_input_tokens_seen": 103457600, "step": 47955 }, { "epoch": 7.823817292006526, "grad_norm": 0.06688476353883743, "learning_rate": 0.000763206825635532, "loss": 0.0759, "num_input_tokens_seen": 103468160, "step": 47960 }, { "epoch": 7.8246329526916805, "grad_norm": 0.13584576547145844, "learning_rate": 0.0007631463038478334, "loss": 0.0296, "num_input_tokens_seen": 103478944, "step": 47965 }, { "epoch": 7.825448613376835, "grad_norm": 0.4117397665977478, "learning_rate": 0.0007630857767271413, "loss": 0.1763, "num_input_tokens_seen": 103490080, "step": 47970 }, { "epoch": 7.82626427406199, "grad_norm": 0.02553015761077404, "learning_rate": 0.000763025244274682, "loss": 0.0271, "num_input_tokens_seen": 103500736, "step": 47975 }, { "epoch": 7.827079934747145, "grad_norm": 0.04163774475455284, "learning_rate": 0.0007629647064916825, "loss": 0.1203, "num_input_tokens_seen": 103510240, "step": 47980 }, { "epoch": 7.827895595432301, "grad_norm": 0.005472294986248016, "learning_rate": 0.0007629041633793696, "loss": 0.0234, "num_input_tokens_seen": 103520512, "step": 47985 }, { "epoch": 7.828711256117455, "grad_norm": 0.45654305815696716, "learning_rate": 0.0007628436149389703, "loss": 0.1511, "num_input_tokens_seen": 103532032, "step": 47990 }, { "epoch": 7.82952691680261, "grad_norm": 0.015453352592885494, "learning_rate": 0.000762783061171712, "loss": 0.0433, "num_input_tokens_seen": 103542528, "step": 47995 }, { "epoch": 7.830342577487765, "grad_norm": 0.0946039929986, "learning_rate": 0.0007627225020788213, "loss": 0.0737, "num_input_tokens_seen": 103552704, "step": 48000 }, { "epoch": 7.83115823817292, "grad_norm": 0.0692768543958664, "learning_rate": 0.0007626619376615258, "loss": 0.189, "num_input_tokens_seen": 103563072, "step": 48005 }, { "epoch": 7.831973898858075, "grad_norm": 0.2812081575393677, "learning_rate": 0.000762601367921053, "loss": 0.0954, "num_input_tokens_seen": 103574624, "step": 48010 }, { "epoch": 7.8327895595432295, "grad_norm": 0.21065741777420044, "learning_rate": 0.0007625407928586303, "loss": 0.0725, "num_input_tokens_seen": 103584864, "step": 48015 }, { "epoch": 7.833605220228385, "grad_norm": 0.03276003524661064, "learning_rate": 0.0007624802124754855, "loss": 0.0147, "num_input_tokens_seen": 103595296, "step": 48020 }, { "epoch": 7.83442088091354, "grad_norm": 0.3056166470050812, "learning_rate": 0.000762419626772846, "loss": 0.1489, "num_input_tokens_seen": 103605696, "step": 48025 }, { "epoch": 7.835236541598695, "grad_norm": 0.02437729947268963, "learning_rate": 0.0007623590357519401, "loss": 0.0285, "num_input_tokens_seen": 103616064, "step": 48030 }, { "epoch": 7.83605220228385, "grad_norm": 0.3373369872570038, "learning_rate": 0.0007622984394139953, "loss": 0.0885, "num_input_tokens_seen": 103626496, "step": 48035 }, { "epoch": 7.8368678629690045, "grad_norm": 0.011673593893647194, "learning_rate": 0.00076223783776024, "loss": 0.038, "num_input_tokens_seen": 103636832, "step": 48040 }, { "epoch": 7.83768352365416, "grad_norm": 0.6916818618774414, "learning_rate": 0.0007621772307919022, "loss": 0.142, "num_input_tokens_seen": 103647232, "step": 48045 }, { "epoch": 7.838499184339315, "grad_norm": 0.5041036605834961, "learning_rate": 0.0007621166185102104, "loss": 0.081, "num_input_tokens_seen": 103659040, "step": 48050 }, { "epoch": 7.83931484502447, "grad_norm": 0.01314466167241335, "learning_rate": 0.0007620560009163926, "loss": 0.0088, "num_input_tokens_seen": 103670752, "step": 48055 }, { "epoch": 7.840130505709625, "grad_norm": 0.4181535840034485, "learning_rate": 0.0007619953780116775, "loss": 0.1403, "num_input_tokens_seen": 103681760, "step": 48060 }, { "epoch": 7.8409461663947795, "grad_norm": 0.15186119079589844, "learning_rate": 0.0007619347497972937, "loss": 0.2014, "num_input_tokens_seen": 103692864, "step": 48065 }, { "epoch": 7.841761827079935, "grad_norm": 0.26776066422462463, "learning_rate": 0.00076187411627447, "loss": 0.0436, "num_input_tokens_seen": 103702080, "step": 48070 }, { "epoch": 7.84257748776509, "grad_norm": 0.43096962571144104, "learning_rate": 0.0007618134774444351, "loss": 0.0404, "num_input_tokens_seen": 103712736, "step": 48075 }, { "epoch": 7.843393148450245, "grad_norm": 0.097377710044384, "learning_rate": 0.0007617528333084178, "loss": 0.0836, "num_input_tokens_seen": 103724000, "step": 48080 }, { "epoch": 7.8442088091354, "grad_norm": 0.3449857831001282, "learning_rate": 0.0007616921838676475, "loss": 0.0703, "num_input_tokens_seen": 103733920, "step": 48085 }, { "epoch": 7.8450244698205545, "grad_norm": 0.4668895900249481, "learning_rate": 0.0007616315291233531, "loss": 0.0923, "num_input_tokens_seen": 103744032, "step": 48090 }, { "epoch": 7.845840130505709, "grad_norm": 0.15616139769554138, "learning_rate": 0.0007615708690767637, "loss": 0.0855, "num_input_tokens_seen": 103754688, "step": 48095 }, { "epoch": 7.846655791190865, "grad_norm": 0.9473476409912109, "learning_rate": 0.0007615102037291089, "loss": 0.1412, "num_input_tokens_seen": 103766080, "step": 48100 }, { "epoch": 7.84747145187602, "grad_norm": 0.015311360359191895, "learning_rate": 0.000761449533081618, "loss": 0.0566, "num_input_tokens_seen": 103777440, "step": 48105 }, { "epoch": 7.848287112561175, "grad_norm": 0.18195249140262604, "learning_rate": 0.0007613888571355208, "loss": 0.2879, "num_input_tokens_seen": 103788192, "step": 48110 }, { "epoch": 7.849102773246329, "grad_norm": 0.2336111217737198, "learning_rate": 0.0007613281758920467, "loss": 0.1915, "num_input_tokens_seen": 103799744, "step": 48115 }, { "epoch": 7.849918433931484, "grad_norm": 0.03789116069674492, "learning_rate": 0.0007612674893524256, "loss": 0.1568, "num_input_tokens_seen": 103810464, "step": 48120 }, { "epoch": 7.850734094616639, "grad_norm": 0.04415895789861679, "learning_rate": 0.0007612067975178874, "loss": 0.0583, "num_input_tokens_seen": 103820864, "step": 48125 }, { "epoch": 7.851549755301795, "grad_norm": 0.015726245939731598, "learning_rate": 0.0007611461003896621, "loss": 0.1228, "num_input_tokens_seen": 103831168, "step": 48130 }, { "epoch": 7.85236541598695, "grad_norm": 0.14303886890411377, "learning_rate": 0.0007610853979689797, "loss": 0.0835, "num_input_tokens_seen": 103842016, "step": 48135 }, { "epoch": 7.853181076672104, "grad_norm": 0.2702622711658478, "learning_rate": 0.0007610246902570706, "loss": 0.0744, "num_input_tokens_seen": 103852576, "step": 48140 }, { "epoch": 7.853996737357259, "grad_norm": 0.037327930331230164, "learning_rate": 0.000760963977255165, "loss": 0.108, "num_input_tokens_seen": 103862912, "step": 48145 }, { "epoch": 7.854812398042414, "grad_norm": 0.668152391910553, "learning_rate": 0.0007609032589644934, "loss": 0.1177, "num_input_tokens_seen": 103873152, "step": 48150 }, { "epoch": 7.85562805872757, "grad_norm": 0.009313046932220459, "learning_rate": 0.0007608425353862863, "loss": 0.0305, "num_input_tokens_seen": 103883744, "step": 48155 }, { "epoch": 7.856443719412725, "grad_norm": 0.086497001349926, "learning_rate": 0.000760781806521774, "loss": 0.0777, "num_input_tokens_seen": 103894624, "step": 48160 }, { "epoch": 7.857259380097879, "grad_norm": 0.2987935245037079, "learning_rate": 0.0007607210723721879, "loss": 0.0837, "num_input_tokens_seen": 103905728, "step": 48165 }, { "epoch": 7.858075040783034, "grad_norm": 0.3277336359024048, "learning_rate": 0.0007606603329387585, "loss": 0.0469, "num_input_tokens_seen": 103916320, "step": 48170 }, { "epoch": 7.858890701468189, "grad_norm": 0.017325852066278458, "learning_rate": 0.0007605995882227166, "loss": 0.0185, "num_input_tokens_seen": 103926336, "step": 48175 }, { "epoch": 7.859706362153344, "grad_norm": 0.2982833981513977, "learning_rate": 0.0007605388382252936, "loss": 0.2151, "num_input_tokens_seen": 103936704, "step": 48180 }, { "epoch": 7.8605220228384995, "grad_norm": 0.04742131009697914, "learning_rate": 0.0007604780829477205, "loss": 0.112, "num_input_tokens_seen": 103948160, "step": 48185 }, { "epoch": 7.861337683523654, "grad_norm": 0.06433898210525513, "learning_rate": 0.0007604173223912285, "loss": 0.0374, "num_input_tokens_seen": 103959776, "step": 48190 }, { "epoch": 7.862153344208809, "grad_norm": 0.3845127522945404, "learning_rate": 0.0007603565565570493, "loss": 0.412, "num_input_tokens_seen": 103970272, "step": 48195 }, { "epoch": 7.862969004893964, "grad_norm": 0.28359919786453247, "learning_rate": 0.0007602957854464141, "loss": 0.1789, "num_input_tokens_seen": 103980384, "step": 48200 }, { "epoch": 7.863784665579119, "grad_norm": 0.4829026758670807, "learning_rate": 0.0007602350090605546, "loss": 0.1273, "num_input_tokens_seen": 103991200, "step": 48205 }, { "epoch": 7.864600326264274, "grad_norm": 0.008966047316789627, "learning_rate": 0.0007601742274007023, "loss": 0.1233, "num_input_tokens_seen": 104002912, "step": 48210 }, { "epoch": 7.865415986949429, "grad_norm": 0.08171498030424118, "learning_rate": 0.0007601134404680894, "loss": 0.0636, "num_input_tokens_seen": 104013056, "step": 48215 }, { "epoch": 7.866231647634584, "grad_norm": 0.01833738200366497, "learning_rate": 0.0007600526482639477, "loss": 0.1003, "num_input_tokens_seen": 104023648, "step": 48220 }, { "epoch": 7.867047308319739, "grad_norm": 0.08116874098777771, "learning_rate": 0.0007599918507895092, "loss": 0.093, "num_input_tokens_seen": 104033504, "step": 48225 }, { "epoch": 7.867862969004894, "grad_norm": 0.34233608841896057, "learning_rate": 0.000759931048046006, "loss": 0.0691, "num_input_tokens_seen": 104044352, "step": 48230 }, { "epoch": 7.868678629690049, "grad_norm": 0.2711058259010315, "learning_rate": 0.0007598702400346703, "loss": 0.0692, "num_input_tokens_seen": 104054752, "step": 48235 }, { "epoch": 7.869494290375204, "grad_norm": 0.016251573339104652, "learning_rate": 0.0007598094267567345, "loss": 0.0375, "num_input_tokens_seen": 104065024, "step": 48240 }, { "epoch": 7.870309951060359, "grad_norm": 0.022611765190958977, "learning_rate": 0.0007597486082134311, "loss": 0.0984, "num_input_tokens_seen": 104075584, "step": 48245 }, { "epoch": 7.871125611745514, "grad_norm": 0.251004159450531, "learning_rate": 0.0007596877844059926, "loss": 0.0459, "num_input_tokens_seen": 104087936, "step": 48250 }, { "epoch": 7.871941272430669, "grad_norm": 0.11969710886478424, "learning_rate": 0.0007596269553356518, "loss": 0.0344, "num_input_tokens_seen": 104097440, "step": 48255 }, { "epoch": 7.872756933115824, "grad_norm": 0.39873629808425903, "learning_rate": 0.0007595661210036414, "loss": 0.0672, "num_input_tokens_seen": 104109120, "step": 48260 }, { "epoch": 7.873572593800979, "grad_norm": 0.07275792956352234, "learning_rate": 0.0007595052814111942, "loss": 0.05, "num_input_tokens_seen": 104120160, "step": 48265 }, { "epoch": 7.874388254486134, "grad_norm": 0.11027942597866058, "learning_rate": 0.0007594444365595435, "loss": 0.0496, "num_input_tokens_seen": 104132480, "step": 48270 }, { "epoch": 7.875203915171289, "grad_norm": 0.013370302505791187, "learning_rate": 0.0007593835864499219, "loss": 0.0936, "num_input_tokens_seen": 104142912, "step": 48275 }, { "epoch": 7.876019575856444, "grad_norm": 0.33727899193763733, "learning_rate": 0.0007593227310835629, "loss": 0.0424, "num_input_tokens_seen": 104153504, "step": 48280 }, { "epoch": 7.876835236541599, "grad_norm": 0.0633564442396164, "learning_rate": 0.0007592618704616998, "loss": 0.0216, "num_input_tokens_seen": 104162688, "step": 48285 }, { "epoch": 7.877650897226753, "grad_norm": 0.004596862476319075, "learning_rate": 0.0007592010045855662, "loss": 0.156, "num_input_tokens_seen": 104173920, "step": 48290 }, { "epoch": 7.878466557911908, "grad_norm": 0.02165268361568451, "learning_rate": 0.0007591401334563952, "loss": 0.0273, "num_input_tokens_seen": 104184672, "step": 48295 }, { "epoch": 7.879282218597064, "grad_norm": 0.01483437791466713, "learning_rate": 0.0007590792570754207, "loss": 0.0633, "num_input_tokens_seen": 104194848, "step": 48300 }, { "epoch": 7.880097879282219, "grad_norm": 0.38728073239326477, "learning_rate": 0.0007590183754438764, "loss": 0.2092, "num_input_tokens_seen": 104206112, "step": 48305 }, { "epoch": 7.8809135399673735, "grad_norm": 0.004772756714373827, "learning_rate": 0.0007589574885629961, "loss": 0.0135, "num_input_tokens_seen": 104216960, "step": 48310 }, { "epoch": 7.881729200652528, "grad_norm": 0.47414860129356384, "learning_rate": 0.0007588965964340137, "loss": 0.1637, "num_input_tokens_seen": 104227296, "step": 48315 }, { "epoch": 7.882544861337683, "grad_norm": 0.4101828634738922, "learning_rate": 0.0007588356990581635, "loss": 0.1631, "num_input_tokens_seen": 104237472, "step": 48320 }, { "epoch": 7.883360522022839, "grad_norm": 0.2916763126850128, "learning_rate": 0.0007587747964366796, "loss": 0.0942, "num_input_tokens_seen": 104248416, "step": 48325 }, { "epoch": 7.884176182707994, "grad_norm": 0.1623440533876419, "learning_rate": 0.0007587138885707959, "loss": 0.1514, "num_input_tokens_seen": 104258112, "step": 48330 }, { "epoch": 7.8849918433931485, "grad_norm": 0.35090339183807373, "learning_rate": 0.000758652975461747, "loss": 0.149, "num_input_tokens_seen": 104268576, "step": 48335 }, { "epoch": 7.885807504078303, "grad_norm": 0.08142123371362686, "learning_rate": 0.0007585920571107677, "loss": 0.0556, "num_input_tokens_seen": 104279808, "step": 48340 }, { "epoch": 7.886623164763458, "grad_norm": 0.18787047266960144, "learning_rate": 0.0007585311335190923, "loss": 0.0242, "num_input_tokens_seen": 104291104, "step": 48345 }, { "epoch": 7.887438825448614, "grad_norm": 0.06933962553739548, "learning_rate": 0.0007584702046879554, "loss": 0.0761, "num_input_tokens_seen": 104302432, "step": 48350 }, { "epoch": 7.888254486133769, "grad_norm": 0.26296475529670715, "learning_rate": 0.0007584092706185919, "loss": 0.1184, "num_input_tokens_seen": 104313408, "step": 48355 }, { "epoch": 7.8890701468189235, "grad_norm": 0.5503690242767334, "learning_rate": 0.0007583483313122368, "loss": 0.1711, "num_input_tokens_seen": 104324640, "step": 48360 }, { "epoch": 7.889885807504078, "grad_norm": 0.4123271107673645, "learning_rate": 0.000758287386770125, "loss": 0.0744, "num_input_tokens_seen": 104336608, "step": 48365 }, { "epoch": 7.890701468189233, "grad_norm": 0.185148686170578, "learning_rate": 0.0007582264369934915, "loss": 0.053, "num_input_tokens_seen": 104348480, "step": 48370 }, { "epoch": 7.891517128874388, "grad_norm": 0.094192273914814, "learning_rate": 0.0007581654819835717, "loss": 0.0179, "num_input_tokens_seen": 104358400, "step": 48375 }, { "epoch": 7.892332789559543, "grad_norm": 0.009980947710573673, "learning_rate": 0.0007581045217416011, "loss": 0.0449, "num_input_tokens_seen": 104368736, "step": 48380 }, { "epoch": 7.8931484502446985, "grad_norm": 0.0041212355718016624, "learning_rate": 0.0007580435562688148, "loss": 0.3121, "num_input_tokens_seen": 104379840, "step": 48385 }, { "epoch": 7.893964110929853, "grad_norm": 0.2282155305147171, "learning_rate": 0.0007579825855664486, "loss": 0.0493, "num_input_tokens_seen": 104391168, "step": 48390 }, { "epoch": 7.894779771615008, "grad_norm": 0.035077404230833054, "learning_rate": 0.0007579216096357378, "loss": 0.0123, "num_input_tokens_seen": 104401600, "step": 48395 }, { "epoch": 7.895595432300163, "grad_norm": 0.08312935382127762, "learning_rate": 0.0007578606284779185, "loss": 0.0983, "num_input_tokens_seen": 104411424, "step": 48400 }, { "epoch": 7.896411092985318, "grad_norm": 0.7784950733184814, "learning_rate": 0.0007577996420942266, "loss": 0.0555, "num_input_tokens_seen": 104422144, "step": 48405 }, { "epoch": 7.897226753670473, "grad_norm": 0.010715791955590248, "learning_rate": 0.0007577386504858978, "loss": 0.0146, "num_input_tokens_seen": 104433216, "step": 48410 }, { "epoch": 7.898042414355628, "grad_norm": 0.011442198418080807, "learning_rate": 0.0007576776536541682, "loss": 0.0688, "num_input_tokens_seen": 104443712, "step": 48415 }, { "epoch": 7.898858075040783, "grad_norm": 0.19817012548446655, "learning_rate": 0.0007576166516002741, "loss": 0.0302, "num_input_tokens_seen": 104455296, "step": 48420 }, { "epoch": 7.899673735725938, "grad_norm": 0.046595633029937744, "learning_rate": 0.0007575556443254518, "loss": 0.0137, "num_input_tokens_seen": 104465696, "step": 48425 }, { "epoch": 7.900489396411093, "grad_norm": 0.6229962706565857, "learning_rate": 0.0007574946318309376, "loss": 0.0848, "num_input_tokens_seen": 104476768, "step": 48430 }, { "epoch": 7.901305057096248, "grad_norm": 0.0069332364946603775, "learning_rate": 0.000757433614117968, "loss": 0.1257, "num_input_tokens_seen": 104488384, "step": 48435 }, { "epoch": 7.902120717781403, "grad_norm": 0.07671057432889938, "learning_rate": 0.0007573725911877797, "loss": 0.0162, "num_input_tokens_seen": 104498784, "step": 48440 }, { "epoch": 7.902936378466558, "grad_norm": 0.008773133158683777, "learning_rate": 0.0007573115630416092, "loss": 0.0377, "num_input_tokens_seen": 104509184, "step": 48445 }, { "epoch": 7.903752039151713, "grad_norm": 0.08682745695114136, "learning_rate": 0.0007572505296806935, "loss": 0.0349, "num_input_tokens_seen": 104519744, "step": 48450 }, { "epoch": 7.904567699836868, "grad_norm": 0.005094946827739477, "learning_rate": 0.0007571894911062696, "loss": 0.0158, "num_input_tokens_seen": 104530368, "step": 48455 }, { "epoch": 7.9053833605220225, "grad_norm": 0.08960643410682678, "learning_rate": 0.0007571284473195743, "loss": 0.0213, "num_input_tokens_seen": 104540640, "step": 48460 }, { "epoch": 7.906199021207177, "grad_norm": 0.03995351493358612, "learning_rate": 0.0007570673983218448, "loss": 0.044, "num_input_tokens_seen": 104552480, "step": 48465 }, { "epoch": 7.907014681892333, "grad_norm": 0.38220691680908203, "learning_rate": 0.0007570063441143185, "loss": 0.1095, "num_input_tokens_seen": 104562592, "step": 48470 }, { "epoch": 7.907830342577488, "grad_norm": 0.18686965107917786, "learning_rate": 0.0007569452846982325, "loss": 0.0903, "num_input_tokens_seen": 104573888, "step": 48475 }, { "epoch": 7.908646003262643, "grad_norm": 0.2732616364955902, "learning_rate": 0.0007568842200748243, "loss": 0.0897, "num_input_tokens_seen": 104584000, "step": 48480 }, { "epoch": 7.9094616639477975, "grad_norm": 0.42170387506484985, "learning_rate": 0.0007568231502453317, "loss": 0.0783, "num_input_tokens_seen": 104594528, "step": 48485 }, { "epoch": 7.910277324632952, "grad_norm": 0.016445474699139595, "learning_rate": 0.000756762075210992, "loss": 0.1221, "num_input_tokens_seen": 104604800, "step": 48490 }, { "epoch": 7.911092985318108, "grad_norm": 0.1903805136680603, "learning_rate": 0.0007567009949730431, "loss": 0.108, "num_input_tokens_seen": 104615328, "step": 48495 }, { "epoch": 7.911908646003263, "grad_norm": 0.5338291525840759, "learning_rate": 0.000756639909532723, "loss": 0.0807, "num_input_tokens_seen": 104625408, "step": 48500 }, { "epoch": 7.912724306688418, "grad_norm": 0.057819683104753494, "learning_rate": 0.0007565788188912694, "loss": 0.0555, "num_input_tokens_seen": 104636960, "step": 48505 }, { "epoch": 7.9135399673735725, "grad_norm": 0.030563849955797195, "learning_rate": 0.0007565177230499206, "loss": 0.0239, "num_input_tokens_seen": 104647744, "step": 48510 }, { "epoch": 7.914355628058727, "grad_norm": 0.05450010299682617, "learning_rate": 0.0007564566220099147, "loss": 0.1055, "num_input_tokens_seen": 104659648, "step": 48515 }, { "epoch": 7.915171288743883, "grad_norm": 0.24479885399341583, "learning_rate": 0.00075639551577249, "loss": 0.0441, "num_input_tokens_seen": 104670848, "step": 48520 }, { "epoch": 7.915986949429038, "grad_norm": 0.01527437474578619, "learning_rate": 0.0007563344043388851, "loss": 0.013, "num_input_tokens_seen": 104681824, "step": 48525 }, { "epoch": 7.916802610114193, "grad_norm": 0.019100485369563103, "learning_rate": 0.0007562732877103382, "loss": 0.1606, "num_input_tokens_seen": 104692992, "step": 48530 }, { "epoch": 7.917618270799347, "grad_norm": 0.37950432300567627, "learning_rate": 0.000756212165888088, "loss": 0.0531, "num_input_tokens_seen": 104703840, "step": 48535 }, { "epoch": 7.918433931484502, "grad_norm": 0.06310606002807617, "learning_rate": 0.0007561510388733732, "loss": 0.0811, "num_input_tokens_seen": 104714176, "step": 48540 }, { "epoch": 7.919249592169657, "grad_norm": 0.07106725126504898, "learning_rate": 0.0007560899066674327, "loss": 0.0206, "num_input_tokens_seen": 104724416, "step": 48545 }, { "epoch": 7.920065252854813, "grad_norm": 0.21406258642673492, "learning_rate": 0.0007560287692715053, "loss": 0.0338, "num_input_tokens_seen": 104735968, "step": 48550 }, { "epoch": 7.920880913539968, "grad_norm": 0.26113736629486084, "learning_rate": 0.0007559676266868302, "loss": 0.0897, "num_input_tokens_seen": 104747488, "step": 48555 }, { "epoch": 7.921696574225122, "grad_norm": 0.3318088948726654, "learning_rate": 0.0007559064789146464, "loss": 0.0474, "num_input_tokens_seen": 104757472, "step": 48560 }, { "epoch": 7.922512234910277, "grad_norm": 0.049718886613845825, "learning_rate": 0.000755845325956193, "loss": 0.0274, "num_input_tokens_seen": 104767968, "step": 48565 }, { "epoch": 7.923327895595432, "grad_norm": 0.24499699473381042, "learning_rate": 0.0007557841678127097, "loss": 0.1092, "num_input_tokens_seen": 104778336, "step": 48570 }, { "epoch": 7.924143556280587, "grad_norm": 0.13392864167690277, "learning_rate": 0.0007557230044854357, "loss": 0.059, "num_input_tokens_seen": 104789120, "step": 48575 }, { "epoch": 7.924959216965743, "grad_norm": 0.1269339621067047, "learning_rate": 0.0007556618359756107, "loss": 0.0255, "num_input_tokens_seen": 104799648, "step": 48580 }, { "epoch": 7.925774877650897, "grad_norm": 0.21478500962257385, "learning_rate": 0.0007556006622844742, "loss": 0.0615, "num_input_tokens_seen": 104809792, "step": 48585 }, { "epoch": 7.926590538336052, "grad_norm": 0.637736976146698, "learning_rate": 0.000755539483413266, "loss": 0.0827, "num_input_tokens_seen": 104821408, "step": 48590 }, { "epoch": 7.927406199021207, "grad_norm": 0.6781191229820251, "learning_rate": 0.0007554782993632259, "loss": 0.077, "num_input_tokens_seen": 104831040, "step": 48595 }, { "epoch": 7.928221859706362, "grad_norm": 0.016890328377485275, "learning_rate": 0.0007554171101355941, "loss": 0.0976, "num_input_tokens_seen": 104840800, "step": 48600 }, { "epoch": 7.9290375203915175, "grad_norm": 0.03135719522833824, "learning_rate": 0.0007553559157316105, "loss": 0.0501, "num_input_tokens_seen": 104851712, "step": 48605 }, { "epoch": 7.929853181076672, "grad_norm": 0.021081894636154175, "learning_rate": 0.0007552947161525153, "loss": 0.0965, "num_input_tokens_seen": 104863552, "step": 48610 }, { "epoch": 7.930668841761827, "grad_norm": 0.11639349907636642, "learning_rate": 0.0007552335113995489, "loss": 0.1129, "num_input_tokens_seen": 104874176, "step": 48615 }, { "epoch": 7.931484502446982, "grad_norm": 0.00949505902826786, "learning_rate": 0.0007551723014739515, "loss": 0.149, "num_input_tokens_seen": 104885056, "step": 48620 }, { "epoch": 7.932300163132137, "grad_norm": 0.1120639517903328, "learning_rate": 0.0007551110863769638, "loss": 0.0605, "num_input_tokens_seen": 104897312, "step": 48625 }, { "epoch": 7.933115823817292, "grad_norm": 0.11907103657722473, "learning_rate": 0.0007550498661098263, "loss": 0.0975, "num_input_tokens_seen": 104908416, "step": 48630 }, { "epoch": 7.933931484502447, "grad_norm": 0.38088110089302063, "learning_rate": 0.0007549886406737796, "loss": 0.1691, "num_input_tokens_seen": 104918656, "step": 48635 }, { "epoch": 7.934747145187602, "grad_norm": 0.43547603487968445, "learning_rate": 0.0007549274100700647, "loss": 0.1868, "num_input_tokens_seen": 104930304, "step": 48640 }, { "epoch": 7.935562805872757, "grad_norm": 0.03152475133538246, "learning_rate": 0.0007548661742999225, "loss": 0.0268, "num_input_tokens_seen": 104940800, "step": 48645 }, { "epoch": 7.936378466557912, "grad_norm": 0.07971568405628204, "learning_rate": 0.0007548049333645939, "loss": 0.0866, "num_input_tokens_seen": 104952000, "step": 48650 }, { "epoch": 7.937194127243067, "grad_norm": 0.31623032689094543, "learning_rate": 0.00075474368726532, "loss": 0.1679, "num_input_tokens_seen": 104962976, "step": 48655 }, { "epoch": 7.938009787928221, "grad_norm": 0.04478880390524864, "learning_rate": 0.0007546824360033421, "loss": 0.3218, "num_input_tokens_seen": 104972704, "step": 48660 }, { "epoch": 7.938825448613377, "grad_norm": 0.3739233911037445, "learning_rate": 0.0007546211795799016, "loss": 0.1017, "num_input_tokens_seen": 104982592, "step": 48665 }, { "epoch": 7.939641109298532, "grad_norm": 0.16571593284606934, "learning_rate": 0.0007545599179962399, "loss": 0.1365, "num_input_tokens_seen": 104994624, "step": 48670 }, { "epoch": 7.940456769983687, "grad_norm": 0.03601468354463577, "learning_rate": 0.0007544986512535985, "loss": 0.0152, "num_input_tokens_seen": 105004896, "step": 48675 }, { "epoch": 7.941272430668842, "grad_norm": 0.28209686279296875, "learning_rate": 0.0007544373793532191, "loss": 0.1031, "num_input_tokens_seen": 105015680, "step": 48680 }, { "epoch": 7.942088091353996, "grad_norm": 0.28403207659721375, "learning_rate": 0.0007543761022963436, "loss": 0.1546, "num_input_tokens_seen": 105026848, "step": 48685 }, { "epoch": 7.942903752039152, "grad_norm": 0.1927800178527832, "learning_rate": 0.0007543148200842134, "loss": 0.0504, "num_input_tokens_seen": 105036288, "step": 48690 }, { "epoch": 7.943719412724307, "grad_norm": 0.1469605565071106, "learning_rate": 0.0007542535327180708, "loss": 0.0669, "num_input_tokens_seen": 105047104, "step": 48695 }, { "epoch": 7.944535073409462, "grad_norm": 0.10133697092533112, "learning_rate": 0.0007541922401991579, "loss": 0.2811, "num_input_tokens_seen": 105057888, "step": 48700 }, { "epoch": 7.945350734094617, "grad_norm": 0.22228077054023743, "learning_rate": 0.0007541309425287168, "loss": 0.1711, "num_input_tokens_seen": 105068768, "step": 48705 }, { "epoch": 7.946166394779771, "grad_norm": 0.07218156009912491, "learning_rate": 0.0007540696397079898, "loss": 0.0562, "num_input_tokens_seen": 105079360, "step": 48710 }, { "epoch": 7.946982055464927, "grad_norm": 0.5002050995826721, "learning_rate": 0.0007540083317382192, "loss": 0.1112, "num_input_tokens_seen": 105089888, "step": 48715 }, { "epoch": 7.947797716150082, "grad_norm": 0.02348877303302288, "learning_rate": 0.0007539470186206474, "loss": 0.0422, "num_input_tokens_seen": 105100320, "step": 48720 }, { "epoch": 7.948613376835237, "grad_norm": 0.07796557247638702, "learning_rate": 0.0007538857003565174, "loss": 0.0612, "num_input_tokens_seen": 105112448, "step": 48725 }, { "epoch": 7.9494290375203915, "grad_norm": 0.18601307272911072, "learning_rate": 0.0007538243769470714, "loss": 0.0951, "num_input_tokens_seen": 105123840, "step": 48730 }, { "epoch": 7.950244698205546, "grad_norm": 0.02430122159421444, "learning_rate": 0.0007537630483935524, "loss": 0.1539, "num_input_tokens_seen": 105133984, "step": 48735 }, { "epoch": 7.951060358890701, "grad_norm": 0.03999203070998192, "learning_rate": 0.0007537017146972033, "loss": 0.1346, "num_input_tokens_seen": 105144576, "step": 48740 }, { "epoch": 7.951876019575856, "grad_norm": 0.12233507633209229, "learning_rate": 0.0007536403758592672, "loss": 0.1297, "num_input_tokens_seen": 105155136, "step": 48745 }, { "epoch": 7.952691680261012, "grad_norm": 0.016184618696570396, "learning_rate": 0.000753579031880987, "loss": 0.2132, "num_input_tokens_seen": 105166816, "step": 48750 }, { "epoch": 7.9535073409461665, "grad_norm": 0.04933517053723335, "learning_rate": 0.0007535176827636061, "loss": 0.0506, "num_input_tokens_seen": 105177344, "step": 48755 }, { "epoch": 7.954323001631321, "grad_norm": 0.6290183067321777, "learning_rate": 0.0007534563285083678, "loss": 0.1202, "num_input_tokens_seen": 105189056, "step": 48760 }, { "epoch": 7.955138662316476, "grad_norm": 0.01619282178580761, "learning_rate": 0.0007533949691165152, "loss": 0.113, "num_input_tokens_seen": 105198816, "step": 48765 }, { "epoch": 7.955954323001631, "grad_norm": 0.03779361769556999, "learning_rate": 0.0007533336045892925, "loss": 0.0568, "num_input_tokens_seen": 105208416, "step": 48770 }, { "epoch": 7.956769983686787, "grad_norm": 0.03582243248820305, "learning_rate": 0.0007532722349279426, "loss": 0.0522, "num_input_tokens_seen": 105219712, "step": 48775 }, { "epoch": 7.9575856443719415, "grad_norm": 0.05312197655439377, "learning_rate": 0.0007532108601337097, "loss": 0.0536, "num_input_tokens_seen": 105230336, "step": 48780 }, { "epoch": 7.958401305057096, "grad_norm": 0.06593223661184311, "learning_rate": 0.0007531494802078376, "loss": 0.0405, "num_input_tokens_seen": 105241024, "step": 48785 }, { "epoch": 7.959216965742251, "grad_norm": 0.3619539737701416, "learning_rate": 0.00075308809515157, "loss": 0.0417, "num_input_tokens_seen": 105251232, "step": 48790 }, { "epoch": 7.960032626427406, "grad_norm": 0.02389264851808548, "learning_rate": 0.0007530267049661511, "loss": 0.0639, "num_input_tokens_seen": 105262048, "step": 48795 }, { "epoch": 7.960848287112562, "grad_norm": 0.04761756211519241, "learning_rate": 0.000752965309652825, "loss": 0.2018, "num_input_tokens_seen": 105273184, "step": 48800 }, { "epoch": 7.9616639477977165, "grad_norm": 0.02395232953131199, "learning_rate": 0.0007529039092128361, "loss": 0.0745, "num_input_tokens_seen": 105284832, "step": 48805 }, { "epoch": 7.962479608482871, "grad_norm": 0.013701491057872772, "learning_rate": 0.0007528425036474287, "loss": 0.0316, "num_input_tokens_seen": 105296416, "step": 48810 }, { "epoch": 7.963295269168026, "grad_norm": 0.007370386738330126, "learning_rate": 0.000752781092957847, "loss": 0.0782, "num_input_tokens_seen": 105307264, "step": 48815 }, { "epoch": 7.964110929853181, "grad_norm": 0.03072693943977356, "learning_rate": 0.000752719677145336, "loss": 0.0666, "num_input_tokens_seen": 105317568, "step": 48820 }, { "epoch": 7.964926590538336, "grad_norm": 0.09162832796573639, "learning_rate": 0.0007526582562111399, "loss": 0.0409, "num_input_tokens_seen": 105328928, "step": 48825 }, { "epoch": 7.9657422512234906, "grad_norm": 0.050932787358760834, "learning_rate": 0.0007525968301565038, "loss": 0.0903, "num_input_tokens_seen": 105339648, "step": 48830 }, { "epoch": 7.966557911908646, "grad_norm": 0.03485019505023956, "learning_rate": 0.0007525353989826726, "loss": 0.06, "num_input_tokens_seen": 105351360, "step": 48835 }, { "epoch": 7.967373572593801, "grad_norm": 0.024860871955752373, "learning_rate": 0.000752473962690891, "loss": 0.0935, "num_input_tokens_seen": 105361952, "step": 48840 }, { "epoch": 7.968189233278956, "grad_norm": 0.42854198813438416, "learning_rate": 0.0007524125212824044, "loss": 0.1888, "num_input_tokens_seen": 105372704, "step": 48845 }, { "epoch": 7.969004893964111, "grad_norm": 0.0599367693066597, "learning_rate": 0.0007523510747584578, "loss": 0.0855, "num_input_tokens_seen": 105382400, "step": 48850 }, { "epoch": 7.9698205546492655, "grad_norm": 0.08904532343149185, "learning_rate": 0.0007522896231202967, "loss": 0.1124, "num_input_tokens_seen": 105394080, "step": 48855 }, { "epoch": 7.970636215334421, "grad_norm": 0.023718256503343582, "learning_rate": 0.0007522281663691661, "loss": 0.1193, "num_input_tokens_seen": 105404960, "step": 48860 }, { "epoch": 7.971451876019576, "grad_norm": 0.5391967296600342, "learning_rate": 0.0007521667045063119, "loss": 0.147, "num_input_tokens_seen": 105416224, "step": 48865 }, { "epoch": 7.972267536704731, "grad_norm": 0.016287386417388916, "learning_rate": 0.0007521052375329793, "loss": 0.0919, "num_input_tokens_seen": 105426688, "step": 48870 }, { "epoch": 7.973083197389886, "grad_norm": 0.04502877593040466, "learning_rate": 0.0007520437654504144, "loss": 0.0589, "num_input_tokens_seen": 105437888, "step": 48875 }, { "epoch": 7.9738988580750405, "grad_norm": 0.43209370970726013, "learning_rate": 0.0007519822882598629, "loss": 0.1453, "num_input_tokens_seen": 105448832, "step": 48880 }, { "epoch": 7.974714518760196, "grad_norm": 0.340436726808548, "learning_rate": 0.0007519208059625707, "loss": 0.2143, "num_input_tokens_seen": 105459392, "step": 48885 }, { "epoch": 7.975530179445351, "grad_norm": 0.032445669174194336, "learning_rate": 0.0007518593185597837, "loss": 0.0709, "num_input_tokens_seen": 105470464, "step": 48890 }, { "epoch": 7.976345840130506, "grad_norm": 0.05766851827502251, "learning_rate": 0.000751797826052748, "loss": 0.0855, "num_input_tokens_seen": 105481952, "step": 48895 }, { "epoch": 7.977161500815661, "grad_norm": 0.07345040887594223, "learning_rate": 0.0007517363284427101, "loss": 0.1777, "num_input_tokens_seen": 105492640, "step": 48900 }, { "epoch": 7.9779771615008155, "grad_norm": 0.2643451988697052, "learning_rate": 0.0007516748257309162, "loss": 0.0553, "num_input_tokens_seen": 105503328, "step": 48905 }, { "epoch": 7.97879282218597, "grad_norm": 0.07445618510246277, "learning_rate": 0.0007516133179186125, "loss": 0.0502, "num_input_tokens_seen": 105513632, "step": 48910 }, { "epoch": 7.979608482871125, "grad_norm": 0.029151596128940582, "learning_rate": 0.0007515518050070458, "loss": 0.0212, "num_input_tokens_seen": 105525760, "step": 48915 }, { "epoch": 7.980424143556281, "grad_norm": 0.02449207566678524, "learning_rate": 0.0007514902869974627, "loss": 0.0261, "num_input_tokens_seen": 105537472, "step": 48920 }, { "epoch": 7.981239804241436, "grad_norm": 0.5087122321128845, "learning_rate": 0.0007514287638911099, "loss": 0.1151, "num_input_tokens_seen": 105547904, "step": 48925 }, { "epoch": 7.9820554649265905, "grad_norm": 0.771680474281311, "learning_rate": 0.0007513672356892342, "loss": 0.1745, "num_input_tokens_seen": 105558048, "step": 48930 }, { "epoch": 7.982871125611745, "grad_norm": 0.3765924870967865, "learning_rate": 0.0007513057023930825, "loss": 0.0696, "num_input_tokens_seen": 105569280, "step": 48935 }, { "epoch": 7.9836867862969, "grad_norm": 0.016787433996796608, "learning_rate": 0.000751244164003902, "loss": 0.0366, "num_input_tokens_seen": 105580416, "step": 48940 }, { "epoch": 7.984502446982056, "grad_norm": 0.06901337951421738, "learning_rate": 0.00075118262052294, "loss": 0.04, "num_input_tokens_seen": 105590592, "step": 48945 }, { "epoch": 7.985318107667211, "grad_norm": 0.036270394921302795, "learning_rate": 0.0007511210719514432, "loss": 0.1111, "num_input_tokens_seen": 105599936, "step": 48950 }, { "epoch": 7.986133768352365, "grad_norm": 0.0717955008149147, "learning_rate": 0.0007510595182906595, "loss": 0.0578, "num_input_tokens_seen": 105610656, "step": 48955 }, { "epoch": 7.98694942903752, "grad_norm": 0.14659738540649414, "learning_rate": 0.0007509979595418362, "loss": 0.0993, "num_input_tokens_seen": 105621984, "step": 48960 }, { "epoch": 7.987765089722675, "grad_norm": 0.8492498397827148, "learning_rate": 0.0007509363957062207, "loss": 0.1591, "num_input_tokens_seen": 105632256, "step": 48965 }, { "epoch": 7.988580750407831, "grad_norm": 0.0375811941921711, "learning_rate": 0.0007508748267850609, "loss": 0.3269, "num_input_tokens_seen": 105643104, "step": 48970 }, { "epoch": 7.989396411092986, "grad_norm": 0.14074194431304932, "learning_rate": 0.0007508132527796043, "loss": 0.0418, "num_input_tokens_seen": 105654912, "step": 48975 }, { "epoch": 7.99021207177814, "grad_norm": 0.011132019571959972, "learning_rate": 0.0007507516736910992, "loss": 0.0573, "num_input_tokens_seen": 105664800, "step": 48980 }, { "epoch": 7.991027732463295, "grad_norm": 0.15481600165367126, "learning_rate": 0.0007506900895207932, "loss": 0.1, "num_input_tokens_seen": 105675872, "step": 48985 }, { "epoch": 7.99184339314845, "grad_norm": 0.01749015413224697, "learning_rate": 0.0007506285002699346, "loss": 0.0653, "num_input_tokens_seen": 105686176, "step": 48990 }, { "epoch": 7.992659053833605, "grad_norm": 0.022090595215559006, "learning_rate": 0.0007505669059397715, "loss": 0.1019, "num_input_tokens_seen": 105697216, "step": 48995 }, { "epoch": 7.993474714518761, "grad_norm": 0.07972252368927002, "learning_rate": 0.0007505053065315521, "loss": 0.0722, "num_input_tokens_seen": 105708224, "step": 49000 }, { "epoch": 7.994290375203915, "grad_norm": 0.034679919481277466, "learning_rate": 0.0007504437020465248, "loss": 0.0785, "num_input_tokens_seen": 105719040, "step": 49005 }, { "epoch": 7.99510603588907, "grad_norm": 0.07879279553890228, "learning_rate": 0.0007503820924859382, "loss": 0.1392, "num_input_tokens_seen": 105729504, "step": 49010 }, { "epoch": 7.995921696574225, "grad_norm": 0.054000161588191986, "learning_rate": 0.000750320477851041, "loss": 0.0313, "num_input_tokens_seen": 105739296, "step": 49015 }, { "epoch": 7.99673735725938, "grad_norm": 0.05378521606326103, "learning_rate": 0.0007502588581430817, "loss": 0.0397, "num_input_tokens_seen": 105750336, "step": 49020 }, { "epoch": 7.997553017944535, "grad_norm": 0.07242117077112198, "learning_rate": 0.0007501972333633091, "loss": 0.0546, "num_input_tokens_seen": 105761984, "step": 49025 }, { "epoch": 7.99836867862969, "grad_norm": 0.07152658700942993, "learning_rate": 0.0007501356035129723, "loss": 0.0441, "num_input_tokens_seen": 105771520, "step": 49030 }, { "epoch": 7.999184339314845, "grad_norm": 0.36083605885505676, "learning_rate": 0.0007500739685933201, "loss": 0.2204, "num_input_tokens_seen": 105780480, "step": 49035 }, { "epoch": 8.0, "grad_norm": 0.031406648457050323, "learning_rate": 0.0007500123286056018, "loss": 0.1785, "num_input_tokens_seen": 105788704, "step": 49040 }, { "epoch": 8.0, "eval_loss": 0.1272326409816742, "eval_runtime": 104.6752, "eval_samples_per_second": 26.033, "eval_steps_per_second": 6.515, "num_input_tokens_seen": 105788704, "step": 49040 }, { "epoch": 8.000815660685156, "grad_norm": 0.05562405288219452, "learning_rate": 0.0007499506835510663, "loss": 0.0197, "num_input_tokens_seen": 105799296, "step": 49045 }, { "epoch": 8.00163132137031, "grad_norm": 0.025818029418587685, "learning_rate": 0.0007498890334309633, "loss": 0.1138, "num_input_tokens_seen": 105809216, "step": 49050 }, { "epoch": 8.002446982055465, "grad_norm": 0.14336830377578735, "learning_rate": 0.000749827378246542, "loss": 0.1016, "num_input_tokens_seen": 105821184, "step": 49055 }, { "epoch": 8.00326264274062, "grad_norm": 0.030630996450781822, "learning_rate": 0.0007497657179990518, "loss": 0.0572, "num_input_tokens_seen": 105831968, "step": 49060 }, { "epoch": 8.004078303425775, "grad_norm": 0.08194147050380707, "learning_rate": 0.0007497040526897426, "loss": 0.0515, "num_input_tokens_seen": 105843136, "step": 49065 }, { "epoch": 8.00489396411093, "grad_norm": 0.2786090075969696, "learning_rate": 0.0007496423823198639, "loss": 0.0347, "num_input_tokens_seen": 105854560, "step": 49070 }, { "epoch": 8.005709624796085, "grad_norm": 0.06659670919179916, "learning_rate": 0.0007495807068906657, "loss": 0.1709, "num_input_tokens_seen": 105865856, "step": 49075 }, { "epoch": 8.00652528548124, "grad_norm": 0.32486703991889954, "learning_rate": 0.0007495190264033978, "loss": 0.1122, "num_input_tokens_seen": 105876640, "step": 49080 }, { "epoch": 8.007340946166394, "grad_norm": 0.18374553322792053, "learning_rate": 0.0007494573408593103, "loss": 0.0385, "num_input_tokens_seen": 105886592, "step": 49085 }, { "epoch": 8.00815660685155, "grad_norm": 0.007273446302860975, "learning_rate": 0.0007493956502596533, "loss": 0.0595, "num_input_tokens_seen": 105897760, "step": 49090 }, { "epoch": 8.008972267536704, "grad_norm": 0.1251687854528427, "learning_rate": 0.0007493339546056772, "loss": 0.178, "num_input_tokens_seen": 105908480, "step": 49095 }, { "epoch": 8.00978792822186, "grad_norm": 0.09872283041477203, "learning_rate": 0.0007492722538986321, "loss": 0.0469, "num_input_tokens_seen": 105919936, "step": 49100 }, { "epoch": 8.010603588907015, "grad_norm": 0.004216793924570084, "learning_rate": 0.0007492105481397686, "loss": 0.0629, "num_input_tokens_seen": 105931552, "step": 49105 }, { "epoch": 8.01141924959217, "grad_norm": 0.21793438494205475, "learning_rate": 0.0007491488373303373, "loss": 0.0243, "num_input_tokens_seen": 105943040, "step": 49110 }, { "epoch": 8.012234910277325, "grad_norm": 0.18698358535766602, "learning_rate": 0.0007490871214715885, "loss": 0.0349, "num_input_tokens_seen": 105954304, "step": 49115 }, { "epoch": 8.013050570962479, "grad_norm": 0.03246983140707016, "learning_rate": 0.0007490254005647735, "loss": 0.0168, "num_input_tokens_seen": 105964896, "step": 49120 }, { "epoch": 8.013866231647635, "grad_norm": 0.01469373982399702, "learning_rate": 0.0007489636746111426, "loss": 0.0364, "num_input_tokens_seen": 105975264, "step": 49125 }, { "epoch": 8.01468189233279, "grad_norm": 0.04631610959768295, "learning_rate": 0.0007489019436119471, "loss": 0.0434, "num_input_tokens_seen": 105987136, "step": 49130 }, { "epoch": 8.015497553017944, "grad_norm": 0.026004917919635773, "learning_rate": 0.0007488402075684379, "loss": 0.055, "num_input_tokens_seen": 105998304, "step": 49135 }, { "epoch": 8.0163132137031, "grad_norm": 0.052532654255628586, "learning_rate": 0.0007487784664818662, "loss": 0.0275, "num_input_tokens_seen": 106009344, "step": 49140 }, { "epoch": 8.017128874388254, "grad_norm": 0.017775893211364746, "learning_rate": 0.0007487167203534834, "loss": 0.1236, "num_input_tokens_seen": 106018976, "step": 49145 }, { "epoch": 8.01794453507341, "grad_norm": 0.4063442647457123, "learning_rate": 0.0007486549691845405, "loss": 0.0967, "num_input_tokens_seen": 106029792, "step": 49150 }, { "epoch": 8.018760195758565, "grad_norm": 0.0904417484998703, "learning_rate": 0.0007485932129762895, "loss": 0.0167, "num_input_tokens_seen": 106041056, "step": 49155 }, { "epoch": 8.01957585644372, "grad_norm": 0.14919592440128326, "learning_rate": 0.0007485314517299815, "loss": 0.0231, "num_input_tokens_seen": 106052640, "step": 49160 }, { "epoch": 8.020391517128875, "grad_norm": 0.0036316467449069023, "learning_rate": 0.0007484696854468684, "loss": 0.0372, "num_input_tokens_seen": 106063360, "step": 49165 }, { "epoch": 8.021207177814029, "grad_norm": 0.09754778444766998, "learning_rate": 0.0007484079141282018, "loss": 0.0376, "num_input_tokens_seen": 106073760, "step": 49170 }, { "epoch": 8.022022838499185, "grad_norm": 0.06557095795869827, "learning_rate": 0.0007483461377752339, "loss": 0.0727, "num_input_tokens_seen": 106084640, "step": 49175 }, { "epoch": 8.022838499184338, "grad_norm": 0.03193598985671997, "learning_rate": 0.0007482843563892164, "loss": 0.0545, "num_input_tokens_seen": 106096352, "step": 49180 }, { "epoch": 8.023654159869494, "grad_norm": 0.014172831550240517, "learning_rate": 0.0007482225699714014, "loss": 0.059, "num_input_tokens_seen": 106106752, "step": 49185 }, { "epoch": 8.02446982055465, "grad_norm": 0.046596135944128036, "learning_rate": 0.0007481607785230411, "loss": 0.047, "num_input_tokens_seen": 106117856, "step": 49190 }, { "epoch": 8.025285481239804, "grad_norm": 0.024187371134757996, "learning_rate": 0.0007480989820453878, "loss": 0.0987, "num_input_tokens_seen": 106127648, "step": 49195 }, { "epoch": 8.02610114192496, "grad_norm": 0.0038718467112630606, "learning_rate": 0.0007480371805396941, "loss": 0.0356, "num_input_tokens_seen": 106138688, "step": 49200 }, { "epoch": 8.026916802610113, "grad_norm": 0.37950098514556885, "learning_rate": 0.0007479753740072121, "loss": 0.1163, "num_input_tokens_seen": 106148672, "step": 49205 }, { "epoch": 8.02773246329527, "grad_norm": 0.02516249008476734, "learning_rate": 0.0007479135624491946, "loss": 0.0108, "num_input_tokens_seen": 106158624, "step": 49210 }, { "epoch": 8.028548123980425, "grad_norm": 0.004129387903958559, "learning_rate": 0.0007478517458668943, "loss": 0.1184, "num_input_tokens_seen": 106169088, "step": 49215 }, { "epoch": 8.029363784665579, "grad_norm": 0.13105081021785736, "learning_rate": 0.0007477899242615639, "loss": 0.0293, "num_input_tokens_seen": 106180160, "step": 49220 }, { "epoch": 8.030179445350734, "grad_norm": 0.012096775695681572, "learning_rate": 0.0007477280976344563, "loss": 0.0939, "num_input_tokens_seen": 106191360, "step": 49225 }, { "epoch": 8.030995106035888, "grad_norm": 0.08228478580713272, "learning_rate": 0.0007476662659868246, "loss": 0.0115, "num_input_tokens_seen": 106201920, "step": 49230 }, { "epoch": 8.031810766721044, "grad_norm": 0.19451302289962769, "learning_rate": 0.0007476044293199218, "loss": 0.0295, "num_input_tokens_seen": 106213600, "step": 49235 }, { "epoch": 8.0326264274062, "grad_norm": 0.0745706856250763, "learning_rate": 0.0007475425876350011, "loss": 0.0415, "num_input_tokens_seen": 106224960, "step": 49240 }, { "epoch": 8.033442088091354, "grad_norm": 0.5527396202087402, "learning_rate": 0.000747480740933316, "loss": 0.0402, "num_input_tokens_seen": 106236672, "step": 49245 }, { "epoch": 8.03425774877651, "grad_norm": 0.24227845668792725, "learning_rate": 0.0007474188892161196, "loss": 0.1153, "num_input_tokens_seen": 106248384, "step": 49250 }, { "epoch": 8.035073409461663, "grad_norm": 0.007642609532922506, "learning_rate": 0.0007473570324846656, "loss": 0.0506, "num_input_tokens_seen": 106259392, "step": 49255 }, { "epoch": 8.035889070146819, "grad_norm": 0.009419330395758152, "learning_rate": 0.0007472951707402074, "loss": 0.0127, "num_input_tokens_seen": 106270016, "step": 49260 }, { "epoch": 8.036704730831975, "grad_norm": 0.3859071433544159, "learning_rate": 0.0007472333039839989, "loss": 0.1081, "num_input_tokens_seen": 106280480, "step": 49265 }, { "epoch": 8.037520391517129, "grad_norm": 0.01060212031006813, "learning_rate": 0.000747171432217294, "loss": 0.0197, "num_input_tokens_seen": 106290368, "step": 49270 }, { "epoch": 8.038336052202284, "grad_norm": 0.08629363030195236, "learning_rate": 0.0007471095554413463, "loss": 0.0552, "num_input_tokens_seen": 106300864, "step": 49275 }, { "epoch": 8.039151712887438, "grad_norm": 0.023045966401696205, "learning_rate": 0.0007470476736574102, "loss": 0.089, "num_input_tokens_seen": 106311200, "step": 49280 }, { "epoch": 8.039967373572594, "grad_norm": 0.041177909821271896, "learning_rate": 0.0007469857868667393, "loss": 0.07, "num_input_tokens_seen": 106321664, "step": 49285 }, { "epoch": 8.040783034257748, "grad_norm": 0.028013113886117935, "learning_rate": 0.0007469238950705883, "loss": 0.1591, "num_input_tokens_seen": 106332992, "step": 49290 }, { "epoch": 8.041598694942904, "grad_norm": 0.09774285554885864, "learning_rate": 0.0007468619982702112, "loss": 0.0339, "num_input_tokens_seen": 106344704, "step": 49295 }, { "epoch": 8.04241435562806, "grad_norm": 0.27080145478248596, "learning_rate": 0.0007468000964668625, "loss": 0.0334, "num_input_tokens_seen": 106356128, "step": 49300 }, { "epoch": 8.043230016313213, "grad_norm": 0.2473781555891037, "learning_rate": 0.0007467381896617968, "loss": 0.0286, "num_input_tokens_seen": 106365984, "step": 49305 }, { "epoch": 8.044045676998369, "grad_norm": 0.042729392647743225, "learning_rate": 0.0007466762778562687, "loss": 0.0308, "num_input_tokens_seen": 106377312, "step": 49310 }, { "epoch": 8.044861337683523, "grad_norm": 0.11403258144855499, "learning_rate": 0.000746614361051533, "loss": 0.1323, "num_input_tokens_seen": 106388064, "step": 49315 }, { "epoch": 8.045676998368679, "grad_norm": 0.09523624926805496, "learning_rate": 0.0007465524392488443, "loss": 0.1972, "num_input_tokens_seen": 106398848, "step": 49320 }, { "epoch": 8.046492659053834, "grad_norm": 0.018042881041765213, "learning_rate": 0.0007464905124494578, "loss": 0.0259, "num_input_tokens_seen": 106409440, "step": 49325 }, { "epoch": 8.047308319738988, "grad_norm": 0.5523084402084351, "learning_rate": 0.0007464285806546283, "loss": 0.0975, "num_input_tokens_seen": 106420160, "step": 49330 }, { "epoch": 8.048123980424144, "grad_norm": 0.019978735595941544, "learning_rate": 0.0007463666438656109, "loss": 0.1141, "num_input_tokens_seen": 106431904, "step": 49335 }, { "epoch": 8.048939641109298, "grad_norm": 0.008768611587584019, "learning_rate": 0.000746304702083661, "loss": 0.1173, "num_input_tokens_seen": 106443040, "step": 49340 }, { "epoch": 8.049755301794454, "grad_norm": 0.08687015622854233, "learning_rate": 0.0007462427553100339, "loss": 0.1408, "num_input_tokens_seen": 106454336, "step": 49345 }, { "epoch": 8.05057096247961, "grad_norm": 0.0457877553999424, "learning_rate": 0.0007461808035459848, "loss": 0.2646, "num_input_tokens_seen": 106465472, "step": 49350 }, { "epoch": 8.051386623164763, "grad_norm": 0.006969032343477011, "learning_rate": 0.0007461188467927695, "loss": 0.0344, "num_input_tokens_seen": 106476640, "step": 49355 }, { "epoch": 8.052202283849919, "grad_norm": 0.23481422662734985, "learning_rate": 0.0007460568850516436, "loss": 0.0402, "num_input_tokens_seen": 106486912, "step": 49360 }, { "epoch": 8.053017944535073, "grad_norm": 0.00975036807358265, "learning_rate": 0.0007459949183238627, "loss": 0.0311, "num_input_tokens_seen": 106497216, "step": 49365 }, { "epoch": 8.053833605220229, "grad_norm": 0.07172878831624985, "learning_rate": 0.0007459329466106829, "loss": 0.0491, "num_input_tokens_seen": 106507584, "step": 49370 }, { "epoch": 8.054649265905383, "grad_norm": 0.029521724209189415, "learning_rate": 0.0007458709699133597, "loss": 0.0364, "num_input_tokens_seen": 106519168, "step": 49375 }, { "epoch": 8.055464926590538, "grad_norm": 0.05464666709303856, "learning_rate": 0.0007458089882331495, "loss": 0.1033, "num_input_tokens_seen": 106530144, "step": 49380 }, { "epoch": 8.056280587275694, "grad_norm": 0.4695340692996979, "learning_rate": 0.0007457470015713085, "loss": 0.1863, "num_input_tokens_seen": 106540512, "step": 49385 }, { "epoch": 8.057096247960848, "grad_norm": 0.1266806721687317, "learning_rate": 0.0007456850099290927, "loss": 0.0315, "num_input_tokens_seen": 106552512, "step": 49390 }, { "epoch": 8.057911908646004, "grad_norm": 0.1220078319311142, "learning_rate": 0.0007456230133077583, "loss": 0.0588, "num_input_tokens_seen": 106563072, "step": 49395 }, { "epoch": 8.058727569331158, "grad_norm": 0.013576229102909565, "learning_rate": 0.0007455610117085618, "loss": 0.041, "num_input_tokens_seen": 106573952, "step": 49400 }, { "epoch": 8.059543230016313, "grad_norm": 0.08890219777822495, "learning_rate": 0.0007454990051327602, "loss": 0.0645, "num_input_tokens_seen": 106584608, "step": 49405 }, { "epoch": 8.060358890701469, "grad_norm": 0.20600968599319458, "learning_rate": 0.0007454369935816098, "loss": 0.0457, "num_input_tokens_seen": 106595296, "step": 49410 }, { "epoch": 8.061174551386623, "grad_norm": 0.08471699804067612, "learning_rate": 0.0007453749770563673, "loss": 0.0203, "num_input_tokens_seen": 106605792, "step": 49415 }, { "epoch": 8.061990212071779, "grad_norm": 0.0063440860249102116, "learning_rate": 0.0007453129555582896, "loss": 0.0542, "num_input_tokens_seen": 106616192, "step": 49420 }, { "epoch": 8.062805872756933, "grad_norm": 0.044900983572006226, "learning_rate": 0.0007452509290886336, "loss": 0.034, "num_input_tokens_seen": 106626656, "step": 49425 }, { "epoch": 8.063621533442088, "grad_norm": 0.021317848935723305, "learning_rate": 0.0007451888976486565, "loss": 0.0222, "num_input_tokens_seen": 106637632, "step": 49430 }, { "epoch": 8.064437194127244, "grad_norm": 0.011992895975708961, "learning_rate": 0.0007451268612396154, "loss": 0.0414, "num_input_tokens_seen": 106647936, "step": 49435 }, { "epoch": 8.065252854812398, "grad_norm": 0.006767969578504562, "learning_rate": 0.0007450648198627673, "loss": 0.0248, "num_input_tokens_seen": 106659360, "step": 49440 }, { "epoch": 8.066068515497554, "grad_norm": 0.30891647934913635, "learning_rate": 0.0007450027735193699, "loss": 0.0644, "num_input_tokens_seen": 106670240, "step": 49445 }, { "epoch": 8.066884176182707, "grad_norm": 0.13579487800598145, "learning_rate": 0.0007449407222106804, "loss": 0.1519, "num_input_tokens_seen": 106681600, "step": 49450 }, { "epoch": 8.067699836867863, "grad_norm": 0.019034622237086296, "learning_rate": 0.0007448786659379565, "loss": 0.022, "num_input_tokens_seen": 106692320, "step": 49455 }, { "epoch": 8.068515497553017, "grad_norm": 0.08954381197690964, "learning_rate": 0.0007448166047024556, "loss": 0.0122, "num_input_tokens_seen": 106703168, "step": 49460 }, { "epoch": 8.069331158238173, "grad_norm": 0.6258130669593811, "learning_rate": 0.0007447545385054358, "loss": 0.1219, "num_input_tokens_seen": 106713344, "step": 49465 }, { "epoch": 8.070146818923329, "grad_norm": 0.0238290186971426, "learning_rate": 0.0007446924673481548, "loss": 0.1651, "num_input_tokens_seen": 106723648, "step": 49470 }, { "epoch": 8.070962479608482, "grad_norm": 0.08919946849346161, "learning_rate": 0.0007446303912318705, "loss": 0.073, "num_input_tokens_seen": 106734240, "step": 49475 }, { "epoch": 8.071778140293638, "grad_norm": 0.01028291042894125, "learning_rate": 0.000744568310157841, "loss": 0.0159, "num_input_tokens_seen": 106744960, "step": 49480 }, { "epoch": 8.072593800978792, "grad_norm": 0.13486161828041077, "learning_rate": 0.0007445062241273244, "loss": 0.0215, "num_input_tokens_seen": 106756320, "step": 49485 }, { "epoch": 8.073409461663948, "grad_norm": 0.08335603028535843, "learning_rate": 0.000744444133141579, "loss": 0.0691, "num_input_tokens_seen": 106767072, "step": 49490 }, { "epoch": 8.074225122349104, "grad_norm": 0.02786366455256939, "learning_rate": 0.0007443820372018631, "loss": 0.0283, "num_input_tokens_seen": 106777888, "step": 49495 }, { "epoch": 8.075040783034257, "grad_norm": 0.4507330656051636, "learning_rate": 0.0007443199363094353, "loss": 0.116, "num_input_tokens_seen": 106789280, "step": 49500 }, { "epoch": 8.075856443719413, "grad_norm": 0.21954458951950073, "learning_rate": 0.0007442578304655541, "loss": 0.1244, "num_input_tokens_seen": 106800992, "step": 49505 }, { "epoch": 8.076672104404567, "grad_norm": 0.11994319409132004, "learning_rate": 0.0007441957196714778, "loss": 0.1696, "num_input_tokens_seen": 106812992, "step": 49510 }, { "epoch": 8.077487765089723, "grad_norm": 0.1749015748500824, "learning_rate": 0.0007441336039284656, "loss": 0.1866, "num_input_tokens_seen": 106824224, "step": 49515 }, { "epoch": 8.078303425774878, "grad_norm": 0.11079803109169006, "learning_rate": 0.0007440714832377764, "loss": 0.0292, "num_input_tokens_seen": 106834816, "step": 49520 }, { "epoch": 8.079119086460032, "grad_norm": 0.0724577084183693, "learning_rate": 0.0007440093576006688, "loss": 0.0448, "num_input_tokens_seen": 106844832, "step": 49525 }, { "epoch": 8.079934747145188, "grad_norm": 0.01546991802752018, "learning_rate": 0.000743947227018402, "loss": 0.0902, "num_input_tokens_seen": 106856096, "step": 49530 }, { "epoch": 8.080750407830342, "grad_norm": 0.07417545467615128, "learning_rate": 0.0007438850914922352, "loss": 0.063, "num_input_tokens_seen": 106865152, "step": 49535 }, { "epoch": 8.081566068515498, "grad_norm": 0.0071200099773705006, "learning_rate": 0.0007438229510234278, "loss": 0.2374, "num_input_tokens_seen": 106875232, "step": 49540 }, { "epoch": 8.082381729200652, "grad_norm": 0.007329224608838558, "learning_rate": 0.0007437608056132388, "loss": 0.0404, "num_input_tokens_seen": 106884352, "step": 49545 }, { "epoch": 8.083197389885807, "grad_norm": 0.32732099294662476, "learning_rate": 0.0007436986552629279, "loss": 0.0705, "num_input_tokens_seen": 106895456, "step": 49550 }, { "epoch": 8.084013050570963, "grad_norm": 0.08654429763555527, "learning_rate": 0.0007436364999737546, "loss": 0.1609, "num_input_tokens_seen": 106905920, "step": 49555 }, { "epoch": 8.084828711256117, "grad_norm": 0.09946247190237045, "learning_rate": 0.0007435743397469785, "loss": 0.0731, "num_input_tokens_seen": 106917312, "step": 49560 }, { "epoch": 8.085644371941273, "grad_norm": 0.002146139508113265, "learning_rate": 0.0007435121745838595, "loss": 0.1055, "num_input_tokens_seen": 106928320, "step": 49565 }, { "epoch": 8.086460032626427, "grad_norm": 0.018939005210995674, "learning_rate": 0.0007434500044856574, "loss": 0.0633, "num_input_tokens_seen": 106938272, "step": 49570 }, { "epoch": 8.087275693311582, "grad_norm": 0.4300839304924011, "learning_rate": 0.000743387829453632, "loss": 0.1596, "num_input_tokens_seen": 106949376, "step": 49575 }, { "epoch": 8.088091353996738, "grad_norm": 0.0654028132557869, "learning_rate": 0.0007433256494890435, "loss": 0.087, "num_input_tokens_seen": 106959648, "step": 49580 }, { "epoch": 8.088907014681892, "grad_norm": 0.023333145305514336, "learning_rate": 0.000743263464593152, "loss": 0.0649, "num_input_tokens_seen": 106969440, "step": 49585 }, { "epoch": 8.089722675367048, "grad_norm": 0.335827499628067, "learning_rate": 0.0007432012747672179, "loss": 0.073, "num_input_tokens_seen": 106979072, "step": 49590 }, { "epoch": 8.090538336052202, "grad_norm": 0.5987522006034851, "learning_rate": 0.0007431390800125013, "loss": 0.0829, "num_input_tokens_seen": 106991456, "step": 49595 }, { "epoch": 8.091353996737357, "grad_norm": 0.38535013794898987, "learning_rate": 0.0007430768803302629, "loss": 0.0769, "num_input_tokens_seen": 107002368, "step": 49600 }, { "epoch": 8.092169657422513, "grad_norm": 0.09133922308683395, "learning_rate": 0.0007430146757217631, "loss": 0.1293, "num_input_tokens_seen": 107013504, "step": 49605 }, { "epoch": 8.092985318107667, "grad_norm": 0.017097201198339462, "learning_rate": 0.0007429524661882626, "loss": 0.0436, "num_input_tokens_seen": 107024576, "step": 49610 }, { "epoch": 8.093800978792823, "grad_norm": 0.14418570697307587, "learning_rate": 0.0007428902517310222, "loss": 0.1431, "num_input_tokens_seen": 107035648, "step": 49615 }, { "epoch": 8.094616639477977, "grad_norm": 0.2599688470363617, "learning_rate": 0.0007428280323513028, "loss": 0.0591, "num_input_tokens_seen": 107045664, "step": 49620 }, { "epoch": 8.095432300163132, "grad_norm": 0.010762886144220829, "learning_rate": 0.0007427658080503652, "loss": 0.032, "num_input_tokens_seen": 107056960, "step": 49625 }, { "epoch": 8.096247960848286, "grad_norm": 0.2744116485118866, "learning_rate": 0.0007427035788294704, "loss": 0.0452, "num_input_tokens_seen": 107067872, "step": 49630 }, { "epoch": 8.097063621533442, "grad_norm": 0.0024410535115748644, "learning_rate": 0.0007426413446898799, "loss": 0.0186, "num_input_tokens_seen": 107078784, "step": 49635 }, { "epoch": 8.097879282218598, "grad_norm": 0.39112716913223267, "learning_rate": 0.0007425791056328546, "loss": 0.1896, "num_input_tokens_seen": 107089536, "step": 49640 }, { "epoch": 8.098694942903752, "grad_norm": 0.11201483756303787, "learning_rate": 0.0007425168616596561, "loss": 0.1497, "num_input_tokens_seen": 107100768, "step": 49645 }, { "epoch": 8.099510603588907, "grad_norm": 0.06533048301935196, "learning_rate": 0.0007424546127715456, "loss": 0.1199, "num_input_tokens_seen": 107112064, "step": 49650 }, { "epoch": 8.100326264274061, "grad_norm": 0.03158574178814888, "learning_rate": 0.0007423923589697849, "loss": 0.0344, "num_input_tokens_seen": 107123328, "step": 49655 }, { "epoch": 8.101141924959217, "grad_norm": 0.015326308086514473, "learning_rate": 0.0007423301002556355, "loss": 0.0336, "num_input_tokens_seen": 107135424, "step": 49660 }, { "epoch": 8.101957585644373, "grad_norm": 0.12226692587137222, "learning_rate": 0.0007422678366303592, "loss": 0.1156, "num_input_tokens_seen": 107146112, "step": 49665 }, { "epoch": 8.102773246329527, "grad_norm": 0.017345814034342766, "learning_rate": 0.000742205568095218, "loss": 0.1082, "num_input_tokens_seen": 107156576, "step": 49670 }, { "epoch": 8.103588907014682, "grad_norm": 0.101743184030056, "learning_rate": 0.0007421432946514736, "loss": 0.136, "num_input_tokens_seen": 107169664, "step": 49675 }, { "epoch": 8.104404567699836, "grad_norm": 0.3616216778755188, "learning_rate": 0.0007420810163003881, "loss": 0.1649, "num_input_tokens_seen": 107180640, "step": 49680 }, { "epoch": 8.105220228384992, "grad_norm": 0.01888289488852024, "learning_rate": 0.0007420187330432238, "loss": 0.0652, "num_input_tokens_seen": 107191904, "step": 49685 }, { "epoch": 8.106035889070148, "grad_norm": 0.025666696950793266, "learning_rate": 0.0007419564448812428, "loss": 0.0315, "num_input_tokens_seen": 107202464, "step": 49690 }, { "epoch": 8.106851549755302, "grad_norm": 0.03992503136396408, "learning_rate": 0.0007418941518157075, "loss": 0.0275, "num_input_tokens_seen": 107213568, "step": 49695 }, { "epoch": 8.107667210440457, "grad_norm": 0.025879623368382454, "learning_rate": 0.0007418318538478803, "loss": 0.1054, "num_input_tokens_seen": 107225024, "step": 49700 }, { "epoch": 8.108482871125611, "grad_norm": 0.3681522309780121, "learning_rate": 0.0007417695509790239, "loss": 0.1426, "num_input_tokens_seen": 107233536, "step": 49705 }, { "epoch": 8.109298531810767, "grad_norm": 0.2877691686153412, "learning_rate": 0.0007417072432104007, "loss": 0.0657, "num_input_tokens_seen": 107243712, "step": 49710 }, { "epoch": 8.11011419249592, "grad_norm": 0.2142242044210434, "learning_rate": 0.0007416449305432738, "loss": 0.0553, "num_input_tokens_seen": 107255136, "step": 49715 }, { "epoch": 8.110929853181077, "grad_norm": 0.025479895994067192, "learning_rate": 0.0007415826129789057, "loss": 0.0107, "num_input_tokens_seen": 107267456, "step": 49720 }, { "epoch": 8.111745513866232, "grad_norm": 0.0669064074754715, "learning_rate": 0.0007415202905185594, "loss": 0.03, "num_input_tokens_seen": 107278496, "step": 49725 }, { "epoch": 8.112561174551386, "grad_norm": 0.08186621218919754, "learning_rate": 0.0007414579631634981, "loss": 0.0363, "num_input_tokens_seen": 107291072, "step": 49730 }, { "epoch": 8.113376835236542, "grad_norm": 0.05036059394478798, "learning_rate": 0.0007413956309149848, "loss": 0.0561, "num_input_tokens_seen": 107301088, "step": 49735 }, { "epoch": 8.114192495921696, "grad_norm": 0.18325304985046387, "learning_rate": 0.000741333293774283, "loss": 0.0482, "num_input_tokens_seen": 107311936, "step": 49740 }, { "epoch": 8.115008156606851, "grad_norm": 0.12949243187904358, "learning_rate": 0.0007412709517426556, "loss": 0.0185, "num_input_tokens_seen": 107322528, "step": 49745 }, { "epoch": 8.115823817292007, "grad_norm": 0.23704536259174347, "learning_rate": 0.0007412086048213665, "loss": 0.0769, "num_input_tokens_seen": 107332256, "step": 49750 }, { "epoch": 8.116639477977161, "grad_norm": 0.10696770995855331, "learning_rate": 0.000741146253011679, "loss": 0.0415, "num_input_tokens_seen": 107343232, "step": 49755 }, { "epoch": 8.117455138662317, "grad_norm": 0.02270204946398735, "learning_rate": 0.0007410838963148568, "loss": 0.012, "num_input_tokens_seen": 107353056, "step": 49760 }, { "epoch": 8.11827079934747, "grad_norm": 0.040559329092502594, "learning_rate": 0.0007410215347321634, "loss": 0.0271, "num_input_tokens_seen": 107363584, "step": 49765 }, { "epoch": 8.119086460032626, "grad_norm": 0.11489695310592651, "learning_rate": 0.000740959168264863, "loss": 0.0584, "num_input_tokens_seen": 107373248, "step": 49770 }, { "epoch": 8.119902120717782, "grad_norm": 0.26011204719543457, "learning_rate": 0.0007408967969142193, "loss": 0.1218, "num_input_tokens_seen": 107385088, "step": 49775 }, { "epoch": 8.120717781402936, "grad_norm": 0.18486042320728302, "learning_rate": 0.0007408344206814965, "loss": 0.0415, "num_input_tokens_seen": 107395968, "step": 49780 }, { "epoch": 8.121533442088092, "grad_norm": 0.5513808131217957, "learning_rate": 0.0007407720395679585, "loss": 0.0767, "num_input_tokens_seen": 107405824, "step": 49785 }, { "epoch": 8.122349102773246, "grad_norm": 0.29454702138900757, "learning_rate": 0.0007407096535748698, "loss": 0.0852, "num_input_tokens_seen": 107415744, "step": 49790 }, { "epoch": 8.123164763458401, "grad_norm": 0.09023239463567734, "learning_rate": 0.0007406472627034946, "loss": 0.0107, "num_input_tokens_seen": 107426688, "step": 49795 }, { "epoch": 8.123980424143557, "grad_norm": 0.0033560402225703, "learning_rate": 0.0007405848669550973, "loss": 0.1001, "num_input_tokens_seen": 107438272, "step": 49800 }, { "epoch": 8.124796084828711, "grad_norm": 0.05456848815083504, "learning_rate": 0.0007405224663309425, "loss": 0.0161, "num_input_tokens_seen": 107448064, "step": 49805 }, { "epoch": 8.125611745513867, "grad_norm": 0.038041841238737106, "learning_rate": 0.0007404600608322948, "loss": 0.0109, "num_input_tokens_seen": 107458080, "step": 49810 }, { "epoch": 8.12642740619902, "grad_norm": 0.6141024827957153, "learning_rate": 0.0007403976504604189, "loss": 0.0896, "num_input_tokens_seen": 107468832, "step": 49815 }, { "epoch": 8.127243066884176, "grad_norm": 0.2184603363275528, "learning_rate": 0.0007403352352165797, "loss": 0.0299, "num_input_tokens_seen": 107480896, "step": 49820 }, { "epoch": 8.12805872756933, "grad_norm": 0.02148313820362091, "learning_rate": 0.0007402728151020419, "loss": 0.0465, "num_input_tokens_seen": 107492768, "step": 49825 }, { "epoch": 8.128874388254486, "grad_norm": 0.01422592718154192, "learning_rate": 0.0007402103901180708, "loss": 0.0318, "num_input_tokens_seen": 107504448, "step": 49830 }, { "epoch": 8.129690048939642, "grad_norm": 0.10204093158245087, "learning_rate": 0.0007401479602659315, "loss": 0.1322, "num_input_tokens_seen": 107515488, "step": 49835 }, { "epoch": 8.130505709624796, "grad_norm": 0.5433957576751709, "learning_rate": 0.000740085525546889, "loss": 0.0575, "num_input_tokens_seen": 107527584, "step": 49840 }, { "epoch": 8.131321370309951, "grad_norm": 0.011276071891188622, "learning_rate": 0.0007400230859622088, "loss": 0.0457, "num_input_tokens_seen": 107537344, "step": 49845 }, { "epoch": 8.132137030995105, "grad_norm": 0.03547811135649681, "learning_rate": 0.0007399606415131563, "loss": 0.0334, "num_input_tokens_seen": 107546848, "step": 49850 }, { "epoch": 8.132952691680261, "grad_norm": 0.1079113706946373, "learning_rate": 0.0007398981922009971, "loss": 0.0249, "num_input_tokens_seen": 107556704, "step": 49855 }, { "epoch": 8.133768352365417, "grad_norm": 0.004153805784881115, "learning_rate": 0.0007398357380269966, "loss": 0.0284, "num_input_tokens_seen": 107566528, "step": 49860 }, { "epoch": 8.13458401305057, "grad_norm": 0.26552143692970276, "learning_rate": 0.0007397732789924205, "loss": 0.0289, "num_input_tokens_seen": 107575776, "step": 49865 }, { "epoch": 8.135399673735726, "grad_norm": 0.34985512495040894, "learning_rate": 0.0007397108150985349, "loss": 0.1262, "num_input_tokens_seen": 107586112, "step": 49870 }, { "epoch": 8.13621533442088, "grad_norm": 0.050913307815790176, "learning_rate": 0.0007396483463466055, "loss": 0.2069, "num_input_tokens_seen": 107596576, "step": 49875 }, { "epoch": 8.137030995106036, "grad_norm": 0.03466951474547386, "learning_rate": 0.0007395858727378982, "loss": 0.0902, "num_input_tokens_seen": 107607328, "step": 49880 }, { "epoch": 8.137846655791192, "grad_norm": 0.027927404269576073, "learning_rate": 0.0007395233942736794, "loss": 0.0042, "num_input_tokens_seen": 107618560, "step": 49885 }, { "epoch": 8.138662316476346, "grad_norm": 0.05232899263501167, "learning_rate": 0.0007394609109552152, "loss": 0.0111, "num_input_tokens_seen": 107627296, "step": 49890 }, { "epoch": 8.139477977161501, "grad_norm": 0.006739957258105278, "learning_rate": 0.0007393984227837718, "loss": 0.0041, "num_input_tokens_seen": 107636736, "step": 49895 }, { "epoch": 8.140293637846655, "grad_norm": 0.005522626917809248, "learning_rate": 0.0007393359297606155, "loss": 0.064, "num_input_tokens_seen": 107646368, "step": 49900 }, { "epoch": 8.141109298531811, "grad_norm": 0.04193035885691643, "learning_rate": 0.0007392734318870133, "loss": 0.0623, "num_input_tokens_seen": 107656576, "step": 49905 }, { "epoch": 8.141924959216965, "grad_norm": 0.0401594340801239, "learning_rate": 0.0007392109291642311, "loss": 0.0809, "num_input_tokens_seen": 107666592, "step": 49910 }, { "epoch": 8.14274061990212, "grad_norm": 0.4051806330680847, "learning_rate": 0.0007391484215935363, "loss": 0.1379, "num_input_tokens_seen": 107677280, "step": 49915 }, { "epoch": 8.143556280587276, "grad_norm": 0.018833141773939133, "learning_rate": 0.000739085909176195, "loss": 0.006, "num_input_tokens_seen": 107686752, "step": 49920 }, { "epoch": 8.14437194127243, "grad_norm": 0.31956547498703003, "learning_rate": 0.0007390233919134747, "loss": 0.0801, "num_input_tokens_seen": 107698368, "step": 49925 }, { "epoch": 8.145187601957586, "grad_norm": 0.08044224977493286, "learning_rate": 0.0007389608698066422, "loss": 0.1192, "num_input_tokens_seen": 107708800, "step": 49930 }, { "epoch": 8.14600326264274, "grad_norm": 0.06322523206472397, "learning_rate": 0.0007388983428569643, "loss": 0.1975, "num_input_tokens_seen": 107719456, "step": 49935 }, { "epoch": 8.146818923327896, "grad_norm": 0.013761820271611214, "learning_rate": 0.0007388358110657085, "loss": 0.0226, "num_input_tokens_seen": 107730848, "step": 49940 }, { "epoch": 8.147634584013051, "grad_norm": 0.6720054745674133, "learning_rate": 0.000738773274434142, "loss": 0.1802, "num_input_tokens_seen": 107741856, "step": 49945 }, { "epoch": 8.148450244698205, "grad_norm": 0.25962674617767334, "learning_rate": 0.0007387107329635322, "loss": 0.0216, "num_input_tokens_seen": 107752512, "step": 49950 }, { "epoch": 8.149265905383361, "grad_norm": 0.07553454488515854, "learning_rate": 0.0007386481866551466, "loss": 0.0257, "num_input_tokens_seen": 107763104, "step": 49955 }, { "epoch": 8.150081566068515, "grad_norm": 0.007566601969301701, "learning_rate": 0.0007385856355102528, "loss": 0.1131, "num_input_tokens_seen": 107774016, "step": 49960 }, { "epoch": 8.15089722675367, "grad_norm": 0.27591443061828613, "learning_rate": 0.0007385230795301183, "loss": 0.0472, "num_input_tokens_seen": 107785024, "step": 49965 }, { "epoch": 8.151712887438826, "grad_norm": 0.0794505923986435, "learning_rate": 0.000738460518716011, "loss": 0.0348, "num_input_tokens_seen": 107796256, "step": 49970 }, { "epoch": 8.15252854812398, "grad_norm": 0.024514488875865936, "learning_rate": 0.0007383979530691989, "loss": 0.0133, "num_input_tokens_seen": 107807168, "step": 49975 }, { "epoch": 8.153344208809136, "grad_norm": 0.018452299758791924, "learning_rate": 0.0007383353825909498, "loss": 0.0339, "num_input_tokens_seen": 107818592, "step": 49980 }, { "epoch": 8.15415986949429, "grad_norm": 0.10709256678819656, "learning_rate": 0.0007382728072825318, "loss": 0.1723, "num_input_tokens_seen": 107828096, "step": 49985 }, { "epoch": 8.154975530179446, "grad_norm": 0.015050127170979977, "learning_rate": 0.0007382102271452132, "loss": 0.0077, "num_input_tokens_seen": 107838560, "step": 49990 }, { "epoch": 8.1557911908646, "grad_norm": 0.00515691889449954, "learning_rate": 0.0007381476421802621, "loss": 0.0442, "num_input_tokens_seen": 107849568, "step": 49995 }, { "epoch": 8.156606851549755, "grad_norm": 0.5890952944755554, "learning_rate": 0.0007380850523889469, "loss": 0.1493, "num_input_tokens_seen": 107860768, "step": 50000 }, { "epoch": 8.15742251223491, "grad_norm": 0.016232166439294815, "learning_rate": 0.0007380224577725361, "loss": 0.0465, "num_input_tokens_seen": 107871392, "step": 50005 }, { "epoch": 8.158238172920065, "grad_norm": 0.009946675039827824, "learning_rate": 0.0007379598583322982, "loss": 0.07, "num_input_tokens_seen": 107881984, "step": 50010 }, { "epoch": 8.15905383360522, "grad_norm": 0.7673853635787964, "learning_rate": 0.0007378972540695019, "loss": 0.0389, "num_input_tokens_seen": 107892992, "step": 50015 }, { "epoch": 8.159869494290374, "grad_norm": 0.036095134913921356, "learning_rate": 0.0007378346449854159, "loss": 0.1429, "num_input_tokens_seen": 107904928, "step": 50020 }, { "epoch": 8.16068515497553, "grad_norm": 0.010661177337169647, "learning_rate": 0.0007377720310813092, "loss": 0.1092, "num_input_tokens_seen": 107914496, "step": 50025 }, { "epoch": 8.161500815660686, "grad_norm": 0.4742550551891327, "learning_rate": 0.0007377094123584507, "loss": 0.2107, "num_input_tokens_seen": 107924608, "step": 50030 }, { "epoch": 8.16231647634584, "grad_norm": 0.24059396982192993, "learning_rate": 0.0007376467888181094, "loss": 0.1135, "num_input_tokens_seen": 107935008, "step": 50035 }, { "epoch": 8.163132137030995, "grad_norm": 0.8788039684295654, "learning_rate": 0.0007375841604615542, "loss": 0.1337, "num_input_tokens_seen": 107944672, "step": 50040 }, { "epoch": 8.16394779771615, "grad_norm": 0.07411758601665497, "learning_rate": 0.0007375215272900548, "loss": 0.0619, "num_input_tokens_seen": 107955776, "step": 50045 }, { "epoch": 8.164763458401305, "grad_norm": 0.31156018376350403, "learning_rate": 0.0007374588893048803, "loss": 0.045, "num_input_tokens_seen": 107966784, "step": 50050 }, { "epoch": 8.16557911908646, "grad_norm": 0.009732937440276146, "learning_rate": 0.0007373962465073002, "loss": 0.0316, "num_input_tokens_seen": 107977792, "step": 50055 }, { "epoch": 8.166394779771615, "grad_norm": 0.04900631681084633, "learning_rate": 0.0007373335988985839, "loss": 0.0067, "num_input_tokens_seen": 107989376, "step": 50060 }, { "epoch": 8.16721044045677, "grad_norm": 0.010533085092902184, "learning_rate": 0.0007372709464800013, "loss": 0.0385, "num_input_tokens_seen": 107999840, "step": 50065 }, { "epoch": 8.168026101141924, "grad_norm": 0.1689562201499939, "learning_rate": 0.0007372082892528218, "loss": 0.0879, "num_input_tokens_seen": 108010688, "step": 50070 }, { "epoch": 8.16884176182708, "grad_norm": 0.022172749042510986, "learning_rate": 0.0007371456272183156, "loss": 0.0226, "num_input_tokens_seen": 108021920, "step": 50075 }, { "epoch": 8.169657422512234, "grad_norm": 1.1432584524154663, "learning_rate": 0.0007370829603777523, "loss": 0.245, "num_input_tokens_seen": 108031680, "step": 50080 }, { "epoch": 8.17047308319739, "grad_norm": 0.18533046543598175, "learning_rate": 0.000737020288732402, "loss": 0.0355, "num_input_tokens_seen": 108043072, "step": 50085 }, { "epoch": 8.171288743882545, "grad_norm": 0.07516685873270035, "learning_rate": 0.0007369576122835349, "loss": 0.1629, "num_input_tokens_seen": 108053856, "step": 50090 }, { "epoch": 8.1721044045677, "grad_norm": 0.3980889320373535, "learning_rate": 0.0007368949310324211, "loss": 0.0964, "num_input_tokens_seen": 108063648, "step": 50095 }, { "epoch": 8.172920065252855, "grad_norm": 0.7919608354568481, "learning_rate": 0.0007368322449803311, "loss": 0.1821, "num_input_tokens_seen": 108074496, "step": 50100 }, { "epoch": 8.173735725938009, "grad_norm": 0.034267790615558624, "learning_rate": 0.0007367695541285353, "loss": 0.062, "num_input_tokens_seen": 108085440, "step": 50105 }, { "epoch": 8.174551386623165, "grad_norm": 0.412604421377182, "learning_rate": 0.0007367068584783041, "loss": 0.0513, "num_input_tokens_seen": 108095968, "step": 50110 }, { "epoch": 8.17536704730832, "grad_norm": 0.006883690133690834, "learning_rate": 0.000736644158030908, "loss": 0.0272, "num_input_tokens_seen": 108106304, "step": 50115 }, { "epoch": 8.176182707993474, "grad_norm": 0.09755579382181168, "learning_rate": 0.0007365814527876179, "loss": 0.1308, "num_input_tokens_seen": 108117600, "step": 50120 }, { "epoch": 8.17699836867863, "grad_norm": 0.55540931224823, "learning_rate": 0.0007365187427497045, "loss": 0.0554, "num_input_tokens_seen": 108127584, "step": 50125 }, { "epoch": 8.177814029363784, "grad_norm": 0.06628274917602539, "learning_rate": 0.0007364560279184387, "loss": 0.0171, "num_input_tokens_seen": 108138912, "step": 50130 }, { "epoch": 8.17862969004894, "grad_norm": 0.1109669953584671, "learning_rate": 0.0007363933082950917, "loss": 0.2083, "num_input_tokens_seen": 108149632, "step": 50135 }, { "epoch": 8.179445350734095, "grad_norm": 0.016871700063347816, "learning_rate": 0.0007363305838809344, "loss": 0.0195, "num_input_tokens_seen": 108161248, "step": 50140 }, { "epoch": 8.18026101141925, "grad_norm": 0.018982835114002228, "learning_rate": 0.0007362678546772379, "loss": 0.0312, "num_input_tokens_seen": 108172736, "step": 50145 }, { "epoch": 8.181076672104405, "grad_norm": 0.02116461470723152, "learning_rate": 0.0007362051206852736, "loss": 0.017, "num_input_tokens_seen": 108184160, "step": 50150 }, { "epoch": 8.181892332789559, "grad_norm": 0.025736715644598007, "learning_rate": 0.0007361423819063128, "loss": 0.0632, "num_input_tokens_seen": 108194560, "step": 50155 }, { "epoch": 8.182707993474715, "grad_norm": 0.03843200206756592, "learning_rate": 0.0007360796383416273, "loss": 0.0384, "num_input_tokens_seen": 108204928, "step": 50160 }, { "epoch": 8.18352365415987, "grad_norm": 0.023746982216835022, "learning_rate": 0.0007360168899924883, "loss": 0.0097, "num_input_tokens_seen": 108215680, "step": 50165 }, { "epoch": 8.184339314845024, "grad_norm": 0.01167546771466732, "learning_rate": 0.0007359541368601675, "loss": 0.0113, "num_input_tokens_seen": 108225888, "step": 50170 }, { "epoch": 8.18515497553018, "grad_norm": 0.027317887172102928, "learning_rate": 0.0007358913789459369, "loss": 0.0277, "num_input_tokens_seen": 108235584, "step": 50175 }, { "epoch": 8.185970636215334, "grad_norm": 0.12168390303850174, "learning_rate": 0.0007358286162510683, "loss": 0.0335, "num_input_tokens_seen": 108246720, "step": 50180 }, { "epoch": 8.18678629690049, "grad_norm": 0.10593614727258682, "learning_rate": 0.0007357658487768337, "loss": 0.1331, "num_input_tokens_seen": 108257952, "step": 50185 }, { "epoch": 8.187601957585644, "grad_norm": 0.22239764034748077, "learning_rate": 0.0007357030765245049, "loss": 0.0367, "num_input_tokens_seen": 108268800, "step": 50190 }, { "epoch": 8.1884176182708, "grad_norm": 0.501271665096283, "learning_rate": 0.0007356402994953544, "loss": 0.0432, "num_input_tokens_seen": 108278624, "step": 50195 }, { "epoch": 8.189233278955955, "grad_norm": 0.5714296102523804, "learning_rate": 0.0007355775176906543, "loss": 0.1206, "num_input_tokens_seen": 108288000, "step": 50200 }, { "epoch": 8.190048939641109, "grad_norm": 0.13616710901260376, "learning_rate": 0.0007355147311116768, "loss": 0.0923, "num_input_tokens_seen": 108298368, "step": 50205 }, { "epoch": 8.190864600326265, "grad_norm": 0.4116344153881073, "learning_rate": 0.0007354519397596946, "loss": 0.3281, "num_input_tokens_seen": 108309600, "step": 50210 }, { "epoch": 8.191680261011419, "grad_norm": 0.00943823903799057, "learning_rate": 0.0007353891436359801, "loss": 0.0188, "num_input_tokens_seen": 108321024, "step": 50215 }, { "epoch": 8.192495921696574, "grad_norm": 0.5498570203781128, "learning_rate": 0.000735326342741806, "loss": 0.2123, "num_input_tokens_seen": 108333408, "step": 50220 }, { "epoch": 8.19331158238173, "grad_norm": 0.07691401988267899, "learning_rate": 0.0007352635370784451, "loss": 0.1867, "num_input_tokens_seen": 108343840, "step": 50225 }, { "epoch": 8.194127243066884, "grad_norm": 0.7422526478767395, "learning_rate": 0.00073520072664717, "loss": 0.0915, "num_input_tokens_seen": 108355264, "step": 50230 }, { "epoch": 8.19494290375204, "grad_norm": 0.1376253366470337, "learning_rate": 0.000735137911449254, "loss": 0.1108, "num_input_tokens_seen": 108364928, "step": 50235 }, { "epoch": 8.195758564437194, "grad_norm": 0.0758839100599289, "learning_rate": 0.0007350750914859698, "loss": 0.0331, "num_input_tokens_seen": 108375168, "step": 50240 }, { "epoch": 8.19657422512235, "grad_norm": 0.37059351801872253, "learning_rate": 0.0007350122667585908, "loss": 0.1698, "num_input_tokens_seen": 108384608, "step": 50245 }, { "epoch": 8.197389885807505, "grad_norm": 0.039826974272727966, "learning_rate": 0.0007349494372683899, "loss": 0.119, "num_input_tokens_seen": 108394944, "step": 50250 }, { "epoch": 8.198205546492659, "grad_norm": 0.04359278455376625, "learning_rate": 0.0007348866030166407, "loss": 0.0561, "num_input_tokens_seen": 108406912, "step": 50255 }, { "epoch": 8.199021207177815, "grad_norm": 0.008550113067030907, "learning_rate": 0.0007348237640046165, "loss": 0.017, "num_input_tokens_seen": 108417120, "step": 50260 }, { "epoch": 8.199836867862969, "grad_norm": 0.36433014273643494, "learning_rate": 0.0007347609202335907, "loss": 0.0633, "num_input_tokens_seen": 108427744, "step": 50265 }, { "epoch": 8.200652528548124, "grad_norm": 0.16374653577804565, "learning_rate": 0.0007346980717048373, "loss": 0.11, "num_input_tokens_seen": 108439264, "step": 50270 }, { "epoch": 8.201468189233278, "grad_norm": 0.018815038725733757, "learning_rate": 0.0007346352184196296, "loss": 0.0806, "num_input_tokens_seen": 108450368, "step": 50275 }, { "epoch": 8.202283849918434, "grad_norm": 0.012595677748322487, "learning_rate": 0.0007345723603792415, "loss": 0.0471, "num_input_tokens_seen": 108460736, "step": 50280 }, { "epoch": 8.20309951060359, "grad_norm": 0.13198766112327576, "learning_rate": 0.000734509497584947, "loss": 0.1043, "num_input_tokens_seen": 108471840, "step": 50285 }, { "epoch": 8.203915171288743, "grad_norm": 0.0469261035323143, "learning_rate": 0.0007344466300380201, "loss": 0.023, "num_input_tokens_seen": 108482112, "step": 50290 }, { "epoch": 8.2047308319739, "grad_norm": 0.11499030888080597, "learning_rate": 0.0007343837577397347, "loss": 0.0643, "num_input_tokens_seen": 108492384, "step": 50295 }, { "epoch": 8.205546492659053, "grad_norm": 0.016630306839942932, "learning_rate": 0.0007343208806913651, "loss": 0.1171, "num_input_tokens_seen": 108503040, "step": 50300 }, { "epoch": 8.206362153344209, "grad_norm": 0.4379960000514984, "learning_rate": 0.0007342579988941858, "loss": 0.1286, "num_input_tokens_seen": 108513120, "step": 50305 }, { "epoch": 8.207177814029365, "grad_norm": 0.2995723485946655, "learning_rate": 0.0007341951123494708, "loss": 0.2013, "num_input_tokens_seen": 108524032, "step": 50310 }, { "epoch": 8.207993474714518, "grad_norm": 0.01344800554215908, "learning_rate": 0.0007341322210584947, "loss": 0.0704, "num_input_tokens_seen": 108534240, "step": 50315 }, { "epoch": 8.208809135399674, "grad_norm": 0.08376513421535492, "learning_rate": 0.0007340693250225322, "loss": 0.0672, "num_input_tokens_seen": 108545664, "step": 50320 }, { "epoch": 8.209624796084828, "grad_norm": 0.007541070692241192, "learning_rate": 0.0007340064242428579, "loss": 0.1271, "num_input_tokens_seen": 108555232, "step": 50325 }, { "epoch": 8.210440456769984, "grad_norm": 0.15333358943462372, "learning_rate": 0.0007339435187207466, "loss": 0.1546, "num_input_tokens_seen": 108566624, "step": 50330 }, { "epoch": 8.21125611745514, "grad_norm": 0.045886050909757614, "learning_rate": 0.0007338806084574731, "loss": 0.0298, "num_input_tokens_seen": 108576096, "step": 50335 }, { "epoch": 8.212071778140293, "grad_norm": 0.1812276691198349, "learning_rate": 0.0007338176934543124, "loss": 0.0449, "num_input_tokens_seen": 108587136, "step": 50340 }, { "epoch": 8.21288743882545, "grad_norm": 0.05384465306997299, "learning_rate": 0.0007337547737125394, "loss": 0.0731, "num_input_tokens_seen": 108598272, "step": 50345 }, { "epoch": 8.213703099510603, "grad_norm": 0.017519094049930573, "learning_rate": 0.0007336918492334294, "loss": 0.011, "num_input_tokens_seen": 108609792, "step": 50350 }, { "epoch": 8.214518760195759, "grad_norm": 0.6977828741073608, "learning_rate": 0.0007336289200182576, "loss": 0.0305, "num_input_tokens_seen": 108620832, "step": 50355 }, { "epoch": 8.215334420880913, "grad_norm": 0.0749376192688942, "learning_rate": 0.0007335659860682994, "loss": 0.0383, "num_input_tokens_seen": 108632384, "step": 50360 }, { "epoch": 8.216150081566068, "grad_norm": 0.33974960446357727, "learning_rate": 0.0007335030473848302, "loss": 0.0448, "num_input_tokens_seen": 108642496, "step": 50365 }, { "epoch": 8.216965742251224, "grad_norm": 0.23961451649665833, "learning_rate": 0.0007334401039691255, "loss": 0.0419, "num_input_tokens_seen": 108653952, "step": 50370 }, { "epoch": 8.217781402936378, "grad_norm": 0.007589209359139204, "learning_rate": 0.000733377155822461, "loss": 0.0164, "num_input_tokens_seen": 108665152, "step": 50375 }, { "epoch": 8.218597063621534, "grad_norm": 0.10964424163103104, "learning_rate": 0.0007333142029461124, "loss": 0.0663, "num_input_tokens_seen": 108675744, "step": 50380 }, { "epoch": 8.219412724306688, "grad_norm": 0.006165620405226946, "learning_rate": 0.0007332512453413555, "loss": 0.0142, "num_input_tokens_seen": 108686880, "step": 50385 }, { "epoch": 8.220228384991843, "grad_norm": 0.49109211564064026, "learning_rate": 0.0007331882830094661, "loss": 0.0456, "num_input_tokens_seen": 108695808, "step": 50390 }, { "epoch": 8.221044045676999, "grad_norm": 0.04432032257318497, "learning_rate": 0.0007331253159517204, "loss": 0.1469, "num_input_tokens_seen": 108707520, "step": 50395 }, { "epoch": 8.221859706362153, "grad_norm": 0.2862972617149353, "learning_rate": 0.0007330623441693944, "loss": 0.2855, "num_input_tokens_seen": 108718240, "step": 50400 }, { "epoch": 8.222675367047309, "grad_norm": 0.23254846036434174, "learning_rate": 0.0007329993676637643, "loss": 0.1385, "num_input_tokens_seen": 108729408, "step": 50405 }, { "epoch": 8.223491027732463, "grad_norm": 0.33261796832084656, "learning_rate": 0.0007329363864361065, "loss": 0.0255, "num_input_tokens_seen": 108740416, "step": 50410 }, { "epoch": 8.224306688417618, "grad_norm": 0.3530348837375641, "learning_rate": 0.0007328734004876974, "loss": 0.0586, "num_input_tokens_seen": 108751296, "step": 50415 }, { "epoch": 8.225122349102774, "grad_norm": 0.45861154794692993, "learning_rate": 0.0007328104098198131, "loss": 0.1158, "num_input_tokens_seen": 108762656, "step": 50420 }, { "epoch": 8.225938009787928, "grad_norm": 0.04200742393732071, "learning_rate": 0.000732747414433731, "loss": 0.1054, "num_input_tokens_seen": 108773504, "step": 50425 }, { "epoch": 8.226753670473084, "grad_norm": 0.5670138597488403, "learning_rate": 0.000732684414330727, "loss": 0.0669, "num_input_tokens_seen": 108784512, "step": 50430 }, { "epoch": 8.227569331158238, "grad_norm": 0.007214317563921213, "learning_rate": 0.0007326214095120781, "loss": 0.0179, "num_input_tokens_seen": 108793952, "step": 50435 }, { "epoch": 8.228384991843393, "grad_norm": 0.07553485035896301, "learning_rate": 0.0007325583999790613, "loss": 0.0504, "num_input_tokens_seen": 108803744, "step": 50440 }, { "epoch": 8.229200652528547, "grad_norm": 0.5017794370651245, "learning_rate": 0.0007324953857329535, "loss": 0.0549, "num_input_tokens_seen": 108814624, "step": 50445 }, { "epoch": 8.230016313213703, "grad_norm": 0.03471095487475395, "learning_rate": 0.0007324323667750319, "loss": 0.0479, "num_input_tokens_seen": 108825632, "step": 50450 }, { "epoch": 8.230831973898859, "grad_norm": 0.04200306907296181, "learning_rate": 0.0007323693431065734, "loss": 0.0732, "num_input_tokens_seen": 108835648, "step": 50455 }, { "epoch": 8.231647634584013, "grad_norm": 0.006238288711756468, "learning_rate": 0.0007323063147288553, "loss": 0.0126, "num_input_tokens_seen": 108847360, "step": 50460 }, { "epoch": 8.232463295269168, "grad_norm": 0.04576272517442703, "learning_rate": 0.0007322432816431551, "loss": 0.0209, "num_input_tokens_seen": 108858880, "step": 50465 }, { "epoch": 8.233278955954322, "grad_norm": 0.02991821989417076, "learning_rate": 0.0007321802438507502, "loss": 0.0887, "num_input_tokens_seen": 108869216, "step": 50470 }, { "epoch": 8.234094616639478, "grad_norm": 0.03408213332295418, "learning_rate": 0.0007321172013529182, "loss": 0.0797, "num_input_tokens_seen": 108878848, "step": 50475 }, { "epoch": 8.234910277324634, "grad_norm": 0.007532401476055384, "learning_rate": 0.0007320541541509366, "loss": 0.0129, "num_input_tokens_seen": 108889728, "step": 50480 }, { "epoch": 8.235725938009788, "grad_norm": 0.008567597717046738, "learning_rate": 0.0007319911022460831, "loss": 0.0537, "num_input_tokens_seen": 108899872, "step": 50485 }, { "epoch": 8.236541598694943, "grad_norm": 0.5470032691955566, "learning_rate": 0.0007319280456396357, "loss": 0.0722, "num_input_tokens_seen": 108910816, "step": 50490 }, { "epoch": 8.237357259380097, "grad_norm": 0.012514223344624043, "learning_rate": 0.0007318649843328722, "loss": 0.0977, "num_input_tokens_seen": 108920640, "step": 50495 }, { "epoch": 8.238172920065253, "grad_norm": 0.005304648540914059, "learning_rate": 0.0007318019183270707, "loss": 0.0591, "num_input_tokens_seen": 108932096, "step": 50500 }, { "epoch": 8.238988580750409, "grad_norm": 0.6024613380432129, "learning_rate": 0.0007317388476235091, "loss": 0.1251, "num_input_tokens_seen": 108943232, "step": 50505 }, { "epoch": 8.239804241435563, "grad_norm": 0.03080102987587452, "learning_rate": 0.0007316757722234659, "loss": 0.1289, "num_input_tokens_seen": 108954304, "step": 50510 }, { "epoch": 8.240619902120718, "grad_norm": 0.11405860632658005, "learning_rate": 0.0007316126921282193, "loss": 0.2297, "num_input_tokens_seen": 108964416, "step": 50515 }, { "epoch": 8.241435562805872, "grad_norm": 0.4650290012359619, "learning_rate": 0.0007315496073390477, "loss": 0.1551, "num_input_tokens_seen": 108974400, "step": 50520 }, { "epoch": 8.242251223491028, "grad_norm": 0.43233850598335266, "learning_rate": 0.0007314865178572295, "loss": 0.1345, "num_input_tokens_seen": 108985824, "step": 50525 }, { "epoch": 8.243066884176184, "grad_norm": 0.00965120829641819, "learning_rate": 0.0007314234236840434, "loss": 0.0107, "num_input_tokens_seen": 108996448, "step": 50530 }, { "epoch": 8.243882544861338, "grad_norm": 0.47202590107917786, "learning_rate": 0.000731360324820768, "loss": 0.0772, "num_input_tokens_seen": 109007968, "step": 50535 }, { "epoch": 8.244698205546493, "grad_norm": 0.3299475610256195, "learning_rate": 0.000731297221268682, "loss": 0.2343, "num_input_tokens_seen": 109018784, "step": 50540 }, { "epoch": 8.245513866231647, "grad_norm": 0.015838848426938057, "learning_rate": 0.0007312341130290645, "loss": 0.0477, "num_input_tokens_seen": 109028416, "step": 50545 }, { "epoch": 8.246329526916803, "grad_norm": 0.026836447417736053, "learning_rate": 0.0007311710001031943, "loss": 0.0294, "num_input_tokens_seen": 109039360, "step": 50550 }, { "epoch": 8.247145187601957, "grad_norm": 0.29086488485336304, "learning_rate": 0.0007311078824923506, "loss": 0.0406, "num_input_tokens_seen": 109049632, "step": 50555 }, { "epoch": 8.247960848287113, "grad_norm": 0.025218788534402847, "learning_rate": 0.0007310447601978125, "loss": 0.1305, "num_input_tokens_seen": 109060352, "step": 50560 }, { "epoch": 8.248776508972268, "grad_norm": 0.01048362348228693, "learning_rate": 0.0007309816332208592, "loss": 0.0586, "num_input_tokens_seen": 109071616, "step": 50565 }, { "epoch": 8.249592169657422, "grad_norm": 0.17068620026111603, "learning_rate": 0.00073091850156277, "loss": 0.0832, "num_input_tokens_seen": 109082432, "step": 50570 }, { "epoch": 8.250407830342578, "grad_norm": 0.019193200394511223, "learning_rate": 0.0007308553652248244, "loss": 0.0161, "num_input_tokens_seen": 109092736, "step": 50575 }, { "epoch": 8.251223491027732, "grad_norm": 0.35629740357398987, "learning_rate": 0.0007307922242083022, "loss": 0.0248, "num_input_tokens_seen": 109104544, "step": 50580 }, { "epoch": 8.252039151712887, "grad_norm": 0.3925999701023102, "learning_rate": 0.0007307290785144826, "loss": 0.1263, "num_input_tokens_seen": 109115552, "step": 50585 }, { "epoch": 8.252854812398043, "grad_norm": 0.010240672156214714, "learning_rate": 0.0007306659281446456, "loss": 0.0209, "num_input_tokens_seen": 109124960, "step": 50590 }, { "epoch": 8.253670473083197, "grad_norm": 0.4661763608455658, "learning_rate": 0.000730602773100071, "loss": 0.1983, "num_input_tokens_seen": 109136704, "step": 50595 }, { "epoch": 8.254486133768353, "grad_norm": 0.02378825657069683, "learning_rate": 0.0007305396133820385, "loss": 0.1614, "num_input_tokens_seen": 109147904, "step": 50600 }, { "epoch": 8.255301794453507, "grad_norm": 0.41588687896728516, "learning_rate": 0.0007304764489918284, "loss": 0.073, "num_input_tokens_seen": 109159232, "step": 50605 }, { "epoch": 8.256117455138662, "grad_norm": 0.006960693281143904, "learning_rate": 0.0007304132799307206, "loss": 0.0921, "num_input_tokens_seen": 109170112, "step": 50610 }, { "epoch": 8.256933115823816, "grad_norm": 0.2565586268901825, "learning_rate": 0.0007303501061999956, "loss": 0.078, "num_input_tokens_seen": 109180128, "step": 50615 }, { "epoch": 8.257748776508972, "grad_norm": 0.21065716445446014, "learning_rate": 0.0007302869278009332, "loss": 0.0648, "num_input_tokens_seen": 109190912, "step": 50620 }, { "epoch": 8.258564437194128, "grad_norm": 0.009265861473977566, "learning_rate": 0.0007302237447348141, "loss": 0.0928, "num_input_tokens_seen": 109202080, "step": 50625 }, { "epoch": 8.259380097879282, "grad_norm": 0.5156923532485962, "learning_rate": 0.0007301605570029189, "loss": 0.0406, "num_input_tokens_seen": 109212960, "step": 50630 }, { "epoch": 8.260195758564437, "grad_norm": 0.47369855642318726, "learning_rate": 0.000730097364606528, "loss": 0.1195, "num_input_tokens_seen": 109224608, "step": 50635 }, { "epoch": 8.261011419249591, "grad_norm": 0.27438226342201233, "learning_rate": 0.000730034167546922, "loss": 0.0464, "num_input_tokens_seen": 109234784, "step": 50640 }, { "epoch": 8.261827079934747, "grad_norm": 0.2882070243358612, "learning_rate": 0.0007299709658253819, "loss": 0.037, "num_input_tokens_seen": 109246304, "step": 50645 }, { "epoch": 8.262642740619903, "grad_norm": 0.017386503517627716, "learning_rate": 0.0007299077594431885, "loss": 0.022, "num_input_tokens_seen": 109255648, "step": 50650 }, { "epoch": 8.263458401305057, "grad_norm": 0.08384004235267639, "learning_rate": 0.0007298445484016225, "loss": 0.058, "num_input_tokens_seen": 109267200, "step": 50655 }, { "epoch": 8.264274061990212, "grad_norm": 0.06605534255504608, "learning_rate": 0.0007297813327019652, "loss": 0.1775, "num_input_tokens_seen": 109277408, "step": 50660 }, { "epoch": 8.265089722675366, "grad_norm": 0.10462336242198944, "learning_rate": 0.0007297181123454977, "loss": 0.0264, "num_input_tokens_seen": 109288256, "step": 50665 }, { "epoch": 8.265905383360522, "grad_norm": 0.21534991264343262, "learning_rate": 0.0007296548873335013, "loss": 0.0723, "num_input_tokens_seen": 109299168, "step": 50670 }, { "epoch": 8.266721044045678, "grad_norm": 0.010324773378670216, "learning_rate": 0.0007295916576672572, "loss": 0.183, "num_input_tokens_seen": 109309696, "step": 50675 }, { "epoch": 8.267536704730832, "grad_norm": 0.5436800122261047, "learning_rate": 0.0007295284233480468, "loss": 0.07, "num_input_tokens_seen": 109319776, "step": 50680 }, { "epoch": 8.268352365415987, "grad_norm": 0.39998385310173035, "learning_rate": 0.0007294651843771519, "loss": 0.0363, "num_input_tokens_seen": 109330208, "step": 50685 }, { "epoch": 8.269168026101141, "grad_norm": 0.08828753978013992, "learning_rate": 0.0007294019407558538, "loss": 0.0951, "num_input_tokens_seen": 109341664, "step": 50690 }, { "epoch": 8.269983686786297, "grad_norm": 0.18262991309165955, "learning_rate": 0.0007293386924854346, "loss": 0.1294, "num_input_tokens_seen": 109352192, "step": 50695 }, { "epoch": 8.270799347471453, "grad_norm": 0.2386414110660553, "learning_rate": 0.0007292754395671757, "loss": 0.0803, "num_input_tokens_seen": 109362688, "step": 50700 }, { "epoch": 8.271615008156607, "grad_norm": 0.029415108263492584, "learning_rate": 0.0007292121820023592, "loss": 0.0201, "num_input_tokens_seen": 109372896, "step": 50705 }, { "epoch": 8.272430668841762, "grad_norm": 0.013915937393903732, "learning_rate": 0.000729148919792267, "loss": 0.2763, "num_input_tokens_seen": 109383104, "step": 50710 }, { "epoch": 8.273246329526916, "grad_norm": 0.021518917754292488, "learning_rate": 0.000729085652938181, "loss": 0.1515, "num_input_tokens_seen": 109394208, "step": 50715 }, { "epoch": 8.274061990212072, "grad_norm": 0.029454736039042473, "learning_rate": 0.0007290223814413841, "loss": 0.1356, "num_input_tokens_seen": 109404864, "step": 50720 }, { "epoch": 8.274877650897226, "grad_norm": 0.029201842844486237, "learning_rate": 0.0007289591053031578, "loss": 0.0104, "num_input_tokens_seen": 109416064, "step": 50725 }, { "epoch": 8.275693311582382, "grad_norm": 0.6946372389793396, "learning_rate": 0.000728895824524785, "loss": 0.1181, "num_input_tokens_seen": 109426720, "step": 50730 }, { "epoch": 8.276508972267537, "grad_norm": 0.4854262173175812, "learning_rate": 0.0007288325391075478, "loss": 0.0679, "num_input_tokens_seen": 109437952, "step": 50735 }, { "epoch": 8.277324632952691, "grad_norm": 0.11141788959503174, "learning_rate": 0.000728769249052729, "loss": 0.0554, "num_input_tokens_seen": 109447808, "step": 50740 }, { "epoch": 8.278140293637847, "grad_norm": 0.08954327553510666, "learning_rate": 0.000728705954361611, "loss": 0.1318, "num_input_tokens_seen": 109458368, "step": 50745 }, { "epoch": 8.278955954323001, "grad_norm": 0.17944641411304474, "learning_rate": 0.0007286426550354768, "loss": 0.0335, "num_input_tokens_seen": 109468992, "step": 50750 }, { "epoch": 8.279771615008157, "grad_norm": 0.200909823179245, "learning_rate": 0.000728579351075609, "loss": 0.0496, "num_input_tokens_seen": 109480192, "step": 50755 }, { "epoch": 8.280587275693312, "grad_norm": 0.278914213180542, "learning_rate": 0.0007285160424832909, "loss": 0.119, "num_input_tokens_seen": 109489632, "step": 50760 }, { "epoch": 8.281402936378466, "grad_norm": 0.7490116357803345, "learning_rate": 0.0007284527292598051, "loss": 0.0603, "num_input_tokens_seen": 109501216, "step": 50765 }, { "epoch": 8.282218597063622, "grad_norm": 0.013474891893565655, "learning_rate": 0.0007283894114064351, "loss": 0.1666, "num_input_tokens_seen": 109512672, "step": 50770 }, { "epoch": 8.283034257748776, "grad_norm": 0.17690134048461914, "learning_rate": 0.0007283260889244639, "loss": 0.2202, "num_input_tokens_seen": 109523968, "step": 50775 }, { "epoch": 8.283849918433932, "grad_norm": 0.3860192894935608, "learning_rate": 0.0007282627618151747, "loss": 0.0793, "num_input_tokens_seen": 109534592, "step": 50780 }, { "epoch": 8.284665579119087, "grad_norm": 0.28460368514060974, "learning_rate": 0.0007281994300798511, "loss": 0.051, "num_input_tokens_seen": 109545152, "step": 50785 }, { "epoch": 8.285481239804241, "grad_norm": 0.013401616364717484, "learning_rate": 0.0007281360937197767, "loss": 0.0288, "num_input_tokens_seen": 109555616, "step": 50790 }, { "epoch": 8.286296900489397, "grad_norm": 0.006492276675999165, "learning_rate": 0.0007280727527362349, "loss": 0.0323, "num_input_tokens_seen": 109566720, "step": 50795 }, { "epoch": 8.28711256117455, "grad_norm": 0.02813137136399746, "learning_rate": 0.0007280094071305095, "loss": 0.0197, "num_input_tokens_seen": 109577728, "step": 50800 }, { "epoch": 8.287928221859707, "grad_norm": 0.045574672520160675, "learning_rate": 0.0007279460569038841, "loss": 0.0692, "num_input_tokens_seen": 109587840, "step": 50805 }, { "epoch": 8.28874388254486, "grad_norm": 0.1804543286561966, "learning_rate": 0.0007278827020576427, "loss": 0.0374, "num_input_tokens_seen": 109597696, "step": 50810 }, { "epoch": 8.289559543230016, "grad_norm": 0.2930324971675873, "learning_rate": 0.0007278193425930692, "loss": 0.223, "num_input_tokens_seen": 109608096, "step": 50815 }, { "epoch": 8.290375203915172, "grad_norm": 0.22046451270580292, "learning_rate": 0.0007277559785114478, "loss": 0.0372, "num_input_tokens_seen": 109618752, "step": 50820 }, { "epoch": 8.291190864600326, "grad_norm": 0.04654459282755852, "learning_rate": 0.0007276926098140626, "loss": 0.2485, "num_input_tokens_seen": 109629952, "step": 50825 }, { "epoch": 8.292006525285482, "grad_norm": 0.06827251613140106, "learning_rate": 0.0007276292365021979, "loss": 0.0222, "num_input_tokens_seen": 109639232, "step": 50830 }, { "epoch": 8.292822185970635, "grad_norm": 0.01363430730998516, "learning_rate": 0.0007275658585771378, "loss": 0.0912, "num_input_tokens_seen": 109649472, "step": 50835 }, { "epoch": 8.293637846655791, "grad_norm": 0.027167387306690216, "learning_rate": 0.0007275024760401668, "loss": 0.0846, "num_input_tokens_seen": 109659328, "step": 50840 }, { "epoch": 8.294453507340947, "grad_norm": 0.5709592700004578, "learning_rate": 0.0007274390888925697, "loss": 0.1535, "num_input_tokens_seen": 109670752, "step": 50845 }, { "epoch": 8.2952691680261, "grad_norm": 0.057625219225883484, "learning_rate": 0.0007273756971356308, "loss": 0.0366, "num_input_tokens_seen": 109681504, "step": 50850 }, { "epoch": 8.296084828711257, "grad_norm": 0.01994991861283779, "learning_rate": 0.000727312300770635, "loss": 0.0416, "num_input_tokens_seen": 109690272, "step": 50855 }, { "epoch": 8.29690048939641, "grad_norm": 0.6278749704360962, "learning_rate": 0.0007272488997988671, "loss": 0.1389, "num_input_tokens_seen": 109702048, "step": 50860 }, { "epoch": 8.297716150081566, "grad_norm": 0.044214554131031036, "learning_rate": 0.000727185494221612, "loss": 0.1218, "num_input_tokens_seen": 109712704, "step": 50865 }, { "epoch": 8.298531810766722, "grad_norm": 0.08822567760944366, "learning_rate": 0.0007271220840401546, "loss": 0.0426, "num_input_tokens_seen": 109724480, "step": 50870 }, { "epoch": 8.299347471451876, "grad_norm": 0.029616599902510643, "learning_rate": 0.0007270586692557799, "loss": 0.0551, "num_input_tokens_seen": 109735872, "step": 50875 }, { "epoch": 8.300163132137031, "grad_norm": 0.04738328605890274, "learning_rate": 0.0007269952498697733, "loss": 0.0257, "num_input_tokens_seen": 109746880, "step": 50880 }, { "epoch": 8.300978792822185, "grad_norm": 0.21511492133140564, "learning_rate": 0.0007269318258834202, "loss": 0.2079, "num_input_tokens_seen": 109757792, "step": 50885 }, { "epoch": 8.301794453507341, "grad_norm": 0.0892384722828865, "learning_rate": 0.0007268683972980056, "loss": 0.0814, "num_input_tokens_seen": 109770656, "step": 50890 }, { "epoch": 8.302610114192497, "grad_norm": 0.2008020132780075, "learning_rate": 0.0007268049641148152, "loss": 0.0527, "num_input_tokens_seen": 109780832, "step": 50895 }, { "epoch": 8.30342577487765, "grad_norm": 0.024666346609592438, "learning_rate": 0.0007267415263351343, "loss": 0.0331, "num_input_tokens_seen": 109792160, "step": 50900 }, { "epoch": 8.304241435562806, "grad_norm": 0.10605191439390182, "learning_rate": 0.0007266780839602488, "loss": 0.0459, "num_input_tokens_seen": 109804256, "step": 50905 }, { "epoch": 8.30505709624796, "grad_norm": 0.11223665624856949, "learning_rate": 0.0007266146369914445, "loss": 0.0252, "num_input_tokens_seen": 109815392, "step": 50910 }, { "epoch": 8.305872756933116, "grad_norm": 0.007813964039087296, "learning_rate": 0.0007265511854300069, "loss": 0.029, "num_input_tokens_seen": 109828096, "step": 50915 }, { "epoch": 8.30668841761827, "grad_norm": 0.09181029349565506, "learning_rate": 0.0007264877292772223, "loss": 0.0881, "num_input_tokens_seen": 109839232, "step": 50920 }, { "epoch": 8.307504078303426, "grad_norm": 0.021090298891067505, "learning_rate": 0.0007264242685343765, "loss": 0.0381, "num_input_tokens_seen": 109849792, "step": 50925 }, { "epoch": 8.308319738988581, "grad_norm": 0.006968356668949127, "learning_rate": 0.0007263608032027557, "loss": 0.0109, "num_input_tokens_seen": 109860896, "step": 50930 }, { "epoch": 8.309135399673735, "grad_norm": 0.017777597531676292, "learning_rate": 0.000726297333283646, "loss": 0.0828, "num_input_tokens_seen": 109871648, "step": 50935 }, { "epoch": 8.309951060358891, "grad_norm": 0.5401907563209534, "learning_rate": 0.0007262338587783338, "loss": 0.0693, "num_input_tokens_seen": 109882272, "step": 50940 }, { "epoch": 8.310766721044045, "grad_norm": 1.2843950986862183, "learning_rate": 0.0007261703796881054, "loss": 0.2279, "num_input_tokens_seen": 109893024, "step": 50945 }, { "epoch": 8.3115823817292, "grad_norm": 0.3990388810634613, "learning_rate": 0.0007261068960142474, "loss": 0.2202, "num_input_tokens_seen": 109904864, "step": 50950 }, { "epoch": 8.312398042414356, "grad_norm": 0.4096055328845978, "learning_rate": 0.0007260434077580463, "loss": 0.0658, "num_input_tokens_seen": 109915008, "step": 50955 }, { "epoch": 8.31321370309951, "grad_norm": 0.13297200202941895, "learning_rate": 0.0007259799149207887, "loss": 0.0894, "num_input_tokens_seen": 109925152, "step": 50960 }, { "epoch": 8.314029363784666, "grad_norm": 0.47274479269981384, "learning_rate": 0.0007259164175037616, "loss": 0.1827, "num_input_tokens_seen": 109935552, "step": 50965 }, { "epoch": 8.31484502446982, "grad_norm": 0.018033968284726143, "learning_rate": 0.0007258529155082516, "loss": 0.0093, "num_input_tokens_seen": 109947008, "step": 50970 }, { "epoch": 8.315660685154976, "grad_norm": 0.5141571760177612, "learning_rate": 0.0007257894089355458, "loss": 0.3334, "num_input_tokens_seen": 109957760, "step": 50975 }, { "epoch": 8.31647634584013, "grad_norm": 0.010217422619462013, "learning_rate": 0.0007257258977869313, "loss": 0.1524, "num_input_tokens_seen": 109968096, "step": 50980 }, { "epoch": 8.317292006525285, "grad_norm": 0.20990988612174988, "learning_rate": 0.000725662382063695, "loss": 0.0998, "num_input_tokens_seen": 109979712, "step": 50985 }, { "epoch": 8.318107667210441, "grad_norm": 0.4176332652568817, "learning_rate": 0.0007255988617671241, "loss": 0.1151, "num_input_tokens_seen": 109990624, "step": 50990 }, { "epoch": 8.318923327895595, "grad_norm": 0.03250904753804207, "learning_rate": 0.0007255353368985063, "loss": 0.2409, "num_input_tokens_seen": 110002368, "step": 50995 }, { "epoch": 8.31973898858075, "grad_norm": 0.1251508593559265, "learning_rate": 0.0007254718074591285, "loss": 0.0331, "num_input_tokens_seen": 110012704, "step": 51000 }, { "epoch": 8.320554649265905, "grad_norm": 0.11290530860424042, "learning_rate": 0.0007254082734502788, "loss": 0.0464, "num_input_tokens_seen": 110024608, "step": 51005 }, { "epoch": 8.32137030995106, "grad_norm": 0.388776570558548, "learning_rate": 0.0007253447348732443, "loss": 0.0709, "num_input_tokens_seen": 110034912, "step": 51010 }, { "epoch": 8.322185970636216, "grad_norm": 0.02710309810936451, "learning_rate": 0.000725281191729313, "loss": 0.0386, "num_input_tokens_seen": 110045856, "step": 51015 }, { "epoch": 8.32300163132137, "grad_norm": 0.045396558940410614, "learning_rate": 0.0007252176440197726, "loss": 0.0963, "num_input_tokens_seen": 110056032, "step": 51020 }, { "epoch": 8.323817292006526, "grad_norm": 0.33237507939338684, "learning_rate": 0.0007251540917459109, "loss": 0.1598, "num_input_tokens_seen": 110067296, "step": 51025 }, { "epoch": 8.32463295269168, "grad_norm": 0.0607922226190567, "learning_rate": 0.0007250905349090158, "loss": 0.0923, "num_input_tokens_seen": 110078528, "step": 51030 }, { "epoch": 8.325448613376835, "grad_norm": 0.16640405356884003, "learning_rate": 0.0007250269735103754, "loss": 0.0525, "num_input_tokens_seen": 110088608, "step": 51035 }, { "epoch": 8.326264274061991, "grad_norm": 0.01263394020497799, "learning_rate": 0.0007249634075512781, "loss": 0.0539, "num_input_tokens_seen": 110099872, "step": 51040 }, { "epoch": 8.327079934747145, "grad_norm": 0.06532914936542511, "learning_rate": 0.0007248998370330119, "loss": 0.0607, "num_input_tokens_seen": 110110496, "step": 51045 }, { "epoch": 8.3278955954323, "grad_norm": 0.2116706371307373, "learning_rate": 0.0007248362619568651, "loss": 0.1299, "num_input_tokens_seen": 110121568, "step": 51050 }, { "epoch": 8.328711256117455, "grad_norm": 0.008997824974358082, "learning_rate": 0.0007247726823241264, "loss": 0.0124, "num_input_tokens_seen": 110133280, "step": 51055 }, { "epoch": 8.32952691680261, "grad_norm": 0.45081767439842224, "learning_rate": 0.0007247090981360841, "loss": 0.0557, "num_input_tokens_seen": 110143584, "step": 51060 }, { "epoch": 8.330342577487766, "grad_norm": 0.07525065541267395, "learning_rate": 0.0007246455093940268, "loss": 0.0437, "num_input_tokens_seen": 110152992, "step": 51065 }, { "epoch": 8.33115823817292, "grad_norm": 0.06993364542722702, "learning_rate": 0.0007245819160992434, "loss": 0.0241, "num_input_tokens_seen": 110162656, "step": 51070 }, { "epoch": 8.331973898858076, "grad_norm": 0.1539648026227951, "learning_rate": 0.0007245183182530224, "loss": 0.0623, "num_input_tokens_seen": 110173280, "step": 51075 }, { "epoch": 8.33278955954323, "grad_norm": 0.287077933549881, "learning_rate": 0.0007244547158566531, "loss": 0.0762, "num_input_tokens_seen": 110182368, "step": 51080 }, { "epoch": 8.333605220228385, "grad_norm": 0.17505909502506256, "learning_rate": 0.0007243911089114239, "loss": 0.0184, "num_input_tokens_seen": 110194464, "step": 51085 }, { "epoch": 8.33442088091354, "grad_norm": 0.11994560062885284, "learning_rate": 0.0007243274974186245, "loss": 0.0536, "num_input_tokens_seen": 110205504, "step": 51090 }, { "epoch": 8.335236541598695, "grad_norm": 0.10068147629499435, "learning_rate": 0.0007242638813795437, "loss": 0.1425, "num_input_tokens_seen": 110216576, "step": 51095 }, { "epoch": 8.33605220228385, "grad_norm": 0.01714715175330639, "learning_rate": 0.0007242002607954708, "loss": 0.0987, "num_input_tokens_seen": 110227360, "step": 51100 }, { "epoch": 8.336867862969005, "grad_norm": 0.4225645363330841, "learning_rate": 0.000724136635667695, "loss": 0.2755, "num_input_tokens_seen": 110238240, "step": 51105 }, { "epoch": 8.33768352365416, "grad_norm": 0.14015738666057587, "learning_rate": 0.0007240730059975063, "loss": 0.2427, "num_input_tokens_seen": 110249952, "step": 51110 }, { "epoch": 8.338499184339314, "grad_norm": 0.3045877516269684, "learning_rate": 0.0007240093717861937, "loss": 0.1597, "num_input_tokens_seen": 110261248, "step": 51115 }, { "epoch": 8.33931484502447, "grad_norm": 0.03816094622015953, "learning_rate": 0.000723945733035047, "loss": 0.0168, "num_input_tokens_seen": 110270976, "step": 51120 }, { "epoch": 8.340130505709626, "grad_norm": 0.03267329931259155, "learning_rate": 0.0007238820897453559, "loss": 0.0253, "num_input_tokens_seen": 110281248, "step": 51125 }, { "epoch": 8.34094616639478, "grad_norm": 0.010226571932435036, "learning_rate": 0.0007238184419184104, "loss": 0.0149, "num_input_tokens_seen": 110292320, "step": 51130 }, { "epoch": 8.341761827079935, "grad_norm": 0.34732887148857117, "learning_rate": 0.0007237547895555001, "loss": 0.0719, "num_input_tokens_seen": 110302752, "step": 51135 }, { "epoch": 8.34257748776509, "grad_norm": 0.0355033203959465, "learning_rate": 0.0007236911326579152, "loss": 0.041, "num_input_tokens_seen": 110314080, "step": 51140 }, { "epoch": 8.343393148450245, "grad_norm": 0.2825056314468384, "learning_rate": 0.0007236274712269457, "loss": 0.0458, "num_input_tokens_seen": 110325248, "step": 51145 }, { "epoch": 8.3442088091354, "grad_norm": 0.3452303409576416, "learning_rate": 0.0007235638052638819, "loss": 0.1749, "num_input_tokens_seen": 110336096, "step": 51150 }, { "epoch": 8.345024469820554, "grad_norm": 0.17844438552856445, "learning_rate": 0.0007235001347700139, "loss": 0.1778, "num_input_tokens_seen": 110346944, "step": 51155 }, { "epoch": 8.34584013050571, "grad_norm": 0.035053033381700516, "learning_rate": 0.0007234364597466321, "loss": 0.0378, "num_input_tokens_seen": 110358464, "step": 51160 }, { "epoch": 8.346655791190864, "grad_norm": 0.02319471724331379, "learning_rate": 0.000723372780195027, "loss": 0.0116, "num_input_tokens_seen": 110368864, "step": 51165 }, { "epoch": 8.34747145187602, "grad_norm": 0.06733471900224686, "learning_rate": 0.0007233090961164892, "loss": 0.0223, "num_input_tokens_seen": 110380064, "step": 51170 }, { "epoch": 8.348287112561174, "grad_norm": 0.3718867897987366, "learning_rate": 0.000723245407512309, "loss": 0.1584, "num_input_tokens_seen": 110391232, "step": 51175 }, { "epoch": 8.34910277324633, "grad_norm": 0.22289599478244781, "learning_rate": 0.0007231817143837778, "loss": 0.1747, "num_input_tokens_seen": 110402432, "step": 51180 }, { "epoch": 8.349918433931485, "grad_norm": 0.13756129145622253, "learning_rate": 0.0007231180167321858, "loss": 0.0691, "num_input_tokens_seen": 110412576, "step": 51185 }, { "epoch": 8.350734094616639, "grad_norm": 0.21738028526306152, "learning_rate": 0.0007230543145588242, "loss": 0.0583, "num_input_tokens_seen": 110423040, "step": 51190 }, { "epoch": 8.351549755301795, "grad_norm": 0.04028356447815895, "learning_rate": 0.000722990607864984, "loss": 0.3543, "num_input_tokens_seen": 110432640, "step": 51195 }, { "epoch": 8.352365415986949, "grad_norm": 0.013157119043171406, "learning_rate": 0.0007229268966519562, "loss": 0.1033, "num_input_tokens_seen": 110444128, "step": 51200 }, { "epoch": 8.353181076672104, "grad_norm": 0.024720164015889168, "learning_rate": 0.0007228631809210321, "loss": 0.0378, "num_input_tokens_seen": 110454528, "step": 51205 }, { "epoch": 8.35399673735726, "grad_norm": 0.023486243560910225, "learning_rate": 0.0007227994606735029, "loss": 0.047, "num_input_tokens_seen": 110465632, "step": 51210 }, { "epoch": 8.354812398042414, "grad_norm": 0.14289453625679016, "learning_rate": 0.0007227357359106598, "loss": 0.0392, "num_input_tokens_seen": 110476224, "step": 51215 }, { "epoch": 8.35562805872757, "grad_norm": 0.030731141567230225, "learning_rate": 0.0007226720066337946, "loss": 0.0242, "num_input_tokens_seen": 110487360, "step": 51220 }, { "epoch": 8.356443719412724, "grad_norm": 0.06942770630121231, "learning_rate": 0.0007226082728441989, "loss": 0.0232, "num_input_tokens_seen": 110498688, "step": 51225 }, { "epoch": 8.35725938009788, "grad_norm": 0.09936600923538208, "learning_rate": 0.0007225445345431638, "loss": 0.0427, "num_input_tokens_seen": 110509728, "step": 51230 }, { "epoch": 8.358075040783035, "grad_norm": 0.017896434292197227, "learning_rate": 0.0007224807917319817, "loss": 0.0569, "num_input_tokens_seen": 110520352, "step": 51235 }, { "epoch": 8.358890701468189, "grad_norm": 0.10662972182035446, "learning_rate": 0.000722417044411944, "loss": 0.0248, "num_input_tokens_seen": 110530784, "step": 51240 }, { "epoch": 8.359706362153345, "grad_norm": 0.008738738484680653, "learning_rate": 0.0007223532925843427, "loss": 0.0749, "num_input_tokens_seen": 110542304, "step": 51245 }, { "epoch": 8.360522022838499, "grad_norm": 0.08951480686664581, "learning_rate": 0.0007222895362504698, "loss": 0.03, "num_input_tokens_seen": 110552544, "step": 51250 }, { "epoch": 8.361337683523654, "grad_norm": 0.0961562916636467, "learning_rate": 0.0007222257754116176, "loss": 0.0634, "num_input_tokens_seen": 110563104, "step": 51255 }, { "epoch": 8.362153344208808, "grad_norm": 0.8401079177856445, "learning_rate": 0.000722162010069078, "loss": 0.2565, "num_input_tokens_seen": 110572352, "step": 51260 }, { "epoch": 8.362969004893964, "grad_norm": 0.2447149157524109, "learning_rate": 0.0007220982402241436, "loss": 0.1039, "num_input_tokens_seen": 110581792, "step": 51265 }, { "epoch": 8.36378466557912, "grad_norm": 0.12326212227344513, "learning_rate": 0.0007220344658781065, "loss": 0.0939, "num_input_tokens_seen": 110593824, "step": 51270 }, { "epoch": 8.364600326264274, "grad_norm": 0.0050844973884522915, "learning_rate": 0.0007219706870322594, "loss": 0.1521, "num_input_tokens_seen": 110604992, "step": 51275 }, { "epoch": 8.36541598694943, "grad_norm": 0.08339108526706696, "learning_rate": 0.0007219069036878945, "loss": 0.1019, "num_input_tokens_seen": 110616448, "step": 51280 }, { "epoch": 8.366231647634583, "grad_norm": 0.15203113853931427, "learning_rate": 0.0007218431158463048, "loss": 0.0559, "num_input_tokens_seen": 110626624, "step": 51285 }, { "epoch": 8.367047308319739, "grad_norm": 0.43035805225372314, "learning_rate": 0.000721779323508783, "loss": 0.0835, "num_input_tokens_seen": 110637088, "step": 51290 }, { "epoch": 8.367862969004895, "grad_norm": 0.24240756034851074, "learning_rate": 0.0007217155266766217, "loss": 0.0869, "num_input_tokens_seen": 110648256, "step": 51295 }, { "epoch": 8.368678629690049, "grad_norm": 0.03179782256484032, "learning_rate": 0.0007216517253511143, "loss": 0.0488, "num_input_tokens_seen": 110659424, "step": 51300 }, { "epoch": 8.369494290375204, "grad_norm": 0.0883818119764328, "learning_rate": 0.0007215879195335531, "loss": 0.1341, "num_input_tokens_seen": 110671104, "step": 51305 }, { "epoch": 8.370309951060358, "grad_norm": 0.01965748891234398, "learning_rate": 0.0007215241092252319, "loss": 0.0589, "num_input_tokens_seen": 110683008, "step": 51310 }, { "epoch": 8.371125611745514, "grad_norm": 0.2372993677854538, "learning_rate": 0.0007214602944274435, "loss": 0.0686, "num_input_tokens_seen": 110692544, "step": 51315 }, { "epoch": 8.37194127243067, "grad_norm": 0.035493750125169754, "learning_rate": 0.0007213964751414812, "loss": 0.0601, "num_input_tokens_seen": 110702848, "step": 51320 }, { "epoch": 8.372756933115824, "grad_norm": 0.04357893764972687, "learning_rate": 0.0007213326513686386, "loss": 0.0256, "num_input_tokens_seen": 110713920, "step": 51325 }, { "epoch": 8.37357259380098, "grad_norm": 0.37469497323036194, "learning_rate": 0.0007212688231102091, "loss": 0.1052, "num_input_tokens_seen": 110724928, "step": 51330 }, { "epoch": 8.374388254486133, "grad_norm": 0.06512996554374695, "learning_rate": 0.000721204990367486, "loss": 0.0205, "num_input_tokens_seen": 110735456, "step": 51335 }, { "epoch": 8.375203915171289, "grad_norm": 0.0673840120434761, "learning_rate": 0.0007211411531417633, "loss": 0.0684, "num_input_tokens_seen": 110747232, "step": 51340 }, { "epoch": 8.376019575856443, "grad_norm": 0.463696151971817, "learning_rate": 0.0007210773114343345, "loss": 0.1663, "num_input_tokens_seen": 110757408, "step": 51345 }, { "epoch": 8.376835236541599, "grad_norm": 0.05973052233457565, "learning_rate": 0.0007210134652464935, "loss": 0.1337, "num_input_tokens_seen": 110768288, "step": 51350 }, { "epoch": 8.377650897226754, "grad_norm": 0.42217713594436646, "learning_rate": 0.0007209496145795343, "loss": 0.0716, "num_input_tokens_seen": 110778304, "step": 51355 }, { "epoch": 8.378466557911908, "grad_norm": 0.045719366520643234, "learning_rate": 0.000720885759434751, "loss": 0.0217, "num_input_tokens_seen": 110788448, "step": 51360 }, { "epoch": 8.379282218597064, "grad_norm": 0.019831620156764984, "learning_rate": 0.0007208218998134375, "loss": 0.0403, "num_input_tokens_seen": 110799136, "step": 51365 }, { "epoch": 8.380097879282218, "grad_norm": 0.005734425038099289, "learning_rate": 0.000720758035716888, "loss": 0.0141, "num_input_tokens_seen": 110811552, "step": 51370 }, { "epoch": 8.380913539967374, "grad_norm": 0.04387452453374863, "learning_rate": 0.0007206941671463969, "loss": 0.0539, "num_input_tokens_seen": 110823584, "step": 51375 }, { "epoch": 8.38172920065253, "grad_norm": 0.4235420227050781, "learning_rate": 0.0007206302941032586, "loss": 0.1127, "num_input_tokens_seen": 110834112, "step": 51380 }, { "epoch": 8.382544861337683, "grad_norm": 0.0086395014077425, "learning_rate": 0.0007205664165887673, "loss": 0.0662, "num_input_tokens_seen": 110844480, "step": 51385 }, { "epoch": 8.383360522022839, "grad_norm": 0.5018084049224854, "learning_rate": 0.000720502534604218, "loss": 0.0651, "num_input_tokens_seen": 110854816, "step": 51390 }, { "epoch": 8.384176182707993, "grad_norm": 0.004374734591692686, "learning_rate": 0.0007204386481509049, "loss": 0.0389, "num_input_tokens_seen": 110865920, "step": 51395 }, { "epoch": 8.384991843393149, "grad_norm": 0.1379576176404953, "learning_rate": 0.0007203747572301231, "loss": 0.0076, "num_input_tokens_seen": 110877024, "step": 51400 }, { "epoch": 8.385807504078304, "grad_norm": 0.014870010316371918, "learning_rate": 0.0007203108618431672, "loss": 0.102, "num_input_tokens_seen": 110888960, "step": 51405 }, { "epoch": 8.386623164763458, "grad_norm": 0.008077307604253292, "learning_rate": 0.0007202469619913322, "loss": 0.0232, "num_input_tokens_seen": 110900864, "step": 51410 }, { "epoch": 8.387438825448614, "grad_norm": 0.03390329331159592, "learning_rate": 0.0007201830576759132, "loss": 0.3012, "num_input_tokens_seen": 110910656, "step": 51415 }, { "epoch": 8.388254486133768, "grad_norm": 0.6233205795288086, "learning_rate": 0.0007201191488982051, "loss": 0.0807, "num_input_tokens_seen": 110922464, "step": 51420 }, { "epoch": 8.389070146818923, "grad_norm": 0.3525693118572235, "learning_rate": 0.0007200552356595031, "loss": 0.0769, "num_input_tokens_seen": 110933088, "step": 51425 }, { "epoch": 8.38988580750408, "grad_norm": 0.10931502282619476, "learning_rate": 0.0007199913179611029, "loss": 0.1028, "num_input_tokens_seen": 110941472, "step": 51430 }, { "epoch": 8.390701468189233, "grad_norm": 0.017395833507180214, "learning_rate": 0.0007199273958042994, "loss": 0.0965, "num_input_tokens_seen": 110952352, "step": 51435 }, { "epoch": 8.391517128874389, "grad_norm": 0.016400756314396858, "learning_rate": 0.0007198634691903882, "loss": 0.1275, "num_input_tokens_seen": 110962304, "step": 51440 }, { "epoch": 8.392332789559543, "grad_norm": 0.06746278703212738, "learning_rate": 0.0007197995381206649, "loss": 0.0618, "num_input_tokens_seen": 110973536, "step": 51445 }, { "epoch": 8.393148450244698, "grad_norm": 0.23783336579799652, "learning_rate": 0.0007197356025964252, "loss": 0.1529, "num_input_tokens_seen": 110985792, "step": 51450 }, { "epoch": 8.393964110929852, "grad_norm": 0.4625706374645233, "learning_rate": 0.0007196716626189646, "loss": 0.0866, "num_input_tokens_seen": 110996544, "step": 51455 }, { "epoch": 8.394779771615008, "grad_norm": 0.0465809628367424, "learning_rate": 0.0007196077181895792, "loss": 0.1388, "num_input_tokens_seen": 111007840, "step": 51460 }, { "epoch": 8.395595432300164, "grad_norm": 0.16916170716285706, "learning_rate": 0.0007195437693095647, "loss": 0.0795, "num_input_tokens_seen": 111018688, "step": 51465 }, { "epoch": 8.396411092985318, "grad_norm": 0.07622670382261276, "learning_rate": 0.0007194798159802174, "loss": 0.1388, "num_input_tokens_seen": 111029408, "step": 51470 }, { "epoch": 8.397226753670473, "grad_norm": 0.08947384357452393, "learning_rate": 0.0007194158582028332, "loss": 0.0648, "num_input_tokens_seen": 111040256, "step": 51475 }, { "epoch": 8.398042414355627, "grad_norm": 0.07268799841403961, "learning_rate": 0.0007193518959787081, "loss": 0.0506, "num_input_tokens_seen": 111050656, "step": 51480 }, { "epoch": 8.398858075040783, "grad_norm": 0.013356252573430538, "learning_rate": 0.0007192879293091386, "loss": 0.0287, "num_input_tokens_seen": 111060896, "step": 51485 }, { "epoch": 8.399673735725939, "grad_norm": 0.26962223649024963, "learning_rate": 0.000719223958195421, "loss": 0.087, "num_input_tokens_seen": 111071392, "step": 51490 }, { "epoch": 8.400489396411093, "grad_norm": 0.05497454106807709, "learning_rate": 0.0007191599826388518, "loss": 0.0554, "num_input_tokens_seen": 111081248, "step": 51495 }, { "epoch": 8.401305057096248, "grad_norm": 0.18138113617897034, "learning_rate": 0.0007190960026407276, "loss": 0.113, "num_input_tokens_seen": 111091456, "step": 51500 }, { "epoch": 8.402120717781402, "grad_norm": 0.014315619133412838, "learning_rate": 0.0007190320182023449, "loss": 0.0654, "num_input_tokens_seen": 111102528, "step": 51505 }, { "epoch": 8.402936378466558, "grad_norm": 0.03323863446712494, "learning_rate": 0.0007189680293250005, "loss": 0.0431, "num_input_tokens_seen": 111113984, "step": 51510 }, { "epoch": 8.403752039151712, "grad_norm": 0.0913081169128418, "learning_rate": 0.0007189040360099913, "loss": 0.0786, "num_input_tokens_seen": 111125184, "step": 51515 }, { "epoch": 8.404567699836868, "grad_norm": 0.021273715421557426, "learning_rate": 0.000718840038258614, "loss": 0.0903, "num_input_tokens_seen": 111136416, "step": 51520 }, { "epoch": 8.405383360522023, "grad_norm": 0.06742476671934128, "learning_rate": 0.0007187760360721658, "loss": 0.2297, "num_input_tokens_seen": 111146752, "step": 51525 }, { "epoch": 8.406199021207177, "grad_norm": 0.18190132081508636, "learning_rate": 0.0007187120294519434, "loss": 0.1169, "num_input_tokens_seen": 111157920, "step": 51530 }, { "epoch": 8.407014681892333, "grad_norm": 0.011462472379207611, "learning_rate": 0.0007186480183992446, "loss": 0.045, "num_input_tokens_seen": 111169312, "step": 51535 }, { "epoch": 8.407830342577487, "grad_norm": 0.00840445514768362, "learning_rate": 0.0007185840029153663, "loss": 0.1244, "num_input_tokens_seen": 111181088, "step": 51540 }, { "epoch": 8.408646003262643, "grad_norm": 0.35941943526268005, "learning_rate": 0.0007185199830016058, "loss": 0.2834, "num_input_tokens_seen": 111191392, "step": 51545 }, { "epoch": 8.409461663947798, "grad_norm": 0.3591446578502655, "learning_rate": 0.0007184559586592606, "loss": 0.1527, "num_input_tokens_seen": 111201568, "step": 51550 }, { "epoch": 8.410277324632952, "grad_norm": 0.44410571455955505, "learning_rate": 0.0007183919298896283, "loss": 0.2306, "num_input_tokens_seen": 111212992, "step": 51555 }, { "epoch": 8.411092985318108, "grad_norm": 0.00844508595764637, "learning_rate": 0.0007183278966940065, "loss": 0.0706, "num_input_tokens_seen": 111224352, "step": 51560 }, { "epoch": 8.411908646003262, "grad_norm": 0.01503937877714634, "learning_rate": 0.000718263859073693, "loss": 0.0308, "num_input_tokens_seen": 111235264, "step": 51565 }, { "epoch": 8.412724306688418, "grad_norm": 0.3758123517036438, "learning_rate": 0.0007181998170299854, "loss": 0.0632, "num_input_tokens_seen": 111244384, "step": 51570 }, { "epoch": 8.413539967373573, "grad_norm": 0.4277532994747162, "learning_rate": 0.0007181357705641818, "loss": 0.0757, "num_input_tokens_seen": 111255232, "step": 51575 }, { "epoch": 8.414355628058727, "grad_norm": 0.03630569949746132, "learning_rate": 0.0007180717196775799, "loss": 0.0147, "num_input_tokens_seen": 111266496, "step": 51580 }, { "epoch": 8.415171288743883, "grad_norm": 0.020137974992394447, "learning_rate": 0.0007180076643714781, "loss": 0.078, "num_input_tokens_seen": 111277120, "step": 51585 }, { "epoch": 8.415986949429037, "grad_norm": 0.07960548251867294, "learning_rate": 0.0007179436046471743, "loss": 0.155, "num_input_tokens_seen": 111286912, "step": 51590 }, { "epoch": 8.416802610114193, "grad_norm": 0.10560011863708496, "learning_rate": 0.0007178795405059671, "loss": 0.0306, "num_input_tokens_seen": 111298144, "step": 51595 }, { "epoch": 8.417618270799348, "grad_norm": 0.03849923238158226, "learning_rate": 0.0007178154719491545, "loss": 0.0663, "num_input_tokens_seen": 111306976, "step": 51600 }, { "epoch": 8.418433931484502, "grad_norm": 0.5101104974746704, "learning_rate": 0.0007177513989780349, "loss": 0.0563, "num_input_tokens_seen": 111316896, "step": 51605 }, { "epoch": 8.419249592169658, "grad_norm": 0.09940233081579208, "learning_rate": 0.0007176873215939072, "loss": 0.0401, "num_input_tokens_seen": 111327808, "step": 51610 }, { "epoch": 8.420065252854812, "grad_norm": 0.18149471282958984, "learning_rate": 0.0007176232397980696, "loss": 0.0773, "num_input_tokens_seen": 111339008, "step": 51615 }, { "epoch": 8.420880913539968, "grad_norm": 0.050501253455877304, "learning_rate": 0.000717559153591821, "loss": 0.0433, "num_input_tokens_seen": 111349408, "step": 51620 }, { "epoch": 8.421696574225122, "grad_norm": 0.015429974533617496, "learning_rate": 0.0007174950629764602, "loss": 0.0414, "num_input_tokens_seen": 111360896, "step": 51625 }, { "epoch": 8.422512234910277, "grad_norm": 0.04846658930182457, "learning_rate": 0.0007174309679532859, "loss": 0.1062, "num_input_tokens_seen": 111371552, "step": 51630 }, { "epoch": 8.423327895595433, "grad_norm": 0.16041985154151917, "learning_rate": 0.0007173668685235973, "loss": 0.0529, "num_input_tokens_seen": 111382944, "step": 51635 }, { "epoch": 8.424143556280587, "grad_norm": 0.08502721786499023, "learning_rate": 0.0007173027646886934, "loss": 0.0467, "num_input_tokens_seen": 111394240, "step": 51640 }, { "epoch": 8.424959216965743, "grad_norm": 0.32133081555366516, "learning_rate": 0.0007172386564498733, "loss": 0.0236, "num_input_tokens_seen": 111404224, "step": 51645 }, { "epoch": 8.425774877650896, "grad_norm": 0.06941556185483932, "learning_rate": 0.0007171745438084362, "loss": 0.0783, "num_input_tokens_seen": 111414848, "step": 51650 }, { "epoch": 8.426590538336052, "grad_norm": 0.12235123664140701, "learning_rate": 0.0007171104267656814, "loss": 0.1508, "num_input_tokens_seen": 111424960, "step": 51655 }, { "epoch": 8.427406199021208, "grad_norm": 0.09398604184389114, "learning_rate": 0.0007170463053229085, "loss": 0.0718, "num_input_tokens_seen": 111435104, "step": 51660 }, { "epoch": 8.428221859706362, "grad_norm": 0.012065643444657326, "learning_rate": 0.0007169821794814168, "loss": 0.0252, "num_input_tokens_seen": 111445312, "step": 51665 }, { "epoch": 8.429037520391518, "grad_norm": 1.0378928184509277, "learning_rate": 0.000716918049242506, "loss": 0.0997, "num_input_tokens_seen": 111456800, "step": 51670 }, { "epoch": 8.429853181076671, "grad_norm": 0.07212609797716141, "learning_rate": 0.0007168539146074757, "loss": 0.184, "num_input_tokens_seen": 111468416, "step": 51675 }, { "epoch": 8.430668841761827, "grad_norm": 0.11962103098630905, "learning_rate": 0.0007167897755776258, "loss": 0.172, "num_input_tokens_seen": 111478944, "step": 51680 }, { "epoch": 8.431484502446983, "grad_norm": 0.010223951190710068, "learning_rate": 0.0007167256321542561, "loss": 0.0183, "num_input_tokens_seen": 111490528, "step": 51685 }, { "epoch": 8.432300163132137, "grad_norm": 0.3899267017841339, "learning_rate": 0.0007166614843386666, "loss": 0.0837, "num_input_tokens_seen": 111501600, "step": 51690 }, { "epoch": 8.433115823817293, "grad_norm": 0.02706688456237316, "learning_rate": 0.0007165973321321571, "loss": 0.0166, "num_input_tokens_seen": 111511264, "step": 51695 }, { "epoch": 8.433931484502446, "grad_norm": 0.42511776089668274, "learning_rate": 0.0007165331755360281, "loss": 0.096, "num_input_tokens_seen": 111523328, "step": 51700 }, { "epoch": 8.434747145187602, "grad_norm": 0.19688576459884644, "learning_rate": 0.0007164690145515793, "loss": 0.0724, "num_input_tokens_seen": 111534144, "step": 51705 }, { "epoch": 8.435562805872756, "grad_norm": 0.027763258665800095, "learning_rate": 0.0007164048491801116, "loss": 0.0362, "num_input_tokens_seen": 111544288, "step": 51710 }, { "epoch": 8.436378466557912, "grad_norm": 0.05267176404595375, "learning_rate": 0.0007163406794229249, "loss": 0.0252, "num_input_tokens_seen": 111555104, "step": 51715 }, { "epoch": 8.437194127243067, "grad_norm": 0.01811242289841175, "learning_rate": 0.0007162765052813199, "loss": 0.1211, "num_input_tokens_seen": 111565344, "step": 51720 }, { "epoch": 8.438009787928221, "grad_norm": 0.03151806816458702, "learning_rate": 0.0007162123267565972, "loss": 0.0214, "num_input_tokens_seen": 111574880, "step": 51725 }, { "epoch": 8.438825448613377, "grad_norm": 0.009406226687133312, "learning_rate": 0.0007161481438500574, "loss": 0.0163, "num_input_tokens_seen": 111585184, "step": 51730 }, { "epoch": 8.439641109298531, "grad_norm": 0.025325605645775795, "learning_rate": 0.0007160839565630014, "loss": 0.1674, "num_input_tokens_seen": 111595040, "step": 51735 }, { "epoch": 8.440456769983687, "grad_norm": 0.03474397957324982, "learning_rate": 0.0007160197648967298, "loss": 0.0507, "num_input_tokens_seen": 111605152, "step": 51740 }, { "epoch": 8.441272430668842, "grad_norm": 0.019671516492962837, "learning_rate": 0.0007159555688525434, "loss": 0.0995, "num_input_tokens_seen": 111615840, "step": 51745 }, { "epoch": 8.442088091353996, "grad_norm": 0.2522837221622467, "learning_rate": 0.0007158913684317437, "loss": 0.1472, "num_input_tokens_seen": 111626592, "step": 51750 }, { "epoch": 8.442903752039152, "grad_norm": 0.07300834357738495, "learning_rate": 0.0007158271636356315, "loss": 0.0165, "num_input_tokens_seen": 111636896, "step": 51755 }, { "epoch": 8.443719412724306, "grad_norm": 0.060791414231061935, "learning_rate": 0.000715762954465508, "loss": 0.0244, "num_input_tokens_seen": 111647232, "step": 51760 }, { "epoch": 8.444535073409462, "grad_norm": 0.1320922076702118, "learning_rate": 0.0007156987409226745, "loss": 0.0786, "num_input_tokens_seen": 111658048, "step": 51765 }, { "epoch": 8.445350734094617, "grad_norm": 0.38156402111053467, "learning_rate": 0.0007156345230084325, "loss": 0.1279, "num_input_tokens_seen": 111669632, "step": 51770 }, { "epoch": 8.446166394779771, "grad_norm": 0.6001725196838379, "learning_rate": 0.0007155703007240832, "loss": 0.097, "num_input_tokens_seen": 111682400, "step": 51775 }, { "epoch": 8.446982055464927, "grad_norm": 0.05568667873740196, "learning_rate": 0.0007155060740709284, "loss": 0.0154, "num_input_tokens_seen": 111693056, "step": 51780 }, { "epoch": 8.447797716150081, "grad_norm": 0.5822984576225281, "learning_rate": 0.0007154418430502696, "loss": 0.0219, "num_input_tokens_seen": 111703680, "step": 51785 }, { "epoch": 8.448613376835237, "grad_norm": 0.12925441563129425, "learning_rate": 0.0007153776076634084, "loss": 0.0449, "num_input_tokens_seen": 111714400, "step": 51790 }, { "epoch": 8.449429037520392, "grad_norm": 0.5694994330406189, "learning_rate": 0.0007153133679116469, "loss": 0.2423, "num_input_tokens_seen": 111724992, "step": 51795 }, { "epoch": 8.450244698205546, "grad_norm": 0.23656250536441803, "learning_rate": 0.0007152491237962867, "loss": 0.113, "num_input_tokens_seen": 111736896, "step": 51800 }, { "epoch": 8.451060358890702, "grad_norm": 0.39868900179862976, "learning_rate": 0.0007151848753186301, "loss": 0.1372, "num_input_tokens_seen": 111747584, "step": 51805 }, { "epoch": 8.451876019575856, "grad_norm": 0.09132198989391327, "learning_rate": 0.000715120622479979, "loss": 0.0353, "num_input_tokens_seen": 111756704, "step": 51810 }, { "epoch": 8.452691680261012, "grad_norm": 0.09871755540370941, "learning_rate": 0.0007150563652816355, "loss": 0.0546, "num_input_tokens_seen": 111767072, "step": 51815 }, { "epoch": 8.453507340946166, "grad_norm": 0.14659786224365234, "learning_rate": 0.0007149921037249021, "loss": 0.0408, "num_input_tokens_seen": 111778080, "step": 51820 }, { "epoch": 8.454323001631321, "grad_norm": 0.32462412118911743, "learning_rate": 0.0007149278378110808, "loss": 0.0264, "num_input_tokens_seen": 111790080, "step": 51825 }, { "epoch": 8.455138662316477, "grad_norm": 0.10096310079097748, "learning_rate": 0.0007148635675414743, "loss": 0.0712, "num_input_tokens_seen": 111800096, "step": 51830 }, { "epoch": 8.455954323001631, "grad_norm": 0.1815778762102127, "learning_rate": 0.000714799292917385, "loss": 0.0479, "num_input_tokens_seen": 111811136, "step": 51835 }, { "epoch": 8.456769983686787, "grad_norm": 0.443448930978775, "learning_rate": 0.0007147350139401156, "loss": 0.1267, "num_input_tokens_seen": 111821184, "step": 51840 }, { "epoch": 8.45758564437194, "grad_norm": 0.19018666446208954, "learning_rate": 0.0007146707306109687, "loss": 0.1295, "num_input_tokens_seen": 111832288, "step": 51845 }, { "epoch": 8.458401305057096, "grad_norm": 0.11084506660699844, "learning_rate": 0.000714606442931247, "loss": 0.0548, "num_input_tokens_seen": 111843680, "step": 51850 }, { "epoch": 8.459216965742252, "grad_norm": 0.34757664799690247, "learning_rate": 0.0007145421509022536, "loss": 0.0948, "num_input_tokens_seen": 111853760, "step": 51855 }, { "epoch": 8.460032626427406, "grad_norm": 0.029664140194654465, "learning_rate": 0.0007144778545252914, "loss": 0.0207, "num_input_tokens_seen": 111864320, "step": 51860 }, { "epoch": 8.460848287112562, "grad_norm": 0.3778526782989502, "learning_rate": 0.0007144135538016633, "loss": 0.0756, "num_input_tokens_seen": 111874048, "step": 51865 }, { "epoch": 8.461663947797716, "grad_norm": 0.18764834105968475, "learning_rate": 0.0007143492487326726, "loss": 0.0152, "num_input_tokens_seen": 111885312, "step": 51870 }, { "epoch": 8.462479608482871, "grad_norm": 0.04225245863199234, "learning_rate": 0.0007142849393196223, "loss": 0.0322, "num_input_tokens_seen": 111895904, "step": 51875 }, { "epoch": 8.463295269168025, "grad_norm": 0.3991396725177765, "learning_rate": 0.000714220625563816, "loss": 0.2454, "num_input_tokens_seen": 111906592, "step": 51880 }, { "epoch": 8.464110929853181, "grad_norm": 0.10484295338392258, "learning_rate": 0.0007141563074665571, "loss": 0.1722, "num_input_tokens_seen": 111917248, "step": 51885 }, { "epoch": 8.464926590538337, "grad_norm": 0.15298965573310852, "learning_rate": 0.0007140919850291488, "loss": 0.036, "num_input_tokens_seen": 111928768, "step": 51890 }, { "epoch": 8.46574225122349, "grad_norm": 0.19607166945934296, "learning_rate": 0.0007140276582528947, "loss": 0.0351, "num_input_tokens_seen": 111939680, "step": 51895 }, { "epoch": 8.466557911908646, "grad_norm": 0.01544447522610426, "learning_rate": 0.0007139633271390988, "loss": 0.1162, "num_input_tokens_seen": 111951104, "step": 51900 }, { "epoch": 8.4673735725938, "grad_norm": 0.03359499201178551, "learning_rate": 0.0007138989916890644, "loss": 0.0808, "num_input_tokens_seen": 111962432, "step": 51905 }, { "epoch": 8.468189233278956, "grad_norm": 0.03891899436712265, "learning_rate": 0.0007138346519040959, "loss": 0.0163, "num_input_tokens_seen": 111971776, "step": 51910 }, { "epoch": 8.469004893964112, "grad_norm": 0.09015488624572754, "learning_rate": 0.0007137703077854967, "loss": 0.0408, "num_input_tokens_seen": 111983552, "step": 51915 }, { "epoch": 8.469820554649266, "grad_norm": 0.031660813838243484, "learning_rate": 0.0007137059593345711, "loss": 0.0372, "num_input_tokens_seen": 111995328, "step": 51920 }, { "epoch": 8.470636215334421, "grad_norm": 0.014169171452522278, "learning_rate": 0.0007136416065526231, "loss": 0.0829, "num_input_tokens_seen": 112005984, "step": 51925 }, { "epoch": 8.471451876019575, "grad_norm": 0.4222790598869324, "learning_rate": 0.0007135772494409569, "loss": 0.1289, "num_input_tokens_seen": 112016864, "step": 51930 }, { "epoch": 8.47226753670473, "grad_norm": 0.5411301851272583, "learning_rate": 0.0007135128880008768, "loss": 0.0966, "num_input_tokens_seen": 112027456, "step": 51935 }, { "epoch": 8.473083197389887, "grad_norm": 0.0198789332062006, "learning_rate": 0.0007134485222336873, "loss": 0.0414, "num_input_tokens_seen": 112037696, "step": 51940 }, { "epoch": 8.47389885807504, "grad_norm": 0.2534342408180237, "learning_rate": 0.0007133841521406925, "loss": 0.0152, "num_input_tokens_seen": 112049344, "step": 51945 }, { "epoch": 8.474714518760196, "grad_norm": 0.00835504662245512, "learning_rate": 0.0007133197777231973, "loss": 0.0136, "num_input_tokens_seen": 112060032, "step": 51950 }, { "epoch": 8.47553017944535, "grad_norm": 0.021618641912937164, "learning_rate": 0.0007132553989825061, "loss": 0.0793, "num_input_tokens_seen": 112071520, "step": 51955 }, { "epoch": 8.476345840130506, "grad_norm": 0.009793249890208244, "learning_rate": 0.0007131910159199238, "loss": 0.0295, "num_input_tokens_seen": 112083136, "step": 51960 }, { "epoch": 8.477161500815662, "grad_norm": 0.7443006634712219, "learning_rate": 0.000713126628536755, "loss": 0.1506, "num_input_tokens_seen": 112093312, "step": 51965 }, { "epoch": 8.477977161500815, "grad_norm": 0.1741093397140503, "learning_rate": 0.0007130622368343048, "loss": 0.0148, "num_input_tokens_seen": 112103232, "step": 51970 }, { "epoch": 8.478792822185971, "grad_norm": 0.15808257460594177, "learning_rate": 0.000712997840813878, "loss": 0.0581, "num_input_tokens_seen": 112114752, "step": 51975 }, { "epoch": 8.479608482871125, "grad_norm": 0.02974502556025982, "learning_rate": 0.0007129334404767797, "loss": 0.0077, "num_input_tokens_seen": 112125664, "step": 51980 }, { "epoch": 8.48042414355628, "grad_norm": 0.007966126315295696, "learning_rate": 0.0007128690358243153, "loss": 0.1214, "num_input_tokens_seen": 112136544, "step": 51985 }, { "epoch": 8.481239804241435, "grad_norm": 0.4449196755886078, "learning_rate": 0.0007128046268577898, "loss": 0.0711, "num_input_tokens_seen": 112147008, "step": 51990 }, { "epoch": 8.48205546492659, "grad_norm": 0.28420713543891907, "learning_rate": 0.0007127402135785086, "loss": 0.0612, "num_input_tokens_seen": 112158720, "step": 51995 }, { "epoch": 8.482871125611746, "grad_norm": 0.3455711007118225, "learning_rate": 0.000712675795987777, "loss": 0.0524, "num_input_tokens_seen": 112168544, "step": 52000 }, { "epoch": 8.4836867862969, "grad_norm": 0.01829277165234089, "learning_rate": 0.0007126113740869006, "loss": 0.0209, "num_input_tokens_seen": 112178144, "step": 52005 }, { "epoch": 8.484502446982056, "grad_norm": 0.007040528580546379, "learning_rate": 0.000712546947877185, "loss": 0.1855, "num_input_tokens_seen": 112188320, "step": 52010 }, { "epoch": 8.48531810766721, "grad_norm": 0.11675887554883957, "learning_rate": 0.0007124825173599359, "loss": 0.0949, "num_input_tokens_seen": 112198112, "step": 52015 }, { "epoch": 8.486133768352365, "grad_norm": 0.015561007894575596, "learning_rate": 0.000712418082536459, "loss": 0.0282, "num_input_tokens_seen": 112209024, "step": 52020 }, { "epoch": 8.486949429037521, "grad_norm": 0.22562304139137268, "learning_rate": 0.0007123536434080602, "loss": 0.0285, "num_input_tokens_seen": 112219744, "step": 52025 }, { "epoch": 8.487765089722675, "grad_norm": 0.04059227555990219, "learning_rate": 0.0007122891999760454, "loss": 0.0624, "num_input_tokens_seen": 112230688, "step": 52030 }, { "epoch": 8.48858075040783, "grad_norm": 0.006049167364835739, "learning_rate": 0.0007122247522417206, "loss": 0.1055, "num_input_tokens_seen": 112242144, "step": 52035 }, { "epoch": 8.489396411092985, "grad_norm": 0.022350842133164406, "learning_rate": 0.0007121603002063921, "loss": 0.0163, "num_input_tokens_seen": 112253728, "step": 52040 }, { "epoch": 8.49021207177814, "grad_norm": 0.050821445882320404, "learning_rate": 0.000712095843871366, "loss": 0.0405, "num_input_tokens_seen": 112264608, "step": 52045 }, { "epoch": 8.491027732463296, "grad_norm": 0.01740807667374611, "learning_rate": 0.0007120313832379483, "loss": 0.0171, "num_input_tokens_seen": 112275072, "step": 52050 }, { "epoch": 8.49184339314845, "grad_norm": 0.03135994076728821, "learning_rate": 0.000711966918307446, "loss": 0.0367, "num_input_tokens_seen": 112285536, "step": 52055 }, { "epoch": 8.492659053833606, "grad_norm": 0.12760677933692932, "learning_rate": 0.000711902449081165, "loss": 0.0227, "num_input_tokens_seen": 112296384, "step": 52060 }, { "epoch": 8.49347471451876, "grad_norm": 0.13042232394218445, "learning_rate": 0.000711837975560412, "loss": 0.0622, "num_input_tokens_seen": 112306784, "step": 52065 }, { "epoch": 8.494290375203915, "grad_norm": 0.04808565229177475, "learning_rate": 0.0007117734977464937, "loss": 0.0261, "num_input_tokens_seen": 112317056, "step": 52070 }, { "epoch": 8.49510603588907, "grad_norm": 0.2043689787387848, "learning_rate": 0.0007117090156407168, "loss": 0.0858, "num_input_tokens_seen": 112327424, "step": 52075 }, { "epoch": 8.495921696574225, "grad_norm": 0.07318547368049622, "learning_rate": 0.0007116445292443883, "loss": 0.1577, "num_input_tokens_seen": 112339328, "step": 52080 }, { "epoch": 8.49673735725938, "grad_norm": 0.02601948380470276, "learning_rate": 0.0007115800385588148, "loss": 0.0125, "num_input_tokens_seen": 112348672, "step": 52085 }, { "epoch": 8.497553017944535, "grad_norm": 0.005327170714735985, "learning_rate": 0.0007115155435853034, "loss": 0.034, "num_input_tokens_seen": 112360064, "step": 52090 }, { "epoch": 8.49836867862969, "grad_norm": 0.0798022523522377, "learning_rate": 0.0007114510443251613, "loss": 0.0384, "num_input_tokens_seen": 112371424, "step": 52095 }, { "epoch": 8.499184339314844, "grad_norm": 0.013463851064443588, "learning_rate": 0.0007113865407796955, "loss": 0.0685, "num_input_tokens_seen": 112382304, "step": 52100 }, { "epoch": 8.5, "grad_norm": 0.007057413458824158, "learning_rate": 0.0007113220329502131, "loss": 0.0979, "num_input_tokens_seen": 112393248, "step": 52105 }, { "epoch": 8.500815660685156, "grad_norm": 0.3989030420780182, "learning_rate": 0.0007112575208380219, "loss": 0.0526, "num_input_tokens_seen": 112404192, "step": 52110 }, { "epoch": 8.50163132137031, "grad_norm": 0.03188792243599892, "learning_rate": 0.0007111930044444288, "loss": 0.0286, "num_input_tokens_seen": 112415936, "step": 52115 }, { "epoch": 8.502446982055465, "grad_norm": 0.678286075592041, "learning_rate": 0.0007111284837707416, "loss": 0.1584, "num_input_tokens_seen": 112426784, "step": 52120 }, { "epoch": 8.50326264274062, "grad_norm": 0.011275465600192547, "learning_rate": 0.0007110639588182679, "loss": 0.0614, "num_input_tokens_seen": 112438624, "step": 52125 }, { "epoch": 8.504078303425775, "grad_norm": 0.8181564807891846, "learning_rate": 0.0007109994295883154, "loss": 0.1683, "num_input_tokens_seen": 112449344, "step": 52130 }, { "epoch": 8.50489396411093, "grad_norm": 0.7194790840148926, "learning_rate": 0.0007109348960821916, "loss": 0.1785, "num_input_tokens_seen": 112460288, "step": 52135 }, { "epoch": 8.505709624796085, "grad_norm": 0.13549011945724487, "learning_rate": 0.0007108703583012047, "loss": 0.0507, "num_input_tokens_seen": 112472000, "step": 52140 }, { "epoch": 8.50652528548124, "grad_norm": 0.006778481882065535, "learning_rate": 0.0007108058162466624, "loss": 0.0058, "num_input_tokens_seen": 112481280, "step": 52145 }, { "epoch": 8.507340946166394, "grad_norm": 0.19302892684936523, "learning_rate": 0.0007107412699198729, "loss": 0.0595, "num_input_tokens_seen": 112491456, "step": 52150 }, { "epoch": 8.50815660685155, "grad_norm": 0.01080802921205759, "learning_rate": 0.0007106767193221442, "loss": 0.4004, "num_input_tokens_seen": 112501440, "step": 52155 }, { "epoch": 8.508972267536706, "grad_norm": 0.041307300329208374, "learning_rate": 0.0007106121644547844, "loss": 0.0584, "num_input_tokens_seen": 112511488, "step": 52160 }, { "epoch": 8.50978792822186, "grad_norm": 0.0161303598433733, "learning_rate": 0.000710547605319102, "loss": 0.0349, "num_input_tokens_seen": 112522848, "step": 52165 }, { "epoch": 8.510603588907015, "grad_norm": 0.007766826543956995, "learning_rate": 0.0007104830419164052, "loss": 0.2071, "num_input_tokens_seen": 112534560, "step": 52170 }, { "epoch": 8.51141924959217, "grad_norm": 0.010690944269299507, "learning_rate": 0.0007104184742480025, "loss": 0.1507, "num_input_tokens_seen": 112544384, "step": 52175 }, { "epoch": 8.512234910277325, "grad_norm": 0.25651654601097107, "learning_rate": 0.0007103539023152025, "loss": 0.0658, "num_input_tokens_seen": 112555200, "step": 52180 }, { "epoch": 8.513050570962479, "grad_norm": 0.02715766802430153, "learning_rate": 0.0007102893261193141, "loss": 0.0377, "num_input_tokens_seen": 112565600, "step": 52185 }, { "epoch": 8.513866231647635, "grad_norm": 0.14624623954296112, "learning_rate": 0.0007102247456616456, "loss": 0.0715, "num_input_tokens_seen": 112575424, "step": 52190 }, { "epoch": 8.51468189233279, "grad_norm": 0.2052011489868164, "learning_rate": 0.0007101601609435057, "loss": 0.0341, "num_input_tokens_seen": 112585184, "step": 52195 }, { "epoch": 8.515497553017944, "grad_norm": 0.013489839620888233, "learning_rate": 0.0007100955719662038, "loss": 0.0262, "num_input_tokens_seen": 112596032, "step": 52200 }, { "epoch": 8.5163132137031, "grad_norm": 0.6606424450874329, "learning_rate": 0.0007100309787310485, "loss": 0.2503, "num_input_tokens_seen": 112606048, "step": 52205 }, { "epoch": 8.517128874388254, "grad_norm": 0.1653893142938614, "learning_rate": 0.0007099663812393489, "loss": 0.1174, "num_input_tokens_seen": 112616800, "step": 52210 }, { "epoch": 8.51794453507341, "grad_norm": 0.013789049349725246, "learning_rate": 0.0007099017794924144, "loss": 0.0454, "num_input_tokens_seen": 112626912, "step": 52215 }, { "epoch": 8.518760195758565, "grad_norm": 0.04234495013952255, "learning_rate": 0.000709837173491554, "loss": 0.0833, "num_input_tokens_seen": 112638592, "step": 52220 }, { "epoch": 8.51957585644372, "grad_norm": 0.014766390435397625, "learning_rate": 0.0007097725632380771, "loss": 0.0396, "num_input_tokens_seen": 112650432, "step": 52225 }, { "epoch": 8.520391517128875, "grad_norm": 0.21195147931575775, "learning_rate": 0.0007097079487332931, "loss": 0.0375, "num_input_tokens_seen": 112661408, "step": 52230 }, { "epoch": 8.521207177814029, "grad_norm": 0.3564627170562744, "learning_rate": 0.0007096433299785113, "loss": 0.1221, "num_input_tokens_seen": 112671968, "step": 52235 }, { "epoch": 8.522022838499185, "grad_norm": 0.28007712960243225, "learning_rate": 0.0007095787069750416, "loss": 0.0444, "num_input_tokens_seen": 112683040, "step": 52240 }, { "epoch": 8.522838499184338, "grad_norm": 0.05765927955508232, "learning_rate": 0.0007095140797241936, "loss": 0.0322, "num_input_tokens_seen": 112694080, "step": 52245 }, { "epoch": 8.523654159869494, "grad_norm": 0.4238038957118988, "learning_rate": 0.0007094494482272768, "loss": 0.1305, "num_input_tokens_seen": 112704288, "step": 52250 }, { "epoch": 8.52446982055465, "grad_norm": 0.018903449177742004, "learning_rate": 0.0007093848124856014, "loss": 0.0242, "num_input_tokens_seen": 112715008, "step": 52255 }, { "epoch": 8.525285481239804, "grad_norm": 0.271589070558548, "learning_rate": 0.000709320172500477, "loss": 0.1587, "num_input_tokens_seen": 112726784, "step": 52260 }, { "epoch": 8.52610114192496, "grad_norm": 0.13706177473068237, "learning_rate": 0.0007092555282732139, "loss": 0.0395, "num_input_tokens_seen": 112738400, "step": 52265 }, { "epoch": 8.526916802610113, "grad_norm": 0.3041245639324188, "learning_rate": 0.000709190879805122, "loss": 0.0865, "num_input_tokens_seen": 112749664, "step": 52270 }, { "epoch": 8.52773246329527, "grad_norm": 0.09652306139469147, "learning_rate": 0.0007091262270975116, "loss": 0.0135, "num_input_tokens_seen": 112761952, "step": 52275 }, { "epoch": 8.528548123980425, "grad_norm": 0.01655682548880577, "learning_rate": 0.0007090615701516929, "loss": 0.1087, "num_input_tokens_seen": 112771968, "step": 52280 }, { "epoch": 8.529363784665579, "grad_norm": 0.11326311528682709, "learning_rate": 0.0007089969089689761, "loss": 0.2109, "num_input_tokens_seen": 112781664, "step": 52285 }, { "epoch": 8.530179445350734, "grad_norm": 0.012405349873006344, "learning_rate": 0.0007089322435506719, "loss": 0.005, "num_input_tokens_seen": 112792416, "step": 52290 }, { "epoch": 8.530995106035888, "grad_norm": 0.01709955371916294, "learning_rate": 0.0007088675738980909, "loss": 0.0377, "num_input_tokens_seen": 112802400, "step": 52295 }, { "epoch": 8.531810766721044, "grad_norm": 0.03971458971500397, "learning_rate": 0.0007088029000125435, "loss": 0.0348, "num_input_tokens_seen": 112813376, "step": 52300 }, { "epoch": 8.5326264274062, "grad_norm": 0.01963002048432827, "learning_rate": 0.0007087382218953403, "loss": 0.0168, "num_input_tokens_seen": 112823488, "step": 52305 }, { "epoch": 8.533442088091354, "grad_norm": 0.2701168358325958, "learning_rate": 0.0007086735395477923, "loss": 0.1896, "num_input_tokens_seen": 112833632, "step": 52310 }, { "epoch": 8.53425774877651, "grad_norm": 0.023985065519809723, "learning_rate": 0.0007086088529712103, "loss": 0.0597, "num_input_tokens_seen": 112844768, "step": 52315 }, { "epoch": 8.535073409461663, "grad_norm": 0.003129492746666074, "learning_rate": 0.0007085441621669053, "loss": 0.0497, "num_input_tokens_seen": 112855456, "step": 52320 }, { "epoch": 8.535889070146819, "grad_norm": 0.01596365123987198, "learning_rate": 0.0007084794671361883, "loss": 0.056, "num_input_tokens_seen": 112867200, "step": 52325 }, { "epoch": 8.536704730831975, "grad_norm": 0.02165941894054413, "learning_rate": 0.0007084147678803703, "loss": 0.103, "num_input_tokens_seen": 112878528, "step": 52330 }, { "epoch": 8.537520391517129, "grad_norm": 0.04197744280099869, "learning_rate": 0.0007083500644007628, "loss": 0.0299, "num_input_tokens_seen": 112888672, "step": 52335 }, { "epoch": 8.538336052202284, "grad_norm": 0.026097755879163742, "learning_rate": 0.0007082853566986769, "loss": 0.0347, "num_input_tokens_seen": 112899392, "step": 52340 }, { "epoch": 8.539151712887438, "grad_norm": 0.007676062639802694, "learning_rate": 0.0007082206447754239, "loss": 0.0154, "num_input_tokens_seen": 112910080, "step": 52345 }, { "epoch": 8.539967373572594, "grad_norm": 0.010974233038723469, "learning_rate": 0.0007081559286323155, "loss": 0.0173, "num_input_tokens_seen": 112920736, "step": 52350 }, { "epoch": 8.540783034257748, "grad_norm": 0.17029424011707306, "learning_rate": 0.0007080912082706631, "loss": 0.0237, "num_input_tokens_seen": 112931552, "step": 52355 }, { "epoch": 8.541598694942904, "grad_norm": 0.09603752195835114, "learning_rate": 0.0007080264836917783, "loss": 0.085, "num_input_tokens_seen": 112943648, "step": 52360 }, { "epoch": 8.54241435562806, "grad_norm": 0.09903179109096527, "learning_rate": 0.000707961754896973, "loss": 0.0136, "num_input_tokens_seen": 112955296, "step": 52365 }, { "epoch": 8.543230016313213, "grad_norm": 0.06685255467891693, "learning_rate": 0.0007078970218875589, "loss": 0.0498, "num_input_tokens_seen": 112966272, "step": 52370 }, { "epoch": 8.544045676998369, "grad_norm": 0.04724392294883728, "learning_rate": 0.0007078322846648479, "loss": 0.0548, "num_input_tokens_seen": 112978912, "step": 52375 }, { "epoch": 8.544861337683523, "grad_norm": 0.06616072356700897, "learning_rate": 0.0007077675432301521, "loss": 0.1936, "num_input_tokens_seen": 112989792, "step": 52380 }, { "epoch": 8.545676998368679, "grad_norm": 0.03849957510828972, "learning_rate": 0.0007077027975847833, "loss": 0.1169, "num_input_tokens_seen": 112999200, "step": 52385 }, { "epoch": 8.546492659053834, "grad_norm": 0.007321841083467007, "learning_rate": 0.0007076380477300539, "loss": 0.1015, "num_input_tokens_seen": 113009504, "step": 52390 }, { "epoch": 8.547308319738988, "grad_norm": 0.013756643049418926, "learning_rate": 0.0007075732936672761, "loss": 0.0161, "num_input_tokens_seen": 113019488, "step": 52395 }, { "epoch": 8.548123980424144, "grad_norm": 0.31894347071647644, "learning_rate": 0.0007075085353977622, "loss": 0.0561, "num_input_tokens_seen": 113030016, "step": 52400 }, { "epoch": 8.548939641109298, "grad_norm": 0.7395493984222412, "learning_rate": 0.0007074437729228245, "loss": 0.1784, "num_input_tokens_seen": 113041856, "step": 52405 }, { "epoch": 8.549755301794454, "grad_norm": 0.29210326075553894, "learning_rate": 0.0007073790062437755, "loss": 0.1926, "num_input_tokens_seen": 113052608, "step": 52410 }, { "epoch": 8.550570962479608, "grad_norm": 0.04046386852860451, "learning_rate": 0.000707314235361928, "loss": 0.1731, "num_input_tokens_seen": 113063104, "step": 52415 }, { "epoch": 8.551386623164763, "grad_norm": 0.46161264181137085, "learning_rate": 0.0007072494602785945, "loss": 0.1115, "num_input_tokens_seen": 113075200, "step": 52420 }, { "epoch": 8.552202283849919, "grad_norm": 0.04034951329231262, "learning_rate": 0.0007071846809950878, "loss": 0.0952, "num_input_tokens_seen": 113087392, "step": 52425 }, { "epoch": 8.553017944535073, "grad_norm": 0.045092228800058365, "learning_rate": 0.0007071198975127206, "loss": 0.0631, "num_input_tokens_seen": 113098784, "step": 52430 }, { "epoch": 8.553833605220229, "grad_norm": 0.00812490377575159, "learning_rate": 0.000707055109832806, "loss": 0.0788, "num_input_tokens_seen": 113110048, "step": 52435 }, { "epoch": 8.554649265905383, "grad_norm": 0.00961544830352068, "learning_rate": 0.0007069903179566569, "loss": 0.0285, "num_input_tokens_seen": 113119968, "step": 52440 }, { "epoch": 8.555464926590538, "grad_norm": 0.1951313465833664, "learning_rate": 0.0007069255218855865, "loss": 0.0297, "num_input_tokens_seen": 113132256, "step": 52445 }, { "epoch": 8.556280587275694, "grad_norm": 0.03383425995707512, "learning_rate": 0.0007068607216209078, "loss": 0.0133, "num_input_tokens_seen": 113142144, "step": 52450 }, { "epoch": 8.557096247960848, "grad_norm": 0.499152272939682, "learning_rate": 0.0007067959171639342, "loss": 0.2001, "num_input_tokens_seen": 113152960, "step": 52455 }, { "epoch": 8.557911908646004, "grad_norm": 0.47092071175575256, "learning_rate": 0.000706731108515979, "loss": 0.0501, "num_input_tokens_seen": 113164480, "step": 52460 }, { "epoch": 8.558727569331158, "grad_norm": 0.20113173127174377, "learning_rate": 0.0007066662956783556, "loss": 0.0497, "num_input_tokens_seen": 113175488, "step": 52465 }, { "epoch": 8.559543230016313, "grad_norm": 0.0661533772945404, "learning_rate": 0.0007066014786523776, "loss": 0.0516, "num_input_tokens_seen": 113186240, "step": 52470 }, { "epoch": 8.560358890701469, "grad_norm": 0.15611976385116577, "learning_rate": 0.0007065366574393585, "loss": 0.0807, "num_input_tokens_seen": 113196288, "step": 52475 }, { "epoch": 8.561174551386623, "grad_norm": 0.09287663549184799, "learning_rate": 0.000706471832040612, "loss": 0.0547, "num_input_tokens_seen": 113207168, "step": 52480 }, { "epoch": 8.561990212071779, "grad_norm": 0.027409210801124573, "learning_rate": 0.000706407002457452, "loss": 0.081, "num_input_tokens_seen": 113217920, "step": 52485 }, { "epoch": 8.562805872756933, "grad_norm": 0.010809613391757011, "learning_rate": 0.0007063421686911921, "loss": 0.0195, "num_input_tokens_seen": 113228768, "step": 52490 }, { "epoch": 8.563621533442088, "grad_norm": 1.0236653089523315, "learning_rate": 0.0007062773307431465, "loss": 0.1683, "num_input_tokens_seen": 113239168, "step": 52495 }, { "epoch": 8.564437194127244, "grad_norm": 0.44787463545799255, "learning_rate": 0.000706212488614629, "loss": 0.0827, "num_input_tokens_seen": 113249216, "step": 52500 }, { "epoch": 8.565252854812398, "grad_norm": 0.4116763770580292, "learning_rate": 0.0007061476423069539, "loss": 0.0458, "num_input_tokens_seen": 113260416, "step": 52505 }, { "epoch": 8.566068515497554, "grad_norm": 0.5867841243743896, "learning_rate": 0.0007060827918214353, "loss": 0.1769, "num_input_tokens_seen": 113270752, "step": 52510 }, { "epoch": 8.566884176182707, "grad_norm": 0.07104306668043137, "learning_rate": 0.0007060179371593876, "loss": 0.0215, "num_input_tokens_seen": 113282432, "step": 52515 }, { "epoch": 8.567699836867863, "grad_norm": 0.13301724195480347, "learning_rate": 0.0007059530783221249, "loss": 0.0718, "num_input_tokens_seen": 113293824, "step": 52520 }, { "epoch": 8.568515497553017, "grad_norm": 0.021473020315170288, "learning_rate": 0.0007058882153109618, "loss": 0.042, "num_input_tokens_seen": 113305824, "step": 52525 }, { "epoch": 8.569331158238173, "grad_norm": 0.5328531861305237, "learning_rate": 0.000705823348127213, "loss": 0.0946, "num_input_tokens_seen": 113317888, "step": 52530 }, { "epoch": 8.570146818923329, "grad_norm": 0.021104246377944946, "learning_rate": 0.0007057584767721927, "loss": 0.1193, "num_input_tokens_seen": 113329664, "step": 52535 }, { "epoch": 8.570962479608482, "grad_norm": 0.10279271006584167, "learning_rate": 0.000705693601247216, "loss": 0.0483, "num_input_tokens_seen": 113340000, "step": 52540 }, { "epoch": 8.571778140293638, "grad_norm": 0.15066039562225342, "learning_rate": 0.0007056287215535976, "loss": 0.124, "num_input_tokens_seen": 113351616, "step": 52545 }, { "epoch": 8.572593800978792, "grad_norm": 0.08485747873783112, "learning_rate": 0.0007055638376926522, "loss": 0.0902, "num_input_tokens_seen": 113362848, "step": 52550 }, { "epoch": 8.573409461663948, "grad_norm": 0.3261450529098511, "learning_rate": 0.0007054989496656949, "loss": 0.144, "num_input_tokens_seen": 113374400, "step": 52555 }, { "epoch": 8.574225122349104, "grad_norm": 0.025214551016688347, "learning_rate": 0.0007054340574740405, "loss": 0.0114, "num_input_tokens_seen": 113384000, "step": 52560 }, { "epoch": 8.575040783034257, "grad_norm": 0.22050581872463226, "learning_rate": 0.0007053691611190045, "loss": 0.0441, "num_input_tokens_seen": 113395872, "step": 52565 }, { "epoch": 8.575856443719413, "grad_norm": 0.16345635056495667, "learning_rate": 0.0007053042606019017, "loss": 0.0658, "num_input_tokens_seen": 113405408, "step": 52570 }, { "epoch": 8.576672104404567, "grad_norm": 0.21222253143787384, "learning_rate": 0.0007052393559240479, "loss": 0.0272, "num_input_tokens_seen": 113415840, "step": 52575 }, { "epoch": 8.577487765089723, "grad_norm": 0.2056702971458435, "learning_rate": 0.0007051744470867581, "loss": 0.0642, "num_input_tokens_seen": 113427424, "step": 52580 }, { "epoch": 8.578303425774878, "grad_norm": 0.02931940369307995, "learning_rate": 0.0007051095340913478, "loss": 0.0537, "num_input_tokens_seen": 113437344, "step": 52585 }, { "epoch": 8.579119086460032, "grad_norm": 0.5176214575767517, "learning_rate": 0.0007050446169391326, "loss": 0.0344, "num_input_tokens_seen": 113447968, "step": 52590 }, { "epoch": 8.579934747145188, "grad_norm": 0.4078221321105957, "learning_rate": 0.0007049796956314281, "loss": 0.1845, "num_input_tokens_seen": 113458688, "step": 52595 }, { "epoch": 8.580750407830342, "grad_norm": 0.2801363170146942, "learning_rate": 0.00070491477016955, "loss": 0.1005, "num_input_tokens_seen": 113469728, "step": 52600 }, { "epoch": 8.581566068515498, "grad_norm": 0.39343565702438354, "learning_rate": 0.0007048498405548142, "loss": 0.1467, "num_input_tokens_seen": 113481120, "step": 52605 }, { "epoch": 8.582381729200652, "grad_norm": 0.31688812375068665, "learning_rate": 0.0007047849067885366, "loss": 0.1064, "num_input_tokens_seen": 113490720, "step": 52610 }, { "epoch": 8.583197389885807, "grad_norm": 0.04376254975795746, "learning_rate": 0.000704719968872033, "loss": 0.0282, "num_input_tokens_seen": 113501856, "step": 52615 }, { "epoch": 8.584013050570963, "grad_norm": 0.16781193017959595, "learning_rate": 0.0007046550268066194, "loss": 0.0861, "num_input_tokens_seen": 113513856, "step": 52620 }, { "epoch": 8.584828711256117, "grad_norm": 0.10036098212003708, "learning_rate": 0.0007045900805936122, "loss": 0.0443, "num_input_tokens_seen": 113524256, "step": 52625 }, { "epoch": 8.585644371941273, "grad_norm": 0.018535228446125984, "learning_rate": 0.0007045251302343276, "loss": 0.1332, "num_input_tokens_seen": 113535232, "step": 52630 }, { "epoch": 8.586460032626427, "grad_norm": 0.25911739468574524, "learning_rate": 0.0007044601757300815, "loss": 0.1346, "num_input_tokens_seen": 113546560, "step": 52635 }, { "epoch": 8.587275693311582, "grad_norm": 0.021499942988157272, "learning_rate": 0.0007043952170821907, "loss": 0.047, "num_input_tokens_seen": 113558144, "step": 52640 }, { "epoch": 8.588091353996738, "grad_norm": 0.011928388848900795, "learning_rate": 0.0007043302542919715, "loss": 0.0658, "num_input_tokens_seen": 113568384, "step": 52645 }, { "epoch": 8.588907014681892, "grad_norm": 0.07271187752485275, "learning_rate": 0.0007042652873607405, "loss": 0.0232, "num_input_tokens_seen": 113578464, "step": 52650 }, { "epoch": 8.589722675367048, "grad_norm": 0.0456092394888401, "learning_rate": 0.0007042003162898143, "loss": 0.1687, "num_input_tokens_seen": 113588800, "step": 52655 }, { "epoch": 8.590538336052202, "grad_norm": 0.015398882329463959, "learning_rate": 0.0007041353410805097, "loss": 0.0805, "num_input_tokens_seen": 113600352, "step": 52660 }, { "epoch": 8.591353996737357, "grad_norm": 0.3370961546897888, "learning_rate": 0.0007040703617341434, "loss": 0.0814, "num_input_tokens_seen": 113610944, "step": 52665 }, { "epoch": 8.592169657422513, "grad_norm": 0.02650475688278675, "learning_rate": 0.0007040053782520324, "loss": 0.0382, "num_input_tokens_seen": 113620704, "step": 52670 }, { "epoch": 8.592985318107667, "grad_norm": 0.06864813715219498, "learning_rate": 0.0007039403906354936, "loss": 0.0868, "num_input_tokens_seen": 113630656, "step": 52675 }, { "epoch": 8.593800978792823, "grad_norm": 0.24346548318862915, "learning_rate": 0.0007038753988858439, "loss": 0.0945, "num_input_tokens_seen": 113641504, "step": 52680 }, { "epoch": 8.594616639477977, "grad_norm": 0.017839234322309494, "learning_rate": 0.0007038104030044008, "loss": 0.1455, "num_input_tokens_seen": 113650048, "step": 52685 }, { "epoch": 8.595432300163132, "grad_norm": 0.03571370989084244, "learning_rate": 0.0007037454029924814, "loss": 0.0319, "num_input_tokens_seen": 113660736, "step": 52690 }, { "epoch": 8.596247960848288, "grad_norm": 0.42271652817726135, "learning_rate": 0.0007036803988514028, "loss": 0.1804, "num_input_tokens_seen": 113672992, "step": 52695 }, { "epoch": 8.597063621533442, "grad_norm": 0.17758876085281372, "learning_rate": 0.0007036153905824825, "loss": 0.0974, "num_input_tokens_seen": 113683648, "step": 52700 }, { "epoch": 8.597879282218598, "grad_norm": 0.020917385816574097, "learning_rate": 0.0007035503781870379, "loss": 0.0353, "num_input_tokens_seen": 113694080, "step": 52705 }, { "epoch": 8.598694942903752, "grad_norm": 0.12035126984119415, "learning_rate": 0.0007034853616663868, "loss": 0.1629, "num_input_tokens_seen": 113705376, "step": 52710 }, { "epoch": 8.599510603588907, "grad_norm": 0.026858678087592125, "learning_rate": 0.0007034203410218467, "loss": 0.093, "num_input_tokens_seen": 113714880, "step": 52715 }, { "epoch": 8.600326264274061, "grad_norm": 0.25143852829933167, "learning_rate": 0.0007033553162547355, "loss": 0.1259, "num_input_tokens_seen": 113724000, "step": 52720 }, { "epoch": 8.601141924959217, "grad_norm": 0.0347890667617321, "learning_rate": 0.0007032902873663707, "loss": 0.1443, "num_input_tokens_seen": 113733088, "step": 52725 }, { "epoch": 8.601957585644373, "grad_norm": 0.034404464066028595, "learning_rate": 0.0007032252543580702, "loss": 0.0149, "num_input_tokens_seen": 113744384, "step": 52730 }, { "epoch": 8.602773246329527, "grad_norm": 0.045803848654031754, "learning_rate": 0.0007031602172311523, "loss": 0.0985, "num_input_tokens_seen": 113754592, "step": 52735 }, { "epoch": 8.603588907014682, "grad_norm": 0.023900087922811508, "learning_rate": 0.0007030951759869347, "loss": 0.0719, "num_input_tokens_seen": 113765280, "step": 52740 }, { "epoch": 8.604404567699836, "grad_norm": 0.054559480398893356, "learning_rate": 0.0007030301306267358, "loss": 0.0203, "num_input_tokens_seen": 113776800, "step": 52745 }, { "epoch": 8.605220228384992, "grad_norm": 0.031281813979148865, "learning_rate": 0.0007029650811518737, "loss": 0.0484, "num_input_tokens_seen": 113787872, "step": 52750 }, { "epoch": 8.606035889070148, "grad_norm": 0.00700868247076869, "learning_rate": 0.0007029000275636669, "loss": 0.0287, "num_input_tokens_seen": 113800800, "step": 52755 }, { "epoch": 8.606851549755302, "grad_norm": 0.02249310165643692, "learning_rate": 0.0007028349698634335, "loss": 0.0496, "num_input_tokens_seen": 113810592, "step": 52760 }, { "epoch": 8.607667210440457, "grad_norm": 0.5075094103813171, "learning_rate": 0.0007027699080524923, "loss": 0.1433, "num_input_tokens_seen": 113821728, "step": 52765 }, { "epoch": 8.608482871125611, "grad_norm": 0.2501092553138733, "learning_rate": 0.0007027048421321616, "loss": 0.0276, "num_input_tokens_seen": 113832800, "step": 52770 }, { "epoch": 8.609298531810767, "grad_norm": 0.017145207151770592, "learning_rate": 0.0007026397721037601, "loss": 0.0625, "num_input_tokens_seen": 113842720, "step": 52775 }, { "epoch": 8.61011419249592, "grad_norm": 0.589413046836853, "learning_rate": 0.0007025746979686065, "loss": 0.2585, "num_input_tokens_seen": 113853696, "step": 52780 }, { "epoch": 8.610929853181077, "grad_norm": 0.364375501871109, "learning_rate": 0.0007025096197280196, "loss": 0.2155, "num_input_tokens_seen": 113864096, "step": 52785 }, { "epoch": 8.611745513866232, "grad_norm": 0.030221717432141304, "learning_rate": 0.0007024445373833185, "loss": 0.065, "num_input_tokens_seen": 113874912, "step": 52790 }, { "epoch": 8.612561174551386, "grad_norm": 0.06842674314975739, "learning_rate": 0.000702379450935822, "loss": 0.0314, "num_input_tokens_seen": 113886336, "step": 52795 }, { "epoch": 8.613376835236542, "grad_norm": 0.08444344252347946, "learning_rate": 0.0007023143603868492, "loss": 0.0478, "num_input_tokens_seen": 113897184, "step": 52800 }, { "epoch": 8.614192495921696, "grad_norm": 0.018883051350712776, "learning_rate": 0.0007022492657377192, "loss": 0.09, "num_input_tokens_seen": 113907392, "step": 52805 }, { "epoch": 8.615008156606851, "grad_norm": 0.26636016368865967, "learning_rate": 0.0007021841669897511, "loss": 0.0531, "num_input_tokens_seen": 113918048, "step": 52810 }, { "epoch": 8.615823817292007, "grad_norm": 0.013662328012287617, "learning_rate": 0.0007021190641442645, "loss": 0.0171, "num_input_tokens_seen": 113929888, "step": 52815 }, { "epoch": 8.616639477977161, "grad_norm": 0.4433819651603699, "learning_rate": 0.0007020539572025788, "loss": 0.049, "num_input_tokens_seen": 113941504, "step": 52820 }, { "epoch": 8.617455138662317, "grad_norm": 0.3274267315864563, "learning_rate": 0.0007019888461660132, "loss": 0.0866, "num_input_tokens_seen": 113952960, "step": 52825 }, { "epoch": 8.61827079934747, "grad_norm": 0.020772969350218773, "learning_rate": 0.0007019237310358874, "loss": 0.0308, "num_input_tokens_seen": 113963328, "step": 52830 }, { "epoch": 8.619086460032626, "grad_norm": 0.09076014906167984, "learning_rate": 0.000701858611813521, "loss": 0.1357, "num_input_tokens_seen": 113974816, "step": 52835 }, { "epoch": 8.619902120717782, "grad_norm": 0.034512169659137726, "learning_rate": 0.0007017934885002339, "loss": 0.0599, "num_input_tokens_seen": 113985664, "step": 52840 }, { "epoch": 8.620717781402936, "grad_norm": 0.5324146151542664, "learning_rate": 0.0007017283610973456, "loss": 0.0997, "num_input_tokens_seen": 113996160, "step": 52845 }, { "epoch": 8.621533442088092, "grad_norm": 0.08560054749250412, "learning_rate": 0.0007016632296061762, "loss": 0.019, "num_input_tokens_seen": 114006368, "step": 52850 }, { "epoch": 8.622349102773246, "grad_norm": 0.2775586247444153, "learning_rate": 0.0007015980940280458, "loss": 0.0968, "num_input_tokens_seen": 114016704, "step": 52855 }, { "epoch": 8.623164763458401, "grad_norm": 0.3067305386066437, "learning_rate": 0.0007015329543642741, "loss": 0.1187, "num_input_tokens_seen": 114026272, "step": 52860 }, { "epoch": 8.623980424143557, "grad_norm": 0.8282463550567627, "learning_rate": 0.0007014678106161814, "loss": 0.2135, "num_input_tokens_seen": 114037600, "step": 52865 }, { "epoch": 8.624796084828711, "grad_norm": 0.03212658315896988, "learning_rate": 0.000701402662785088, "loss": 0.0663, "num_input_tokens_seen": 114047296, "step": 52870 }, { "epoch": 8.625611745513867, "grad_norm": 0.018620343878865242, "learning_rate": 0.0007013375108723141, "loss": 0.0746, "num_input_tokens_seen": 114057280, "step": 52875 }, { "epoch": 8.62642740619902, "grad_norm": 0.1164599135518074, "learning_rate": 0.0007012723548791802, "loss": 0.0474, "num_input_tokens_seen": 114067744, "step": 52880 }, { "epoch": 8.627243066884176, "grad_norm": 0.1523023396730423, "learning_rate": 0.0007012071948070065, "loss": 0.0528, "num_input_tokens_seen": 114077984, "step": 52885 }, { "epoch": 8.62805872756933, "grad_norm": 0.3663238286972046, "learning_rate": 0.0007011420306571139, "loss": 0.1971, "num_input_tokens_seen": 114087392, "step": 52890 }, { "epoch": 8.628874388254486, "grad_norm": 0.06196855381131172, "learning_rate": 0.0007010768624308228, "loss": 0.0896, "num_input_tokens_seen": 114097184, "step": 52895 }, { "epoch": 8.629690048939642, "grad_norm": 0.5704248547554016, "learning_rate": 0.0007010116901294541, "loss": 0.0767, "num_input_tokens_seen": 114107552, "step": 52900 }, { "epoch": 8.630505709624796, "grad_norm": 0.14818133413791656, "learning_rate": 0.0007009465137543285, "loss": 0.1233, "num_input_tokens_seen": 114118400, "step": 52905 }, { "epoch": 8.631321370309951, "grad_norm": 0.04436136409640312, "learning_rate": 0.0007008813333067668, "loss": 0.0394, "num_input_tokens_seen": 114129952, "step": 52910 }, { "epoch": 8.632137030995105, "grad_norm": 0.2840471565723419, "learning_rate": 0.00070081614878809, "loss": 0.0282, "num_input_tokens_seen": 114140192, "step": 52915 }, { "epoch": 8.632952691680261, "grad_norm": 0.022116808220744133, "learning_rate": 0.0007007509601996193, "loss": 0.0067, "num_input_tokens_seen": 114150240, "step": 52920 }, { "epoch": 8.633768352365417, "grad_norm": 0.4861741364002228, "learning_rate": 0.0007006857675426757, "loss": 0.1745, "num_input_tokens_seen": 114161376, "step": 52925 }, { "epoch": 8.63458401305057, "grad_norm": 0.13621555268764496, "learning_rate": 0.0007006205708185804, "loss": 0.066, "num_input_tokens_seen": 114172032, "step": 52930 }, { "epoch": 8.635399673735726, "grad_norm": 0.4763711392879486, "learning_rate": 0.0007005553700286549, "loss": 0.146, "num_input_tokens_seen": 114184192, "step": 52935 }, { "epoch": 8.63621533442088, "grad_norm": 0.1526675969362259, "learning_rate": 0.0007004901651742201, "loss": 0.0801, "num_input_tokens_seen": 114194208, "step": 52940 }, { "epoch": 8.637030995106036, "grad_norm": 0.028167076408863068, "learning_rate": 0.000700424956256598, "loss": 0.1126, "num_input_tokens_seen": 114205280, "step": 52945 }, { "epoch": 8.63784665579119, "grad_norm": 0.01191997155547142, "learning_rate": 0.0007003597432771098, "loss": 0.1192, "num_input_tokens_seen": 114215968, "step": 52950 }, { "epoch": 8.638662316476346, "grad_norm": 0.02824377827346325, "learning_rate": 0.0007002945262370773, "loss": 0.1166, "num_input_tokens_seen": 114227200, "step": 52955 }, { "epoch": 8.639477977161501, "grad_norm": 0.009903785772621632, "learning_rate": 0.0007002293051378221, "loss": 0.0423, "num_input_tokens_seen": 114237888, "step": 52960 }, { "epoch": 8.640293637846655, "grad_norm": 0.1200321614742279, "learning_rate": 0.0007001640799806662, "loss": 0.0519, "num_input_tokens_seen": 114248384, "step": 52965 }, { "epoch": 8.641109298531811, "grad_norm": 0.5369485020637512, "learning_rate": 0.000700098850766931, "loss": 0.1896, "num_input_tokens_seen": 114258464, "step": 52970 }, { "epoch": 8.641924959216965, "grad_norm": 0.3151225447654724, "learning_rate": 0.0007000336174979389, "loss": 0.2211, "num_input_tokens_seen": 114269952, "step": 52975 }, { "epoch": 8.64274061990212, "grad_norm": 0.15303009748458862, "learning_rate": 0.0006999683801750116, "loss": 0.0616, "num_input_tokens_seen": 114279680, "step": 52980 }, { "epoch": 8.643556280587276, "grad_norm": 0.24750499427318573, "learning_rate": 0.0006999031387994717, "loss": 0.1088, "num_input_tokens_seen": 114291648, "step": 52985 }, { "epoch": 8.64437194127243, "grad_norm": 0.20866602659225464, "learning_rate": 0.0006998378933726408, "loss": 0.0966, "num_input_tokens_seen": 114302464, "step": 52990 }, { "epoch": 8.645187601957586, "grad_norm": 0.022096488624811172, "learning_rate": 0.0006997726438958417, "loss": 0.0451, "num_input_tokens_seen": 114313248, "step": 52995 }, { "epoch": 8.64600326264274, "grad_norm": 0.043599165976047516, "learning_rate": 0.0006997073903703964, "loss": 0.0285, "num_input_tokens_seen": 114323552, "step": 53000 }, { "epoch": 8.646818923327896, "grad_norm": 0.33390122652053833, "learning_rate": 0.0006996421327976276, "loss": 0.0531, "num_input_tokens_seen": 114333760, "step": 53005 }, { "epoch": 8.647634584013051, "grad_norm": 0.016172289848327637, "learning_rate": 0.0006995768711788577, "loss": 0.1051, "num_input_tokens_seen": 114345568, "step": 53010 }, { "epoch": 8.648450244698205, "grad_norm": 0.11265739053487778, "learning_rate": 0.0006995116055154093, "loss": 0.0833, "num_input_tokens_seen": 114356288, "step": 53015 }, { "epoch": 8.649265905383361, "grad_norm": 0.3883618116378784, "learning_rate": 0.000699446335808605, "loss": 0.0242, "num_input_tokens_seen": 114368192, "step": 53020 }, { "epoch": 8.650081566068515, "grad_norm": 0.05005374178290367, "learning_rate": 0.0006993810620597677, "loss": 0.032, "num_input_tokens_seen": 114378208, "step": 53025 }, { "epoch": 8.65089722675367, "grad_norm": 0.6286240220069885, "learning_rate": 0.0006993157842702203, "loss": 0.1333, "num_input_tokens_seen": 114388960, "step": 53030 }, { "epoch": 8.651712887438826, "grad_norm": 0.24019049108028412, "learning_rate": 0.0006992505024412858, "loss": 0.1588, "num_input_tokens_seen": 114400032, "step": 53035 }, { "epoch": 8.65252854812398, "grad_norm": 0.16844017803668976, "learning_rate": 0.000699185216574287, "loss": 0.0991, "num_input_tokens_seen": 114410944, "step": 53040 }, { "epoch": 8.653344208809136, "grad_norm": 0.33781319856643677, "learning_rate": 0.0006991199266705472, "loss": 0.1147, "num_input_tokens_seen": 114422144, "step": 53045 }, { "epoch": 8.65415986949429, "grad_norm": 0.11361045390367508, "learning_rate": 0.0006990546327313894, "loss": 0.0434, "num_input_tokens_seen": 114433632, "step": 53050 }, { "epoch": 8.654975530179446, "grad_norm": 0.00820836890488863, "learning_rate": 0.0006989893347581368, "loss": 0.076, "num_input_tokens_seen": 114444928, "step": 53055 }, { "epoch": 8.655791190864601, "grad_norm": 0.020323364064097404, "learning_rate": 0.000698924032752113, "loss": 0.0323, "num_input_tokens_seen": 114455584, "step": 53060 }, { "epoch": 8.656606851549755, "grad_norm": 0.0179766658693552, "learning_rate": 0.0006988587267146414, "loss": 0.1407, "num_input_tokens_seen": 114466304, "step": 53065 }, { "epoch": 8.65742251223491, "grad_norm": 0.02667371742427349, "learning_rate": 0.0006987934166470454, "loss": 0.033, "num_input_tokens_seen": 114478336, "step": 53070 }, { "epoch": 8.658238172920065, "grad_norm": 0.0719609260559082, "learning_rate": 0.0006987281025506487, "loss": 0.1176, "num_input_tokens_seen": 114488896, "step": 53075 }, { "epoch": 8.65905383360522, "grad_norm": 0.18208950757980347, "learning_rate": 0.0006986627844267748, "loss": 0.1138, "num_input_tokens_seen": 114500384, "step": 53080 }, { "epoch": 8.659869494290374, "grad_norm": 0.008797693066298962, "learning_rate": 0.0006985974622767475, "loss": 0.0146, "num_input_tokens_seen": 114510336, "step": 53085 }, { "epoch": 8.66068515497553, "grad_norm": 0.31581729650497437, "learning_rate": 0.0006985321361018908, "loss": 0.0589, "num_input_tokens_seen": 114521024, "step": 53090 }, { "epoch": 8.661500815660686, "grad_norm": 0.02033611573278904, "learning_rate": 0.0006984668059035284, "loss": 0.0949, "num_input_tokens_seen": 114532288, "step": 53095 }, { "epoch": 8.66231647634584, "grad_norm": 0.2879686653614044, "learning_rate": 0.0006984014716829845, "loss": 0.0888, "num_input_tokens_seen": 114542016, "step": 53100 }, { "epoch": 8.663132137030995, "grad_norm": 0.49453026056289673, "learning_rate": 0.0006983361334415831, "loss": 0.047, "num_input_tokens_seen": 114552288, "step": 53105 }, { "epoch": 8.66394779771615, "grad_norm": 0.47048115730285645, "learning_rate": 0.0006982707911806483, "loss": 0.0586, "num_input_tokens_seen": 114562976, "step": 53110 }, { "epoch": 8.664763458401305, "grad_norm": 0.31761083006858826, "learning_rate": 0.0006982054449015044, "loss": 0.1502, "num_input_tokens_seen": 114574144, "step": 53115 }, { "epoch": 8.66557911908646, "grad_norm": 0.03569081425666809, "learning_rate": 0.0006981400946054758, "loss": 0.0289, "num_input_tokens_seen": 114586080, "step": 53120 }, { "epoch": 8.666394779771615, "grad_norm": 0.4546973407268524, "learning_rate": 0.0006980747402938868, "loss": 0.0542, "num_input_tokens_seen": 114596096, "step": 53125 }, { "epoch": 8.66721044045677, "grad_norm": 0.06431930512189865, "learning_rate": 0.0006980093819680616, "loss": 0.083, "num_input_tokens_seen": 114608544, "step": 53130 }, { "epoch": 8.668026101141924, "grad_norm": 0.10850845277309418, "learning_rate": 0.0006979440196293254, "loss": 0.054, "num_input_tokens_seen": 114619840, "step": 53135 }, { "epoch": 8.66884176182708, "grad_norm": 0.4214705228805542, "learning_rate": 0.0006978786532790025, "loss": 0.0452, "num_input_tokens_seen": 114631008, "step": 53140 }, { "epoch": 8.669657422512234, "grad_norm": 0.015672873705625534, "learning_rate": 0.0006978132829184176, "loss": 0.0283, "num_input_tokens_seen": 114641664, "step": 53145 }, { "epoch": 8.67047308319739, "grad_norm": 0.016845721751451492, "learning_rate": 0.0006977479085488956, "loss": 0.0418, "num_input_tokens_seen": 114650976, "step": 53150 }, { "epoch": 8.671288743882545, "grad_norm": 0.17436188459396362, "learning_rate": 0.0006976825301717615, "loss": 0.025, "num_input_tokens_seen": 114663360, "step": 53155 }, { "epoch": 8.6721044045677, "grad_norm": 0.021396413445472717, "learning_rate": 0.0006976171477883399, "loss": 0.086, "num_input_tokens_seen": 114674144, "step": 53160 }, { "epoch": 8.672920065252855, "grad_norm": 0.19203506410121918, "learning_rate": 0.0006975517613999562, "loss": 0.1002, "num_input_tokens_seen": 114684320, "step": 53165 }, { "epoch": 8.673735725938009, "grad_norm": 0.23389990627765656, "learning_rate": 0.0006974863710079355, "loss": 0.1249, "num_input_tokens_seen": 114695008, "step": 53170 }, { "epoch": 8.674551386623165, "grad_norm": 0.01268321555107832, "learning_rate": 0.0006974209766136031, "loss": 0.0195, "num_input_tokens_seen": 114706624, "step": 53175 }, { "epoch": 8.67536704730832, "grad_norm": 0.15128210186958313, "learning_rate": 0.0006973555782182839, "loss": 0.1421, "num_input_tokens_seen": 114718048, "step": 53180 }, { "epoch": 8.676182707993474, "grad_norm": 0.16064926981925964, "learning_rate": 0.0006972901758233037, "loss": 0.0622, "num_input_tokens_seen": 114728384, "step": 53185 }, { "epoch": 8.67699836867863, "grad_norm": 0.18036328256130219, "learning_rate": 0.0006972247694299877, "loss": 0.0124, "num_input_tokens_seen": 114739200, "step": 53190 }, { "epoch": 8.677814029363784, "grad_norm": 0.2084040492773056, "learning_rate": 0.0006971593590396616, "loss": 0.0121, "num_input_tokens_seen": 114749888, "step": 53195 }, { "epoch": 8.67862969004894, "grad_norm": 0.010485968552529812, "learning_rate": 0.000697093944653651, "loss": 0.0662, "num_input_tokens_seen": 114761440, "step": 53200 }, { "epoch": 8.679445350734095, "grad_norm": 0.003406255040317774, "learning_rate": 0.0006970285262732815, "loss": 0.1865, "num_input_tokens_seen": 114772096, "step": 53205 }, { "epoch": 8.68026101141925, "grad_norm": 0.05442534387111664, "learning_rate": 0.000696963103899879, "loss": 0.0734, "num_input_tokens_seen": 114783264, "step": 53210 }, { "epoch": 8.681076672104405, "grad_norm": 0.09310620278120041, "learning_rate": 0.0006968976775347694, "loss": 0.0121, "num_input_tokens_seen": 114794208, "step": 53215 }, { "epoch": 8.681892332789559, "grad_norm": 0.0036744459066540003, "learning_rate": 0.0006968322471792785, "loss": 0.1505, "num_input_tokens_seen": 114803392, "step": 53220 }, { "epoch": 8.682707993474715, "grad_norm": 0.04883972555398941, "learning_rate": 0.0006967668128347324, "loss": 0.039, "num_input_tokens_seen": 114814432, "step": 53225 }, { "epoch": 8.68352365415987, "grad_norm": 0.29169610142707825, "learning_rate": 0.0006967013745024573, "loss": 0.0531, "num_input_tokens_seen": 114824608, "step": 53230 }, { "epoch": 8.684339314845024, "grad_norm": 0.016991205513477325, "learning_rate": 0.0006966359321837792, "loss": 0.0593, "num_input_tokens_seen": 114834784, "step": 53235 }, { "epoch": 8.68515497553018, "grad_norm": 0.04017460346221924, "learning_rate": 0.0006965704858800246, "loss": 0.022, "num_input_tokens_seen": 114845568, "step": 53240 }, { "epoch": 8.685970636215334, "grad_norm": 0.056792955845594406, "learning_rate": 0.0006965050355925197, "loss": 0.199, "num_input_tokens_seen": 114856672, "step": 53245 }, { "epoch": 8.68678629690049, "grad_norm": 0.01601191610097885, "learning_rate": 0.000696439581322591, "loss": 0.0978, "num_input_tokens_seen": 114866720, "step": 53250 }, { "epoch": 8.687601957585644, "grad_norm": 0.014594516716897488, "learning_rate": 0.000696374123071565, "loss": 0.014, "num_input_tokens_seen": 114877408, "step": 53255 }, { "epoch": 8.6884176182708, "grad_norm": 0.22053614258766174, "learning_rate": 0.0006963086608407683, "loss": 0.0599, "num_input_tokens_seen": 114888992, "step": 53260 }, { "epoch": 8.689233278955955, "grad_norm": 0.04673745110630989, "learning_rate": 0.0006962431946315274, "loss": 0.0794, "num_input_tokens_seen": 114899360, "step": 53265 }, { "epoch": 8.690048939641109, "grad_norm": 0.4070226848125458, "learning_rate": 0.0006961777244451694, "loss": 0.2165, "num_input_tokens_seen": 114910784, "step": 53270 }, { "epoch": 8.690864600326265, "grad_norm": 0.6344642043113708, "learning_rate": 0.0006961122502830208, "loss": 0.1234, "num_input_tokens_seen": 114920512, "step": 53275 }, { "epoch": 8.691680261011419, "grad_norm": 0.019308947026729584, "learning_rate": 0.0006960467721464086, "loss": 0.0222, "num_input_tokens_seen": 114931616, "step": 53280 }, { "epoch": 8.692495921696574, "grad_norm": 0.07603778690099716, "learning_rate": 0.00069598129003666, "loss": 0.0659, "num_input_tokens_seen": 114941504, "step": 53285 }, { "epoch": 8.69331158238173, "grad_norm": 0.1761578619480133, "learning_rate": 0.0006959158039551019, "loss": 0.0316, "num_input_tokens_seen": 114951936, "step": 53290 }, { "epoch": 8.694127243066884, "grad_norm": 0.03502597287297249, "learning_rate": 0.0006958503139030616, "loss": 0.0212, "num_input_tokens_seen": 114963168, "step": 53295 }, { "epoch": 8.69494290375204, "grad_norm": 0.17464032769203186, "learning_rate": 0.0006957848198818661, "loss": 0.0414, "num_input_tokens_seen": 114972960, "step": 53300 }, { "epoch": 8.695758564437194, "grad_norm": 0.41044381260871887, "learning_rate": 0.0006957193218928429, "loss": 0.048, "num_input_tokens_seen": 114983072, "step": 53305 }, { "epoch": 8.69657422512235, "grad_norm": 0.5138563513755798, "learning_rate": 0.0006956538199373194, "loss": 0.261, "num_input_tokens_seen": 114993024, "step": 53310 }, { "epoch": 8.697389885807503, "grad_norm": 0.015174128115177155, "learning_rate": 0.000695588314016623, "loss": 0.1371, "num_input_tokens_seen": 115003104, "step": 53315 }, { "epoch": 8.698205546492659, "grad_norm": 0.1455385982990265, "learning_rate": 0.0006955228041320811, "loss": 0.036, "num_input_tokens_seen": 115013312, "step": 53320 }, { "epoch": 8.699021207177815, "grad_norm": 0.3805324137210846, "learning_rate": 0.0006954572902850218, "loss": 0.0439, "num_input_tokens_seen": 115023840, "step": 53325 }, { "epoch": 8.699836867862969, "grad_norm": 0.025569431483745575, "learning_rate": 0.0006953917724767724, "loss": 0.0249, "num_input_tokens_seen": 115033824, "step": 53330 }, { "epoch": 8.700652528548124, "grad_norm": 0.45007285475730896, "learning_rate": 0.0006953262507086611, "loss": 0.0709, "num_input_tokens_seen": 115044832, "step": 53335 }, { "epoch": 8.701468189233278, "grad_norm": 0.03266146779060364, "learning_rate": 0.0006952607249820153, "loss": 0.0276, "num_input_tokens_seen": 115055072, "step": 53340 }, { "epoch": 8.702283849918434, "grad_norm": 0.06985446810722351, "learning_rate": 0.0006951951952981631, "loss": 0.1652, "num_input_tokens_seen": 115065600, "step": 53345 }, { "epoch": 8.70309951060359, "grad_norm": 0.5989344120025635, "learning_rate": 0.0006951296616584329, "loss": 0.0515, "num_input_tokens_seen": 115075232, "step": 53350 }, { "epoch": 8.703915171288743, "grad_norm": 0.21277424693107605, "learning_rate": 0.0006950641240641524, "loss": 0.0214, "num_input_tokens_seen": 115085952, "step": 53355 }, { "epoch": 8.7047308319739, "grad_norm": 0.48762229084968567, "learning_rate": 0.0006949985825166501, "loss": 0.1235, "num_input_tokens_seen": 115096896, "step": 53360 }, { "epoch": 8.705546492659053, "grad_norm": 0.2878133952617645, "learning_rate": 0.0006949330370172541, "loss": 0.0294, "num_input_tokens_seen": 115107776, "step": 53365 }, { "epoch": 8.706362153344209, "grad_norm": 0.07085592299699783, "learning_rate": 0.0006948674875672927, "loss": 0.0489, "num_input_tokens_seen": 115118496, "step": 53370 }, { "epoch": 8.707177814029365, "grad_norm": 0.01315888948738575, "learning_rate": 0.0006948019341680945, "loss": 0.0064, "num_input_tokens_seen": 115129536, "step": 53375 }, { "epoch": 8.707993474714518, "grad_norm": 0.06919736415147781, "learning_rate": 0.0006947363768209882, "loss": 0.1846, "num_input_tokens_seen": 115139360, "step": 53380 }, { "epoch": 8.708809135399674, "grad_norm": 0.00579241756349802, "learning_rate": 0.000694670815527302, "loss": 0.1102, "num_input_tokens_seen": 115147712, "step": 53385 }, { "epoch": 8.709624796084828, "grad_norm": 0.2415034919977188, "learning_rate": 0.0006946052502883648, "loss": 0.0604, "num_input_tokens_seen": 115156832, "step": 53390 }, { "epoch": 8.710440456769984, "grad_norm": 0.03303147479891777, "learning_rate": 0.0006945396811055053, "loss": 0.0608, "num_input_tokens_seen": 115167136, "step": 53395 }, { "epoch": 8.71125611745514, "grad_norm": 0.03103630244731903, "learning_rate": 0.0006944741079800525, "loss": 0.0219, "num_input_tokens_seen": 115178336, "step": 53400 }, { "epoch": 8.712071778140293, "grad_norm": 0.10189773142337799, "learning_rate": 0.000694408530913335, "loss": 0.0291, "num_input_tokens_seen": 115189664, "step": 53405 }, { "epoch": 8.71288743882545, "grad_norm": 0.38640961050987244, "learning_rate": 0.0006943429499066821, "loss": 0.1545, "num_input_tokens_seen": 115201408, "step": 53410 }, { "epoch": 8.713703099510603, "grad_norm": 0.035593431442976, "learning_rate": 0.0006942773649614228, "loss": 0.063, "num_input_tokens_seen": 115211072, "step": 53415 }, { "epoch": 8.714518760195759, "grad_norm": 0.12581591308116913, "learning_rate": 0.0006942117760788862, "loss": 0.1615, "num_input_tokens_seen": 115221472, "step": 53420 }, { "epoch": 8.715334420880914, "grad_norm": 0.02180134505033493, "learning_rate": 0.0006941461832604017, "loss": 0.0563, "num_input_tokens_seen": 115232256, "step": 53425 }, { "epoch": 8.716150081566068, "grad_norm": 0.03803763911128044, "learning_rate": 0.0006940805865072984, "loss": 0.0094, "num_input_tokens_seen": 115242944, "step": 53430 }, { "epoch": 8.716965742251224, "grad_norm": 0.02931883931159973, "learning_rate": 0.0006940149858209058, "loss": 0.0487, "num_input_tokens_seen": 115254464, "step": 53435 }, { "epoch": 8.717781402936378, "grad_norm": 0.02903362177312374, "learning_rate": 0.0006939493812025534, "loss": 0.0416, "num_input_tokens_seen": 115264864, "step": 53440 }, { "epoch": 8.718597063621534, "grad_norm": 0.015029530972242355, "learning_rate": 0.0006938837726535707, "loss": 0.153, "num_input_tokens_seen": 115275680, "step": 53445 }, { "epoch": 8.719412724306688, "grad_norm": 0.41206926107406616, "learning_rate": 0.0006938181601752873, "loss": 0.248, "num_input_tokens_seen": 115285760, "step": 53450 }, { "epoch": 8.720228384991843, "grad_norm": 0.10325730592012405, "learning_rate": 0.0006937525437690332, "loss": 0.2416, "num_input_tokens_seen": 115297408, "step": 53455 }, { "epoch": 8.721044045676999, "grad_norm": 0.040243711322546005, "learning_rate": 0.0006936869234361379, "loss": 0.0599, "num_input_tokens_seen": 115306752, "step": 53460 }, { "epoch": 8.721859706362153, "grad_norm": 0.05241856724023819, "learning_rate": 0.0006936212991779314, "loss": 0.1813, "num_input_tokens_seen": 115316928, "step": 53465 }, { "epoch": 8.722675367047309, "grad_norm": 0.16963286697864532, "learning_rate": 0.0006935556709957437, "loss": 0.0686, "num_input_tokens_seen": 115326784, "step": 53470 }, { "epoch": 8.723491027732463, "grad_norm": 0.1333635002374649, "learning_rate": 0.0006934900388909048, "loss": 0.0887, "num_input_tokens_seen": 115337856, "step": 53475 }, { "epoch": 8.724306688417618, "grad_norm": 0.12847691774368286, "learning_rate": 0.0006934244028647447, "loss": 0.0993, "num_input_tokens_seen": 115348704, "step": 53480 }, { "epoch": 8.725122349102774, "grad_norm": 0.15227638185024261, "learning_rate": 0.0006933587629185938, "loss": 0.0394, "num_input_tokens_seen": 115357792, "step": 53485 }, { "epoch": 8.725938009787928, "grad_norm": 0.03610370680689812, "learning_rate": 0.0006932931190537822, "loss": 0.033, "num_input_tokens_seen": 115369056, "step": 53490 }, { "epoch": 8.726753670473084, "grad_norm": 0.04167995601892471, "learning_rate": 0.0006932274712716405, "loss": 0.1152, "num_input_tokens_seen": 115378624, "step": 53495 }, { "epoch": 8.727569331158238, "grad_norm": 0.37678787112236023, "learning_rate": 0.0006931618195734988, "loss": 0.0855, "num_input_tokens_seen": 115389920, "step": 53500 }, { "epoch": 8.728384991843393, "grad_norm": 1.0431709289550781, "learning_rate": 0.0006930961639606878, "loss": 0.2584, "num_input_tokens_seen": 115400864, "step": 53505 }, { "epoch": 8.729200652528547, "grad_norm": 0.479232519865036, "learning_rate": 0.0006930305044345381, "loss": 0.0565, "num_input_tokens_seen": 115412224, "step": 53510 }, { "epoch": 8.730016313213703, "grad_norm": 0.049810491502285004, "learning_rate": 0.0006929648409963802, "loss": 0.0365, "num_input_tokens_seen": 115423808, "step": 53515 }, { "epoch": 8.730831973898859, "grad_norm": 0.04052180051803589, "learning_rate": 0.0006928991736475452, "loss": 0.0694, "num_input_tokens_seen": 115433632, "step": 53520 }, { "epoch": 8.731647634584013, "grad_norm": 0.12301499396562576, "learning_rate": 0.0006928335023893637, "loss": 0.052, "num_input_tokens_seen": 115445088, "step": 53525 }, { "epoch": 8.732463295269168, "grad_norm": 0.05882667750120163, "learning_rate": 0.0006927678272231667, "loss": 0.0539, "num_input_tokens_seen": 115455840, "step": 53530 }, { "epoch": 8.733278955954322, "grad_norm": 0.00851437821984291, "learning_rate": 0.0006927021481502851, "loss": 0.0229, "num_input_tokens_seen": 115465856, "step": 53535 }, { "epoch": 8.734094616639478, "grad_norm": 0.12066182494163513, "learning_rate": 0.0006926364651720499, "loss": 0.0155, "num_input_tokens_seen": 115475296, "step": 53540 }, { "epoch": 8.734910277324634, "grad_norm": 0.030864359810948372, "learning_rate": 0.0006925707782897925, "loss": 0.0822, "num_input_tokens_seen": 115486144, "step": 53545 }, { "epoch": 8.735725938009788, "grad_norm": 0.03537926450371742, "learning_rate": 0.000692505087504844, "loss": 0.0746, "num_input_tokens_seen": 115496640, "step": 53550 }, { "epoch": 8.736541598694943, "grad_norm": 0.2773245871067047, "learning_rate": 0.0006924393928185354, "loss": 0.1362, "num_input_tokens_seen": 115508064, "step": 53555 }, { "epoch": 8.737357259380097, "grad_norm": 0.5599880814552307, "learning_rate": 0.0006923736942321987, "loss": 0.0859, "num_input_tokens_seen": 115519392, "step": 53560 }, { "epoch": 8.738172920065253, "grad_norm": 0.33625146746635437, "learning_rate": 0.0006923079917471648, "loss": 0.1477, "num_input_tokens_seen": 115529696, "step": 53565 }, { "epoch": 8.738988580750409, "grad_norm": 0.015536236576735973, "learning_rate": 0.0006922422853647656, "loss": 0.08, "num_input_tokens_seen": 115541312, "step": 53570 }, { "epoch": 8.739804241435563, "grad_norm": 0.009478003717958927, "learning_rate": 0.0006921765750863327, "loss": 0.0113, "num_input_tokens_seen": 115552096, "step": 53575 }, { "epoch": 8.740619902120718, "grad_norm": 0.05003843829035759, "learning_rate": 0.0006921108609131976, "loss": 0.0492, "num_input_tokens_seen": 115562464, "step": 53580 }, { "epoch": 8.741435562805872, "grad_norm": 0.08236910402774811, "learning_rate": 0.0006920451428466923, "loss": 0.0176, "num_input_tokens_seen": 115573984, "step": 53585 }, { "epoch": 8.742251223491028, "grad_norm": 0.2935020625591278, "learning_rate": 0.0006919794208881486, "loss": 0.0357, "num_input_tokens_seen": 115585664, "step": 53590 }, { "epoch": 8.743066884176184, "grad_norm": 0.5583141446113586, "learning_rate": 0.0006919136950388982, "loss": 0.2475, "num_input_tokens_seen": 115596736, "step": 53595 }, { "epoch": 8.743882544861338, "grad_norm": 0.13295137882232666, "learning_rate": 0.0006918479653002734, "loss": 0.0555, "num_input_tokens_seen": 115607136, "step": 53600 }, { "epoch": 8.744698205546493, "grad_norm": 0.022684739902615547, "learning_rate": 0.0006917822316736062, "loss": 0.0687, "num_input_tokens_seen": 115618400, "step": 53605 }, { "epoch": 8.745513866231647, "grad_norm": 0.05544504523277283, "learning_rate": 0.0006917164941602289, "loss": 0.0866, "num_input_tokens_seen": 115629632, "step": 53610 }, { "epoch": 8.746329526916803, "grad_norm": 0.26641178131103516, "learning_rate": 0.0006916507527614735, "loss": 0.0971, "num_input_tokens_seen": 115640864, "step": 53615 }, { "epoch": 8.747145187601957, "grad_norm": 0.049183379858732224, "learning_rate": 0.0006915850074786725, "loss": 0.0502, "num_input_tokens_seen": 115652192, "step": 53620 }, { "epoch": 8.747960848287113, "grad_norm": 0.41419583559036255, "learning_rate": 0.0006915192583131582, "loss": 0.2398, "num_input_tokens_seen": 115662784, "step": 53625 }, { "epoch": 8.748776508972268, "grad_norm": 0.12176747620105743, "learning_rate": 0.0006914535052662633, "loss": 0.0698, "num_input_tokens_seen": 115674080, "step": 53630 }, { "epoch": 8.749592169657422, "grad_norm": 0.21555472910404205, "learning_rate": 0.0006913877483393202, "loss": 0.0299, "num_input_tokens_seen": 115685440, "step": 53635 }, { "epoch": 8.750407830342578, "grad_norm": 0.009912700392305851, "learning_rate": 0.0006913219875336616, "loss": 0.0474, "num_input_tokens_seen": 115697696, "step": 53640 }, { "epoch": 8.751223491027732, "grad_norm": 0.009432855062186718, "learning_rate": 0.0006912562228506201, "loss": 0.0645, "num_input_tokens_seen": 115709056, "step": 53645 }, { "epoch": 8.752039151712887, "grad_norm": 0.06638861447572708, "learning_rate": 0.0006911904542915288, "loss": 0.0609, "num_input_tokens_seen": 115720416, "step": 53650 }, { "epoch": 8.752854812398043, "grad_norm": 0.18245218694210052, "learning_rate": 0.0006911246818577201, "loss": 0.1115, "num_input_tokens_seen": 115730560, "step": 53655 }, { "epoch": 8.753670473083197, "grad_norm": 0.13418616354465485, "learning_rate": 0.0006910589055505275, "loss": 0.0283, "num_input_tokens_seen": 115740448, "step": 53660 }, { "epoch": 8.754486133768353, "grad_norm": 0.08909186720848083, "learning_rate": 0.0006909931253712838, "loss": 0.1283, "num_input_tokens_seen": 115751488, "step": 53665 }, { "epoch": 8.755301794453507, "grad_norm": 0.3907860517501831, "learning_rate": 0.0006909273413213222, "loss": 0.0571, "num_input_tokens_seen": 115762176, "step": 53670 }, { "epoch": 8.756117455138662, "grad_norm": 0.01374961156398058, "learning_rate": 0.0006908615534019757, "loss": 0.0049, "num_input_tokens_seen": 115771008, "step": 53675 }, { "epoch": 8.756933115823816, "grad_norm": 0.0160199161618948, "learning_rate": 0.0006907957616145777, "loss": 0.1317, "num_input_tokens_seen": 115782144, "step": 53680 }, { "epoch": 8.757748776508972, "grad_norm": 0.3237563371658325, "learning_rate": 0.0006907299659604613, "loss": 0.0647, "num_input_tokens_seen": 115792736, "step": 53685 }, { "epoch": 8.758564437194128, "grad_norm": 0.004840110894292593, "learning_rate": 0.0006906641664409605, "loss": 0.0121, "num_input_tokens_seen": 115802624, "step": 53690 }, { "epoch": 8.759380097879282, "grad_norm": 0.137078195810318, "learning_rate": 0.0006905983630574084, "loss": 0.137, "num_input_tokens_seen": 115814368, "step": 53695 }, { "epoch": 8.760195758564437, "grad_norm": 0.01143583469092846, "learning_rate": 0.0006905325558111389, "loss": 0.0292, "num_input_tokens_seen": 115825088, "step": 53700 }, { "epoch": 8.761011419249591, "grad_norm": 0.4225313663482666, "learning_rate": 0.0006904667447034851, "loss": 0.1204, "num_input_tokens_seen": 115836160, "step": 53705 }, { "epoch": 8.761827079934747, "grad_norm": 0.22437088191509247, "learning_rate": 0.0006904009297357814, "loss": 0.0353, "num_input_tokens_seen": 115846176, "step": 53710 }, { "epoch": 8.762642740619903, "grad_norm": 0.31809714436531067, "learning_rate": 0.000690335110909361, "loss": 0.0737, "num_input_tokens_seen": 115857440, "step": 53715 }, { "epoch": 8.763458401305057, "grad_norm": 0.1302795708179474, "learning_rate": 0.0006902692882255583, "loss": 0.0542, "num_input_tokens_seen": 115867616, "step": 53720 }, { "epoch": 8.764274061990212, "grad_norm": 0.02206779457628727, "learning_rate": 0.0006902034616857073, "loss": 0.0354, "num_input_tokens_seen": 115878272, "step": 53725 }, { "epoch": 8.765089722675366, "grad_norm": 0.020523015409708023, "learning_rate": 0.0006901376312911416, "loss": 0.0704, "num_input_tokens_seen": 115888608, "step": 53730 }, { "epoch": 8.765905383360522, "grad_norm": 0.4321870505809784, "learning_rate": 0.0006900717970431956, "loss": 0.1668, "num_input_tokens_seen": 115899232, "step": 53735 }, { "epoch": 8.766721044045678, "grad_norm": 0.025819502770900726, "learning_rate": 0.0006900059589432036, "loss": 0.0864, "num_input_tokens_seen": 115909920, "step": 53740 }, { "epoch": 8.767536704730832, "grad_norm": 0.13181297481060028, "learning_rate": 0.0006899401169924997, "loss": 0.0353, "num_input_tokens_seen": 115919744, "step": 53745 }, { "epoch": 8.768352365415987, "grad_norm": 0.03512445464730263, "learning_rate": 0.0006898742711924185, "loss": 0.113, "num_input_tokens_seen": 115930560, "step": 53750 }, { "epoch": 8.769168026101141, "grad_norm": 0.03561723604798317, "learning_rate": 0.0006898084215442942, "loss": 0.0885, "num_input_tokens_seen": 115941536, "step": 53755 }, { "epoch": 8.769983686786297, "grad_norm": 0.1160869300365448, "learning_rate": 0.0006897425680494616, "loss": 0.098, "num_input_tokens_seen": 115951744, "step": 53760 }, { "epoch": 8.770799347471453, "grad_norm": 0.05161687359213829, "learning_rate": 0.000689676710709255, "loss": 0.2218, "num_input_tokens_seen": 115962720, "step": 53765 }, { "epoch": 8.771615008156607, "grad_norm": 0.8680769205093384, "learning_rate": 0.0006896108495250092, "loss": 0.0887, "num_input_tokens_seen": 115973152, "step": 53770 }, { "epoch": 8.772430668841762, "grad_norm": 0.3734970688819885, "learning_rate": 0.0006895449844980592, "loss": 0.0857, "num_input_tokens_seen": 115983872, "step": 53775 }, { "epoch": 8.773246329526916, "grad_norm": 0.3729572892189026, "learning_rate": 0.0006894791156297394, "loss": 0.135, "num_input_tokens_seen": 115994560, "step": 53780 }, { "epoch": 8.774061990212072, "grad_norm": 0.3487239480018616, "learning_rate": 0.0006894132429213851, "loss": 0.1471, "num_input_tokens_seen": 116006176, "step": 53785 }, { "epoch": 8.774877650897226, "grad_norm": 0.4764491617679596, "learning_rate": 0.0006893473663743311, "loss": 0.1484, "num_input_tokens_seen": 116017152, "step": 53790 }, { "epoch": 8.775693311582382, "grad_norm": 0.43597739934921265, "learning_rate": 0.0006892814859899126, "loss": 0.0821, "num_input_tokens_seen": 116027840, "step": 53795 }, { "epoch": 8.776508972267537, "grad_norm": 0.09039237350225449, "learning_rate": 0.0006892156017694646, "loss": 0.0182, "num_input_tokens_seen": 116038848, "step": 53800 }, { "epoch": 8.777324632952691, "grad_norm": 0.3221157193183899, "learning_rate": 0.0006891497137143224, "loss": 0.1677, "num_input_tokens_seen": 116048192, "step": 53805 }, { "epoch": 8.778140293637847, "grad_norm": 0.09714799374341965, "learning_rate": 0.0006890838218258213, "loss": 0.0425, "num_input_tokens_seen": 116059552, "step": 53810 }, { "epoch": 8.778955954323001, "grad_norm": 0.010539236478507519, "learning_rate": 0.0006890179261052967, "loss": 0.031, "num_input_tokens_seen": 116069568, "step": 53815 }, { "epoch": 8.779771615008157, "grad_norm": 0.4359527826309204, "learning_rate": 0.000688952026554084, "loss": 0.0601, "num_input_tokens_seen": 116080864, "step": 53820 }, { "epoch": 8.780587275693312, "grad_norm": 0.05504335090517998, "learning_rate": 0.0006888861231735186, "loss": 0.0323, "num_input_tokens_seen": 116091008, "step": 53825 }, { "epoch": 8.781402936378466, "grad_norm": 0.05727994441986084, "learning_rate": 0.0006888202159649366, "loss": 0.0258, "num_input_tokens_seen": 116102720, "step": 53830 }, { "epoch": 8.782218597063622, "grad_norm": 0.15397650003433228, "learning_rate": 0.0006887543049296733, "loss": 0.1458, "num_input_tokens_seen": 116113824, "step": 53835 }, { "epoch": 8.783034257748776, "grad_norm": 0.23483791947364807, "learning_rate": 0.0006886883900690645, "loss": 0.0716, "num_input_tokens_seen": 116125184, "step": 53840 }, { "epoch": 8.783849918433932, "grad_norm": 0.053877513855695724, "learning_rate": 0.0006886224713844461, "loss": 0.1056, "num_input_tokens_seen": 116136160, "step": 53845 }, { "epoch": 8.784665579119086, "grad_norm": 0.02352530136704445, "learning_rate": 0.0006885565488771541, "loss": 0.0118, "num_input_tokens_seen": 116145888, "step": 53850 }, { "epoch": 8.785481239804241, "grad_norm": 0.026939259842038155, "learning_rate": 0.0006884906225485245, "loss": 0.0937, "num_input_tokens_seen": 116155936, "step": 53855 }, { "epoch": 8.786296900489397, "grad_norm": 0.5072387456893921, "learning_rate": 0.0006884246923998932, "loss": 0.0977, "num_input_tokens_seen": 116166688, "step": 53860 }, { "epoch": 8.78711256117455, "grad_norm": 0.27622395753860474, "learning_rate": 0.0006883587584325965, "loss": 0.0854, "num_input_tokens_seen": 116178272, "step": 53865 }, { "epoch": 8.787928221859707, "grad_norm": 0.7445651888847351, "learning_rate": 0.0006882928206479707, "loss": 0.1588, "num_input_tokens_seen": 116189120, "step": 53870 }, { "epoch": 8.78874388254486, "grad_norm": 0.30124449729919434, "learning_rate": 0.0006882268790473517, "loss": 0.0203, "num_input_tokens_seen": 116200128, "step": 53875 }, { "epoch": 8.789559543230016, "grad_norm": 0.24563272297382355, "learning_rate": 0.0006881609336320764, "loss": 0.0344, "num_input_tokens_seen": 116211776, "step": 53880 }, { "epoch": 8.790375203915172, "grad_norm": 0.1782751828432083, "learning_rate": 0.0006880949844034811, "loss": 0.07, "num_input_tokens_seen": 116222208, "step": 53885 }, { "epoch": 8.791190864600326, "grad_norm": 0.29298514127731323, "learning_rate": 0.0006880290313629026, "loss": 0.1519, "num_input_tokens_seen": 116232864, "step": 53890 }, { "epoch": 8.792006525285482, "grad_norm": 0.28120049834251404, "learning_rate": 0.0006879630745116769, "loss": 0.0641, "num_input_tokens_seen": 116242976, "step": 53895 }, { "epoch": 8.792822185970635, "grad_norm": 0.09896563738584518, "learning_rate": 0.0006878971138511412, "loss": 0.0813, "num_input_tokens_seen": 116255456, "step": 53900 }, { "epoch": 8.793637846655791, "grad_norm": 0.03434130549430847, "learning_rate": 0.000687831149382632, "loss": 0.0524, "num_input_tokens_seen": 116266080, "step": 53905 }, { "epoch": 8.794453507340947, "grad_norm": 0.18517626821994781, "learning_rate": 0.0006877651811074863, "loss": 0.0761, "num_input_tokens_seen": 116276128, "step": 53910 }, { "epoch": 8.7952691680261, "grad_norm": 0.16261640191078186, "learning_rate": 0.0006876992090270411, "loss": 0.0285, "num_input_tokens_seen": 116286208, "step": 53915 }, { "epoch": 8.796084828711257, "grad_norm": 0.2285573035478592, "learning_rate": 0.0006876332331426332, "loss": 0.2496, "num_input_tokens_seen": 116297408, "step": 53920 }, { "epoch": 8.79690048939641, "grad_norm": 0.08167286217212677, "learning_rate": 0.0006875672534556, "loss": 0.1672, "num_input_tokens_seen": 116308224, "step": 53925 }, { "epoch": 8.797716150081566, "grad_norm": 0.48806145787239075, "learning_rate": 0.0006875012699672783, "loss": 0.0989, "num_input_tokens_seen": 116319328, "step": 53930 }, { "epoch": 8.798531810766722, "grad_norm": 0.022313492372632027, "learning_rate": 0.0006874352826790055, "loss": 0.0611, "num_input_tokens_seen": 116330400, "step": 53935 }, { "epoch": 8.799347471451876, "grad_norm": 0.1614833027124405, "learning_rate": 0.000687369291592119, "loss": 0.0241, "num_input_tokens_seen": 116341088, "step": 53940 }, { "epoch": 8.800163132137031, "grad_norm": 0.15085767209529877, "learning_rate": 0.0006873032967079561, "loss": 0.0465, "num_input_tokens_seen": 116352256, "step": 53945 }, { "epoch": 8.800978792822185, "grad_norm": 0.048762086778879166, "learning_rate": 0.0006872372980278543, "loss": 0.0303, "num_input_tokens_seen": 116362912, "step": 53950 }, { "epoch": 8.801794453507341, "grad_norm": 0.40989142656326294, "learning_rate": 0.0006871712955531511, "loss": 0.0998, "num_input_tokens_seen": 116372736, "step": 53955 }, { "epoch": 8.802610114192497, "grad_norm": 0.023159367963671684, "learning_rate": 0.0006871052892851842, "loss": 0.0664, "num_input_tokens_seen": 116384832, "step": 53960 }, { "epoch": 8.80342577487765, "grad_norm": 0.11821544170379639, "learning_rate": 0.0006870392792252911, "loss": 0.0568, "num_input_tokens_seen": 116396000, "step": 53965 }, { "epoch": 8.804241435562806, "grad_norm": 0.005723128095269203, "learning_rate": 0.0006869732653748096, "loss": 0.019, "num_input_tokens_seen": 116407648, "step": 53970 }, { "epoch": 8.80505709624796, "grad_norm": 0.0199685450643301, "learning_rate": 0.000686907247735078, "loss": 0.0212, "num_input_tokens_seen": 116418720, "step": 53975 }, { "epoch": 8.805872756933116, "grad_norm": 0.028145885095000267, "learning_rate": 0.0006868412263074337, "loss": 0.1438, "num_input_tokens_seen": 116430208, "step": 53980 }, { "epoch": 8.80668841761827, "grad_norm": 0.06647484004497528, "learning_rate": 0.0006867752010932151, "loss": 0.1624, "num_input_tokens_seen": 116439424, "step": 53985 }, { "epoch": 8.807504078303426, "grad_norm": 0.09728353470563889, "learning_rate": 0.00068670917209376, "loss": 0.2522, "num_input_tokens_seen": 116449856, "step": 53990 }, { "epoch": 8.808319738988581, "grad_norm": 0.012160678394138813, "learning_rate": 0.0006866431393104067, "loss": 0.0287, "num_input_tokens_seen": 116461216, "step": 53995 }, { "epoch": 8.809135399673735, "grad_norm": 0.12803897261619568, "learning_rate": 0.0006865771027444933, "loss": 0.0558, "num_input_tokens_seen": 116472992, "step": 54000 }, { "epoch": 8.809951060358891, "grad_norm": 0.10555374622344971, "learning_rate": 0.0006865110623973585, "loss": 0.0243, "num_input_tokens_seen": 116484480, "step": 54005 }, { "epoch": 8.810766721044045, "grad_norm": 0.1349884420633316, "learning_rate": 0.0006864450182703403, "loss": 0.0306, "num_input_tokens_seen": 116496224, "step": 54010 }, { "epoch": 8.8115823817292, "grad_norm": 0.03510262817144394, "learning_rate": 0.0006863789703647771, "loss": 0.153, "num_input_tokens_seen": 116506816, "step": 54015 }, { "epoch": 8.812398042414356, "grad_norm": 0.026403389871120453, "learning_rate": 0.0006863129186820079, "loss": 0.0154, "num_input_tokens_seen": 116518080, "step": 54020 }, { "epoch": 8.81321370309951, "grad_norm": 0.040965914726257324, "learning_rate": 0.0006862468632233709, "loss": 0.1592, "num_input_tokens_seen": 116528608, "step": 54025 }, { "epoch": 8.814029363784666, "grad_norm": 0.15670448541641235, "learning_rate": 0.000686180803990205, "loss": 0.0828, "num_input_tokens_seen": 116540416, "step": 54030 }, { "epoch": 8.81484502446982, "grad_norm": 0.008520507253706455, "learning_rate": 0.0006861147409838489, "loss": 0.0336, "num_input_tokens_seen": 116550656, "step": 54035 }, { "epoch": 8.815660685154976, "grad_norm": 0.003911648876965046, "learning_rate": 0.0006860486742056415, "loss": 0.0718, "num_input_tokens_seen": 116563104, "step": 54040 }, { "epoch": 8.81647634584013, "grad_norm": 0.02751264162361622, "learning_rate": 0.0006859826036569216, "loss": 0.089, "num_input_tokens_seen": 116573728, "step": 54045 }, { "epoch": 8.817292006525285, "grad_norm": 0.5491066575050354, "learning_rate": 0.0006859165293390284, "loss": 0.0715, "num_input_tokens_seen": 116585344, "step": 54050 }, { "epoch": 8.818107667210441, "grad_norm": 0.04623859375715256, "learning_rate": 0.0006858504512533008, "loss": 0.0282, "num_input_tokens_seen": 116594624, "step": 54055 }, { "epoch": 8.818923327895595, "grad_norm": 0.3674001097679138, "learning_rate": 0.000685784369401078, "loss": 0.0378, "num_input_tokens_seen": 116604672, "step": 54060 }, { "epoch": 8.81973898858075, "grad_norm": 0.17711563408374786, "learning_rate": 0.0006857182837836994, "loss": 0.021, "num_input_tokens_seen": 116616384, "step": 54065 }, { "epoch": 8.820554649265905, "grad_norm": 0.24790586531162262, "learning_rate": 0.0006856521944025041, "loss": 0.1746, "num_input_tokens_seen": 116627264, "step": 54070 }, { "epoch": 8.82137030995106, "grad_norm": 0.12287767231464386, "learning_rate": 0.0006855861012588316, "loss": 0.0483, "num_input_tokens_seen": 116637344, "step": 54075 }, { "epoch": 8.822185970636216, "grad_norm": 0.22150813043117523, "learning_rate": 0.0006855200043540213, "loss": 0.029, "num_input_tokens_seen": 116648352, "step": 54080 }, { "epoch": 8.82300163132137, "grad_norm": 0.2915731966495514, "learning_rate": 0.0006854539036894128, "loss": 0.1743, "num_input_tokens_seen": 116658496, "step": 54085 }, { "epoch": 8.823817292006526, "grad_norm": 0.08894834667444229, "learning_rate": 0.0006853877992663456, "loss": 0.1099, "num_input_tokens_seen": 116668864, "step": 54090 }, { "epoch": 8.82463295269168, "grad_norm": 0.016573136672377586, "learning_rate": 0.0006853216910861595, "loss": 0.0449, "num_input_tokens_seen": 116679264, "step": 54095 }, { "epoch": 8.825448613376835, "grad_norm": 0.022801194339990616, "learning_rate": 0.0006852555791501942, "loss": 0.0283, "num_input_tokens_seen": 116689408, "step": 54100 }, { "epoch": 8.826264274061991, "grad_norm": 0.4085477590560913, "learning_rate": 0.0006851894634597898, "loss": 0.1538, "num_input_tokens_seen": 116700224, "step": 54105 }, { "epoch": 8.827079934747145, "grad_norm": 0.0037660538218915462, "learning_rate": 0.0006851233440162858, "loss": 0.0616, "num_input_tokens_seen": 116711616, "step": 54110 }, { "epoch": 8.8278955954323, "grad_norm": 0.019361043348908424, "learning_rate": 0.0006850572208210223, "loss": 0.1029, "num_input_tokens_seen": 116723168, "step": 54115 }, { "epoch": 8.828711256117455, "grad_norm": 0.016678938642144203, "learning_rate": 0.0006849910938753396, "loss": 0.1946, "num_input_tokens_seen": 116735136, "step": 54120 }, { "epoch": 8.82952691680261, "grad_norm": 0.12006314843893051, "learning_rate": 0.0006849249631805777, "loss": 0.143, "num_input_tokens_seen": 116745792, "step": 54125 }, { "epoch": 8.830342577487766, "grad_norm": 0.29284971952438354, "learning_rate": 0.0006848588287380769, "loss": 0.0767, "num_input_tokens_seen": 116757344, "step": 54130 }, { "epoch": 8.83115823817292, "grad_norm": 0.14746196568012238, "learning_rate": 0.0006847926905491771, "loss": 0.1876, "num_input_tokens_seen": 116768096, "step": 54135 }, { "epoch": 8.831973898858076, "grad_norm": 0.009986791759729385, "learning_rate": 0.0006847265486152192, "loss": 0.0595, "num_input_tokens_seen": 116778496, "step": 54140 }, { "epoch": 8.83278955954323, "grad_norm": 0.01148443203419447, "learning_rate": 0.0006846604029375435, "loss": 0.076, "num_input_tokens_seen": 116789472, "step": 54145 }, { "epoch": 8.833605220228385, "grad_norm": 0.5052831172943115, "learning_rate": 0.0006845942535174905, "loss": 0.077, "num_input_tokens_seen": 116800000, "step": 54150 }, { "epoch": 8.83442088091354, "grad_norm": 0.002669984707608819, "learning_rate": 0.0006845281003564007, "loss": 0.0478, "num_input_tokens_seen": 116812000, "step": 54155 }, { "epoch": 8.835236541598695, "grad_norm": 0.23330183327198029, "learning_rate": 0.0006844619434556149, "loss": 0.035, "num_input_tokens_seen": 116822816, "step": 54160 }, { "epoch": 8.83605220228385, "grad_norm": 0.22590875625610352, "learning_rate": 0.0006843957828164737, "loss": 0.0671, "num_input_tokens_seen": 116833792, "step": 54165 }, { "epoch": 8.836867862969005, "grad_norm": 0.23232512176036835, "learning_rate": 0.0006843296184403182, "loss": 0.0993, "num_input_tokens_seen": 116844448, "step": 54170 }, { "epoch": 8.83768352365416, "grad_norm": 0.029411524534225464, "learning_rate": 0.0006842634503284891, "loss": 0.015, "num_input_tokens_seen": 116854848, "step": 54175 }, { "epoch": 8.838499184339314, "grad_norm": 0.4057464897632599, "learning_rate": 0.0006841972784823274, "loss": 0.0872, "num_input_tokens_seen": 116865600, "step": 54180 }, { "epoch": 8.83931484502447, "grad_norm": 0.044737253338098526, "learning_rate": 0.0006841311029031742, "loss": 0.0921, "num_input_tokens_seen": 116876864, "step": 54185 }, { "epoch": 8.840130505709626, "grad_norm": 0.30428585410118103, "learning_rate": 0.0006840649235923706, "loss": 0.1106, "num_input_tokens_seen": 116887584, "step": 54190 }, { "epoch": 8.84094616639478, "grad_norm": 0.14722320437431335, "learning_rate": 0.0006839987405512577, "loss": 0.077, "num_input_tokens_seen": 116897856, "step": 54195 }, { "epoch": 8.841761827079935, "grad_norm": 0.007436060346662998, "learning_rate": 0.000683932553781177, "loss": 0.168, "num_input_tokens_seen": 116909184, "step": 54200 }, { "epoch": 8.84257748776509, "grad_norm": 0.2358637899160385, "learning_rate": 0.0006838663632834697, "loss": 0.0414, "num_input_tokens_seen": 116920608, "step": 54205 }, { "epoch": 8.843393148450245, "grad_norm": 0.129672572016716, "learning_rate": 0.0006838001690594775, "loss": 0.0939, "num_input_tokens_seen": 116932928, "step": 54210 }, { "epoch": 8.844208809135399, "grad_norm": 0.32588890194892883, "learning_rate": 0.0006837339711105414, "loss": 0.0863, "num_input_tokens_seen": 116944096, "step": 54215 }, { "epoch": 8.845024469820554, "grad_norm": 0.04853362590074539, "learning_rate": 0.0006836677694380035, "loss": 0.0874, "num_input_tokens_seen": 116954592, "step": 54220 }, { "epoch": 8.84584013050571, "grad_norm": 0.16925173997879028, "learning_rate": 0.0006836015640432054, "loss": 0.064, "num_input_tokens_seen": 116964224, "step": 54225 }, { "epoch": 8.846655791190864, "grad_norm": 0.17469853162765503, "learning_rate": 0.0006835353549274885, "loss": 0.082, "num_input_tokens_seen": 116975328, "step": 54230 }, { "epoch": 8.84747145187602, "grad_norm": 0.08106009662151337, "learning_rate": 0.0006834691420921948, "loss": 0.0713, "num_input_tokens_seen": 116986624, "step": 54235 }, { "epoch": 8.848287112561174, "grad_norm": 0.012566989287734032, "learning_rate": 0.0006834029255386663, "loss": 0.065, "num_input_tokens_seen": 116997408, "step": 54240 }, { "epoch": 8.84910277324633, "grad_norm": 0.016507655382156372, "learning_rate": 0.0006833367052682446, "loss": 0.0863, "num_input_tokens_seen": 117008288, "step": 54245 }, { "epoch": 8.849918433931485, "grad_norm": 0.1043224185705185, "learning_rate": 0.0006832704812822722, "loss": 0.0218, "num_input_tokens_seen": 117018464, "step": 54250 }, { "epoch": 8.850734094616639, "grad_norm": 0.013194949366152287, "learning_rate": 0.0006832042535820911, "loss": 0.1686, "num_input_tokens_seen": 117030560, "step": 54255 }, { "epoch": 8.851549755301795, "grad_norm": 0.08643972128629684, "learning_rate": 0.0006831380221690431, "loss": 0.0394, "num_input_tokens_seen": 117041024, "step": 54260 }, { "epoch": 8.852365415986949, "grad_norm": 0.0344260148704052, "learning_rate": 0.0006830717870444709, "loss": 0.0127, "num_input_tokens_seen": 117051840, "step": 54265 }, { "epoch": 8.853181076672104, "grad_norm": 0.01754196174442768, "learning_rate": 0.0006830055482097168, "loss": 0.119, "num_input_tokens_seen": 117062784, "step": 54270 }, { "epoch": 8.85399673735726, "grad_norm": 0.32617369294166565, "learning_rate": 0.000682939305666123, "loss": 0.047, "num_input_tokens_seen": 117072992, "step": 54275 }, { "epoch": 8.854812398042414, "grad_norm": 0.17297236621379852, "learning_rate": 0.000682873059415032, "loss": 0.0486, "num_input_tokens_seen": 117083840, "step": 54280 }, { "epoch": 8.85562805872757, "grad_norm": 0.018565313890576363, "learning_rate": 0.0006828068094577864, "loss": 0.0132, "num_input_tokens_seen": 117093536, "step": 54285 }, { "epoch": 8.856443719412724, "grad_norm": 0.044480543583631516, "learning_rate": 0.0006827405557957291, "loss": 0.0141, "num_input_tokens_seen": 117102560, "step": 54290 }, { "epoch": 8.85725938009788, "grad_norm": 0.05789607763290405, "learning_rate": 0.0006826742984302026, "loss": 0.0775, "num_input_tokens_seen": 117113568, "step": 54295 }, { "epoch": 8.858075040783035, "grad_norm": 0.046839237213134766, "learning_rate": 0.0006826080373625496, "loss": 0.062, "num_input_tokens_seen": 117123456, "step": 54300 }, { "epoch": 8.858890701468189, "grad_norm": 0.38783201575279236, "learning_rate": 0.0006825417725941132, "loss": 0.1207, "num_input_tokens_seen": 117133728, "step": 54305 }, { "epoch": 8.859706362153345, "grad_norm": 0.04466322064399719, "learning_rate": 0.0006824755041262361, "loss": 0.0957, "num_input_tokens_seen": 117144480, "step": 54310 }, { "epoch": 8.860522022838499, "grad_norm": 0.12455623596906662, "learning_rate": 0.0006824092319602614, "loss": 0.047, "num_input_tokens_seen": 117155424, "step": 54315 }, { "epoch": 8.861337683523654, "grad_norm": 0.48800867795944214, "learning_rate": 0.0006823429560975323, "loss": 0.0608, "num_input_tokens_seen": 117165248, "step": 54320 }, { "epoch": 8.86215334420881, "grad_norm": 0.02248143032193184, "learning_rate": 0.0006822766765393919, "loss": 0.1176, "num_input_tokens_seen": 117174880, "step": 54325 }, { "epoch": 8.862969004893964, "grad_norm": 0.0054356809705495834, "learning_rate": 0.0006822103932871832, "loss": 0.0213, "num_input_tokens_seen": 117186688, "step": 54330 }, { "epoch": 8.86378466557912, "grad_norm": 0.05461570993065834, "learning_rate": 0.00068214410634225, "loss": 0.0243, "num_input_tokens_seen": 117197056, "step": 54335 }, { "epoch": 8.864600326264274, "grad_norm": 0.01330614648759365, "learning_rate": 0.0006820778157059353, "loss": 0.0629, "num_input_tokens_seen": 117208640, "step": 54340 }, { "epoch": 8.86541598694943, "grad_norm": 0.17214065790176392, "learning_rate": 0.0006820115213795827, "loss": 0.1255, "num_input_tokens_seen": 117219424, "step": 54345 }, { "epoch": 8.866231647634583, "grad_norm": 0.047885213047266006, "learning_rate": 0.0006819452233645357, "loss": 0.0768, "num_input_tokens_seen": 117231040, "step": 54350 }, { "epoch": 8.867047308319739, "grad_norm": 0.042638979852199554, "learning_rate": 0.0006818789216621379, "loss": 0.0509, "num_input_tokens_seen": 117242304, "step": 54355 }, { "epoch": 8.867862969004895, "grad_norm": 0.0021995645947754383, "learning_rate": 0.0006818126162737332, "loss": 0.0216, "num_input_tokens_seen": 117252640, "step": 54360 }, { "epoch": 8.868678629690049, "grad_norm": 0.46108919382095337, "learning_rate": 0.000681746307200665, "loss": 0.1075, "num_input_tokens_seen": 117263232, "step": 54365 }, { "epoch": 8.869494290375204, "grad_norm": 0.7686156034469604, "learning_rate": 0.0006816799944442774, "loss": 0.1788, "num_input_tokens_seen": 117274560, "step": 54370 }, { "epoch": 8.870309951060358, "grad_norm": 0.7064616680145264, "learning_rate": 0.0006816136780059142, "loss": 0.1875, "num_input_tokens_seen": 117285696, "step": 54375 }, { "epoch": 8.871125611745514, "grad_norm": 0.21526634693145752, "learning_rate": 0.0006815473578869194, "loss": 0.1272, "num_input_tokens_seen": 117296384, "step": 54380 }, { "epoch": 8.87194127243067, "grad_norm": 0.24779769778251648, "learning_rate": 0.0006814810340886372, "loss": 0.1256, "num_input_tokens_seen": 117306592, "step": 54385 }, { "epoch": 8.872756933115824, "grad_norm": 0.4300159811973572, "learning_rate": 0.0006814147066124116, "loss": 0.1947, "num_input_tokens_seen": 117319264, "step": 54390 }, { "epoch": 8.87357259380098, "grad_norm": 0.045188531279563904, "learning_rate": 0.0006813483754595867, "loss": 0.0452, "num_input_tokens_seen": 117330976, "step": 54395 }, { "epoch": 8.874388254486133, "grad_norm": 0.030934225767850876, "learning_rate": 0.000681282040631507, "loss": 0.2237, "num_input_tokens_seen": 117341472, "step": 54400 }, { "epoch": 8.875203915171289, "grad_norm": 0.39201873540878296, "learning_rate": 0.0006812157021295167, "loss": 0.077, "num_input_tokens_seen": 117351392, "step": 54405 }, { "epoch": 8.876019575856443, "grad_norm": 0.07623272389173508, "learning_rate": 0.0006811493599549603, "loss": 0.1783, "num_input_tokens_seen": 117362176, "step": 54410 }, { "epoch": 8.876835236541599, "grad_norm": 0.16923709213733673, "learning_rate": 0.0006810830141091825, "loss": 0.0221, "num_input_tokens_seen": 117372160, "step": 54415 }, { "epoch": 8.877650897226754, "grad_norm": 0.06658641993999481, "learning_rate": 0.0006810166645935276, "loss": 0.0825, "num_input_tokens_seen": 117382688, "step": 54420 }, { "epoch": 8.878466557911908, "grad_norm": 0.053967878222465515, "learning_rate": 0.0006809503114093403, "loss": 0.0426, "num_input_tokens_seen": 117392832, "step": 54425 }, { "epoch": 8.879282218597064, "grad_norm": 0.09863808006048203, "learning_rate": 0.0006808839545579655, "loss": 0.0874, "num_input_tokens_seen": 117403840, "step": 54430 }, { "epoch": 8.880097879282218, "grad_norm": 0.18387866020202637, "learning_rate": 0.0006808175940407477, "loss": 0.0751, "num_input_tokens_seen": 117413920, "step": 54435 }, { "epoch": 8.880913539967374, "grad_norm": 0.3852348327636719, "learning_rate": 0.0006807512298590321, "loss": 0.2041, "num_input_tokens_seen": 117424736, "step": 54440 }, { "epoch": 8.88172920065253, "grad_norm": 0.5888033509254456, "learning_rate": 0.0006806848620141636, "loss": 0.1031, "num_input_tokens_seen": 117436288, "step": 54445 }, { "epoch": 8.882544861337683, "grad_norm": 0.048137366771698, "learning_rate": 0.0006806184905074871, "loss": 0.0493, "num_input_tokens_seen": 117447008, "step": 54450 }, { "epoch": 8.883360522022839, "grad_norm": 0.2139514833688736, "learning_rate": 0.0006805521153403476, "loss": 0.0448, "num_input_tokens_seen": 117458624, "step": 54455 }, { "epoch": 8.884176182707993, "grad_norm": 0.2428581267595291, "learning_rate": 0.0006804857365140906, "loss": 0.0443, "num_input_tokens_seen": 117469600, "step": 54460 }, { "epoch": 8.884991843393149, "grad_norm": 0.2865869402885437, "learning_rate": 0.0006804193540300612, "loss": 0.0474, "num_input_tokens_seen": 117480608, "step": 54465 }, { "epoch": 8.885807504078304, "grad_norm": 0.04660974442958832, "learning_rate": 0.0006803529678896047, "loss": 0.0658, "num_input_tokens_seen": 117490912, "step": 54470 }, { "epoch": 8.886623164763458, "grad_norm": 0.8448531031608582, "learning_rate": 0.0006802865780940663, "loss": 0.1021, "num_input_tokens_seen": 117502976, "step": 54475 }, { "epoch": 8.887438825448614, "grad_norm": 0.4184205234050751, "learning_rate": 0.000680220184644792, "loss": 0.1563, "num_input_tokens_seen": 117513728, "step": 54480 }, { "epoch": 8.888254486133768, "grad_norm": 0.1628236174583435, "learning_rate": 0.0006801537875431269, "loss": 0.0603, "num_input_tokens_seen": 117524320, "step": 54485 }, { "epoch": 8.889070146818923, "grad_norm": 0.2718461751937866, "learning_rate": 0.0006800873867904167, "loss": 0.19, "num_input_tokens_seen": 117533632, "step": 54490 }, { "epoch": 8.88988580750408, "grad_norm": 0.35751858353614807, "learning_rate": 0.0006800209823880072, "loss": 0.046, "num_input_tokens_seen": 117543744, "step": 54495 }, { "epoch": 8.890701468189233, "grad_norm": 0.02659350261092186, "learning_rate": 0.0006799545743372442, "loss": 0.0106, "num_input_tokens_seen": 117555360, "step": 54500 }, { "epoch": 8.891517128874389, "grad_norm": 0.22558395564556122, "learning_rate": 0.0006798881626394734, "loss": 0.0868, "num_input_tokens_seen": 117566016, "step": 54505 }, { "epoch": 8.892332789559543, "grad_norm": 0.004167194478213787, "learning_rate": 0.0006798217472960407, "loss": 0.0405, "num_input_tokens_seen": 117577216, "step": 54510 }, { "epoch": 8.893148450244698, "grad_norm": 0.1533912569284439, "learning_rate": 0.0006797553283082922, "loss": 0.0419, "num_input_tokens_seen": 117586944, "step": 54515 }, { "epoch": 8.893964110929852, "grad_norm": 0.0040937019512057304, "learning_rate": 0.000679688905677574, "loss": 0.0746, "num_input_tokens_seen": 117597312, "step": 54520 }, { "epoch": 8.894779771615008, "grad_norm": 0.060172103345394135, "learning_rate": 0.0006796224794052322, "loss": 0.0235, "num_input_tokens_seen": 117608544, "step": 54525 }, { "epoch": 8.895595432300164, "grad_norm": 0.3353100121021271, "learning_rate": 0.0006795560494926129, "loss": 0.1339, "num_input_tokens_seen": 117618944, "step": 54530 }, { "epoch": 8.896411092985318, "grad_norm": 0.10125717520713806, "learning_rate": 0.0006794896159410625, "loss": 0.0326, "num_input_tokens_seen": 117630368, "step": 54535 }, { "epoch": 8.897226753670473, "grad_norm": 0.01982436515390873, "learning_rate": 0.0006794231787519274, "loss": 0.0373, "num_input_tokens_seen": 117639968, "step": 54540 }, { "epoch": 8.898042414355627, "grad_norm": 0.40384426712989807, "learning_rate": 0.000679356737926554, "loss": 0.0984, "num_input_tokens_seen": 117652480, "step": 54545 }, { "epoch": 8.898858075040783, "grad_norm": 0.5491912364959717, "learning_rate": 0.0006792902934662885, "loss": 0.2385, "num_input_tokens_seen": 117662624, "step": 54550 }, { "epoch": 8.899673735725939, "grad_norm": 1.276809811592102, "learning_rate": 0.000679223845372478, "loss": 0.2138, "num_input_tokens_seen": 117674400, "step": 54555 }, { "epoch": 8.900489396411093, "grad_norm": 0.6144986748695374, "learning_rate": 0.0006791573936464689, "loss": 0.0641, "num_input_tokens_seen": 117685760, "step": 54560 }, { "epoch": 8.901305057096248, "grad_norm": 0.05851384624838829, "learning_rate": 0.0006790909382896079, "loss": 0.0378, "num_input_tokens_seen": 117695808, "step": 54565 }, { "epoch": 8.902120717781402, "grad_norm": 0.26752036809921265, "learning_rate": 0.0006790244793032418, "loss": 0.1137, "num_input_tokens_seen": 117706176, "step": 54570 }, { "epoch": 8.902936378466558, "grad_norm": 0.07598751783370972, "learning_rate": 0.0006789580166887176, "loss": 0.1318, "num_input_tokens_seen": 117716608, "step": 54575 }, { "epoch": 8.903752039151712, "grad_norm": 0.2976522445678711, "learning_rate": 0.0006788915504473822, "loss": 0.0491, "num_input_tokens_seen": 117728480, "step": 54580 }, { "epoch": 8.904567699836868, "grad_norm": 0.20857731997966766, "learning_rate": 0.0006788250805805824, "loss": 0.0612, "num_input_tokens_seen": 117738880, "step": 54585 }, { "epoch": 8.905383360522023, "grad_norm": 0.017023596912622452, "learning_rate": 0.0006787586070896657, "loss": 0.0094, "num_input_tokens_seen": 117750176, "step": 54590 }, { "epoch": 8.906199021207177, "grad_norm": 0.02123020775616169, "learning_rate": 0.0006786921299759789, "loss": 0.0171, "num_input_tokens_seen": 117760896, "step": 54595 }, { "epoch": 8.907014681892333, "grad_norm": 0.09059813618659973, "learning_rate": 0.0006786256492408694, "loss": 0.0682, "num_input_tokens_seen": 117771168, "step": 54600 }, { "epoch": 8.907830342577487, "grad_norm": 0.10467185080051422, "learning_rate": 0.0006785591648856846, "loss": 0.1196, "num_input_tokens_seen": 117783040, "step": 54605 }, { "epoch": 8.908646003262643, "grad_norm": 0.051233965903520584, "learning_rate": 0.0006784926769117717, "loss": 0.2428, "num_input_tokens_seen": 117794592, "step": 54610 }, { "epoch": 8.909461663947798, "grad_norm": 0.036431558430194855, "learning_rate": 0.0006784261853204783, "loss": 0.064, "num_input_tokens_seen": 117804928, "step": 54615 }, { "epoch": 8.910277324632952, "grad_norm": 0.01083289086818695, "learning_rate": 0.0006783596901131521, "loss": 0.0557, "num_input_tokens_seen": 117816128, "step": 54620 }, { "epoch": 8.911092985318108, "grad_norm": 0.019782673567533493, "learning_rate": 0.0006782931912911402, "loss": 0.0385, "num_input_tokens_seen": 117826432, "step": 54625 }, { "epoch": 8.911908646003262, "grad_norm": 0.42851898074150085, "learning_rate": 0.0006782266888557909, "loss": 0.1593, "num_input_tokens_seen": 117836448, "step": 54630 }, { "epoch": 8.912724306688418, "grad_norm": 0.04012679681181908, "learning_rate": 0.0006781601828084513, "loss": 0.0748, "num_input_tokens_seen": 117847136, "step": 54635 }, { "epoch": 8.913539967373573, "grad_norm": 0.11656112223863602, "learning_rate": 0.0006780936731504699, "loss": 0.1204, "num_input_tokens_seen": 117857792, "step": 54640 }, { "epoch": 8.914355628058727, "grad_norm": 0.012323557399213314, "learning_rate": 0.0006780271598831942, "loss": 0.0122, "num_input_tokens_seen": 117867008, "step": 54645 }, { "epoch": 8.915171288743883, "grad_norm": 0.024469271302223206, "learning_rate": 0.0006779606430079723, "loss": 0.1782, "num_input_tokens_seen": 117878720, "step": 54650 }, { "epoch": 8.915986949429037, "grad_norm": 0.2572644054889679, "learning_rate": 0.0006778941225261522, "loss": 0.1424, "num_input_tokens_seen": 117889600, "step": 54655 }, { "epoch": 8.916802610114193, "grad_norm": 0.1707257628440857, "learning_rate": 0.0006778275984390819, "loss": 0.026, "num_input_tokens_seen": 117900288, "step": 54660 }, { "epoch": 8.917618270799348, "grad_norm": 0.006852397229522467, "learning_rate": 0.0006777610707481099, "loss": 0.0743, "num_input_tokens_seen": 117911136, "step": 54665 }, { "epoch": 8.918433931484502, "grad_norm": 0.4134812653064728, "learning_rate": 0.0006776945394545841, "loss": 0.0757, "num_input_tokens_seen": 117921376, "step": 54670 }, { "epoch": 8.919249592169658, "grad_norm": 0.01503437664359808, "learning_rate": 0.0006776280045598533, "loss": 0.0639, "num_input_tokens_seen": 117933120, "step": 54675 }, { "epoch": 8.920065252854812, "grad_norm": 0.44065535068511963, "learning_rate": 0.0006775614660652655, "loss": 0.1215, "num_input_tokens_seen": 117944512, "step": 54680 }, { "epoch": 8.920880913539968, "grad_norm": 0.030026542022824287, "learning_rate": 0.0006774949239721692, "loss": 0.0256, "num_input_tokens_seen": 117955968, "step": 54685 }, { "epoch": 8.921696574225122, "grad_norm": 0.11978883296251297, "learning_rate": 0.0006774283782819133, "loss": 0.0593, "num_input_tokens_seen": 117967712, "step": 54690 }, { "epoch": 8.922512234910277, "grad_norm": 0.02312198281288147, "learning_rate": 0.0006773618289958462, "loss": 0.0241, "num_input_tokens_seen": 117980000, "step": 54695 }, { "epoch": 8.923327895595433, "grad_norm": 0.024243975058197975, "learning_rate": 0.0006772952761153167, "loss": 0.0354, "num_input_tokens_seen": 117989824, "step": 54700 }, { "epoch": 8.924143556280587, "grad_norm": 0.2831211984157562, "learning_rate": 0.0006772287196416733, "loss": 0.0625, "num_input_tokens_seen": 118000480, "step": 54705 }, { "epoch": 8.924959216965743, "grad_norm": 0.03328729048371315, "learning_rate": 0.0006771621595762652, "loss": 0.0272, "num_input_tokens_seen": 118012576, "step": 54710 }, { "epoch": 8.925774877650896, "grad_norm": 0.011533121578395367, "learning_rate": 0.0006770955959204412, "loss": 0.0525, "num_input_tokens_seen": 118024032, "step": 54715 }, { "epoch": 8.926590538336052, "grad_norm": 0.8412138819694519, "learning_rate": 0.0006770290286755503, "loss": 0.0719, "num_input_tokens_seen": 118035136, "step": 54720 }, { "epoch": 8.927406199021208, "grad_norm": 0.06643106788396835, "learning_rate": 0.0006769624578429414, "loss": 0.1595, "num_input_tokens_seen": 118046528, "step": 54725 }, { "epoch": 8.928221859706362, "grad_norm": 0.013766370713710785, "learning_rate": 0.0006768958834239639, "loss": 0.1325, "num_input_tokens_seen": 118057280, "step": 54730 }, { "epoch": 8.929037520391518, "grad_norm": 0.35406073927879333, "learning_rate": 0.0006768293054199669, "loss": 0.0523, "num_input_tokens_seen": 118068608, "step": 54735 }, { "epoch": 8.929853181076671, "grad_norm": 0.09868857264518738, "learning_rate": 0.0006767627238322998, "loss": 0.0383, "num_input_tokens_seen": 118079392, "step": 54740 }, { "epoch": 8.930668841761827, "grad_norm": 0.02761208824813366, "learning_rate": 0.0006766961386623118, "loss": 0.1156, "num_input_tokens_seen": 118090752, "step": 54745 }, { "epoch": 8.931484502446983, "grad_norm": 0.28495508432388306, "learning_rate": 0.0006766295499113524, "loss": 0.048, "num_input_tokens_seen": 118100864, "step": 54750 }, { "epoch": 8.932300163132137, "grad_norm": 0.004174153320491314, "learning_rate": 0.000676562957580771, "loss": 0.0344, "num_input_tokens_seen": 118111456, "step": 54755 }, { "epoch": 8.933115823817293, "grad_norm": 0.10229144990444183, "learning_rate": 0.0006764963616719174, "loss": 0.0199, "num_input_tokens_seen": 118120608, "step": 54760 }, { "epoch": 8.933931484502446, "grad_norm": 0.25565290451049805, "learning_rate": 0.000676429762186141, "loss": 0.0392, "num_input_tokens_seen": 118130304, "step": 54765 }, { "epoch": 8.934747145187602, "grad_norm": 0.007273535709828138, "learning_rate": 0.0006763631591247917, "loss": 0.1065, "num_input_tokens_seen": 118142144, "step": 54770 }, { "epoch": 8.935562805872756, "grad_norm": 0.0053012920543551445, "learning_rate": 0.0006762965524892194, "loss": 0.1462, "num_input_tokens_seen": 118153440, "step": 54775 }, { "epoch": 8.936378466557912, "grad_norm": 0.009744161739945412, "learning_rate": 0.0006762299422807737, "loss": 0.0703, "num_input_tokens_seen": 118163520, "step": 54780 }, { "epoch": 8.937194127243067, "grad_norm": 0.007423016708344221, "learning_rate": 0.0006761633285008046, "loss": 0.0344, "num_input_tokens_seen": 118174560, "step": 54785 }, { "epoch": 8.938009787928221, "grad_norm": 0.06237594410777092, "learning_rate": 0.0006760967111506623, "loss": 0.0315, "num_input_tokens_seen": 118185504, "step": 54790 }, { "epoch": 8.938825448613377, "grad_norm": 0.061692263931035995, "learning_rate": 0.0006760300902316967, "loss": 0.0384, "num_input_tokens_seen": 118197664, "step": 54795 }, { "epoch": 8.939641109298531, "grad_norm": 0.13261817395687103, "learning_rate": 0.000675963465745258, "loss": 0.0583, "num_input_tokens_seen": 118208032, "step": 54800 }, { "epoch": 8.940456769983687, "grad_norm": 0.006056450307369232, "learning_rate": 0.0006758968376926965, "loss": 0.016, "num_input_tokens_seen": 118219424, "step": 54805 }, { "epoch": 8.941272430668842, "grad_norm": 0.12354525178670883, "learning_rate": 0.0006758302060753624, "loss": 0.0362, "num_input_tokens_seen": 118231200, "step": 54810 }, { "epoch": 8.942088091353996, "grad_norm": 0.17180338501930237, "learning_rate": 0.000675763570894606, "loss": 0.0206, "num_input_tokens_seen": 118242272, "step": 54815 }, { "epoch": 8.942903752039152, "grad_norm": 0.6526899337768555, "learning_rate": 0.0006756969321517781, "loss": 0.1195, "num_input_tokens_seen": 118252288, "step": 54820 }, { "epoch": 8.943719412724306, "grad_norm": 0.04230908676981926, "learning_rate": 0.0006756302898482288, "loss": 0.2476, "num_input_tokens_seen": 118263072, "step": 54825 }, { "epoch": 8.944535073409462, "grad_norm": 0.3013417720794678, "learning_rate": 0.0006755636439853089, "loss": 0.0431, "num_input_tokens_seen": 118274048, "step": 54830 }, { "epoch": 8.945350734094617, "grad_norm": 0.018392786383628845, "learning_rate": 0.0006754969945643689, "loss": 0.0402, "num_input_tokens_seen": 118284672, "step": 54835 }, { "epoch": 8.946166394779771, "grad_norm": 0.04745260253548622, "learning_rate": 0.0006754303415867599, "loss": 0.0316, "num_input_tokens_seen": 118295008, "step": 54840 }, { "epoch": 8.946982055464927, "grad_norm": 0.5212236046791077, "learning_rate": 0.0006753636850538325, "loss": 0.1218, "num_input_tokens_seen": 118306816, "step": 54845 }, { "epoch": 8.947797716150081, "grad_norm": 0.12133591622114182, "learning_rate": 0.0006752970249669374, "loss": 0.068, "num_input_tokens_seen": 118317760, "step": 54850 }, { "epoch": 8.948613376835237, "grad_norm": 0.011474822647869587, "learning_rate": 0.0006752303613274257, "loss": 0.0062, "num_input_tokens_seen": 118328000, "step": 54855 }, { "epoch": 8.949429037520392, "grad_norm": 0.08659780770540237, "learning_rate": 0.0006751636941366486, "loss": 0.0352, "num_input_tokens_seen": 118338304, "step": 54860 }, { "epoch": 8.950244698205546, "grad_norm": 0.004791200626641512, "learning_rate": 0.000675097023395957, "loss": 0.0668, "num_input_tokens_seen": 118349152, "step": 54865 }, { "epoch": 8.951060358890702, "grad_norm": 0.00642451923340559, "learning_rate": 0.0006750303491067021, "loss": 0.0068, "num_input_tokens_seen": 118360000, "step": 54870 }, { "epoch": 8.951876019575856, "grad_norm": 0.058599695563316345, "learning_rate": 0.0006749636712702349, "loss": 0.0274, "num_input_tokens_seen": 118370592, "step": 54875 }, { "epoch": 8.952691680261012, "grad_norm": 0.006119515281170607, "learning_rate": 0.0006748969898879071, "loss": 0.1346, "num_input_tokens_seen": 118381728, "step": 54880 }, { "epoch": 8.953507340946166, "grad_norm": 0.24306419491767883, "learning_rate": 0.00067483030496107, "loss": 0.06, "num_input_tokens_seen": 118392384, "step": 54885 }, { "epoch": 8.954323001631321, "grad_norm": 0.4660106897354126, "learning_rate": 0.000674763616491075, "loss": 0.1018, "num_input_tokens_seen": 118402592, "step": 54890 }, { "epoch": 8.955138662316477, "grad_norm": 0.018377551808953285, "learning_rate": 0.0006746969244792734, "loss": 0.1082, "num_input_tokens_seen": 118413920, "step": 54895 }, { "epoch": 8.955954323001631, "grad_norm": 0.004553763195872307, "learning_rate": 0.0006746302289270172, "loss": 0.0651, "num_input_tokens_seen": 118424128, "step": 54900 }, { "epoch": 8.956769983686787, "grad_norm": 0.04405802860856056, "learning_rate": 0.0006745635298356579, "loss": 0.0654, "num_input_tokens_seen": 118433920, "step": 54905 }, { "epoch": 8.95758564437194, "grad_norm": 0.01027133408933878, "learning_rate": 0.0006744968272065469, "loss": 0.1562, "num_input_tokens_seen": 118445152, "step": 54910 }, { "epoch": 8.958401305057096, "grad_norm": 0.03573455661535263, "learning_rate": 0.0006744301210410366, "loss": 0.0265, "num_input_tokens_seen": 118455680, "step": 54915 }, { "epoch": 8.959216965742252, "grad_norm": 0.0032424440141767263, "learning_rate": 0.0006743634113404786, "loss": 0.0138, "num_input_tokens_seen": 118466688, "step": 54920 }, { "epoch": 8.960032626427406, "grad_norm": 0.016394954174757004, "learning_rate": 0.0006742966981062249, "loss": 0.0815, "num_input_tokens_seen": 118476032, "step": 54925 }, { "epoch": 8.960848287112562, "grad_norm": 0.14690245687961578, "learning_rate": 0.0006742299813396274, "loss": 0.1538, "num_input_tokens_seen": 118487168, "step": 54930 }, { "epoch": 8.961663947797716, "grad_norm": 0.005423164460808039, "learning_rate": 0.0006741632610420384, "loss": 0.1281, "num_input_tokens_seen": 118498368, "step": 54935 }, { "epoch": 8.962479608482871, "grad_norm": 0.007161595858633518, "learning_rate": 0.0006740965372148098, "loss": 0.0996, "num_input_tokens_seen": 118509056, "step": 54940 }, { "epoch": 8.963295269168025, "grad_norm": 0.2641008794307709, "learning_rate": 0.0006740298098592941, "loss": 0.1037, "num_input_tokens_seen": 118520512, "step": 54945 }, { "epoch": 8.964110929853181, "grad_norm": 0.06889292597770691, "learning_rate": 0.0006739630789768436, "loss": 0.0336, "num_input_tokens_seen": 118531456, "step": 54950 }, { "epoch": 8.964926590538337, "grad_norm": 0.058766067028045654, "learning_rate": 0.0006738963445688107, "loss": 0.0095, "num_input_tokens_seen": 118541248, "step": 54955 }, { "epoch": 8.96574225122349, "grad_norm": 0.00939859263598919, "learning_rate": 0.0006738296066365476, "loss": 0.004, "num_input_tokens_seen": 118552832, "step": 54960 }, { "epoch": 8.966557911908646, "grad_norm": 0.006169438827782869, "learning_rate": 0.000673762865181407, "loss": 0.0973, "num_input_tokens_seen": 118563648, "step": 54965 }, { "epoch": 8.9673735725938, "grad_norm": 0.1622277945280075, "learning_rate": 0.0006736961202047417, "loss": 0.0294, "num_input_tokens_seen": 118575456, "step": 54970 }, { "epoch": 8.968189233278956, "grad_norm": 0.01568516343832016, "learning_rate": 0.0006736293717079041, "loss": 0.0201, "num_input_tokens_seen": 118586208, "step": 54975 }, { "epoch": 8.969004893964112, "grad_norm": 0.07656209915876389, "learning_rate": 0.0006735626196922469, "loss": 0.0212, "num_input_tokens_seen": 118597824, "step": 54980 }, { "epoch": 8.969820554649266, "grad_norm": 0.0062323641031980515, "learning_rate": 0.0006734958641591231, "loss": 0.0167, "num_input_tokens_seen": 118609088, "step": 54985 }, { "epoch": 8.970636215334421, "grad_norm": 0.010267527773976326, "learning_rate": 0.0006734291051098856, "loss": 0.0709, "num_input_tokens_seen": 118620288, "step": 54990 }, { "epoch": 8.971451876019575, "grad_norm": 0.44062483310699463, "learning_rate": 0.0006733623425458871, "loss": 0.1233, "num_input_tokens_seen": 118629984, "step": 54995 }, { "epoch": 8.97226753670473, "grad_norm": 0.578577995300293, "learning_rate": 0.000673295576468481, "loss": 0.0907, "num_input_tokens_seen": 118641216, "step": 55000 }, { "epoch": 8.973083197389887, "grad_norm": 0.2516621947288513, "learning_rate": 0.00067322880687902, "loss": 0.0593, "num_input_tokens_seen": 118652608, "step": 55005 }, { "epoch": 8.97389885807504, "grad_norm": 0.007286702282726765, "learning_rate": 0.0006731620337788576, "loss": 0.0088, "num_input_tokens_seen": 118663104, "step": 55010 }, { "epoch": 8.974714518760196, "grad_norm": 0.2218107134103775, "learning_rate": 0.0006730952571693469, "loss": 0.0736, "num_input_tokens_seen": 118674528, "step": 55015 }, { "epoch": 8.97553017944535, "grad_norm": 0.42903921008110046, "learning_rate": 0.0006730284770518412, "loss": 0.0623, "num_input_tokens_seen": 118685024, "step": 55020 }, { "epoch": 8.976345840130506, "grad_norm": 0.18602146208286285, "learning_rate": 0.0006729616934276939, "loss": 0.0383, "num_input_tokens_seen": 118695840, "step": 55025 }, { "epoch": 8.977161500815662, "grad_norm": 0.0311795212328434, "learning_rate": 0.0006728949062982585, "loss": 0.115, "num_input_tokens_seen": 118706368, "step": 55030 }, { "epoch": 8.977977161500815, "grad_norm": 0.061059702187776566, "learning_rate": 0.0006728281156648885, "loss": 0.056, "num_input_tokens_seen": 118716384, "step": 55035 }, { "epoch": 8.978792822185971, "grad_norm": 0.6891406774520874, "learning_rate": 0.0006727613215289374, "loss": 0.1294, "num_input_tokens_seen": 118726880, "step": 55040 }, { "epoch": 8.979608482871125, "grad_norm": 0.103432796895504, "learning_rate": 0.0006726945238917589, "loss": 0.0145, "num_input_tokens_seen": 118736224, "step": 55045 }, { "epoch": 8.98042414355628, "grad_norm": 0.2318846434354782, "learning_rate": 0.000672627722754707, "loss": 0.0351, "num_input_tokens_seen": 118747488, "step": 55050 }, { "epoch": 8.981239804241435, "grad_norm": 0.18085940182209015, "learning_rate": 0.0006725609181191352, "loss": 0.0417, "num_input_tokens_seen": 118759648, "step": 55055 }, { "epoch": 8.98205546492659, "grad_norm": 0.39847949147224426, "learning_rate": 0.0006724941099863975, "loss": 0.0882, "num_input_tokens_seen": 118771168, "step": 55060 }, { "epoch": 8.982871125611746, "grad_norm": 0.9012908935546875, "learning_rate": 0.0006724272983578478, "loss": 0.1655, "num_input_tokens_seen": 118783552, "step": 55065 }, { "epoch": 8.9836867862969, "grad_norm": 0.10730496793985367, "learning_rate": 0.0006723604832348403, "loss": 0.0153, "num_input_tokens_seen": 118793920, "step": 55070 }, { "epoch": 8.984502446982056, "grad_norm": 0.006363843567669392, "learning_rate": 0.0006722936646187288, "loss": 0.0488, "num_input_tokens_seen": 118804256, "step": 55075 }, { "epoch": 8.98531810766721, "grad_norm": 0.00794894341379404, "learning_rate": 0.0006722268425108675, "loss": 0.0221, "num_input_tokens_seen": 118813952, "step": 55080 }, { "epoch": 8.986133768352365, "grad_norm": 0.07998554408550262, "learning_rate": 0.000672160016912611, "loss": 0.0117, "num_input_tokens_seen": 118823744, "step": 55085 }, { "epoch": 8.986949429037521, "grad_norm": 0.0025027068331837654, "learning_rate": 0.0006720931878253133, "loss": 0.0737, "num_input_tokens_seen": 118834496, "step": 55090 }, { "epoch": 8.987765089722675, "grad_norm": 0.21643787622451782, "learning_rate": 0.0006720263552503288, "loss": 0.0726, "num_input_tokens_seen": 118844608, "step": 55095 }, { "epoch": 8.98858075040783, "grad_norm": 0.11720778048038483, "learning_rate": 0.000671959519189012, "loss": 0.0371, "num_input_tokens_seen": 118853568, "step": 55100 }, { "epoch": 8.989396411092985, "grad_norm": 0.060814548283815384, "learning_rate": 0.0006718926796427174, "loss": 0.0503, "num_input_tokens_seen": 118864384, "step": 55105 }, { "epoch": 8.99021207177814, "grad_norm": 0.00238768570125103, "learning_rate": 0.0006718258366127995, "loss": 0.0134, "num_input_tokens_seen": 118875232, "step": 55110 }, { "epoch": 8.991027732463294, "grad_norm": 0.26203107833862305, "learning_rate": 0.0006717589901006131, "loss": 0.1057, "num_input_tokens_seen": 118885856, "step": 55115 }, { "epoch": 8.99184339314845, "grad_norm": 0.12555529177188873, "learning_rate": 0.0006716921401075129, "loss": 0.0467, "num_input_tokens_seen": 118897632, "step": 55120 }, { "epoch": 8.992659053833606, "grad_norm": 0.015696818009018898, "learning_rate": 0.0006716252866348537, "loss": 0.0786, "num_input_tokens_seen": 118910112, "step": 55125 }, { "epoch": 8.99347471451876, "grad_norm": 0.05970093235373497, "learning_rate": 0.0006715584296839903, "loss": 0.1336, "num_input_tokens_seen": 118919936, "step": 55130 }, { "epoch": 8.994290375203915, "grad_norm": 0.012708078138530254, "learning_rate": 0.0006714915692562777, "loss": 0.0168, "num_input_tokens_seen": 118930592, "step": 55135 }, { "epoch": 8.99510603588907, "grad_norm": 0.006796054542064667, "learning_rate": 0.0006714247053530709, "loss": 0.01, "num_input_tokens_seen": 118942176, "step": 55140 }, { "epoch": 8.995921696574225, "grad_norm": 0.18596763908863068, "learning_rate": 0.0006713578379757251, "loss": 0.0942, "num_input_tokens_seen": 118953760, "step": 55145 }, { "epoch": 8.99673735725938, "grad_norm": 0.003850656095892191, "learning_rate": 0.0006712909671255952, "loss": 0.0304, "num_input_tokens_seen": 118963776, "step": 55150 }, { "epoch": 8.997553017944535, "grad_norm": 0.027069244533777237, "learning_rate": 0.0006712240928040363, "loss": 0.0261, "num_input_tokens_seen": 118973696, "step": 55155 }, { "epoch": 8.99836867862969, "grad_norm": 0.1889045238494873, "learning_rate": 0.0006711572150124043, "loss": 0.0424, "num_input_tokens_seen": 118984352, "step": 55160 }, { "epoch": 8.999184339314844, "grad_norm": 0.030533069744706154, "learning_rate": 0.0006710903337520539, "loss": 0.086, "num_input_tokens_seen": 118994528, "step": 55165 }, { "epoch": 9.0, "grad_norm": 0.8355327248573303, "learning_rate": 0.0006710234490243412, "loss": 0.1028, "num_input_tokens_seen": 119004560, "step": 55170 }, { "epoch": 9.0, "eval_loss": 0.1462053805589676, "eval_runtime": 104.7577, "eval_samples_per_second": 26.012, "eval_steps_per_second": 6.51, "num_input_tokens_seen": 119004560, "step": 55170 }, { "epoch": 9.000815660685156, "grad_norm": 0.12789714336395264, "learning_rate": 0.0006709565608306212, "loss": 0.0497, "num_input_tokens_seen": 119014672, "step": 55175 }, { "epoch": 9.00163132137031, "grad_norm": 0.867985188961029, "learning_rate": 0.0006708896691722495, "loss": 0.0804, "num_input_tokens_seen": 119025712, "step": 55180 }, { "epoch": 9.002446982055465, "grad_norm": 0.013566500507295132, "learning_rate": 0.0006708227740505822, "loss": 0.017, "num_input_tokens_seen": 119037008, "step": 55185 }, { "epoch": 9.00326264274062, "grad_norm": 0.12729640305042267, "learning_rate": 0.0006707558754669744, "loss": 0.0268, "num_input_tokens_seen": 119047344, "step": 55190 }, { "epoch": 9.004078303425775, "grad_norm": 0.10256603360176086, "learning_rate": 0.0006706889734227823, "loss": 0.008, "num_input_tokens_seen": 119057136, "step": 55195 }, { "epoch": 9.00489396411093, "grad_norm": 0.13957393169403076, "learning_rate": 0.0006706220679193614, "loss": 0.1394, "num_input_tokens_seen": 119066864, "step": 55200 }, { "epoch": 9.005709624796085, "grad_norm": 0.030765770003199577, "learning_rate": 0.000670555158958068, "loss": 0.1578, "num_input_tokens_seen": 119078032, "step": 55205 }, { "epoch": 9.00652528548124, "grad_norm": 0.16550298035144806, "learning_rate": 0.0006704882465402579, "loss": 0.0498, "num_input_tokens_seen": 119088624, "step": 55210 }, { "epoch": 9.007340946166394, "grad_norm": 0.005055101588368416, "learning_rate": 0.0006704213306672873, "loss": 0.0859, "num_input_tokens_seen": 119100464, "step": 55215 }, { "epoch": 9.00815660685155, "grad_norm": 0.01219368726015091, "learning_rate": 0.0006703544113405122, "loss": 0.0597, "num_input_tokens_seen": 119111376, "step": 55220 }, { "epoch": 9.008972267536704, "grad_norm": 0.06764821708202362, "learning_rate": 0.0006702874885612887, "loss": 0.0133, "num_input_tokens_seen": 119121712, "step": 55225 }, { "epoch": 9.00978792822186, "grad_norm": 0.03630441427230835, "learning_rate": 0.0006702205623309734, "loss": 0.0506, "num_input_tokens_seen": 119133872, "step": 55230 }, { "epoch": 9.010603588907015, "grad_norm": 0.6200546622276306, "learning_rate": 0.0006701536326509224, "loss": 0.1617, "num_input_tokens_seen": 119143760, "step": 55235 }, { "epoch": 9.01141924959217, "grad_norm": 0.37775129079818726, "learning_rate": 0.0006700866995224921, "loss": 0.0522, "num_input_tokens_seen": 119154768, "step": 55240 }, { "epoch": 9.012234910277325, "grad_norm": 0.049315109848976135, "learning_rate": 0.0006700197629470393, "loss": 0.0275, "num_input_tokens_seen": 119165328, "step": 55245 }, { "epoch": 9.013050570962479, "grad_norm": 0.19377802312374115, "learning_rate": 0.00066995282292592, "loss": 0.0193, "num_input_tokens_seen": 119175920, "step": 55250 }, { "epoch": 9.013866231647635, "grad_norm": 0.0023321984335780144, "learning_rate": 0.0006698858794604914, "loss": 0.0308, "num_input_tokens_seen": 119184944, "step": 55255 }, { "epoch": 9.01468189233279, "grad_norm": 0.09050610661506653, "learning_rate": 0.0006698189325521097, "loss": 0.0423, "num_input_tokens_seen": 119195952, "step": 55260 }, { "epoch": 9.015497553017944, "grad_norm": 0.44453856348991394, "learning_rate": 0.000669751982202132, "loss": 0.0893, "num_input_tokens_seen": 119207280, "step": 55265 }, { "epoch": 9.0163132137031, "grad_norm": 0.4181574583053589, "learning_rate": 0.0006696850284119151, "loss": 0.0642, "num_input_tokens_seen": 119218640, "step": 55270 }, { "epoch": 9.017128874388254, "grad_norm": 0.207797110080719, "learning_rate": 0.0006696180711828159, "loss": 0.0673, "num_input_tokens_seen": 119229648, "step": 55275 }, { "epoch": 9.01794453507341, "grad_norm": 0.5937560796737671, "learning_rate": 0.0006695511105161913, "loss": 0.0897, "num_input_tokens_seen": 119239920, "step": 55280 }, { "epoch": 9.018760195758565, "grad_norm": 0.030247537419199944, "learning_rate": 0.0006694841464133981, "loss": 0.0196, "num_input_tokens_seen": 119251312, "step": 55285 }, { "epoch": 9.01957585644372, "grad_norm": 0.005840882193297148, "learning_rate": 0.0006694171788757939, "loss": 0.1193, "num_input_tokens_seen": 119262352, "step": 55290 }, { "epoch": 9.020391517128875, "grad_norm": 0.41207802295684814, "learning_rate": 0.0006693502079047356, "loss": 0.0144, "num_input_tokens_seen": 119273168, "step": 55295 }, { "epoch": 9.021207177814029, "grad_norm": 0.10349519550800323, "learning_rate": 0.0006692832335015806, "loss": 0.2274, "num_input_tokens_seen": 119284688, "step": 55300 }, { "epoch": 9.022022838499185, "grad_norm": 0.056495025753974915, "learning_rate": 0.000669216255667686, "loss": 0.0522, "num_input_tokens_seen": 119295056, "step": 55305 }, { "epoch": 9.022838499184338, "grad_norm": 0.006413584109395742, "learning_rate": 0.0006691492744044093, "loss": 0.1206, "num_input_tokens_seen": 119305232, "step": 55310 }, { "epoch": 9.023654159869494, "grad_norm": 0.01080095674842596, "learning_rate": 0.000669082289713108, "loss": 0.0322, "num_input_tokens_seen": 119316208, "step": 55315 }, { "epoch": 9.02446982055465, "grad_norm": 0.43201345205307007, "learning_rate": 0.0006690153015951397, "loss": 0.1073, "num_input_tokens_seen": 119326768, "step": 55320 }, { "epoch": 9.025285481239804, "grad_norm": 0.22295348346233368, "learning_rate": 0.0006689483100518617, "loss": 0.08, "num_input_tokens_seen": 119337776, "step": 55325 }, { "epoch": 9.02610114192496, "grad_norm": 0.12635333836078644, "learning_rate": 0.000668881315084632, "loss": 0.0215, "num_input_tokens_seen": 119349040, "step": 55330 }, { "epoch": 9.026916802610113, "grad_norm": 0.07496989518404007, "learning_rate": 0.0006688143166948082, "loss": 0.0352, "num_input_tokens_seen": 119359184, "step": 55335 }, { "epoch": 9.02773246329527, "grad_norm": 0.23639574646949768, "learning_rate": 0.0006687473148837482, "loss": 0.0288, "num_input_tokens_seen": 119369296, "step": 55340 }, { "epoch": 9.028548123980425, "grad_norm": 0.006493065971881151, "learning_rate": 0.0006686803096528096, "loss": 0.1492, "num_input_tokens_seen": 119381328, "step": 55345 }, { "epoch": 9.029363784665579, "grad_norm": 0.3914169371128082, "learning_rate": 0.0006686133010033507, "loss": 0.11, "num_input_tokens_seen": 119391376, "step": 55350 }, { "epoch": 9.030179445350734, "grad_norm": 0.02614215388894081, "learning_rate": 0.0006685462889367293, "loss": 0.0208, "num_input_tokens_seen": 119401744, "step": 55355 }, { "epoch": 9.030995106035888, "grad_norm": 0.6184800267219543, "learning_rate": 0.0006684792734543036, "loss": 0.2011, "num_input_tokens_seen": 119412080, "step": 55360 }, { "epoch": 9.031810766721044, "grad_norm": 0.011108556762337685, "learning_rate": 0.0006684122545574315, "loss": 0.0528, "num_input_tokens_seen": 119422864, "step": 55365 }, { "epoch": 9.0326264274062, "grad_norm": 0.33638569712638855, "learning_rate": 0.0006683452322474715, "loss": 0.1003, "num_input_tokens_seen": 119432880, "step": 55370 }, { "epoch": 9.033442088091354, "grad_norm": 0.04050545394420624, "learning_rate": 0.0006682782065257818, "loss": 0.0197, "num_input_tokens_seen": 119442544, "step": 55375 }, { "epoch": 9.03425774877651, "grad_norm": 0.02553357183933258, "learning_rate": 0.000668211177393721, "loss": 0.0237, "num_input_tokens_seen": 119454352, "step": 55380 }, { "epoch": 9.035073409461663, "grad_norm": 0.005218474194407463, "learning_rate": 0.0006681441448526471, "loss": 0.0048, "num_input_tokens_seen": 119466704, "step": 55385 }, { "epoch": 9.035889070146819, "grad_norm": 0.07839210331439972, "learning_rate": 0.0006680771089039188, "loss": 0.0116, "num_input_tokens_seen": 119476048, "step": 55390 }, { "epoch": 9.036704730831975, "grad_norm": 0.0407678447663784, "learning_rate": 0.0006680100695488946, "loss": 0.2338, "num_input_tokens_seen": 119487376, "step": 55395 }, { "epoch": 9.037520391517129, "grad_norm": 0.0023660878650844097, "learning_rate": 0.0006679430267889332, "loss": 0.0854, "num_input_tokens_seen": 119497168, "step": 55400 }, { "epoch": 9.038336052202284, "grad_norm": 0.13383476436138153, "learning_rate": 0.0006678759806253933, "loss": 0.0381, "num_input_tokens_seen": 119508208, "step": 55405 }, { "epoch": 9.039151712887438, "grad_norm": 0.016330936923623085, "learning_rate": 0.0006678089310596339, "loss": 0.0378, "num_input_tokens_seen": 119518864, "step": 55410 }, { "epoch": 9.039967373572594, "grad_norm": 0.0238396767526865, "learning_rate": 0.0006677418780930136, "loss": 0.0146, "num_input_tokens_seen": 119528784, "step": 55415 }, { "epoch": 9.040783034257748, "grad_norm": 0.6130561232566833, "learning_rate": 0.0006676748217268912, "loss": 0.0924, "num_input_tokens_seen": 119539344, "step": 55420 }, { "epoch": 9.041598694942904, "grad_norm": 0.04567724093794823, "learning_rate": 0.0006676077619626259, "loss": 0.006, "num_input_tokens_seen": 119550544, "step": 55425 }, { "epoch": 9.04241435562806, "grad_norm": 0.5983039140701294, "learning_rate": 0.0006675406988015766, "loss": 0.0535, "num_input_tokens_seen": 119561904, "step": 55430 }, { "epoch": 9.043230016313213, "grad_norm": 0.04347669705748558, "learning_rate": 0.0006674736322451027, "loss": 0.0252, "num_input_tokens_seen": 119572560, "step": 55435 }, { "epoch": 9.044045676998369, "grad_norm": 0.4338512420654297, "learning_rate": 0.000667406562294563, "loss": 0.0675, "num_input_tokens_seen": 119583632, "step": 55440 }, { "epoch": 9.044861337683523, "grad_norm": 0.28417837619781494, "learning_rate": 0.0006673394889513169, "loss": 0.1334, "num_input_tokens_seen": 119595632, "step": 55445 }, { "epoch": 9.045676998368679, "grad_norm": 0.13589511811733246, "learning_rate": 0.000667272412216724, "loss": 0.158, "num_input_tokens_seen": 119607440, "step": 55450 }, { "epoch": 9.046492659053834, "grad_norm": 0.2303483635187149, "learning_rate": 0.0006672053320921433, "loss": 0.0978, "num_input_tokens_seen": 119618448, "step": 55455 }, { "epoch": 9.047308319738988, "grad_norm": 0.11304701119661331, "learning_rate": 0.0006671382485789344, "loss": 0.1533, "num_input_tokens_seen": 119628976, "step": 55460 }, { "epoch": 9.048123980424144, "grad_norm": 0.1525658369064331, "learning_rate": 0.0006670711616784571, "loss": 0.0539, "num_input_tokens_seen": 119637872, "step": 55465 }, { "epoch": 9.048939641109298, "grad_norm": 0.014404415152966976, "learning_rate": 0.0006670040713920704, "loss": 0.0762, "num_input_tokens_seen": 119648624, "step": 55470 }, { "epoch": 9.049755301794454, "grad_norm": 0.10361255705356598, "learning_rate": 0.0006669369777211344, "loss": 0.0206, "num_input_tokens_seen": 119659376, "step": 55475 }, { "epoch": 9.05057096247961, "grad_norm": 0.7863866090774536, "learning_rate": 0.000666869880667009, "loss": 0.1128, "num_input_tokens_seen": 119670512, "step": 55480 }, { "epoch": 9.051386623164763, "grad_norm": 0.1947966068983078, "learning_rate": 0.0006668027802310537, "loss": 0.0205, "num_input_tokens_seen": 119681392, "step": 55485 }, { "epoch": 9.052202283849919, "grad_norm": 0.11835118383169174, "learning_rate": 0.0006667356764146284, "loss": 0.0209, "num_input_tokens_seen": 119691824, "step": 55490 }, { "epoch": 9.053017944535073, "grad_norm": 0.009567261673510075, "learning_rate": 0.0006666685692190931, "loss": 0.01, "num_input_tokens_seen": 119701648, "step": 55495 }, { "epoch": 9.053833605220229, "grad_norm": 0.01262793317437172, "learning_rate": 0.0006666014586458079, "loss": 0.0612, "num_input_tokens_seen": 119712368, "step": 55500 }, { "epoch": 9.054649265905383, "grad_norm": 0.020150871947407722, "learning_rate": 0.0006665343446961327, "loss": 0.0134, "num_input_tokens_seen": 119723472, "step": 55505 }, { "epoch": 9.055464926590538, "grad_norm": 0.21810318529605865, "learning_rate": 0.0006664672273714278, "loss": 0.023, "num_input_tokens_seen": 119734576, "step": 55510 }, { "epoch": 9.056280587275694, "grad_norm": 0.1977030336856842, "learning_rate": 0.0006664001066730532, "loss": 0.0702, "num_input_tokens_seen": 119744880, "step": 55515 }, { "epoch": 9.057096247960848, "grad_norm": 0.2083820104598999, "learning_rate": 0.0006663329826023696, "loss": 0.1047, "num_input_tokens_seen": 119756176, "step": 55520 }, { "epoch": 9.057911908646004, "grad_norm": 0.1383449137210846, "learning_rate": 0.000666265855160737, "loss": 0.0343, "num_input_tokens_seen": 119766704, "step": 55525 }, { "epoch": 9.058727569331158, "grad_norm": 0.012325000949203968, "learning_rate": 0.0006661987243495159, "loss": 0.026, "num_input_tokens_seen": 119777968, "step": 55530 }, { "epoch": 9.059543230016313, "grad_norm": 0.0018320106901228428, "learning_rate": 0.0006661315901700668, "loss": 0.035, "num_input_tokens_seen": 119786960, "step": 55535 }, { "epoch": 9.060358890701469, "grad_norm": 0.09194603562355042, "learning_rate": 0.0006660644526237502, "loss": 0.0448, "num_input_tokens_seen": 119798960, "step": 55540 }, { "epoch": 9.061174551386623, "grad_norm": 0.5157580971717834, "learning_rate": 0.0006659973117119269, "loss": 0.207, "num_input_tokens_seen": 119808880, "step": 55545 }, { "epoch": 9.061990212071779, "grad_norm": 0.6132428050041199, "learning_rate": 0.0006659301674359575, "loss": 0.0307, "num_input_tokens_seen": 119819376, "step": 55550 }, { "epoch": 9.062805872756933, "grad_norm": 0.49757030606269836, "learning_rate": 0.0006658630197972027, "loss": 0.0383, "num_input_tokens_seen": 119830576, "step": 55555 }, { "epoch": 9.063621533442088, "grad_norm": 0.15536803007125854, "learning_rate": 0.0006657958687970233, "loss": 0.083, "num_input_tokens_seen": 119841136, "step": 55560 }, { "epoch": 9.064437194127244, "grad_norm": 0.051004983484745026, "learning_rate": 0.0006657287144367805, "loss": 0.1118, "num_input_tokens_seen": 119850000, "step": 55565 }, { "epoch": 9.065252854812398, "grad_norm": 0.03996096923947334, "learning_rate": 0.000665661556717835, "loss": 0.0128, "num_input_tokens_seen": 119860368, "step": 55570 }, { "epoch": 9.066068515497554, "grad_norm": 0.10328884422779083, "learning_rate": 0.0006655943956415479, "loss": 0.168, "num_input_tokens_seen": 119870800, "step": 55575 }, { "epoch": 9.066884176182707, "grad_norm": 0.007891271263360977, "learning_rate": 0.0006655272312092802, "loss": 0.0504, "num_input_tokens_seen": 119882064, "step": 55580 }, { "epoch": 9.067699836867863, "grad_norm": 0.02266419306397438, "learning_rate": 0.0006654600634223933, "loss": 0.0255, "num_input_tokens_seen": 119892624, "step": 55585 }, { "epoch": 9.068515497553017, "grad_norm": 0.06656887382268906, "learning_rate": 0.0006653928922822482, "loss": 0.0155, "num_input_tokens_seen": 119902352, "step": 55590 }, { "epoch": 9.069331158238173, "grad_norm": 0.587614119052887, "learning_rate": 0.0006653257177902063, "loss": 0.0974, "num_input_tokens_seen": 119912144, "step": 55595 }, { "epoch": 9.070146818923329, "grad_norm": 0.5886010527610779, "learning_rate": 0.0006652585399476292, "loss": 0.0779, "num_input_tokens_seen": 119923312, "step": 55600 }, { "epoch": 9.070962479608482, "grad_norm": 0.3767647445201874, "learning_rate": 0.000665191358755878, "loss": 0.0657, "num_input_tokens_seen": 119934960, "step": 55605 }, { "epoch": 9.071778140293638, "grad_norm": 0.020891567692160606, "learning_rate": 0.0006651241742163143, "loss": 0.0057, "num_input_tokens_seen": 119945232, "step": 55610 }, { "epoch": 9.072593800978792, "grad_norm": 0.03240369260311127, "learning_rate": 0.0006650569863302999, "loss": 0.1158, "num_input_tokens_seen": 119954768, "step": 55615 }, { "epoch": 9.073409461663948, "grad_norm": 0.03866568207740784, "learning_rate": 0.0006649897950991962, "loss": 0.1416, "num_input_tokens_seen": 119964336, "step": 55620 }, { "epoch": 9.074225122349104, "grad_norm": 0.003994786646217108, "learning_rate": 0.000664922600524365, "loss": 0.0504, "num_input_tokens_seen": 119975920, "step": 55625 }, { "epoch": 9.075040783034257, "grad_norm": 0.0053985086269676685, "learning_rate": 0.000664855402607168, "loss": 0.0223, "num_input_tokens_seen": 119986992, "step": 55630 }, { "epoch": 9.075856443719413, "grad_norm": 0.004717818461358547, "learning_rate": 0.0006647882013489674, "loss": 0.0092, "num_input_tokens_seen": 119997200, "step": 55635 }, { "epoch": 9.076672104404567, "grad_norm": 0.16305413842201233, "learning_rate": 0.0006647209967511245, "loss": 0.1185, "num_input_tokens_seen": 120010512, "step": 55640 }, { "epoch": 9.077487765089723, "grad_norm": 0.09873753041028976, "learning_rate": 0.0006646537888150019, "loss": 0.0396, "num_input_tokens_seen": 120021968, "step": 55645 }, { "epoch": 9.078303425774878, "grad_norm": 0.8446091413497925, "learning_rate": 0.0006645865775419613, "loss": 0.1696, "num_input_tokens_seen": 120034256, "step": 55650 }, { "epoch": 9.079119086460032, "grad_norm": 0.03716664761304855, "learning_rate": 0.0006645193629333649, "loss": 0.1514, "num_input_tokens_seen": 120044560, "step": 55655 }, { "epoch": 9.079934747145188, "grad_norm": 0.3885654807090759, "learning_rate": 0.0006644521449905749, "loss": 0.2012, "num_input_tokens_seen": 120055920, "step": 55660 }, { "epoch": 9.080750407830342, "grad_norm": 0.22820302844047546, "learning_rate": 0.0006643849237149536, "loss": 0.0615, "num_input_tokens_seen": 120067408, "step": 55665 }, { "epoch": 9.081566068515498, "grad_norm": 0.33249443769454956, "learning_rate": 0.0006643176991078632, "loss": 0.1151, "num_input_tokens_seen": 120079184, "step": 55670 }, { "epoch": 9.082381729200652, "grad_norm": 0.0899425595998764, "learning_rate": 0.0006642504711706663, "loss": 0.0266, "num_input_tokens_seen": 120090672, "step": 55675 }, { "epoch": 9.083197389885807, "grad_norm": 0.02980557456612587, "learning_rate": 0.000664183239904725, "loss": 0.0411, "num_input_tokens_seen": 120101968, "step": 55680 }, { "epoch": 9.084013050570963, "grad_norm": 0.9618743062019348, "learning_rate": 0.0006641160053114021, "loss": 0.0696, "num_input_tokens_seen": 120112816, "step": 55685 }, { "epoch": 9.084828711256117, "grad_norm": 0.017354147508740425, "learning_rate": 0.0006640487673920605, "loss": 0.1516, "num_input_tokens_seen": 120123664, "step": 55690 }, { "epoch": 9.085644371941273, "grad_norm": 0.21114613115787506, "learning_rate": 0.0006639815261480622, "loss": 0.0524, "num_input_tokens_seen": 120135472, "step": 55695 }, { "epoch": 9.086460032626427, "grad_norm": 0.6886735558509827, "learning_rate": 0.0006639142815807704, "loss": 0.0998, "num_input_tokens_seen": 120144848, "step": 55700 }, { "epoch": 9.087275693311582, "grad_norm": 0.031912095844745636, "learning_rate": 0.0006638470336915477, "loss": 0.0464, "num_input_tokens_seen": 120156080, "step": 55705 }, { "epoch": 9.088091353996738, "grad_norm": 0.019507847726345062, "learning_rate": 0.0006637797824817569, "loss": 0.0825, "num_input_tokens_seen": 120166256, "step": 55710 }, { "epoch": 9.088907014681892, "grad_norm": 0.025199616327881813, "learning_rate": 0.000663712527952761, "loss": 0.0324, "num_input_tokens_seen": 120177520, "step": 55715 }, { "epoch": 9.089722675367048, "grad_norm": 0.145310178399086, "learning_rate": 0.0006636452701059232, "loss": 0.1292, "num_input_tokens_seen": 120188464, "step": 55720 }, { "epoch": 9.090538336052202, "grad_norm": 0.012959443032741547, "learning_rate": 0.0006635780089426065, "loss": 0.176, "num_input_tokens_seen": 120200112, "step": 55725 }, { "epoch": 9.091353996737357, "grad_norm": 0.2598142623901367, "learning_rate": 0.0006635107444641737, "loss": 0.0276, "num_input_tokens_seen": 120210832, "step": 55730 }, { "epoch": 9.092169657422513, "grad_norm": 0.015528635121881962, "learning_rate": 0.0006634434766719883, "loss": 0.0502, "num_input_tokens_seen": 120222160, "step": 55735 }, { "epoch": 9.092985318107667, "grad_norm": 0.01663241907954216, "learning_rate": 0.0006633762055674136, "loss": 0.0334, "num_input_tokens_seen": 120231376, "step": 55740 }, { "epoch": 9.093800978792823, "grad_norm": 0.048493362963199615, "learning_rate": 0.0006633089311518128, "loss": 0.2616, "num_input_tokens_seen": 120243024, "step": 55745 }, { "epoch": 9.094616639477977, "grad_norm": 0.0799279436469078, "learning_rate": 0.0006632416534265493, "loss": 0.0704, "num_input_tokens_seen": 120254832, "step": 55750 }, { "epoch": 9.095432300163132, "grad_norm": 0.2075689285993576, "learning_rate": 0.0006631743723929867, "loss": 0.0529, "num_input_tokens_seen": 120264688, "step": 55755 }, { "epoch": 9.096247960848286, "grad_norm": 0.027742646634578705, "learning_rate": 0.0006631070880524883, "loss": 0.0463, "num_input_tokens_seen": 120275792, "step": 55760 }, { "epoch": 9.097063621533442, "grad_norm": 0.054742153733968735, "learning_rate": 0.0006630398004064179, "loss": 0.0202, "num_input_tokens_seen": 120285776, "step": 55765 }, { "epoch": 9.097879282218598, "grad_norm": 0.10826899856328964, "learning_rate": 0.0006629725094561392, "loss": 0.1357, "num_input_tokens_seen": 120295408, "step": 55770 }, { "epoch": 9.098694942903752, "grad_norm": 0.13032063841819763, "learning_rate": 0.0006629052152030158, "loss": 0.1463, "num_input_tokens_seen": 120306160, "step": 55775 }, { "epoch": 9.099510603588907, "grad_norm": 0.01495547778904438, "learning_rate": 0.0006628379176484115, "loss": 0.0553, "num_input_tokens_seen": 120317872, "step": 55780 }, { "epoch": 9.100326264274061, "grad_norm": 0.04284167289733887, "learning_rate": 0.0006627706167936903, "loss": 0.1283, "num_input_tokens_seen": 120328784, "step": 55785 }, { "epoch": 9.101141924959217, "grad_norm": 0.12099170684814453, "learning_rate": 0.0006627033126402159, "loss": 0.0336, "num_input_tokens_seen": 120340112, "step": 55790 }, { "epoch": 9.101957585644373, "grad_norm": 0.834470272064209, "learning_rate": 0.0006626360051893526, "loss": 0.1135, "num_input_tokens_seen": 120351472, "step": 55795 }, { "epoch": 9.102773246329527, "grad_norm": 0.28302061557769775, "learning_rate": 0.0006625686944424642, "loss": 0.0595, "num_input_tokens_seen": 120362320, "step": 55800 }, { "epoch": 9.103588907014682, "grad_norm": 0.017118629068136215, "learning_rate": 0.0006625013804009152, "loss": 0.0803, "num_input_tokens_seen": 120372624, "step": 55805 }, { "epoch": 9.104404567699836, "grad_norm": 0.026390070095658302, "learning_rate": 0.0006624340630660695, "loss": 0.118, "num_input_tokens_seen": 120383920, "step": 55810 }, { "epoch": 9.105220228384992, "grad_norm": 0.23710112273693085, "learning_rate": 0.0006623667424392914, "loss": 0.0666, "num_input_tokens_seen": 120394704, "step": 55815 }, { "epoch": 9.106035889070148, "grad_norm": 0.38931578397750854, "learning_rate": 0.0006622994185219453, "loss": 0.0819, "num_input_tokens_seen": 120405584, "step": 55820 }, { "epoch": 9.106851549755302, "grad_norm": 0.42046496272087097, "learning_rate": 0.0006622320913153957, "loss": 0.1129, "num_input_tokens_seen": 120416560, "step": 55825 }, { "epoch": 9.107667210440457, "grad_norm": 0.4778141379356384, "learning_rate": 0.0006621647608210068, "loss": 0.1091, "num_input_tokens_seen": 120427568, "step": 55830 }, { "epoch": 9.108482871125611, "grad_norm": 0.03009987808763981, "learning_rate": 0.0006620974270401434, "loss": 0.0408, "num_input_tokens_seen": 120438128, "step": 55835 }, { "epoch": 9.109298531810767, "grad_norm": 0.2173631191253662, "learning_rate": 0.00066203008997417, "loss": 0.0534, "num_input_tokens_seen": 120449008, "step": 55840 }, { "epoch": 9.11011419249592, "grad_norm": 0.004104393068701029, "learning_rate": 0.0006619627496244513, "loss": 0.0405, "num_input_tokens_seen": 120460528, "step": 55845 }, { "epoch": 9.110929853181077, "grad_norm": 0.04671208932995796, "learning_rate": 0.0006618954059923517, "loss": 0.1114, "num_input_tokens_seen": 120471856, "step": 55850 }, { "epoch": 9.111745513866232, "grad_norm": 0.3068602681159973, "learning_rate": 0.0006618280590792367, "loss": 0.0514, "num_input_tokens_seen": 120482320, "step": 55855 }, { "epoch": 9.112561174551386, "grad_norm": 0.004558865446597338, "learning_rate": 0.0006617607088864706, "loss": 0.0448, "num_input_tokens_seen": 120492880, "step": 55860 }, { "epoch": 9.113376835236542, "grad_norm": 0.08361528813838959, "learning_rate": 0.0006616933554154186, "loss": 0.2659, "num_input_tokens_seen": 120503472, "step": 55865 }, { "epoch": 9.114192495921696, "grad_norm": 0.19932563602924347, "learning_rate": 0.0006616259986674456, "loss": 0.0638, "num_input_tokens_seen": 120513616, "step": 55870 }, { "epoch": 9.115008156606851, "grad_norm": 0.05301324650645256, "learning_rate": 0.0006615586386439169, "loss": 0.0645, "num_input_tokens_seen": 120523408, "step": 55875 }, { "epoch": 9.115823817292007, "grad_norm": 0.017941607162356377, "learning_rate": 0.0006614912753461973, "loss": 0.0387, "num_input_tokens_seen": 120533904, "step": 55880 }, { "epoch": 9.116639477977161, "grad_norm": 0.23517172038555145, "learning_rate": 0.0006614239087756519, "loss": 0.0759, "num_input_tokens_seen": 120545424, "step": 55885 }, { "epoch": 9.117455138662317, "grad_norm": 0.019830500707030296, "learning_rate": 0.0006613565389336465, "loss": 0.0121, "num_input_tokens_seen": 120557104, "step": 55890 }, { "epoch": 9.11827079934747, "grad_norm": 0.6013330817222595, "learning_rate": 0.0006612891658215461, "loss": 0.0771, "num_input_tokens_seen": 120567856, "step": 55895 }, { "epoch": 9.119086460032626, "grad_norm": 0.4540465176105499, "learning_rate": 0.000661221789440716, "loss": 0.2828, "num_input_tokens_seen": 120577840, "step": 55900 }, { "epoch": 9.119902120717782, "grad_norm": 0.019881321117281914, "learning_rate": 0.0006611544097925219, "loss": 0.1651, "num_input_tokens_seen": 120589104, "step": 55905 }, { "epoch": 9.120717781402936, "grad_norm": 0.4489336907863617, "learning_rate": 0.0006610870268783292, "loss": 0.1217, "num_input_tokens_seen": 120600400, "step": 55910 }, { "epoch": 9.121533442088092, "grad_norm": 0.015544923022389412, "learning_rate": 0.0006610196406995038, "loss": 0.0855, "num_input_tokens_seen": 120611472, "step": 55915 }, { "epoch": 9.122349102773246, "grad_norm": 0.19141457974910736, "learning_rate": 0.0006609522512574107, "loss": 0.0445, "num_input_tokens_seen": 120623792, "step": 55920 }, { "epoch": 9.123164763458401, "grad_norm": 0.014635547995567322, "learning_rate": 0.0006608848585534164, "loss": 0.0614, "num_input_tokens_seen": 120634704, "step": 55925 }, { "epoch": 9.123980424143557, "grad_norm": 0.07866846024990082, "learning_rate": 0.0006608174625888862, "loss": 0.0314, "num_input_tokens_seen": 120644624, "step": 55930 }, { "epoch": 9.124796084828711, "grad_norm": 0.012827079743146896, "learning_rate": 0.000660750063365186, "loss": 0.0169, "num_input_tokens_seen": 120656688, "step": 55935 }, { "epoch": 9.125611745513867, "grad_norm": 0.018985828384757042, "learning_rate": 0.000660682660883682, "loss": 0.0324, "num_input_tokens_seen": 120665808, "step": 55940 }, { "epoch": 9.12642740619902, "grad_norm": 0.5553588271141052, "learning_rate": 0.0006606152551457401, "loss": 0.0579, "num_input_tokens_seen": 120675632, "step": 55945 }, { "epoch": 9.127243066884176, "grad_norm": 0.04444368556141853, "learning_rate": 0.0006605478461527262, "loss": 0.0249, "num_input_tokens_seen": 120686544, "step": 55950 }, { "epoch": 9.12805872756933, "grad_norm": 0.1786317229270935, "learning_rate": 0.0006604804339060065, "loss": 0.1777, "num_input_tokens_seen": 120698352, "step": 55955 }, { "epoch": 9.128874388254486, "grad_norm": 0.46378782391548157, "learning_rate": 0.0006604130184069472, "loss": 0.1393, "num_input_tokens_seen": 120709712, "step": 55960 }, { "epoch": 9.129690048939642, "grad_norm": 0.16575656831264496, "learning_rate": 0.0006603455996569146, "loss": 0.03, "num_input_tokens_seen": 120721424, "step": 55965 }, { "epoch": 9.130505709624796, "grad_norm": 0.018115028738975525, "learning_rate": 0.0006602781776572752, "loss": 0.0469, "num_input_tokens_seen": 120732752, "step": 55970 }, { "epoch": 9.131321370309951, "grad_norm": 0.016577819362282753, "learning_rate": 0.000660210752409395, "loss": 0.0982, "num_input_tokens_seen": 120743664, "step": 55975 }, { "epoch": 9.132137030995105, "grad_norm": 0.02281651459634304, "learning_rate": 0.0006601433239146407, "loss": 0.0192, "num_input_tokens_seen": 120754224, "step": 55980 }, { "epoch": 9.132952691680261, "grad_norm": 0.20791949331760406, "learning_rate": 0.0006600758921743788, "loss": 0.1402, "num_input_tokens_seen": 120764848, "step": 55985 }, { "epoch": 9.133768352365417, "grad_norm": 0.023971468210220337, "learning_rate": 0.0006600084571899758, "loss": 0.0384, "num_input_tokens_seen": 120775952, "step": 55990 }, { "epoch": 9.13458401305057, "grad_norm": 0.13164128363132477, "learning_rate": 0.0006599410189627985, "loss": 0.0624, "num_input_tokens_seen": 120787280, "step": 55995 }, { "epoch": 9.135399673735726, "grad_norm": 0.08347659558057785, "learning_rate": 0.0006598735774942135, "loss": 0.0212, "num_input_tokens_seen": 120798160, "step": 56000 }, { "epoch": 9.13621533442088, "grad_norm": 0.39179304242134094, "learning_rate": 0.0006598061327855876, "loss": 0.0675, "num_input_tokens_seen": 120809200, "step": 56005 }, { "epoch": 9.137030995106036, "grad_norm": 0.4155136048793793, "learning_rate": 0.0006597386848382878, "loss": 0.1251, "num_input_tokens_seen": 120819600, "step": 56010 }, { "epoch": 9.137846655791192, "grad_norm": 0.02046937681734562, "learning_rate": 0.000659671233653681, "loss": 0.0552, "num_input_tokens_seen": 120830768, "step": 56015 }, { "epoch": 9.138662316476346, "grad_norm": 0.13241569697856903, "learning_rate": 0.0006596037792331338, "loss": 0.05, "num_input_tokens_seen": 120842736, "step": 56020 }, { "epoch": 9.139477977161501, "grad_norm": 0.39709997177124023, "learning_rate": 0.0006595363215780137, "loss": 0.129, "num_input_tokens_seen": 120853008, "step": 56025 }, { "epoch": 9.140293637846655, "grad_norm": 0.028569212183356285, "learning_rate": 0.0006594688606896877, "loss": 0.0428, "num_input_tokens_seen": 120864112, "step": 56030 }, { "epoch": 9.141109298531811, "grad_norm": 0.329426646232605, "learning_rate": 0.0006594013965695229, "loss": 0.0273, "num_input_tokens_seen": 120875248, "step": 56035 }, { "epoch": 9.141924959216965, "grad_norm": 0.43424251675605774, "learning_rate": 0.0006593339292188865, "loss": 0.2462, "num_input_tokens_seen": 120887024, "step": 56040 }, { "epoch": 9.14274061990212, "grad_norm": 0.027440574020147324, "learning_rate": 0.0006592664586391461, "loss": 0.0255, "num_input_tokens_seen": 120897808, "step": 56045 }, { "epoch": 9.143556280587276, "grad_norm": 0.014710106886923313, "learning_rate": 0.0006591989848316687, "loss": 0.0749, "num_input_tokens_seen": 120908720, "step": 56050 }, { "epoch": 9.14437194127243, "grad_norm": 0.049178145825862885, "learning_rate": 0.0006591315077978221, "loss": 0.028, "num_input_tokens_seen": 120919888, "step": 56055 }, { "epoch": 9.145187601957586, "grad_norm": 0.16164851188659668, "learning_rate": 0.0006590640275389734, "loss": 0.0725, "num_input_tokens_seen": 120930256, "step": 56060 }, { "epoch": 9.14600326264274, "grad_norm": 0.002598636085167527, "learning_rate": 0.0006589965440564905, "loss": 0.0204, "num_input_tokens_seen": 120940336, "step": 56065 }, { "epoch": 9.146818923327896, "grad_norm": 0.019609270617365837, "learning_rate": 0.000658929057351741, "loss": 0.0751, "num_input_tokens_seen": 120950864, "step": 56070 }, { "epoch": 9.147634584013051, "grad_norm": 0.3208036422729492, "learning_rate": 0.0006588615674260925, "loss": 0.026, "num_input_tokens_seen": 120961392, "step": 56075 }, { "epoch": 9.148450244698205, "grad_norm": 0.13847127556800842, "learning_rate": 0.0006587940742809127, "loss": 0.0752, "num_input_tokens_seen": 120971632, "step": 56080 }, { "epoch": 9.149265905383361, "grad_norm": 0.030213642865419388, "learning_rate": 0.0006587265779175696, "loss": 0.0142, "num_input_tokens_seen": 120981168, "step": 56085 }, { "epoch": 9.150081566068515, "grad_norm": 0.6880441308021545, "learning_rate": 0.0006586590783374311, "loss": 0.0567, "num_input_tokens_seen": 120991664, "step": 56090 }, { "epoch": 9.15089722675367, "grad_norm": 0.2707865536212921, "learning_rate": 0.000658591575541865, "loss": 0.0366, "num_input_tokens_seen": 121002928, "step": 56095 }, { "epoch": 9.151712887438826, "grad_norm": 0.035509396344423294, "learning_rate": 0.0006585240695322395, "loss": 0.0154, "num_input_tokens_seen": 121012976, "step": 56100 }, { "epoch": 9.15252854812398, "grad_norm": 0.053702786564826965, "learning_rate": 0.0006584565603099227, "loss": 0.0217, "num_input_tokens_seen": 121024144, "step": 56105 }, { "epoch": 9.153344208809136, "grad_norm": 0.16139712929725647, "learning_rate": 0.0006583890478762824, "loss": 0.0518, "num_input_tokens_seen": 121034928, "step": 56110 }, { "epoch": 9.15415986949429, "grad_norm": 0.4832369089126587, "learning_rate": 0.0006583215322326874, "loss": 0.1536, "num_input_tokens_seen": 121045424, "step": 56115 }, { "epoch": 9.154975530179446, "grad_norm": 0.09216664731502533, "learning_rate": 0.0006582540133805056, "loss": 0.0707, "num_input_tokens_seen": 121056624, "step": 56120 }, { "epoch": 9.1557911908646, "grad_norm": 0.05360868573188782, "learning_rate": 0.0006581864913211055, "loss": 0.0928, "num_input_tokens_seen": 121068176, "step": 56125 }, { "epoch": 9.156606851549755, "grad_norm": 0.01583525538444519, "learning_rate": 0.0006581189660558554, "loss": 0.1761, "num_input_tokens_seen": 121079408, "step": 56130 }, { "epoch": 9.15742251223491, "grad_norm": 0.10853324830532074, "learning_rate": 0.000658051437586124, "loss": 0.0344, "num_input_tokens_seen": 121090000, "step": 56135 }, { "epoch": 9.158238172920065, "grad_norm": 0.9389238953590393, "learning_rate": 0.0006579839059132796, "loss": 0.0425, "num_input_tokens_seen": 121101200, "step": 56140 }, { "epoch": 9.15905383360522, "grad_norm": 0.3436606228351593, "learning_rate": 0.000657916371038691, "loss": 0.0985, "num_input_tokens_seen": 121111120, "step": 56145 }, { "epoch": 9.159869494290374, "grad_norm": 0.006948577240109444, "learning_rate": 0.0006578488329637268, "loss": 0.0101, "num_input_tokens_seen": 121122064, "step": 56150 }, { "epoch": 9.16068515497553, "grad_norm": 0.16838285326957703, "learning_rate": 0.0006577812916897558, "loss": 0.0276, "num_input_tokens_seen": 121133136, "step": 56155 }, { "epoch": 9.161500815660686, "grad_norm": 0.11950661242008209, "learning_rate": 0.0006577137472181466, "loss": 0.1037, "num_input_tokens_seen": 121145168, "step": 56160 }, { "epoch": 9.16231647634584, "grad_norm": 0.47877535223960876, "learning_rate": 0.0006576461995502682, "loss": 0.1379, "num_input_tokens_seen": 121156816, "step": 56165 }, { "epoch": 9.163132137030995, "grad_norm": 0.01069714780896902, "learning_rate": 0.0006575786486874897, "loss": 0.0168, "num_input_tokens_seen": 121166960, "step": 56170 }, { "epoch": 9.16394779771615, "grad_norm": 0.2525418698787689, "learning_rate": 0.0006575110946311801, "loss": 0.1705, "num_input_tokens_seen": 121178832, "step": 56175 }, { "epoch": 9.164763458401305, "grad_norm": 0.10531517118215561, "learning_rate": 0.0006574435373827083, "loss": 0.0427, "num_input_tokens_seen": 121189072, "step": 56180 }, { "epoch": 9.16557911908646, "grad_norm": 0.04583534970879555, "learning_rate": 0.0006573759769434433, "loss": 0.0149, "num_input_tokens_seen": 121198800, "step": 56185 }, { "epoch": 9.166394779771615, "grad_norm": 0.19142676889896393, "learning_rate": 0.0006573084133147547, "loss": 0.1201, "num_input_tokens_seen": 121210064, "step": 56190 }, { "epoch": 9.16721044045677, "grad_norm": 0.013017485849559307, "learning_rate": 0.0006572408464980115, "loss": 0.107, "num_input_tokens_seen": 121221456, "step": 56195 }, { "epoch": 9.168026101141924, "grad_norm": 0.043239716440439224, "learning_rate": 0.000657173276494583, "loss": 0.1065, "num_input_tokens_seen": 121232240, "step": 56200 }, { "epoch": 9.16884176182708, "grad_norm": 0.3486689031124115, "learning_rate": 0.0006571057033058386, "loss": 0.0897, "num_input_tokens_seen": 121242704, "step": 56205 }, { "epoch": 9.169657422512234, "grad_norm": 0.035638950765132904, "learning_rate": 0.000657038126933148, "loss": 0.019, "num_input_tokens_seen": 121253584, "step": 56210 }, { "epoch": 9.17047308319739, "grad_norm": 0.2943743169307709, "learning_rate": 0.0006569705473778804, "loss": 0.1186, "num_input_tokens_seen": 121264880, "step": 56215 }, { "epoch": 9.171288743882545, "grad_norm": 0.045788221061229706, "learning_rate": 0.0006569029646414055, "loss": 0.0296, "num_input_tokens_seen": 121276080, "step": 56220 }, { "epoch": 9.1721044045677, "grad_norm": 0.7541148066520691, "learning_rate": 0.0006568353787250931, "loss": 0.1912, "num_input_tokens_seen": 121285424, "step": 56225 }, { "epoch": 9.172920065252855, "grad_norm": 0.09063722938299179, "learning_rate": 0.0006567677896303127, "loss": 0.0713, "num_input_tokens_seen": 121295216, "step": 56230 }, { "epoch": 9.173735725938009, "grad_norm": 0.12074990570545197, "learning_rate": 0.0006567001973584343, "loss": 0.0482, "num_input_tokens_seen": 121305552, "step": 56235 }, { "epoch": 9.174551386623165, "grad_norm": 0.14534588158130646, "learning_rate": 0.0006566326019108275, "loss": 0.0728, "num_input_tokens_seen": 121316368, "step": 56240 }, { "epoch": 9.17536704730832, "grad_norm": 0.8075056672096252, "learning_rate": 0.0006565650032888624, "loss": 0.1458, "num_input_tokens_seen": 121326512, "step": 56245 }, { "epoch": 9.176182707993474, "grad_norm": 0.010498271323740482, "learning_rate": 0.0006564974014939088, "loss": 0.086, "num_input_tokens_seen": 121337168, "step": 56250 }, { "epoch": 9.17699836867863, "grad_norm": 0.006512145511806011, "learning_rate": 0.0006564297965273369, "loss": 0.3307, "num_input_tokens_seen": 121347376, "step": 56255 }, { "epoch": 9.177814029363784, "grad_norm": 0.08957228809595108, "learning_rate": 0.0006563621883905167, "loss": 0.0343, "num_input_tokens_seen": 121356720, "step": 56260 }, { "epoch": 9.17862969004894, "grad_norm": 0.008703351952135563, "learning_rate": 0.0006562945770848183, "loss": 0.1249, "num_input_tokens_seen": 121366256, "step": 56265 }, { "epoch": 9.179445350734095, "grad_norm": 0.09398101270198822, "learning_rate": 0.0006562269626116122, "loss": 0.0824, "num_input_tokens_seen": 121377296, "step": 56270 }, { "epoch": 9.18026101141925, "grad_norm": 0.07159211486577988, "learning_rate": 0.0006561593449722683, "loss": 0.0484, "num_input_tokens_seen": 121388912, "step": 56275 }, { "epoch": 9.181076672104405, "grad_norm": 0.303072988986969, "learning_rate": 0.0006560917241681573, "loss": 0.111, "num_input_tokens_seen": 121398832, "step": 56280 }, { "epoch": 9.181892332789559, "grad_norm": 0.019270233809947968, "learning_rate": 0.0006560241002006495, "loss": 0.0417, "num_input_tokens_seen": 121410320, "step": 56285 }, { "epoch": 9.182707993474715, "grad_norm": 0.08726800233125687, "learning_rate": 0.0006559564730711153, "loss": 0.0809, "num_input_tokens_seen": 121422096, "step": 56290 }, { "epoch": 9.18352365415987, "grad_norm": 0.08240294456481934, "learning_rate": 0.0006558888427809255, "loss": 0.0328, "num_input_tokens_seen": 121433200, "step": 56295 }, { "epoch": 9.184339314845024, "grad_norm": 0.04616318643093109, "learning_rate": 0.0006558212093314504, "loss": 0.0125, "num_input_tokens_seen": 121443856, "step": 56300 }, { "epoch": 9.18515497553018, "grad_norm": 0.19343885779380798, "learning_rate": 0.0006557535727240609, "loss": 0.0402, "num_input_tokens_seen": 121453584, "step": 56305 }, { "epoch": 9.185970636215334, "grad_norm": 0.007104313001036644, "learning_rate": 0.0006556859329601275, "loss": 0.0132, "num_input_tokens_seen": 121462832, "step": 56310 }, { "epoch": 9.18678629690049, "grad_norm": 0.12412558495998383, "learning_rate": 0.0006556182900410213, "loss": 0.0342, "num_input_tokens_seen": 121472624, "step": 56315 }, { "epoch": 9.187601957585644, "grad_norm": 0.07492324709892273, "learning_rate": 0.0006555506439681131, "loss": 0.1172, "num_input_tokens_seen": 121482480, "step": 56320 }, { "epoch": 9.1884176182708, "grad_norm": 0.04366655275225639, "learning_rate": 0.0006554829947427736, "loss": 0.0359, "num_input_tokens_seen": 121491760, "step": 56325 }, { "epoch": 9.189233278955955, "grad_norm": 0.49060362577438354, "learning_rate": 0.0006554153423663741, "loss": 0.0641, "num_input_tokens_seen": 121500816, "step": 56330 }, { "epoch": 9.190048939641109, "grad_norm": 0.253623902797699, "learning_rate": 0.0006553476868402854, "loss": 0.0554, "num_input_tokens_seen": 121510384, "step": 56335 }, { "epoch": 9.190864600326265, "grad_norm": 0.2804422974586487, "learning_rate": 0.0006552800281658789, "loss": 0.0306, "num_input_tokens_seen": 121522192, "step": 56340 }, { "epoch": 9.191680261011419, "grad_norm": 0.17061103880405426, "learning_rate": 0.0006552123663445255, "loss": 0.047, "num_input_tokens_seen": 121533328, "step": 56345 }, { "epoch": 9.192495921696574, "grad_norm": 0.009179173968732357, "learning_rate": 0.0006551447013775967, "loss": 0.024, "num_input_tokens_seen": 121543440, "step": 56350 }, { "epoch": 9.19331158238173, "grad_norm": 0.4576239585876465, "learning_rate": 0.0006550770332664637, "loss": 0.1343, "num_input_tokens_seen": 121554832, "step": 56355 }, { "epoch": 9.194127243066884, "grad_norm": 0.022812338545918465, "learning_rate": 0.0006550093620124979, "loss": 0.046, "num_input_tokens_seen": 121565392, "step": 56360 }, { "epoch": 9.19494290375204, "grad_norm": 0.03383609652519226, "learning_rate": 0.0006549416876170707, "loss": 0.159, "num_input_tokens_seen": 121576912, "step": 56365 }, { "epoch": 9.195758564437194, "grad_norm": 0.17063173651695251, "learning_rate": 0.0006548740100815537, "loss": 0.0223, "num_input_tokens_seen": 121587888, "step": 56370 }, { "epoch": 9.19657422512235, "grad_norm": 0.020198598504066467, "learning_rate": 0.0006548063294073183, "loss": 0.0249, "num_input_tokens_seen": 121599472, "step": 56375 }, { "epoch": 9.197389885807505, "grad_norm": 0.1530260294675827, "learning_rate": 0.0006547386455957364, "loss": 0.0392, "num_input_tokens_seen": 121609520, "step": 56380 }, { "epoch": 9.198205546492659, "grad_norm": 0.005978389643132687, "learning_rate": 0.0006546709586481794, "loss": 0.0432, "num_input_tokens_seen": 121619408, "step": 56385 }, { "epoch": 9.199021207177815, "grad_norm": 0.5324417948722839, "learning_rate": 0.0006546032685660193, "loss": 0.0269, "num_input_tokens_seen": 121629296, "step": 56390 }, { "epoch": 9.199836867862969, "grad_norm": 0.10068287700414658, "learning_rate": 0.000654535575350628, "loss": 0.0178, "num_input_tokens_seen": 121640496, "step": 56395 }, { "epoch": 9.200652528548124, "grad_norm": 0.09097738564014435, "learning_rate": 0.0006544678790033769, "loss": 0.1287, "num_input_tokens_seen": 121651952, "step": 56400 }, { "epoch": 9.201468189233278, "grad_norm": 0.5552481412887573, "learning_rate": 0.0006544001795256385, "loss": 0.0727, "num_input_tokens_seen": 121662096, "step": 56405 }, { "epoch": 9.202283849918434, "grad_norm": 0.13829775154590607, "learning_rate": 0.0006543324769187844, "loss": 0.0309, "num_input_tokens_seen": 121671472, "step": 56410 }, { "epoch": 9.20309951060359, "grad_norm": 0.4387519657611847, "learning_rate": 0.0006542647711841869, "loss": 0.0937, "num_input_tokens_seen": 121682512, "step": 56415 }, { "epoch": 9.203915171288743, "grad_norm": 0.2677414119243622, "learning_rate": 0.0006541970623232183, "loss": 0.0424, "num_input_tokens_seen": 121694480, "step": 56420 }, { "epoch": 9.2047308319739, "grad_norm": 0.008609200827777386, "learning_rate": 0.0006541293503372506, "loss": 0.1133, "num_input_tokens_seen": 121705968, "step": 56425 }, { "epoch": 9.205546492659053, "grad_norm": 0.0954967737197876, "learning_rate": 0.0006540616352276558, "loss": 0.0483, "num_input_tokens_seen": 121716304, "step": 56430 }, { "epoch": 9.206362153344209, "grad_norm": 0.014114760793745518, "learning_rate": 0.0006539939169958067, "loss": 0.0489, "num_input_tokens_seen": 121726832, "step": 56435 }, { "epoch": 9.207177814029365, "grad_norm": 0.07194806635379791, "learning_rate": 0.0006539261956430755, "loss": 0.0934, "num_input_tokens_seen": 121737712, "step": 56440 }, { "epoch": 9.207993474714518, "grad_norm": 0.37339240312576294, "learning_rate": 0.0006538584711708348, "loss": 0.1556, "num_input_tokens_seen": 121747952, "step": 56445 }, { "epoch": 9.208809135399674, "grad_norm": 0.03280416876077652, "learning_rate": 0.0006537907435804569, "loss": 0.0072, "num_input_tokens_seen": 121758672, "step": 56450 }, { "epoch": 9.209624796084828, "grad_norm": 0.05734563246369362, "learning_rate": 0.0006537230128733144, "loss": 0.0139, "num_input_tokens_seen": 121769104, "step": 56455 }, { "epoch": 9.210440456769984, "grad_norm": 0.010936242528259754, "learning_rate": 0.0006536552790507802, "loss": 0.0328, "num_input_tokens_seen": 121780816, "step": 56460 }, { "epoch": 9.21125611745514, "grad_norm": 0.4480190873146057, "learning_rate": 0.0006535875421142267, "loss": 0.0769, "num_input_tokens_seen": 121791824, "step": 56465 }, { "epoch": 9.212071778140293, "grad_norm": 0.2070656418800354, "learning_rate": 0.0006535198020650269, "loss": 0.0214, "num_input_tokens_seen": 121802672, "step": 56470 }, { "epoch": 9.21288743882545, "grad_norm": 0.021591154858469963, "learning_rate": 0.0006534520589045537, "loss": 0.0219, "num_input_tokens_seen": 121813648, "step": 56475 }, { "epoch": 9.213703099510603, "grad_norm": 0.006583984941244125, "learning_rate": 0.0006533843126341795, "loss": 0.0187, "num_input_tokens_seen": 121825200, "step": 56480 }, { "epoch": 9.214518760195759, "grad_norm": 0.06452787667512894, "learning_rate": 0.0006533165632552777, "loss": 0.0496, "num_input_tokens_seen": 121836112, "step": 56485 }, { "epoch": 9.215334420880913, "grad_norm": 0.03967360407114029, "learning_rate": 0.0006532488107692214, "loss": 0.1197, "num_input_tokens_seen": 121846960, "step": 56490 }, { "epoch": 9.216150081566068, "grad_norm": 0.010467417538166046, "learning_rate": 0.0006531810551773836, "loss": 0.0187, "num_input_tokens_seen": 121857520, "step": 56495 }, { "epoch": 9.216965742251224, "grad_norm": 0.4048374593257904, "learning_rate": 0.0006531132964811374, "loss": 0.1829, "num_input_tokens_seen": 121868816, "step": 56500 }, { "epoch": 9.217781402936378, "grad_norm": 0.007898550480604172, "learning_rate": 0.0006530455346818559, "loss": 0.0734, "num_input_tokens_seen": 121880560, "step": 56505 }, { "epoch": 9.218597063621534, "grad_norm": 0.09966639429330826, "learning_rate": 0.0006529777697809125, "loss": 0.0401, "num_input_tokens_seen": 121892240, "step": 56510 }, { "epoch": 9.219412724306688, "grad_norm": 0.014188375324010849, "learning_rate": 0.0006529100017796805, "loss": 0.181, "num_input_tokens_seen": 121902960, "step": 56515 }, { "epoch": 9.220228384991843, "grad_norm": 0.590430736541748, "learning_rate": 0.0006528422306795334, "loss": 0.1079, "num_input_tokens_seen": 121914672, "step": 56520 }, { "epoch": 9.221044045676999, "grad_norm": 0.43762198090553284, "learning_rate": 0.0006527744564818446, "loss": 0.2125, "num_input_tokens_seen": 121924176, "step": 56525 }, { "epoch": 9.221859706362153, "grad_norm": 0.03208107128739357, "learning_rate": 0.0006527066791879875, "loss": 0.0073, "num_input_tokens_seen": 121934736, "step": 56530 }, { "epoch": 9.222675367047309, "grad_norm": 0.04294430837035179, "learning_rate": 0.000652638898799336, "loss": 0.0866, "num_input_tokens_seen": 121944144, "step": 56535 }, { "epoch": 9.223491027732463, "grad_norm": 0.09946151822805405, "learning_rate": 0.0006525711153172635, "loss": 0.0139, "num_input_tokens_seen": 121953552, "step": 56540 }, { "epoch": 9.224306688417618, "grad_norm": 0.02873525768518448, "learning_rate": 0.0006525033287431436, "loss": 0.1053, "num_input_tokens_seen": 121963248, "step": 56545 }, { "epoch": 9.225122349102774, "grad_norm": 0.014545915648341179, "learning_rate": 0.0006524355390783506, "loss": 0.0557, "num_input_tokens_seen": 121973040, "step": 56550 }, { "epoch": 9.225938009787928, "grad_norm": 0.009453125298023224, "learning_rate": 0.0006523677463242579, "loss": 0.0264, "num_input_tokens_seen": 121984464, "step": 56555 }, { "epoch": 9.226753670473084, "grad_norm": 0.15039196610450745, "learning_rate": 0.0006522999504822395, "loss": 0.1419, "num_input_tokens_seen": 121995664, "step": 56560 }, { "epoch": 9.227569331158238, "grad_norm": 0.00834047794342041, "learning_rate": 0.0006522321515536694, "loss": 0.0058, "num_input_tokens_seen": 122006192, "step": 56565 }, { "epoch": 9.228384991843393, "grad_norm": 0.007829360663890839, "learning_rate": 0.0006521643495399217, "loss": 0.0835, "num_input_tokens_seen": 122016240, "step": 56570 }, { "epoch": 9.229200652528547, "grad_norm": 0.038043905049562454, "learning_rate": 0.0006520965444423704, "loss": 0.0069, "num_input_tokens_seen": 122027920, "step": 56575 }, { "epoch": 9.230016313213703, "grad_norm": 1.0720384120941162, "learning_rate": 0.0006520287362623896, "loss": 0.2546, "num_input_tokens_seen": 122039056, "step": 56580 }, { "epoch": 9.230831973898859, "grad_norm": 0.03096429444849491, "learning_rate": 0.0006519609250013538, "loss": 0.0541, "num_input_tokens_seen": 122049424, "step": 56585 }, { "epoch": 9.231647634584013, "grad_norm": 0.12853571772575378, "learning_rate": 0.000651893110660637, "loss": 0.0419, "num_input_tokens_seen": 122058448, "step": 56590 }, { "epoch": 9.232463295269168, "grad_norm": 0.05009813979268074, "learning_rate": 0.0006518252932416135, "loss": 0.0603, "num_input_tokens_seen": 122069072, "step": 56595 }, { "epoch": 9.233278955954322, "grad_norm": 0.14057983458042145, "learning_rate": 0.0006517574727456579, "loss": 0.0278, "num_input_tokens_seen": 122080592, "step": 56600 }, { "epoch": 9.234094616639478, "grad_norm": 0.024889549240469933, "learning_rate": 0.0006516896491741446, "loss": 0.0334, "num_input_tokens_seen": 122091568, "step": 56605 }, { "epoch": 9.234910277324634, "grad_norm": 0.3494698107242584, "learning_rate": 0.000651621822528448, "loss": 0.0488, "num_input_tokens_seen": 122102000, "step": 56610 }, { "epoch": 9.235725938009788, "grad_norm": 0.10505068302154541, "learning_rate": 0.000651553992809943, "loss": 0.0194, "num_input_tokens_seen": 122112304, "step": 56615 }, { "epoch": 9.236541598694943, "grad_norm": 0.007188831921666861, "learning_rate": 0.0006514861600200039, "loss": 0.0469, "num_input_tokens_seen": 122124048, "step": 56620 }, { "epoch": 9.237357259380097, "grad_norm": 0.025524113327264786, "learning_rate": 0.0006514183241600057, "loss": 0.1798, "num_input_tokens_seen": 122135760, "step": 56625 }, { "epoch": 9.238172920065253, "grad_norm": 0.00880588497966528, "learning_rate": 0.000651350485231323, "loss": 0.0114, "num_input_tokens_seen": 122147536, "step": 56630 }, { "epoch": 9.238988580750409, "grad_norm": 0.022494468837976456, "learning_rate": 0.0006512826432353308, "loss": 0.0565, "num_input_tokens_seen": 122157872, "step": 56635 }, { "epoch": 9.239804241435563, "grad_norm": 0.0692828819155693, "learning_rate": 0.000651214798173404, "loss": 0.1149, "num_input_tokens_seen": 122169616, "step": 56640 }, { "epoch": 9.240619902120718, "grad_norm": 0.052207041531801224, "learning_rate": 0.0006511469500469173, "loss": 0.0292, "num_input_tokens_seen": 122179824, "step": 56645 }, { "epoch": 9.241435562805872, "grad_norm": 0.06759151071310043, "learning_rate": 0.0006510790988572459, "loss": 0.0275, "num_input_tokens_seen": 122191248, "step": 56650 }, { "epoch": 9.242251223491028, "grad_norm": 0.07803839445114136, "learning_rate": 0.0006510112446057651, "loss": 0.0108, "num_input_tokens_seen": 122201648, "step": 56655 }, { "epoch": 9.243066884176184, "grad_norm": 0.7878338098526001, "learning_rate": 0.0006509433872938497, "loss": 0.1154, "num_input_tokens_seen": 122213392, "step": 56660 }, { "epoch": 9.243882544861338, "grad_norm": 0.0501755028963089, "learning_rate": 0.0006508755269228752, "loss": 0.0544, "num_input_tokens_seen": 122224816, "step": 56665 }, { "epoch": 9.244698205546493, "grad_norm": 0.023624034598469734, "learning_rate": 0.0006508076634942167, "loss": 0.0747, "num_input_tokens_seen": 122235696, "step": 56670 }, { "epoch": 9.245513866231647, "grad_norm": 0.024630391970276833, "learning_rate": 0.0006507397970092496, "loss": 0.1033, "num_input_tokens_seen": 122246896, "step": 56675 }, { "epoch": 9.246329526916803, "grad_norm": 0.017960689961910248, "learning_rate": 0.0006506719274693492, "loss": 0.0081, "num_input_tokens_seen": 122257712, "step": 56680 }, { "epoch": 9.247145187601957, "grad_norm": 0.12112439423799515, "learning_rate": 0.0006506040548758911, "loss": 0.0816, "num_input_tokens_seen": 122268176, "step": 56685 }, { "epoch": 9.247960848287113, "grad_norm": 0.4968155324459076, "learning_rate": 0.0006505361792302509, "loss": 0.1149, "num_input_tokens_seen": 122279408, "step": 56690 }, { "epoch": 9.248776508972268, "grad_norm": 0.07462597638368607, "learning_rate": 0.0006504683005338039, "loss": 0.0991, "num_input_tokens_seen": 122290032, "step": 56695 }, { "epoch": 9.249592169657422, "grad_norm": 0.003403344890102744, "learning_rate": 0.0006504004187879259, "loss": 0.0164, "num_input_tokens_seen": 122301520, "step": 56700 }, { "epoch": 9.250407830342578, "grad_norm": 0.6421770453453064, "learning_rate": 0.0006503325339939927, "loss": 0.0322, "num_input_tokens_seen": 122311408, "step": 56705 }, { "epoch": 9.251223491027732, "grad_norm": 0.21597881615161896, "learning_rate": 0.0006502646461533798, "loss": 0.0258, "num_input_tokens_seen": 122321552, "step": 56710 }, { "epoch": 9.252039151712887, "grad_norm": 0.056042205542325974, "learning_rate": 0.0006501967552674635, "loss": 0.0752, "num_input_tokens_seen": 122332944, "step": 56715 }, { "epoch": 9.252854812398043, "grad_norm": 0.07725722342729568, "learning_rate": 0.0006501288613376193, "loss": 0.0181, "num_input_tokens_seen": 122344144, "step": 56720 }, { "epoch": 9.253670473083197, "grad_norm": 0.010933129116892815, "learning_rate": 0.0006500609643652234, "loss": 0.0471, "num_input_tokens_seen": 122355664, "step": 56725 }, { "epoch": 9.254486133768353, "grad_norm": 0.003501271363347769, "learning_rate": 0.0006499930643516514, "loss": 0.0152, "num_input_tokens_seen": 122365808, "step": 56730 }, { "epoch": 9.255301794453507, "grad_norm": 0.008863814175128937, "learning_rate": 0.0006499251612982798, "loss": 0.1294, "num_input_tokens_seen": 122376240, "step": 56735 }, { "epoch": 9.256117455138662, "grad_norm": 0.01273048110306263, "learning_rate": 0.0006498572552064847, "loss": 0.0439, "num_input_tokens_seen": 122386640, "step": 56740 }, { "epoch": 9.256933115823816, "grad_norm": 0.017029695212841034, "learning_rate": 0.0006497893460776421, "loss": 0.0456, "num_input_tokens_seen": 122396688, "step": 56745 }, { "epoch": 9.257748776508972, "grad_norm": 0.22377410531044006, "learning_rate": 0.0006497214339131284, "loss": 0.0228, "num_input_tokens_seen": 122406640, "step": 56750 }, { "epoch": 9.258564437194128, "grad_norm": 0.0073784636333584785, "learning_rate": 0.00064965351871432, "loss": 0.0344, "num_input_tokens_seen": 122416592, "step": 56755 }, { "epoch": 9.259380097879282, "grad_norm": 0.024513857439160347, "learning_rate": 0.0006495856004825931, "loss": 0.0295, "num_input_tokens_seen": 122425296, "step": 56760 }, { "epoch": 9.260195758564437, "grad_norm": 0.03520720824599266, "learning_rate": 0.0006495176792193243, "loss": 0.0292, "num_input_tokens_seen": 122436272, "step": 56765 }, { "epoch": 9.261011419249591, "grad_norm": 0.1064448133111, "learning_rate": 0.00064944975492589, "loss": 0.0355, "num_input_tokens_seen": 122447184, "step": 56770 }, { "epoch": 9.261827079934747, "grad_norm": 0.33933040499687195, "learning_rate": 0.0006493818276036669, "loss": 0.0844, "num_input_tokens_seen": 122458768, "step": 56775 }, { "epoch": 9.262642740619903, "grad_norm": 0.4600701630115509, "learning_rate": 0.0006493138972540316, "loss": 0.0388, "num_input_tokens_seen": 122469264, "step": 56780 }, { "epoch": 9.263458401305057, "grad_norm": 0.011696219444274902, "learning_rate": 0.0006492459638783606, "loss": 0.1274, "num_input_tokens_seen": 122480720, "step": 56785 }, { "epoch": 9.264274061990212, "grad_norm": 0.08828704059123993, "learning_rate": 0.0006491780274780308, "loss": 0.03, "num_input_tokens_seen": 122491568, "step": 56790 }, { "epoch": 9.265089722675366, "grad_norm": 0.0105497557669878, "learning_rate": 0.0006491100880544191, "loss": 0.3333, "num_input_tokens_seen": 122501840, "step": 56795 }, { "epoch": 9.265905383360522, "grad_norm": 0.03120383433997631, "learning_rate": 0.0006490421456089023, "loss": 0.1712, "num_input_tokens_seen": 122512048, "step": 56800 }, { "epoch": 9.266721044045678, "grad_norm": 0.015419309027493, "learning_rate": 0.0006489742001428573, "loss": 0.0177, "num_input_tokens_seen": 122523408, "step": 56805 }, { "epoch": 9.267536704730832, "grad_norm": 0.3863442540168762, "learning_rate": 0.0006489062516576613, "loss": 0.1343, "num_input_tokens_seen": 122535184, "step": 56810 }, { "epoch": 9.268352365415987, "grad_norm": 0.40975114703178406, "learning_rate": 0.0006488383001546911, "loss": 0.0307, "num_input_tokens_seen": 122546768, "step": 56815 }, { "epoch": 9.269168026101141, "grad_norm": 0.023028668016195297, "learning_rate": 0.000648770345635324, "loss": 0.0379, "num_input_tokens_seen": 122557520, "step": 56820 }, { "epoch": 9.269983686786297, "grad_norm": 0.01185005996376276, "learning_rate": 0.000648702388100937, "loss": 0.0084, "num_input_tokens_seen": 122568176, "step": 56825 }, { "epoch": 9.270799347471453, "grad_norm": 0.20064646005630493, "learning_rate": 0.0006486344275529076, "loss": 0.0608, "num_input_tokens_seen": 122578096, "step": 56830 }, { "epoch": 9.271615008156607, "grad_norm": 0.004733366426080465, "learning_rate": 0.0006485664639926128, "loss": 0.0871, "num_input_tokens_seen": 122588560, "step": 56835 }, { "epoch": 9.272430668841762, "grad_norm": 0.010005169548094273, "learning_rate": 0.0006484984974214303, "loss": 0.0089, "num_input_tokens_seen": 122599280, "step": 56840 }, { "epoch": 9.273246329526916, "grad_norm": 0.4582314193248749, "learning_rate": 0.0006484305278407373, "loss": 0.1276, "num_input_tokens_seen": 122610576, "step": 56845 }, { "epoch": 9.274061990212072, "grad_norm": 0.0036550303921103477, "learning_rate": 0.0006483625552519114, "loss": 0.2215, "num_input_tokens_seen": 122620880, "step": 56850 }, { "epoch": 9.274877650897226, "grad_norm": 0.2178775519132614, "learning_rate": 0.00064829457965633, "loss": 0.0144, "num_input_tokens_seen": 122630576, "step": 56855 }, { "epoch": 9.275693311582382, "grad_norm": 0.7681192755699158, "learning_rate": 0.0006482266010553707, "loss": 0.2008, "num_input_tokens_seen": 122642384, "step": 56860 }, { "epoch": 9.276508972267537, "grad_norm": 0.013574379496276379, "learning_rate": 0.0006481586194504117, "loss": 0.0141, "num_input_tokens_seen": 122654416, "step": 56865 }, { "epoch": 9.277324632952691, "grad_norm": 0.38579100370407104, "learning_rate": 0.00064809063484283, "loss": 0.198, "num_input_tokens_seen": 122666352, "step": 56870 }, { "epoch": 9.278140293637847, "grad_norm": 0.1396300047636032, "learning_rate": 0.0006480226472340039, "loss": 0.1603, "num_input_tokens_seen": 122677840, "step": 56875 }, { "epoch": 9.278955954323001, "grad_norm": 0.45370587706565857, "learning_rate": 0.0006479546566253109, "loss": 0.1007, "num_input_tokens_seen": 122688528, "step": 56880 }, { "epoch": 9.279771615008157, "grad_norm": 0.13991548120975494, "learning_rate": 0.0006478866630181293, "loss": 0.0452, "num_input_tokens_seen": 122699088, "step": 56885 }, { "epoch": 9.280587275693312, "grad_norm": 0.021167393773794174, "learning_rate": 0.0006478186664138366, "loss": 0.0199, "num_input_tokens_seen": 122710096, "step": 56890 }, { "epoch": 9.281402936378466, "grad_norm": 0.013311784714460373, "learning_rate": 0.0006477506668138113, "loss": 0.0417, "num_input_tokens_seen": 122720720, "step": 56895 }, { "epoch": 9.282218597063622, "grad_norm": 0.04477938264608383, "learning_rate": 0.0006476826642194313, "loss": 0.0212, "num_input_tokens_seen": 122731664, "step": 56900 }, { "epoch": 9.283034257748776, "grad_norm": 0.006502446718513966, "learning_rate": 0.0006476146586320747, "loss": 0.0441, "num_input_tokens_seen": 122741360, "step": 56905 }, { "epoch": 9.283849918433932, "grad_norm": 0.02574063464999199, "learning_rate": 0.0006475466500531198, "loss": 0.0381, "num_input_tokens_seen": 122752208, "step": 56910 }, { "epoch": 9.284665579119087, "grad_norm": 0.011732914485037327, "learning_rate": 0.0006474786384839448, "loss": 0.1223, "num_input_tokens_seen": 122762384, "step": 56915 }, { "epoch": 9.285481239804241, "grad_norm": 0.45654475688934326, "learning_rate": 0.0006474106239259282, "loss": 0.0727, "num_input_tokens_seen": 122773552, "step": 56920 }, { "epoch": 9.286296900489397, "grad_norm": 0.026349002495408058, "learning_rate": 0.0006473426063804483, "loss": 0.0107, "num_input_tokens_seen": 122784688, "step": 56925 }, { "epoch": 9.28711256117455, "grad_norm": 0.08995307981967926, "learning_rate": 0.0006472745858488835, "loss": 0.1043, "num_input_tokens_seen": 122796272, "step": 56930 }, { "epoch": 9.287928221859707, "grad_norm": 0.009724489413201809, "learning_rate": 0.0006472065623326123, "loss": 0.0668, "num_input_tokens_seen": 122807216, "step": 56935 }, { "epoch": 9.28874388254486, "grad_norm": 0.04950760677456856, "learning_rate": 0.0006471385358330135, "loss": 0.0113, "num_input_tokens_seen": 122817104, "step": 56940 }, { "epoch": 9.289559543230016, "grad_norm": 0.07824046164751053, "learning_rate": 0.0006470705063514656, "loss": 0.1202, "num_input_tokens_seen": 122828400, "step": 56945 }, { "epoch": 9.290375203915172, "grad_norm": 0.008284686133265495, "learning_rate": 0.0006470024738893473, "loss": 0.0244, "num_input_tokens_seen": 122838768, "step": 56950 }, { "epoch": 9.291190864600326, "grad_norm": 0.04573056101799011, "learning_rate": 0.0006469344384480374, "loss": 0.0142, "num_input_tokens_seen": 122849872, "step": 56955 }, { "epoch": 9.292006525285482, "grad_norm": 0.07165543735027313, "learning_rate": 0.0006468664000289147, "loss": 0.0659, "num_input_tokens_seen": 122860944, "step": 56960 }, { "epoch": 9.292822185970635, "grad_norm": 0.056287769228219986, "learning_rate": 0.000646798358633358, "loss": 0.0151, "num_input_tokens_seen": 122870576, "step": 56965 }, { "epoch": 9.293637846655791, "grad_norm": 0.264313668012619, "learning_rate": 0.0006467303142627465, "loss": 0.129, "num_input_tokens_seen": 122880656, "step": 56970 }, { "epoch": 9.294453507340947, "grad_norm": 0.41615694761276245, "learning_rate": 0.0006466622669184589, "loss": 0.1016, "num_input_tokens_seen": 122891472, "step": 56975 }, { "epoch": 9.2952691680261, "grad_norm": 0.029547644779086113, "learning_rate": 0.0006465942166018745, "loss": 0.0413, "num_input_tokens_seen": 122901072, "step": 56980 }, { "epoch": 9.296084828711257, "grad_norm": 0.08821138739585876, "learning_rate": 0.0006465261633143722, "loss": 0.0173, "num_input_tokens_seen": 122912016, "step": 56985 }, { "epoch": 9.29690048939641, "grad_norm": 0.5565441846847534, "learning_rate": 0.0006464581070573315, "loss": 0.1141, "num_input_tokens_seen": 122924176, "step": 56990 }, { "epoch": 9.297716150081566, "grad_norm": 0.025371788069605827, "learning_rate": 0.0006463900478321314, "loss": 0.0174, "num_input_tokens_seen": 122935504, "step": 56995 }, { "epoch": 9.298531810766722, "grad_norm": 0.300371378660202, "learning_rate": 0.0006463219856401513, "loss": 0.0753, "num_input_tokens_seen": 122945296, "step": 57000 }, { "epoch": 9.299347471451876, "grad_norm": 0.11130575090646744, "learning_rate": 0.0006462539204827705, "loss": 0.041, "num_input_tokens_seen": 122955888, "step": 57005 }, { "epoch": 9.300163132137031, "grad_norm": 0.23694032430648804, "learning_rate": 0.0006461858523613684, "loss": 0.0248, "num_input_tokens_seen": 122967024, "step": 57010 }, { "epoch": 9.300978792822185, "grad_norm": 0.00780130922794342, "learning_rate": 0.0006461177812773246, "loss": 0.1337, "num_input_tokens_seen": 122978160, "step": 57015 }, { "epoch": 9.301794453507341, "grad_norm": 0.13078653812408447, "learning_rate": 0.0006460497072320186, "loss": 0.0523, "num_input_tokens_seen": 122989872, "step": 57020 }, { "epoch": 9.302610114192497, "grad_norm": 0.026239726692438126, "learning_rate": 0.00064598163022683, "loss": 0.072, "num_input_tokens_seen": 123001104, "step": 57025 }, { "epoch": 9.30342577487765, "grad_norm": 0.029076118022203445, "learning_rate": 0.0006459135502631386, "loss": 0.0096, "num_input_tokens_seen": 123012464, "step": 57030 }, { "epoch": 9.304241435562806, "grad_norm": 0.28119784593582153, "learning_rate": 0.0006458454673423238, "loss": 0.0296, "num_input_tokens_seen": 123024592, "step": 57035 }, { "epoch": 9.30505709624796, "grad_norm": 0.018870066851377487, "learning_rate": 0.0006457773814657657, "loss": 0.1334, "num_input_tokens_seen": 123035696, "step": 57040 }, { "epoch": 9.305872756933116, "grad_norm": 0.35876163840293884, "learning_rate": 0.000645709292634844, "loss": 0.0933, "num_input_tokens_seen": 123046384, "step": 57045 }, { "epoch": 9.30668841761827, "grad_norm": 0.1517433226108551, "learning_rate": 0.0006456412008509387, "loss": 0.0642, "num_input_tokens_seen": 123056752, "step": 57050 }, { "epoch": 9.307504078303426, "grad_norm": 0.19025282561779022, "learning_rate": 0.0006455731061154297, "loss": 0.0607, "num_input_tokens_seen": 123067728, "step": 57055 }, { "epoch": 9.308319738988581, "grad_norm": 0.05165403336286545, "learning_rate": 0.0006455050084296969, "loss": 0.0199, "num_input_tokens_seen": 123077744, "step": 57060 }, { "epoch": 9.309135399673735, "grad_norm": 0.011419638060033321, "learning_rate": 0.0006454369077951206, "loss": 0.0094, "num_input_tokens_seen": 123086576, "step": 57065 }, { "epoch": 9.309951060358891, "grad_norm": 0.4941067695617676, "learning_rate": 0.0006453688042130808, "loss": 0.1179, "num_input_tokens_seen": 123097744, "step": 57070 }, { "epoch": 9.310766721044045, "grad_norm": 0.03404427692294121, "learning_rate": 0.0006453006976849578, "loss": 0.1026, "num_input_tokens_seen": 123107664, "step": 57075 }, { "epoch": 9.3115823817292, "grad_norm": 0.00698222778737545, "learning_rate": 0.0006452325882121319, "loss": 0.2036, "num_input_tokens_seen": 123117648, "step": 57080 }, { "epoch": 9.312398042414356, "grad_norm": 0.01035995688289404, "learning_rate": 0.0006451644757959834, "loss": 0.0073, "num_input_tokens_seen": 123128624, "step": 57085 }, { "epoch": 9.31321370309951, "grad_norm": 1.192231297492981, "learning_rate": 0.0006450963604378926, "loss": 0.0995, "num_input_tokens_seen": 123139536, "step": 57090 }, { "epoch": 9.314029363784666, "grad_norm": 0.44455286860466003, "learning_rate": 0.0006450282421392399, "loss": 0.2192, "num_input_tokens_seen": 123149776, "step": 57095 }, { "epoch": 9.31484502446982, "grad_norm": 0.3648841977119446, "learning_rate": 0.0006449601209014059, "loss": 0.0238, "num_input_tokens_seen": 123161104, "step": 57100 }, { "epoch": 9.315660685154976, "grad_norm": 0.36085715889930725, "learning_rate": 0.0006448919967257711, "loss": 0.0585, "num_input_tokens_seen": 123171664, "step": 57105 }, { "epoch": 9.31647634584013, "grad_norm": 0.10995535552501678, "learning_rate": 0.0006448238696137163, "loss": 0.0163, "num_input_tokens_seen": 123183664, "step": 57110 }, { "epoch": 9.317292006525285, "grad_norm": 0.06522618234157562, "learning_rate": 0.0006447557395666221, "loss": 0.0984, "num_input_tokens_seen": 123194064, "step": 57115 }, { "epoch": 9.318107667210441, "grad_norm": 0.4014452397823334, "learning_rate": 0.0006446876065858691, "loss": 0.1651, "num_input_tokens_seen": 123205872, "step": 57120 }, { "epoch": 9.318923327895595, "grad_norm": 0.033853624016046524, "learning_rate": 0.0006446194706728383, "loss": 0.0117, "num_input_tokens_seen": 123215888, "step": 57125 }, { "epoch": 9.31973898858075, "grad_norm": 0.031119395047426224, "learning_rate": 0.0006445513318289104, "loss": 0.0493, "num_input_tokens_seen": 123226128, "step": 57130 }, { "epoch": 9.320554649265905, "grad_norm": 0.17172104120254517, "learning_rate": 0.0006444831900554664, "loss": 0.0795, "num_input_tokens_seen": 123237328, "step": 57135 }, { "epoch": 9.32137030995106, "grad_norm": 0.02058544009923935, "learning_rate": 0.0006444150453538873, "loss": 0.0128, "num_input_tokens_seen": 123248752, "step": 57140 }, { "epoch": 9.322185970636216, "grad_norm": 0.04754265397787094, "learning_rate": 0.000644346897725554, "loss": 0.2659, "num_input_tokens_seen": 123259920, "step": 57145 }, { "epoch": 9.32300163132137, "grad_norm": 0.22700241208076477, "learning_rate": 0.0006442787471718479, "loss": 0.0872, "num_input_tokens_seen": 123270320, "step": 57150 }, { "epoch": 9.323817292006526, "grad_norm": 0.44227737188339233, "learning_rate": 0.0006442105936941498, "loss": 0.0851, "num_input_tokens_seen": 123279600, "step": 57155 }, { "epoch": 9.32463295269168, "grad_norm": 0.021930988878011703, "learning_rate": 0.000644142437293841, "loss": 0.065, "num_input_tokens_seen": 123289776, "step": 57160 }, { "epoch": 9.325448613376835, "grad_norm": 0.014614114537835121, "learning_rate": 0.000644074277972303, "loss": 0.1218, "num_input_tokens_seen": 123301584, "step": 57165 }, { "epoch": 9.326264274061991, "grad_norm": 0.12655009329319, "learning_rate": 0.000644006115730917, "loss": 0.1067, "num_input_tokens_seen": 123313072, "step": 57170 }, { "epoch": 9.327079934747145, "grad_norm": 0.05467505007982254, "learning_rate": 0.000643937950571064, "loss": 0.1367, "num_input_tokens_seen": 123323056, "step": 57175 }, { "epoch": 9.3278955954323, "grad_norm": 1.0105386972427368, "learning_rate": 0.0006438697824941263, "loss": 0.0498, "num_input_tokens_seen": 123335056, "step": 57180 }, { "epoch": 9.328711256117455, "grad_norm": 0.40358859300613403, "learning_rate": 0.0006438016115014848, "loss": 0.0468, "num_input_tokens_seen": 123346320, "step": 57185 }, { "epoch": 9.32952691680261, "grad_norm": 0.2465229034423828, "learning_rate": 0.0006437334375945212, "loss": 0.1045, "num_input_tokens_seen": 123355408, "step": 57190 }, { "epoch": 9.330342577487766, "grad_norm": 0.0859232097864151, "learning_rate": 0.0006436652607746171, "loss": 0.021, "num_input_tokens_seen": 123367792, "step": 57195 }, { "epoch": 9.33115823817292, "grad_norm": 0.019869893789291382, "learning_rate": 0.0006435970810431544, "loss": 0.0491, "num_input_tokens_seen": 123378928, "step": 57200 }, { "epoch": 9.331973898858076, "grad_norm": 0.22227971255779266, "learning_rate": 0.0006435288984015146, "loss": 0.0346, "num_input_tokens_seen": 123390800, "step": 57205 }, { "epoch": 9.33278955954323, "grad_norm": 0.28912827372550964, "learning_rate": 0.0006434607128510796, "loss": 0.0614, "num_input_tokens_seen": 123403408, "step": 57210 }, { "epoch": 9.333605220228385, "grad_norm": 0.09093629568815231, "learning_rate": 0.0006433925243932312, "loss": 0.0793, "num_input_tokens_seen": 123414608, "step": 57215 }, { "epoch": 9.33442088091354, "grad_norm": 0.5172942876815796, "learning_rate": 0.0006433243330293514, "loss": 0.1913, "num_input_tokens_seen": 123425520, "step": 57220 }, { "epoch": 9.335236541598695, "grad_norm": 0.017480924725532532, "learning_rate": 0.0006432561387608222, "loss": 0.1542, "num_input_tokens_seen": 123434768, "step": 57225 }, { "epoch": 9.33605220228385, "grad_norm": 0.1213253065943718, "learning_rate": 0.0006431879415890256, "loss": 0.0174, "num_input_tokens_seen": 123445552, "step": 57230 }, { "epoch": 9.336867862969005, "grad_norm": 0.04973814636468887, "learning_rate": 0.0006431197415153437, "loss": 0.0323, "num_input_tokens_seen": 123456464, "step": 57235 }, { "epoch": 9.33768352365416, "grad_norm": 0.06378571689128876, "learning_rate": 0.0006430515385411588, "loss": 0.0131, "num_input_tokens_seen": 123466480, "step": 57240 }, { "epoch": 9.338499184339314, "grad_norm": 0.2103002816438675, "learning_rate": 0.0006429833326678529, "loss": 0.1055, "num_input_tokens_seen": 123476752, "step": 57245 }, { "epoch": 9.33931484502447, "grad_norm": 0.07770740985870361, "learning_rate": 0.0006429151238968083, "loss": 0.0171, "num_input_tokens_seen": 123488240, "step": 57250 }, { "epoch": 9.340130505709626, "grad_norm": 0.018016302958130836, "learning_rate": 0.0006428469122294075, "loss": 0.033, "num_input_tokens_seen": 123498544, "step": 57255 }, { "epoch": 9.34094616639478, "grad_norm": 0.425741583108902, "learning_rate": 0.0006427786976670328, "loss": 0.0745, "num_input_tokens_seen": 123509264, "step": 57260 }, { "epoch": 9.341761827079935, "grad_norm": 0.04910760745406151, "learning_rate": 0.0006427104802110667, "loss": 0.0348, "num_input_tokens_seen": 123521168, "step": 57265 }, { "epoch": 9.34257748776509, "grad_norm": 0.028089556843042374, "learning_rate": 0.0006426422598628916, "loss": 0.0097, "num_input_tokens_seen": 123533072, "step": 57270 }, { "epoch": 9.343393148450245, "grad_norm": 0.6508845686912537, "learning_rate": 0.0006425740366238903, "loss": 0.0651, "num_input_tokens_seen": 123544944, "step": 57275 }, { "epoch": 9.3442088091354, "grad_norm": 0.017835708335042, "learning_rate": 0.0006425058104954451, "loss": 0.0872, "num_input_tokens_seen": 123555344, "step": 57280 }, { "epoch": 9.345024469820554, "grad_norm": 0.009106715209782124, "learning_rate": 0.0006424375814789388, "loss": 0.0535, "num_input_tokens_seen": 123565872, "step": 57285 }, { "epoch": 9.34584013050571, "grad_norm": 0.6868069171905518, "learning_rate": 0.0006423693495757545, "loss": 0.2307, "num_input_tokens_seen": 123577040, "step": 57290 }, { "epoch": 9.346655791190864, "grad_norm": 0.025291142985224724, "learning_rate": 0.0006423011147872745, "loss": 0.204, "num_input_tokens_seen": 123587664, "step": 57295 }, { "epoch": 9.34747145187602, "grad_norm": 0.038538653403520584, "learning_rate": 0.000642232877114882, "loss": 0.0151, "num_input_tokens_seen": 123597488, "step": 57300 }, { "epoch": 9.348287112561174, "grad_norm": 0.01078367792069912, "learning_rate": 0.0006421646365599597, "loss": 0.0076, "num_input_tokens_seen": 123607024, "step": 57305 }, { "epoch": 9.34910277324633, "grad_norm": 0.24330471456050873, "learning_rate": 0.0006420963931238907, "loss": 0.0482, "num_input_tokens_seen": 123616720, "step": 57310 }, { "epoch": 9.349918433931485, "grad_norm": 0.040181562304496765, "learning_rate": 0.0006420281468080582, "loss": 0.1466, "num_input_tokens_seen": 123627312, "step": 57315 }, { "epoch": 9.350734094616639, "grad_norm": 0.028878478333353996, "learning_rate": 0.0006419598976138451, "loss": 0.054, "num_input_tokens_seen": 123640016, "step": 57320 }, { "epoch": 9.351549755301795, "grad_norm": 0.038068994879722595, "learning_rate": 0.0006418916455426344, "loss": 0.0627, "num_input_tokens_seen": 123651056, "step": 57325 }, { "epoch": 9.352365415986949, "grad_norm": 0.013620851561427116, "learning_rate": 0.0006418233905958097, "loss": 0.1344, "num_input_tokens_seen": 123660912, "step": 57330 }, { "epoch": 9.353181076672104, "grad_norm": 0.6204990148544312, "learning_rate": 0.000641755132774754, "loss": 0.0533, "num_input_tokens_seen": 123672048, "step": 57335 }, { "epoch": 9.35399673735726, "grad_norm": 0.09751417487859726, "learning_rate": 0.0006416868720808507, "loss": 0.0495, "num_input_tokens_seen": 123682832, "step": 57340 }, { "epoch": 9.354812398042414, "grad_norm": 0.38898274302482605, "learning_rate": 0.0006416186085154833, "loss": 0.0838, "num_input_tokens_seen": 123694384, "step": 57345 }, { "epoch": 9.35562805872757, "grad_norm": 0.07236102968454361, "learning_rate": 0.0006415503420800349, "loss": 0.0266, "num_input_tokens_seen": 123705680, "step": 57350 }, { "epoch": 9.356443719412724, "grad_norm": 0.16955524682998657, "learning_rate": 0.0006414820727758894, "loss": 0.0623, "num_input_tokens_seen": 123717360, "step": 57355 }, { "epoch": 9.35725938009788, "grad_norm": 0.12147585302591324, "learning_rate": 0.0006414138006044303, "loss": 0.0351, "num_input_tokens_seen": 123728720, "step": 57360 }, { "epoch": 9.358075040783035, "grad_norm": 0.03415464237332344, "learning_rate": 0.0006413455255670409, "loss": 0.0264, "num_input_tokens_seen": 123739216, "step": 57365 }, { "epoch": 9.358890701468189, "grad_norm": 0.061313025653362274, "learning_rate": 0.0006412772476651053, "loss": 0.0183, "num_input_tokens_seen": 123750864, "step": 57370 }, { "epoch": 9.359706362153345, "grad_norm": 0.38240087032318115, "learning_rate": 0.0006412089669000071, "loss": 0.0341, "num_input_tokens_seen": 123761936, "step": 57375 }, { "epoch": 9.360522022838499, "grad_norm": 0.48320963978767395, "learning_rate": 0.0006411406832731299, "loss": 0.0619, "num_input_tokens_seen": 123773360, "step": 57380 }, { "epoch": 9.361337683523654, "grad_norm": 0.005240228958427906, "learning_rate": 0.0006410723967858577, "loss": 0.0811, "num_input_tokens_seen": 123783120, "step": 57385 }, { "epoch": 9.362153344208808, "grad_norm": 0.015332483686506748, "learning_rate": 0.0006410041074395744, "loss": 0.043, "num_input_tokens_seen": 123794224, "step": 57390 }, { "epoch": 9.362969004893964, "grad_norm": 0.14019006490707397, "learning_rate": 0.0006409358152356642, "loss": 0.0169, "num_input_tokens_seen": 123805776, "step": 57395 }, { "epoch": 9.36378466557912, "grad_norm": 0.18988363444805145, "learning_rate": 0.0006408675201755107, "loss": 0.0332, "num_input_tokens_seen": 123816272, "step": 57400 }, { "epoch": 9.364600326264274, "grad_norm": 0.4451763331890106, "learning_rate": 0.0006407992222604983, "loss": 0.0892, "num_input_tokens_seen": 123827568, "step": 57405 }, { "epoch": 9.36541598694943, "grad_norm": 0.2533315122127533, "learning_rate": 0.000640730921492011, "loss": 0.0909, "num_input_tokens_seen": 123838192, "step": 57410 }, { "epoch": 9.366231647634583, "grad_norm": 0.4109003245830536, "learning_rate": 0.000640662617871433, "loss": 0.1779, "num_input_tokens_seen": 123847664, "step": 57415 }, { "epoch": 9.367047308319739, "grad_norm": 0.40724319219589233, "learning_rate": 0.0006405943114001486, "loss": 0.056, "num_input_tokens_seen": 123859152, "step": 57420 }, { "epoch": 9.367862969004895, "grad_norm": 0.01567763090133667, "learning_rate": 0.0006405260020795421, "loss": 0.0229, "num_input_tokens_seen": 123869968, "step": 57425 }, { "epoch": 9.368678629690049, "grad_norm": 0.09415046125650406, "learning_rate": 0.0006404576899109981, "loss": 0.0279, "num_input_tokens_seen": 123882416, "step": 57430 }, { "epoch": 9.369494290375204, "grad_norm": 0.08391381800174713, "learning_rate": 0.0006403893748959007, "loss": 0.0272, "num_input_tokens_seen": 123894544, "step": 57435 }, { "epoch": 9.370309951060358, "grad_norm": 0.5241559743881226, "learning_rate": 0.0006403210570356346, "loss": 0.1437, "num_input_tokens_seen": 123905072, "step": 57440 }, { "epoch": 9.371125611745514, "grad_norm": 0.004599989391863346, "learning_rate": 0.0006402527363315843, "loss": 0.0846, "num_input_tokens_seen": 123916336, "step": 57445 }, { "epoch": 9.37194127243067, "grad_norm": 0.43736234307289124, "learning_rate": 0.0006401844127851342, "loss": 0.1823, "num_input_tokens_seen": 123925808, "step": 57450 }, { "epoch": 9.372756933115824, "grad_norm": 0.5837869048118591, "learning_rate": 0.0006401160863976691, "loss": 0.1101, "num_input_tokens_seen": 123936400, "step": 57455 }, { "epoch": 9.37357259380098, "grad_norm": 0.018402792513370514, "learning_rate": 0.000640047757170574, "loss": 0.0238, "num_input_tokens_seen": 123948560, "step": 57460 }, { "epoch": 9.374388254486133, "grad_norm": 0.13549116253852844, "learning_rate": 0.0006399794251052333, "loss": 0.1162, "num_input_tokens_seen": 123959984, "step": 57465 }, { "epoch": 9.375203915171289, "grad_norm": 0.14438430964946747, "learning_rate": 0.000639911090203032, "loss": 0.0296, "num_input_tokens_seen": 123970960, "step": 57470 }, { "epoch": 9.376019575856443, "grad_norm": 0.08182753622531891, "learning_rate": 0.000639842752465355, "loss": 0.0398, "num_input_tokens_seen": 123982064, "step": 57475 }, { "epoch": 9.376835236541599, "grad_norm": 0.06440489739179611, "learning_rate": 0.0006397744118935871, "loss": 0.0158, "num_input_tokens_seen": 123992176, "step": 57480 }, { "epoch": 9.377650897226754, "grad_norm": 0.03893844038248062, "learning_rate": 0.0006397060684891136, "loss": 0.0228, "num_input_tokens_seen": 124002704, "step": 57485 }, { "epoch": 9.378466557911908, "grad_norm": 0.06627460569143295, "learning_rate": 0.0006396377222533192, "loss": 0.0219, "num_input_tokens_seen": 124013296, "step": 57490 }, { "epoch": 9.379282218597064, "grad_norm": 0.025130294263362885, "learning_rate": 0.0006395693731875892, "loss": 0.0597, "num_input_tokens_seen": 124022960, "step": 57495 }, { "epoch": 9.380097879282218, "grad_norm": 0.013595676980912685, "learning_rate": 0.000639501021293309, "loss": 0.3181, "num_input_tokens_seen": 124032560, "step": 57500 }, { "epoch": 9.380913539967374, "grad_norm": 0.2390897572040558, "learning_rate": 0.0006394326665718635, "loss": 0.0253, "num_input_tokens_seen": 124043472, "step": 57505 }, { "epoch": 9.38172920065253, "grad_norm": 0.3374215066432953, "learning_rate": 0.0006393643090246381, "loss": 0.1221, "num_input_tokens_seen": 124053808, "step": 57510 }, { "epoch": 9.382544861337683, "grad_norm": 0.004427711945027113, "learning_rate": 0.0006392959486530183, "loss": 0.013, "num_input_tokens_seen": 124064656, "step": 57515 }, { "epoch": 9.383360522022839, "grad_norm": 0.6444926261901855, "learning_rate": 0.0006392275854583894, "loss": 0.0635, "num_input_tokens_seen": 124074320, "step": 57520 }, { "epoch": 9.384176182707993, "grad_norm": 0.8443613648414612, "learning_rate": 0.0006391592194421367, "loss": 0.0447, "num_input_tokens_seen": 124085296, "step": 57525 }, { "epoch": 9.384991843393149, "grad_norm": 0.06378015130758286, "learning_rate": 0.0006390908506056461, "loss": 0.0348, "num_input_tokens_seen": 124097072, "step": 57530 }, { "epoch": 9.385807504078304, "grad_norm": 0.20636975765228271, "learning_rate": 0.0006390224789503028, "loss": 0.1039, "num_input_tokens_seen": 124107280, "step": 57535 }, { "epoch": 9.386623164763458, "grad_norm": 0.4503277838230133, "learning_rate": 0.0006389541044774927, "loss": 0.0775, "num_input_tokens_seen": 124117840, "step": 57540 }, { "epoch": 9.387438825448614, "grad_norm": 0.4764765202999115, "learning_rate": 0.0006388857271886013, "loss": 0.0723, "num_input_tokens_seen": 124128496, "step": 57545 }, { "epoch": 9.388254486133768, "grad_norm": 0.010932033881545067, "learning_rate": 0.0006388173470850144, "loss": 0.0659, "num_input_tokens_seen": 124139856, "step": 57550 }, { "epoch": 9.389070146818923, "grad_norm": 0.019419420510530472, "learning_rate": 0.0006387489641681181, "loss": 0.0335, "num_input_tokens_seen": 124151376, "step": 57555 }, { "epoch": 9.38988580750408, "grad_norm": 0.6956990957260132, "learning_rate": 0.0006386805784392978, "loss": 0.0838, "num_input_tokens_seen": 124161744, "step": 57560 }, { "epoch": 9.390701468189233, "grad_norm": 0.02534393034875393, "learning_rate": 0.0006386121898999397, "loss": 0.125, "num_input_tokens_seen": 124171632, "step": 57565 }, { "epoch": 9.391517128874389, "grad_norm": 0.5467933416366577, "learning_rate": 0.0006385437985514297, "loss": 0.0598, "num_input_tokens_seen": 124182224, "step": 57570 }, { "epoch": 9.392332789559543, "grad_norm": 0.881614089012146, "learning_rate": 0.000638475404395154, "loss": 0.1352, "num_input_tokens_seen": 124194544, "step": 57575 }, { "epoch": 9.393148450244698, "grad_norm": 0.6056075096130371, "learning_rate": 0.0006384070074324984, "loss": 0.282, "num_input_tokens_seen": 124205104, "step": 57580 }, { "epoch": 9.393964110929852, "grad_norm": 0.00881397444754839, "learning_rate": 0.0006383386076648494, "loss": 0.0518, "num_input_tokens_seen": 124215664, "step": 57585 }, { "epoch": 9.394779771615008, "grad_norm": 0.880608081817627, "learning_rate": 0.0006382702050935929, "loss": 0.1697, "num_input_tokens_seen": 124226160, "step": 57590 }, { "epoch": 9.395595432300164, "grad_norm": 0.6658081412315369, "learning_rate": 0.0006382017997201152, "loss": 0.2338, "num_input_tokens_seen": 124236272, "step": 57595 }, { "epoch": 9.396411092985318, "grad_norm": 0.19590553641319275, "learning_rate": 0.000638133391545803, "loss": 0.0354, "num_input_tokens_seen": 124247120, "step": 57600 }, { "epoch": 9.397226753670473, "grad_norm": 0.05010209232568741, "learning_rate": 0.000638064980572042, "loss": 0.0427, "num_input_tokens_seen": 124256624, "step": 57605 }, { "epoch": 9.398042414355627, "grad_norm": 0.21968361735343933, "learning_rate": 0.0006379965668002192, "loss": 0.0481, "num_input_tokens_seen": 124267024, "step": 57610 }, { "epoch": 9.398858075040783, "grad_norm": 0.02262178249657154, "learning_rate": 0.0006379281502317209, "loss": 0.0459, "num_input_tokens_seen": 124278544, "step": 57615 }, { "epoch": 9.399673735725939, "grad_norm": 0.21410885453224182, "learning_rate": 0.0006378597308679338, "loss": 0.0255, "num_input_tokens_seen": 124289680, "step": 57620 }, { "epoch": 9.400489396411093, "grad_norm": 0.02469925582408905, "learning_rate": 0.0006377913087102443, "loss": 0.0519, "num_input_tokens_seen": 124300080, "step": 57625 }, { "epoch": 9.401305057096248, "grad_norm": 0.03311479836702347, "learning_rate": 0.0006377228837600391, "loss": 0.1647, "num_input_tokens_seen": 124311536, "step": 57630 }, { "epoch": 9.402120717781402, "grad_norm": 0.02629929594695568, "learning_rate": 0.0006376544560187049, "loss": 0.0301, "num_input_tokens_seen": 124322000, "step": 57635 }, { "epoch": 9.402936378466558, "grad_norm": 0.01878642477095127, "learning_rate": 0.0006375860254876286, "loss": 0.1204, "num_input_tokens_seen": 124333360, "step": 57640 }, { "epoch": 9.403752039151712, "grad_norm": 0.14889688789844513, "learning_rate": 0.0006375175921681968, "loss": 0.0396, "num_input_tokens_seen": 124345520, "step": 57645 }, { "epoch": 9.404567699836868, "grad_norm": 0.31262609362602234, "learning_rate": 0.0006374491560617967, "loss": 0.0697, "num_input_tokens_seen": 124356240, "step": 57650 }, { "epoch": 9.405383360522023, "grad_norm": 0.15785035490989685, "learning_rate": 0.0006373807171698151, "loss": 0.0383, "num_input_tokens_seen": 124367376, "step": 57655 }, { "epoch": 9.406199021207177, "grad_norm": 0.6111378073692322, "learning_rate": 0.0006373122754936389, "loss": 0.2585, "num_input_tokens_seen": 124378160, "step": 57660 }, { "epoch": 9.407014681892333, "grad_norm": 0.05887046456336975, "learning_rate": 0.0006372438310346553, "loss": 0.0406, "num_input_tokens_seen": 124388784, "step": 57665 }, { "epoch": 9.407830342577487, "grad_norm": 0.17689675092697144, "learning_rate": 0.0006371753837942513, "loss": 0.1044, "num_input_tokens_seen": 124400208, "step": 57670 }, { "epoch": 9.408646003262643, "grad_norm": 0.018843647092580795, "learning_rate": 0.0006371069337738142, "loss": 0.0645, "num_input_tokens_seen": 124410704, "step": 57675 }, { "epoch": 9.409461663947798, "grad_norm": 0.04309535026550293, "learning_rate": 0.000637038480974731, "loss": 0.0403, "num_input_tokens_seen": 124421200, "step": 57680 }, { "epoch": 9.410277324632952, "grad_norm": 0.08281403034925461, "learning_rate": 0.0006369700253983893, "loss": 0.1156, "num_input_tokens_seen": 124432944, "step": 57685 }, { "epoch": 9.411092985318108, "grad_norm": 0.25544384121894836, "learning_rate": 0.0006369015670461762, "loss": 0.0261, "num_input_tokens_seen": 124444528, "step": 57690 }, { "epoch": 9.411908646003262, "grad_norm": 0.41060805320739746, "learning_rate": 0.0006368331059194792, "loss": 0.113, "num_input_tokens_seen": 124456848, "step": 57695 }, { "epoch": 9.412724306688418, "grad_norm": 0.5155669450759888, "learning_rate": 0.0006367646420196857, "loss": 0.063, "num_input_tokens_seen": 124466512, "step": 57700 }, { "epoch": 9.413539967373573, "grad_norm": 0.5515162944793701, "learning_rate": 0.0006366961753481832, "loss": 0.0492, "num_input_tokens_seen": 124476976, "step": 57705 }, { "epoch": 9.414355628058727, "grad_norm": 0.42589855194091797, "learning_rate": 0.0006366277059063594, "loss": 0.1143, "num_input_tokens_seen": 124487056, "step": 57710 }, { "epoch": 9.415171288743883, "grad_norm": 0.3894360065460205, "learning_rate": 0.0006365592336956017, "loss": 0.0768, "num_input_tokens_seen": 124497136, "step": 57715 }, { "epoch": 9.415986949429037, "grad_norm": 0.6099425554275513, "learning_rate": 0.0006364907587172978, "loss": 0.1073, "num_input_tokens_seen": 124507728, "step": 57720 }, { "epoch": 9.416802610114193, "grad_norm": 0.02873092144727707, "learning_rate": 0.0006364222809728358, "loss": 0.0428, "num_input_tokens_seen": 124517936, "step": 57725 }, { "epoch": 9.417618270799348, "grad_norm": 0.18540111184120178, "learning_rate": 0.0006363538004636032, "loss": 0.0532, "num_input_tokens_seen": 124528688, "step": 57730 }, { "epoch": 9.418433931484502, "grad_norm": 0.025548255071043968, "learning_rate": 0.0006362853171909876, "loss": 0.0759, "num_input_tokens_seen": 124538640, "step": 57735 }, { "epoch": 9.419249592169658, "grad_norm": 0.054950978606939316, "learning_rate": 0.0006362168311563773, "loss": 0.0379, "num_input_tokens_seen": 124548720, "step": 57740 }, { "epoch": 9.420065252854812, "grad_norm": 0.0036392214242368937, "learning_rate": 0.00063614834236116, "loss": 0.0506, "num_input_tokens_seen": 124559408, "step": 57745 }, { "epoch": 9.420880913539968, "grad_norm": 1.0976166725158691, "learning_rate": 0.000636079850806724, "loss": 0.1026, "num_input_tokens_seen": 124569744, "step": 57750 }, { "epoch": 9.421696574225122, "grad_norm": 0.02300839312374592, "learning_rate": 0.0006360113564944571, "loss": 0.0444, "num_input_tokens_seen": 124580208, "step": 57755 }, { "epoch": 9.422512234910277, "grad_norm": 0.007651113905012608, "learning_rate": 0.0006359428594257476, "loss": 0.1412, "num_input_tokens_seen": 124591600, "step": 57760 }, { "epoch": 9.423327895595433, "grad_norm": 0.3336685299873352, "learning_rate": 0.0006358743596019836, "loss": 0.0923, "num_input_tokens_seen": 124602800, "step": 57765 }, { "epoch": 9.424143556280587, "grad_norm": 0.2775976359844208, "learning_rate": 0.0006358058570245532, "loss": 0.0702, "num_input_tokens_seen": 124612624, "step": 57770 }, { "epoch": 9.424959216965743, "grad_norm": 0.005509909242391586, "learning_rate": 0.0006357373516948451, "loss": 0.0262, "num_input_tokens_seen": 124623216, "step": 57775 }, { "epoch": 9.425774877650896, "grad_norm": 0.07859351485967636, "learning_rate": 0.0006356688436142471, "loss": 0.0121, "num_input_tokens_seen": 124634352, "step": 57780 }, { "epoch": 9.426590538336052, "grad_norm": 0.0631135031580925, "learning_rate": 0.000635600332784148, "loss": 0.1584, "num_input_tokens_seen": 124645584, "step": 57785 }, { "epoch": 9.427406199021208, "grad_norm": 0.02061944454908371, "learning_rate": 0.0006355318192059361, "loss": 0.0095, "num_input_tokens_seen": 124656976, "step": 57790 }, { "epoch": 9.428221859706362, "grad_norm": 0.012508010491728783, "learning_rate": 0.0006354633028809999, "loss": 0.0195, "num_input_tokens_seen": 124668304, "step": 57795 }, { "epoch": 9.429037520391518, "grad_norm": 0.06304553151130676, "learning_rate": 0.000635394783810728, "loss": 0.0216, "num_input_tokens_seen": 124679568, "step": 57800 }, { "epoch": 9.429853181076671, "grad_norm": 0.006754495203495026, "learning_rate": 0.0006353262619965091, "loss": 0.0162, "num_input_tokens_seen": 124691184, "step": 57805 }, { "epoch": 9.430668841761827, "grad_norm": 0.012716390192508698, "learning_rate": 0.000635257737439732, "loss": 0.0311, "num_input_tokens_seen": 124703760, "step": 57810 }, { "epoch": 9.431484502446983, "grad_norm": 1.120622158050537, "learning_rate": 0.0006351892101417849, "loss": 0.0804, "num_input_tokens_seen": 124714768, "step": 57815 }, { "epoch": 9.432300163132137, "grad_norm": 0.004914834629744291, "learning_rate": 0.0006351206801040571, "loss": 0.0337, "num_input_tokens_seen": 124725488, "step": 57820 }, { "epoch": 9.433115823817293, "grad_norm": 0.1275549978017807, "learning_rate": 0.0006350521473279374, "loss": 0.0238, "num_input_tokens_seen": 124736880, "step": 57825 }, { "epoch": 9.433931484502446, "grad_norm": 0.06971491128206253, "learning_rate": 0.0006349836118148146, "loss": 0.0256, "num_input_tokens_seen": 124747952, "step": 57830 }, { "epoch": 9.434747145187602, "grad_norm": 0.03171204775571823, "learning_rate": 0.0006349150735660776, "loss": 0.0135, "num_input_tokens_seen": 124759568, "step": 57835 }, { "epoch": 9.435562805872756, "grad_norm": 0.00931501667946577, "learning_rate": 0.0006348465325831155, "loss": 0.089, "num_input_tokens_seen": 124771536, "step": 57840 }, { "epoch": 9.436378466557912, "grad_norm": 0.03530379757285118, "learning_rate": 0.0006347779888673175, "loss": 0.0083, "num_input_tokens_seen": 124781712, "step": 57845 }, { "epoch": 9.437194127243067, "grad_norm": 0.16937944293022156, "learning_rate": 0.0006347094424200724, "loss": 0.0976, "num_input_tokens_seen": 124791696, "step": 57850 }, { "epoch": 9.438009787928221, "grad_norm": 0.051120031625032425, "learning_rate": 0.0006346408932427696, "loss": 0.0814, "num_input_tokens_seen": 124802640, "step": 57855 }, { "epoch": 9.438825448613377, "grad_norm": 0.0062878611497581005, "learning_rate": 0.0006345723413367983, "loss": 0.0891, "num_input_tokens_seen": 124814352, "step": 57860 }, { "epoch": 9.439641109298531, "grad_norm": 0.5621969103813171, "learning_rate": 0.0006345037867035478, "loss": 0.1059, "num_input_tokens_seen": 124826256, "step": 57865 }, { "epoch": 9.440456769983687, "grad_norm": 0.13361355662345886, "learning_rate": 0.0006344352293444073, "loss": 0.0204, "num_input_tokens_seen": 124836880, "step": 57870 }, { "epoch": 9.441272430668842, "grad_norm": 0.04254751279950142, "learning_rate": 0.0006343666692607665, "loss": 0.0196, "num_input_tokens_seen": 124848208, "step": 57875 }, { "epoch": 9.442088091353996, "grad_norm": 0.0023901453241705894, "learning_rate": 0.0006342981064540145, "loss": 0.0302, "num_input_tokens_seen": 124859984, "step": 57880 }, { "epoch": 9.442903752039152, "grad_norm": 0.00675305537879467, "learning_rate": 0.0006342295409255412, "loss": 0.0133, "num_input_tokens_seen": 124871248, "step": 57885 }, { "epoch": 9.443719412724306, "grad_norm": 0.24742509424686432, "learning_rate": 0.000634160972676736, "loss": 0.0613, "num_input_tokens_seen": 124882160, "step": 57890 }, { "epoch": 9.444535073409462, "grad_norm": 0.2846779525279999, "learning_rate": 0.0006340924017089884, "loss": 0.0314, "num_input_tokens_seen": 124893424, "step": 57895 }, { "epoch": 9.445350734094617, "grad_norm": 0.6716398596763611, "learning_rate": 0.0006340238280236882, "loss": 0.0613, "num_input_tokens_seen": 124903920, "step": 57900 }, { "epoch": 9.446166394779771, "grad_norm": 0.024753879755735397, "learning_rate": 0.0006339552516222251, "loss": 0.1007, "num_input_tokens_seen": 124914640, "step": 57905 }, { "epoch": 9.446982055464927, "grad_norm": 0.008205254562199116, "learning_rate": 0.0006338866725059889, "loss": 0.08, "num_input_tokens_seen": 124924848, "step": 57910 }, { "epoch": 9.447797716150081, "grad_norm": 0.2386118769645691, "learning_rate": 0.0006338180906763693, "loss": 0.0349, "num_input_tokens_seen": 124936208, "step": 57915 }, { "epoch": 9.448613376835237, "grad_norm": 0.5665149092674255, "learning_rate": 0.0006337495061347565, "loss": 0.0986, "num_input_tokens_seen": 124946992, "step": 57920 }, { "epoch": 9.449429037520392, "grad_norm": 0.4700169265270233, "learning_rate": 0.0006336809188825401, "loss": 0.0875, "num_input_tokens_seen": 124959344, "step": 57925 }, { "epoch": 9.450244698205546, "grad_norm": 0.03632558137178421, "learning_rate": 0.0006336123289211104, "loss": 0.1204, "num_input_tokens_seen": 124969264, "step": 57930 }, { "epoch": 9.451060358890702, "grad_norm": 0.005097426939755678, "learning_rate": 0.0006335437362518574, "loss": 0.0095, "num_input_tokens_seen": 124980912, "step": 57935 }, { "epoch": 9.451876019575856, "grad_norm": 1.2858837842941284, "learning_rate": 0.0006334751408761712, "loss": 0.0667, "num_input_tokens_seen": 124990704, "step": 57940 }, { "epoch": 9.452691680261012, "grad_norm": 0.060655441135168076, "learning_rate": 0.0006334065427954418, "loss": 0.1629, "num_input_tokens_seen": 125001584, "step": 57945 }, { "epoch": 9.453507340946166, "grad_norm": 0.0631236732006073, "learning_rate": 0.0006333379420110597, "loss": 0.0548, "num_input_tokens_seen": 125013136, "step": 57950 }, { "epoch": 9.454323001631321, "grad_norm": 0.09569914638996124, "learning_rate": 0.000633269338524415, "loss": 0.0813, "num_input_tokens_seen": 125022224, "step": 57955 }, { "epoch": 9.455138662316477, "grad_norm": 0.162586972117424, "learning_rate": 0.0006332007323368983, "loss": 0.0592, "num_input_tokens_seen": 125032240, "step": 57960 }, { "epoch": 9.455954323001631, "grad_norm": 0.5213939547538757, "learning_rate": 0.0006331321234498995, "loss": 0.1041, "num_input_tokens_seen": 125042576, "step": 57965 }, { "epoch": 9.456769983686787, "grad_norm": 0.5058368444442749, "learning_rate": 0.0006330635118648093, "loss": 0.0936, "num_input_tokens_seen": 125054096, "step": 57970 }, { "epoch": 9.45758564437194, "grad_norm": 0.006088830064982176, "learning_rate": 0.0006329948975830184, "loss": 0.1647, "num_input_tokens_seen": 125065808, "step": 57975 }, { "epoch": 9.458401305057096, "grad_norm": 1.0126179456710815, "learning_rate": 0.0006329262806059173, "loss": 0.0806, "num_input_tokens_seen": 125075248, "step": 57980 }, { "epoch": 9.459216965742252, "grad_norm": 0.012858398258686066, "learning_rate": 0.0006328576609348962, "loss": 0.041, "num_input_tokens_seen": 125086096, "step": 57985 }, { "epoch": 9.460032626427406, "grad_norm": 0.1495935618877411, "learning_rate": 0.0006327890385713462, "loss": 0.0101, "num_input_tokens_seen": 125096976, "step": 57990 }, { "epoch": 9.460848287112562, "grad_norm": 0.4352479875087738, "learning_rate": 0.000632720413516658, "loss": 0.0849, "num_input_tokens_seen": 125108048, "step": 57995 }, { "epoch": 9.461663947797716, "grad_norm": 0.19777001440525055, "learning_rate": 0.000632651785772222, "loss": 0.0345, "num_input_tokens_seen": 125118224, "step": 58000 }, { "epoch": 9.462479608482871, "grad_norm": 0.008766602724790573, "learning_rate": 0.0006325831553394294, "loss": 0.0148, "num_input_tokens_seen": 125128944, "step": 58005 }, { "epoch": 9.463295269168025, "grad_norm": 0.031060216948390007, "learning_rate": 0.000632514522219671, "loss": 0.0641, "num_input_tokens_seen": 125140144, "step": 58010 }, { "epoch": 9.464110929853181, "grad_norm": 0.13641168177127838, "learning_rate": 0.0006324458864143377, "loss": 0.1333, "num_input_tokens_seen": 125150608, "step": 58015 }, { "epoch": 9.464926590538337, "grad_norm": 0.10358651727437973, "learning_rate": 0.0006323772479248204, "loss": 0.0183, "num_input_tokens_seen": 125161616, "step": 58020 }, { "epoch": 9.46574225122349, "grad_norm": 0.04064834862947464, "learning_rate": 0.0006323086067525103, "loss": 0.0344, "num_input_tokens_seen": 125171856, "step": 58025 }, { "epoch": 9.466557911908646, "grad_norm": 0.11970902979373932, "learning_rate": 0.0006322399628987984, "loss": 0.0503, "num_input_tokens_seen": 125183632, "step": 58030 }, { "epoch": 9.4673735725938, "grad_norm": 0.3520708978176117, "learning_rate": 0.000632171316365076, "loss": 0.0376, "num_input_tokens_seen": 125194288, "step": 58035 }, { "epoch": 9.468189233278956, "grad_norm": 0.6180605888366699, "learning_rate": 0.000632102667152734, "loss": 0.1303, "num_input_tokens_seen": 125205008, "step": 58040 }, { "epoch": 9.469004893964112, "grad_norm": 0.012467690743505955, "learning_rate": 0.000632034015263164, "loss": 0.0944, "num_input_tokens_seen": 125215568, "step": 58045 }, { "epoch": 9.469820554649266, "grad_norm": 0.006612907163798809, "learning_rate": 0.0006319653606977571, "loss": 0.0432, "num_input_tokens_seen": 125226352, "step": 58050 }, { "epoch": 9.470636215334421, "grad_norm": 0.055331308394670486, "learning_rate": 0.0006318967034579048, "loss": 0.1312, "num_input_tokens_seen": 125238640, "step": 58055 }, { "epoch": 9.471451876019575, "grad_norm": 0.8544214367866516, "learning_rate": 0.0006318280435449985, "loss": 0.0948, "num_input_tokens_seen": 125250320, "step": 58060 }, { "epoch": 9.47226753670473, "grad_norm": 0.4567054808139801, "learning_rate": 0.0006317593809604298, "loss": 0.159, "num_input_tokens_seen": 125261936, "step": 58065 }, { "epoch": 9.473083197389887, "grad_norm": 0.5499236583709717, "learning_rate": 0.00063169071570559, "loss": 0.0759, "num_input_tokens_seen": 125273392, "step": 58070 }, { "epoch": 9.47389885807504, "grad_norm": 0.04006386548280716, "learning_rate": 0.0006316220477818707, "loss": 0.0218, "num_input_tokens_seen": 125283952, "step": 58075 }, { "epoch": 9.474714518760196, "grad_norm": 0.028712088242173195, "learning_rate": 0.0006315533771906638, "loss": 0.0213, "num_input_tokens_seen": 125293488, "step": 58080 }, { "epoch": 9.47553017944535, "grad_norm": 0.13070084154605865, "learning_rate": 0.0006314847039333607, "loss": 0.0546, "num_input_tokens_seen": 125305136, "step": 58085 }, { "epoch": 9.476345840130506, "grad_norm": 0.6025596261024475, "learning_rate": 0.0006314160280113532, "loss": 0.1157, "num_input_tokens_seen": 125316016, "step": 58090 }, { "epoch": 9.477161500815662, "grad_norm": 0.15075118839740753, "learning_rate": 0.0006313473494260333, "loss": 0.0321, "num_input_tokens_seen": 125326736, "step": 58095 }, { "epoch": 9.477977161500815, "grad_norm": 0.08009255677461624, "learning_rate": 0.0006312786681787928, "loss": 0.0661, "num_input_tokens_seen": 125337584, "step": 58100 }, { "epoch": 9.478792822185971, "grad_norm": 0.026056891307234764, "learning_rate": 0.0006312099842710234, "loss": 0.0149, "num_input_tokens_seen": 125346448, "step": 58105 }, { "epoch": 9.479608482871125, "grad_norm": 0.009021613746881485, "learning_rate": 0.0006311412977041172, "loss": 0.0379, "num_input_tokens_seen": 125355920, "step": 58110 }, { "epoch": 9.48042414355628, "grad_norm": 0.10805810987949371, "learning_rate": 0.0006310726084794663, "loss": 0.029, "num_input_tokens_seen": 125367792, "step": 58115 }, { "epoch": 9.481239804241435, "grad_norm": 0.7031973004341125, "learning_rate": 0.0006310039165984628, "loss": 0.217, "num_input_tokens_seen": 125378512, "step": 58120 }, { "epoch": 9.48205546492659, "grad_norm": 0.006835599895566702, "learning_rate": 0.0006309352220624986, "loss": 0.0332, "num_input_tokens_seen": 125389520, "step": 58125 }, { "epoch": 9.482871125611746, "grad_norm": 0.02898775413632393, "learning_rate": 0.0006308665248729662, "loss": 0.0181, "num_input_tokens_seen": 125400496, "step": 58130 }, { "epoch": 9.4836867862969, "grad_norm": 0.046699922531843185, "learning_rate": 0.0006307978250312574, "loss": 0.0265, "num_input_tokens_seen": 125410448, "step": 58135 }, { "epoch": 9.484502446982056, "grad_norm": 0.005299192387610674, "learning_rate": 0.0006307291225387648, "loss": 0.0138, "num_input_tokens_seen": 125421008, "step": 58140 }, { "epoch": 9.48531810766721, "grad_norm": 0.023296967148780823, "learning_rate": 0.0006306604173968808, "loss": 0.0976, "num_input_tokens_seen": 125431696, "step": 58145 }, { "epoch": 9.486133768352365, "grad_norm": 0.015508589334785938, "learning_rate": 0.0006305917096069977, "loss": 0.0664, "num_input_tokens_seen": 125443888, "step": 58150 }, { "epoch": 9.486949429037521, "grad_norm": 0.008938160724937916, "learning_rate": 0.000630522999170508, "loss": 0.1936, "num_input_tokens_seen": 125455856, "step": 58155 }, { "epoch": 9.487765089722675, "grad_norm": 0.17942595481872559, "learning_rate": 0.0006304542860888039, "loss": 0.0187, "num_input_tokens_seen": 125466640, "step": 58160 }, { "epoch": 9.48858075040783, "grad_norm": 0.23976832628250122, "learning_rate": 0.0006303855703632783, "loss": 0.0695, "num_input_tokens_seen": 125478096, "step": 58165 }, { "epoch": 9.489396411092985, "grad_norm": 0.08689501881599426, "learning_rate": 0.0006303168519953238, "loss": 0.1408, "num_input_tokens_seen": 125488976, "step": 58170 }, { "epoch": 9.49021207177814, "grad_norm": 0.008017306216061115, "learning_rate": 0.0006302481309863329, "loss": 0.0088, "num_input_tokens_seen": 125499408, "step": 58175 }, { "epoch": 9.491027732463296, "grad_norm": 1.171169400215149, "learning_rate": 0.0006301794073376985, "loss": 0.0941, "num_input_tokens_seen": 125511088, "step": 58180 }, { "epoch": 9.49184339314845, "grad_norm": 0.05090084671974182, "learning_rate": 0.0006301106810508131, "loss": 0.152, "num_input_tokens_seen": 125520688, "step": 58185 }, { "epoch": 9.492659053833606, "grad_norm": 0.10196384787559509, "learning_rate": 0.0006300419521270697, "loss": 0.0467, "num_input_tokens_seen": 125533200, "step": 58190 }, { "epoch": 9.49347471451876, "grad_norm": 0.11534524708986282, "learning_rate": 0.0006299732205678613, "loss": 0.0633, "num_input_tokens_seen": 125542800, "step": 58195 }, { "epoch": 9.494290375203915, "grad_norm": 0.02116946317255497, "learning_rate": 0.0006299044863745806, "loss": 0.121, "num_input_tokens_seen": 125554064, "step": 58200 }, { "epoch": 9.49510603588907, "grad_norm": 0.2826470732688904, "learning_rate": 0.0006298357495486208, "loss": 0.1774, "num_input_tokens_seen": 125564400, "step": 58205 }, { "epoch": 9.495921696574225, "grad_norm": 0.1315881460905075, "learning_rate": 0.0006297670100913748, "loss": 0.0424, "num_input_tokens_seen": 125574864, "step": 58210 }, { "epoch": 9.49673735725938, "grad_norm": 0.018945792689919472, "learning_rate": 0.0006296982680042357, "loss": 0.0507, "num_input_tokens_seen": 125585104, "step": 58215 }, { "epoch": 9.497553017944535, "grad_norm": 0.007699902635067701, "learning_rate": 0.0006296295232885966, "loss": 0.0215, "num_input_tokens_seen": 125595760, "step": 58220 }, { "epoch": 9.49836867862969, "grad_norm": 0.02608025260269642, "learning_rate": 0.0006295607759458508, "loss": 0.0098, "num_input_tokens_seen": 125606384, "step": 58225 }, { "epoch": 9.499184339314844, "grad_norm": 0.08171609044075012, "learning_rate": 0.0006294920259773915, "loss": 0.1277, "num_input_tokens_seen": 125618288, "step": 58230 }, { "epoch": 9.5, "grad_norm": 0.009978825226426125, "learning_rate": 0.0006294232733846121, "loss": 0.1086, "num_input_tokens_seen": 125629424, "step": 58235 }, { "epoch": 9.500815660685156, "grad_norm": 0.12057749927043915, "learning_rate": 0.0006293545181689057, "loss": 0.0224, "num_input_tokens_seen": 125640912, "step": 58240 }, { "epoch": 9.50163132137031, "grad_norm": 0.0033223330974578857, "learning_rate": 0.000629285760331666, "loss": 0.0216, "num_input_tokens_seen": 125651536, "step": 58245 }, { "epoch": 9.502446982055465, "grad_norm": 0.37659674882888794, "learning_rate": 0.0006292169998742865, "loss": 0.0626, "num_input_tokens_seen": 125661232, "step": 58250 }, { "epoch": 9.50326264274062, "grad_norm": 0.06521254777908325, "learning_rate": 0.0006291482367981605, "loss": 0.0723, "num_input_tokens_seen": 125670960, "step": 58255 }, { "epoch": 9.504078303425775, "grad_norm": 0.08635067939758301, "learning_rate": 0.0006290794711046816, "loss": 0.0197, "num_input_tokens_seen": 125682416, "step": 58260 }, { "epoch": 9.50489396411093, "grad_norm": 0.030727704986929893, "learning_rate": 0.0006290107027952434, "loss": 0.2579, "num_input_tokens_seen": 125694160, "step": 58265 }, { "epoch": 9.505709624796085, "grad_norm": 0.08212758600711823, "learning_rate": 0.0006289419318712397, "loss": 0.0114, "num_input_tokens_seen": 125705008, "step": 58270 }, { "epoch": 9.50652528548124, "grad_norm": 0.23490901291370392, "learning_rate": 0.0006288731583340642, "loss": 0.1739, "num_input_tokens_seen": 125715888, "step": 58275 }, { "epoch": 9.507340946166394, "grad_norm": 0.19396431744098663, "learning_rate": 0.0006288043821851107, "loss": 0.282, "num_input_tokens_seen": 125726928, "step": 58280 }, { "epoch": 9.50815660685155, "grad_norm": 0.2732084393501282, "learning_rate": 0.000628735603425773, "loss": 0.0701, "num_input_tokens_seen": 125737264, "step": 58285 }, { "epoch": 9.508972267536706, "grad_norm": 0.17187979817390442, "learning_rate": 0.0006286668220574448, "loss": 0.0691, "num_input_tokens_seen": 125748496, "step": 58290 }, { "epoch": 9.50978792822186, "grad_norm": 1.2165248394012451, "learning_rate": 0.0006285980380815204, "loss": 0.2298, "num_input_tokens_seen": 125758928, "step": 58295 }, { "epoch": 9.510603588907015, "grad_norm": 0.7220401763916016, "learning_rate": 0.0006285292514993936, "loss": 0.0706, "num_input_tokens_seen": 125769712, "step": 58300 }, { "epoch": 9.51141924959217, "grad_norm": 0.006973011419177055, "learning_rate": 0.0006284604623124585, "loss": 0.0293, "num_input_tokens_seen": 125780400, "step": 58305 }, { "epoch": 9.512234910277325, "grad_norm": 0.3740575611591339, "learning_rate": 0.0006283916705221091, "loss": 0.029, "num_input_tokens_seen": 125791472, "step": 58310 }, { "epoch": 9.513050570962479, "grad_norm": 0.10369005799293518, "learning_rate": 0.0006283228761297396, "loss": 0.0306, "num_input_tokens_seen": 125802576, "step": 58315 }, { "epoch": 9.513866231647635, "grad_norm": 0.005439228378236294, "learning_rate": 0.0006282540791367442, "loss": 0.0849, "num_input_tokens_seen": 125811952, "step": 58320 }, { "epoch": 9.51468189233279, "grad_norm": 0.2789449393749237, "learning_rate": 0.0006281852795445173, "loss": 0.0222, "num_input_tokens_seen": 125822864, "step": 58325 }, { "epoch": 9.515497553017944, "grad_norm": 0.0805715024471283, "learning_rate": 0.000628116477354453, "loss": 0.1678, "num_input_tokens_seen": 125833552, "step": 58330 }, { "epoch": 9.5163132137031, "grad_norm": 0.3602902591228485, "learning_rate": 0.0006280476725679457, "loss": 0.0966, "num_input_tokens_seen": 125845232, "step": 58335 }, { "epoch": 9.517128874388254, "grad_norm": 0.04466300085186958, "learning_rate": 0.00062797886518639, "loss": 0.0502, "num_input_tokens_seen": 125855536, "step": 58340 }, { "epoch": 9.51794453507341, "grad_norm": 0.006288435310125351, "learning_rate": 0.0006279100552111803, "loss": 0.2058, "num_input_tokens_seen": 125866352, "step": 58345 }, { "epoch": 9.518760195758565, "grad_norm": 0.06510628759860992, "learning_rate": 0.0006278412426437109, "loss": 0.0882, "num_input_tokens_seen": 125876784, "step": 58350 }, { "epoch": 9.51957585644372, "grad_norm": 0.007382781710475683, "learning_rate": 0.0006277724274853767, "loss": 0.013, "num_input_tokens_seen": 125887472, "step": 58355 }, { "epoch": 9.520391517128875, "grad_norm": 0.013673797249794006, "learning_rate": 0.0006277036097375719, "loss": 0.0214, "num_input_tokens_seen": 125897968, "step": 58360 }, { "epoch": 9.521207177814029, "grad_norm": 0.11507966369390488, "learning_rate": 0.0006276347894016917, "loss": 0.0406, "num_input_tokens_seen": 125907920, "step": 58365 }, { "epoch": 9.522022838499185, "grad_norm": 0.12790460884571075, "learning_rate": 0.0006275659664791304, "loss": 0.1004, "num_input_tokens_seen": 125919472, "step": 58370 }, { "epoch": 9.522838499184338, "grad_norm": 0.28610751032829285, "learning_rate": 0.0006274971409712831, "loss": 0.0507, "num_input_tokens_seen": 125929168, "step": 58375 }, { "epoch": 9.523654159869494, "grad_norm": 0.9515119791030884, "learning_rate": 0.0006274283128795445, "loss": 0.1359, "num_input_tokens_seen": 125941296, "step": 58380 }, { "epoch": 9.52446982055465, "grad_norm": 0.4753815829753876, "learning_rate": 0.0006273594822053095, "loss": 0.1257, "num_input_tokens_seen": 125951728, "step": 58385 }, { "epoch": 9.525285481239804, "grad_norm": 0.5236465930938721, "learning_rate": 0.000627290648949973, "loss": 0.0845, "num_input_tokens_seen": 125962096, "step": 58390 }, { "epoch": 9.52610114192496, "grad_norm": 0.38971593976020813, "learning_rate": 0.00062722181311493, "loss": 0.082, "num_input_tokens_seen": 125974256, "step": 58395 }, { "epoch": 9.526916802610113, "grad_norm": 0.023498913273215294, "learning_rate": 0.0006271529747015755, "loss": 0.0225, "num_input_tokens_seen": 125984624, "step": 58400 }, { "epoch": 9.52773246329527, "grad_norm": 0.016071060672402382, "learning_rate": 0.0006270841337113047, "loss": 0.0175, "num_input_tokens_seen": 125995440, "step": 58405 }, { "epoch": 9.528548123980425, "grad_norm": 0.0459694042801857, "learning_rate": 0.0006270152901455128, "loss": 0.0186, "num_input_tokens_seen": 126006576, "step": 58410 }, { "epoch": 9.529363784665579, "grad_norm": 0.023586345836520195, "learning_rate": 0.0006269464440055948, "loss": 0.0225, "num_input_tokens_seen": 126016592, "step": 58415 }, { "epoch": 9.530179445350734, "grad_norm": 0.09077483415603638, "learning_rate": 0.0006268775952929462, "loss": 0.0177, "num_input_tokens_seen": 126027792, "step": 58420 }, { "epoch": 9.530995106035888, "grad_norm": 0.0463712252676487, "learning_rate": 0.000626808744008962, "loss": 0.006, "num_input_tokens_seen": 126039472, "step": 58425 }, { "epoch": 9.531810766721044, "grad_norm": 0.02281074970960617, "learning_rate": 0.0006267398901550379, "loss": 0.0917, "num_input_tokens_seen": 126050064, "step": 58430 }, { "epoch": 9.5326264274062, "grad_norm": 0.052454009652137756, "learning_rate": 0.000626671033732569, "loss": 0.0397, "num_input_tokens_seen": 126060720, "step": 58435 }, { "epoch": 9.533442088091354, "grad_norm": 0.02167009562253952, "learning_rate": 0.0006266021747429511, "loss": 0.0171, "num_input_tokens_seen": 126071632, "step": 58440 }, { "epoch": 9.53425774877651, "grad_norm": 0.14274466037750244, "learning_rate": 0.0006265333131875794, "loss": 0.0319, "num_input_tokens_seen": 126081904, "step": 58445 }, { "epoch": 9.535073409461663, "grad_norm": 0.032463740557432175, "learning_rate": 0.0006264644490678496, "loss": 0.0388, "num_input_tokens_seen": 126093328, "step": 58450 }, { "epoch": 9.535889070146819, "grad_norm": 0.028997454792261124, "learning_rate": 0.0006263955823851571, "loss": 0.0499, "num_input_tokens_seen": 126103760, "step": 58455 }, { "epoch": 9.536704730831975, "grad_norm": 0.011113029904663563, "learning_rate": 0.0006263267131408981, "loss": 0.0276, "num_input_tokens_seen": 126114576, "step": 58460 }, { "epoch": 9.537520391517129, "grad_norm": 0.4074894189834595, "learning_rate": 0.0006262578413364679, "loss": 0.17, "num_input_tokens_seen": 126124464, "step": 58465 }, { "epoch": 9.538336052202284, "grad_norm": 0.5466145873069763, "learning_rate": 0.0006261889669732624, "loss": 0.1917, "num_input_tokens_seen": 126133744, "step": 58470 }, { "epoch": 9.539151712887438, "grad_norm": 0.3555784523487091, "learning_rate": 0.0006261200900526773, "loss": 0.0179, "num_input_tokens_seen": 126146128, "step": 58475 }, { "epoch": 9.539967373572594, "grad_norm": 0.013273402117192745, "learning_rate": 0.0006260512105761086, "loss": 0.0734, "num_input_tokens_seen": 126158480, "step": 58480 }, { "epoch": 9.540783034257748, "grad_norm": 0.40700119733810425, "learning_rate": 0.0006259823285449523, "loss": 0.0942, "num_input_tokens_seen": 126169008, "step": 58485 }, { "epoch": 9.541598694942904, "grad_norm": 0.03941665217280388, "learning_rate": 0.0006259134439606043, "loss": 0.086, "num_input_tokens_seen": 126179248, "step": 58490 }, { "epoch": 9.54241435562806, "grad_norm": 0.028568079695105553, "learning_rate": 0.0006258445568244605, "loss": 0.0167, "num_input_tokens_seen": 126190320, "step": 58495 }, { "epoch": 9.543230016313213, "grad_norm": 0.4833282232284546, "learning_rate": 0.0006257756671379172, "loss": 0.0966, "num_input_tokens_seen": 126200880, "step": 58500 }, { "epoch": 9.544045676998369, "grad_norm": 0.7798423171043396, "learning_rate": 0.0006257067749023704, "loss": 0.0824, "num_input_tokens_seen": 126211088, "step": 58505 }, { "epoch": 9.544861337683523, "grad_norm": 0.07707181572914124, "learning_rate": 0.0006256378801192163, "loss": 0.1098, "num_input_tokens_seen": 126221200, "step": 58510 }, { "epoch": 9.545676998368679, "grad_norm": 0.029642900452017784, "learning_rate": 0.0006255689827898512, "loss": 0.0078, "num_input_tokens_seen": 126231152, "step": 58515 }, { "epoch": 9.546492659053834, "grad_norm": 0.032600291073322296, "learning_rate": 0.0006255000829156714, "loss": 0.029, "num_input_tokens_seen": 126241936, "step": 58520 }, { "epoch": 9.547308319738988, "grad_norm": 0.276779443025589, "learning_rate": 0.0006254311804980733, "loss": 0.0654, "num_input_tokens_seen": 126253552, "step": 58525 }, { "epoch": 9.548123980424144, "grad_norm": 0.06900997459888458, "learning_rate": 0.0006253622755384531, "loss": 0.0131, "num_input_tokens_seen": 126263792, "step": 58530 }, { "epoch": 9.548939641109298, "grad_norm": 0.0035610131453722715, "learning_rate": 0.0006252933680382074, "loss": 0.0478, "num_input_tokens_seen": 126274160, "step": 58535 }, { "epoch": 9.549755301794454, "grad_norm": 0.03504409268498421, "learning_rate": 0.0006252244579987327, "loss": 0.0095, "num_input_tokens_seen": 126283952, "step": 58540 }, { "epoch": 9.550570962479608, "grad_norm": 0.23075544834136963, "learning_rate": 0.0006251555454214254, "loss": 0.0835, "num_input_tokens_seen": 126294640, "step": 58545 }, { "epoch": 9.551386623164763, "grad_norm": 0.013807575218379498, "learning_rate": 0.0006250866303076822, "loss": 0.0092, "num_input_tokens_seen": 126306320, "step": 58550 }, { "epoch": 9.552202283849919, "grad_norm": 0.11007317155599594, "learning_rate": 0.0006250177126588998, "loss": 0.021, "num_input_tokens_seen": 126316048, "step": 58555 }, { "epoch": 9.553017944535073, "grad_norm": 0.021631285548210144, "learning_rate": 0.0006249487924764747, "loss": 0.056, "num_input_tokens_seen": 126326864, "step": 58560 }, { "epoch": 9.553833605220229, "grad_norm": 0.48573800921440125, "learning_rate": 0.000624879869761804, "loss": 0.045, "num_input_tokens_seen": 126337392, "step": 58565 }, { "epoch": 9.554649265905383, "grad_norm": 0.04822688549757004, "learning_rate": 0.0006248109445162843, "loss": 0.1139, "num_input_tokens_seen": 126348432, "step": 58570 }, { "epoch": 9.555464926590538, "grad_norm": 0.3569021224975586, "learning_rate": 0.0006247420167413124, "loss": 0.0466, "num_input_tokens_seen": 126358256, "step": 58575 }, { "epoch": 9.556280587275694, "grad_norm": 0.027278177440166473, "learning_rate": 0.0006246730864382853, "loss": 0.1512, "num_input_tokens_seen": 126368400, "step": 58580 }, { "epoch": 9.557096247960848, "grad_norm": 0.006739978678524494, "learning_rate": 0.0006246041536086, "loss": 0.0131, "num_input_tokens_seen": 126379536, "step": 58585 }, { "epoch": 9.557911908646004, "grad_norm": 0.046346716582775116, "learning_rate": 0.0006245352182536535, "loss": 0.2362, "num_input_tokens_seen": 126389648, "step": 58590 }, { "epoch": 9.558727569331158, "grad_norm": 0.873885452747345, "learning_rate": 0.0006244662803748427, "loss": 0.1057, "num_input_tokens_seen": 126399248, "step": 58595 }, { "epoch": 9.559543230016313, "grad_norm": 0.01116516999900341, "learning_rate": 0.0006243973399735649, "loss": 0.039, "num_input_tokens_seen": 126410736, "step": 58600 }, { "epoch": 9.560358890701469, "grad_norm": 0.6334988474845886, "learning_rate": 0.0006243283970512172, "loss": 0.0766, "num_input_tokens_seen": 126421040, "step": 58605 }, { "epoch": 9.561174551386623, "grad_norm": 0.4775483012199402, "learning_rate": 0.0006242594516091967, "loss": 0.231, "num_input_tokens_seen": 126432240, "step": 58610 }, { "epoch": 9.561990212071779, "grad_norm": 0.24963954091072083, "learning_rate": 0.000624190503648901, "loss": 0.0685, "num_input_tokens_seen": 126442640, "step": 58615 }, { "epoch": 9.562805872756933, "grad_norm": 0.022364353761076927, "learning_rate": 0.000624121553171727, "loss": 0.0222, "num_input_tokens_seen": 126453776, "step": 58620 }, { "epoch": 9.563621533442088, "grad_norm": 0.058948613703250885, "learning_rate": 0.0006240526001790723, "loss": 0.0238, "num_input_tokens_seen": 126464976, "step": 58625 }, { "epoch": 9.564437194127244, "grad_norm": 0.026899239048361778, "learning_rate": 0.0006239836446723343, "loss": 0.0296, "num_input_tokens_seen": 126473584, "step": 58630 }, { "epoch": 9.565252854812398, "grad_norm": 0.11287160962820053, "learning_rate": 0.0006239146866529105, "loss": 0.0493, "num_input_tokens_seen": 126484400, "step": 58635 }, { "epoch": 9.566068515497554, "grad_norm": 0.11984999477863312, "learning_rate": 0.0006238457261221983, "loss": 0.0805, "num_input_tokens_seen": 126495536, "step": 58640 }, { "epoch": 9.566884176182707, "grad_norm": 0.11183185875415802, "learning_rate": 0.0006237767630815955, "loss": 0.0792, "num_input_tokens_seen": 126505616, "step": 58645 }, { "epoch": 9.567699836867863, "grad_norm": 0.3792961537837982, "learning_rate": 0.0006237077975324994, "loss": 0.0389, "num_input_tokens_seen": 126516464, "step": 58650 }, { "epoch": 9.568515497553017, "grad_norm": 0.41945192217826843, "learning_rate": 0.0006236388294763079, "loss": 0.0606, "num_input_tokens_seen": 126526768, "step": 58655 }, { "epoch": 9.569331158238173, "grad_norm": 0.023607546463608742, "learning_rate": 0.0006235698589144188, "loss": 0.0809, "num_input_tokens_seen": 126538416, "step": 58660 }, { "epoch": 9.570146818923329, "grad_norm": 0.3828122019767761, "learning_rate": 0.0006235008858482295, "loss": 0.0784, "num_input_tokens_seen": 126549328, "step": 58665 }, { "epoch": 9.570962479608482, "grad_norm": 0.007308707106858492, "learning_rate": 0.0006234319102791382, "loss": 0.0319, "num_input_tokens_seen": 126559408, "step": 58670 }, { "epoch": 9.571778140293638, "grad_norm": 0.01621377468109131, "learning_rate": 0.0006233629322085427, "loss": 0.0459, "num_input_tokens_seen": 126569232, "step": 58675 }, { "epoch": 9.572593800978792, "grad_norm": 0.008858599700033665, "learning_rate": 0.0006232939516378408, "loss": 0.0137, "num_input_tokens_seen": 126580720, "step": 58680 }, { "epoch": 9.573409461663948, "grad_norm": 0.5117453932762146, "learning_rate": 0.0006232249685684306, "loss": 0.0675, "num_input_tokens_seen": 126590608, "step": 58685 }, { "epoch": 9.574225122349104, "grad_norm": 0.06424707919359207, "learning_rate": 0.0006231559830017102, "loss": 0.0191, "num_input_tokens_seen": 126601616, "step": 58690 }, { "epoch": 9.575040783034257, "grad_norm": 0.7022246718406677, "learning_rate": 0.0006230869949390774, "loss": 0.1657, "num_input_tokens_seen": 126612112, "step": 58695 }, { "epoch": 9.575856443719413, "grad_norm": 0.159372940659523, "learning_rate": 0.0006230180043819306, "loss": 0.0666, "num_input_tokens_seen": 126621808, "step": 58700 }, { "epoch": 9.576672104404567, "grad_norm": 0.008968913927674294, "learning_rate": 0.0006229490113316678, "loss": 0.0779, "num_input_tokens_seen": 126632528, "step": 58705 }, { "epoch": 9.577487765089723, "grad_norm": 0.011230770498514175, "learning_rate": 0.0006228800157896874, "loss": 0.0102, "num_input_tokens_seen": 126644592, "step": 58710 }, { "epoch": 9.578303425774878, "grad_norm": 0.5595538020133972, "learning_rate": 0.0006228110177573876, "loss": 0.1067, "num_input_tokens_seen": 126655504, "step": 58715 }, { "epoch": 9.579119086460032, "grad_norm": 0.1580076664686203, "learning_rate": 0.0006227420172361667, "loss": 0.0824, "num_input_tokens_seen": 126665488, "step": 58720 }, { "epoch": 9.579934747145188, "grad_norm": 0.005644900258630514, "learning_rate": 0.0006226730142274232, "loss": 0.1123, "num_input_tokens_seen": 126675536, "step": 58725 }, { "epoch": 9.580750407830342, "grad_norm": 0.009400753304362297, "learning_rate": 0.0006226040087325553, "loss": 0.0206, "num_input_tokens_seen": 126685872, "step": 58730 }, { "epoch": 9.581566068515498, "grad_norm": 0.1368161141872406, "learning_rate": 0.0006225350007529616, "loss": 0.2022, "num_input_tokens_seen": 126696048, "step": 58735 }, { "epoch": 9.582381729200652, "grad_norm": 0.05598703771829605, "learning_rate": 0.0006224659902900408, "loss": 0.0057, "num_input_tokens_seen": 126707696, "step": 58740 }, { "epoch": 9.583197389885807, "grad_norm": 0.4969485402107239, "learning_rate": 0.0006223969773451913, "loss": 0.1047, "num_input_tokens_seen": 126718704, "step": 58745 }, { "epoch": 9.584013050570963, "grad_norm": 0.027688326314091682, "learning_rate": 0.0006223279619198118, "loss": 0.0266, "num_input_tokens_seen": 126728880, "step": 58750 }, { "epoch": 9.584828711256117, "grad_norm": 0.010053487494587898, "learning_rate": 0.000622258944015301, "loss": 0.0302, "num_input_tokens_seen": 126739216, "step": 58755 }, { "epoch": 9.585644371941273, "grad_norm": 0.05750470608472824, "learning_rate": 0.0006221899236330575, "loss": 0.0206, "num_input_tokens_seen": 126750512, "step": 58760 }, { "epoch": 9.586460032626427, "grad_norm": 0.027294941246509552, "learning_rate": 0.0006221209007744803, "loss": 0.1231, "num_input_tokens_seen": 126762448, "step": 58765 }, { "epoch": 9.587275693311582, "grad_norm": 0.00872715562582016, "learning_rate": 0.0006220518754409681, "loss": 0.0365, "num_input_tokens_seen": 126773200, "step": 58770 }, { "epoch": 9.588091353996738, "grad_norm": 0.01879233680665493, "learning_rate": 0.0006219828476339195, "loss": 0.0241, "num_input_tokens_seen": 126783824, "step": 58775 }, { "epoch": 9.588907014681892, "grad_norm": 0.48452168703079224, "learning_rate": 0.0006219138173547341, "loss": 0.0814, "num_input_tokens_seen": 126793936, "step": 58780 }, { "epoch": 9.589722675367048, "grad_norm": 0.3861008286476135, "learning_rate": 0.0006218447846048106, "loss": 0.1341, "num_input_tokens_seen": 126805968, "step": 58785 }, { "epoch": 9.590538336052202, "grad_norm": 0.05191493779420853, "learning_rate": 0.0006217757493855477, "loss": 0.1712, "num_input_tokens_seen": 126817200, "step": 58790 }, { "epoch": 9.591353996737357, "grad_norm": 0.707980215549469, "learning_rate": 0.0006217067116983449, "loss": 0.0482, "num_input_tokens_seen": 126827152, "step": 58795 }, { "epoch": 9.592169657422513, "grad_norm": 0.21882370114326477, "learning_rate": 0.0006216376715446011, "loss": 0.1726, "num_input_tokens_seen": 126837392, "step": 58800 }, { "epoch": 9.592985318107667, "grad_norm": 0.7265588641166687, "learning_rate": 0.0006215686289257156, "loss": 0.1017, "num_input_tokens_seen": 126848496, "step": 58805 }, { "epoch": 9.593800978792823, "grad_norm": 0.052612412720918655, "learning_rate": 0.0006214995838430878, "loss": 0.0229, "num_input_tokens_seen": 126857840, "step": 58810 }, { "epoch": 9.594616639477977, "grad_norm": 0.1485823094844818, "learning_rate": 0.0006214305362981167, "loss": 0.105, "num_input_tokens_seen": 126868560, "step": 58815 }, { "epoch": 9.595432300163132, "grad_norm": 0.03639107942581177, "learning_rate": 0.0006213614862922015, "loss": 0.009, "num_input_tokens_seen": 126880400, "step": 58820 }, { "epoch": 9.596247960848288, "grad_norm": 0.044147249311208725, "learning_rate": 0.0006212924338267421, "loss": 0.0147, "num_input_tokens_seen": 126891568, "step": 58825 }, { "epoch": 9.597063621533442, "grad_norm": 0.024353008717298508, "learning_rate": 0.0006212233789031376, "loss": 0.0133, "num_input_tokens_seen": 126902512, "step": 58830 }, { "epoch": 9.597879282218598, "grad_norm": 0.21190401911735535, "learning_rate": 0.0006211543215227874, "loss": 0.0431, "num_input_tokens_seen": 126912400, "step": 58835 }, { "epoch": 9.598694942903752, "grad_norm": 0.2430083453655243, "learning_rate": 0.0006210852616870913, "loss": 0.0266, "num_input_tokens_seen": 126923120, "step": 58840 }, { "epoch": 9.599510603588907, "grad_norm": 0.006788566708564758, "learning_rate": 0.0006210161993974488, "loss": 0.0628, "num_input_tokens_seen": 126932912, "step": 58845 }, { "epoch": 9.600326264274061, "grad_norm": 0.11717133969068527, "learning_rate": 0.0006209471346552594, "loss": 0.0222, "num_input_tokens_seen": 126944304, "step": 58850 }, { "epoch": 9.601141924959217, "grad_norm": 0.009953130967915058, "learning_rate": 0.000620878067461923, "loss": 0.0322, "num_input_tokens_seen": 126955248, "step": 58855 }, { "epoch": 9.601957585644373, "grad_norm": 0.04601702466607094, "learning_rate": 0.0006208089978188392, "loss": 0.1023, "num_input_tokens_seen": 126966544, "step": 58860 }, { "epoch": 9.602773246329527, "grad_norm": 0.0021631289273500443, "learning_rate": 0.0006207399257274077, "loss": 0.0148, "num_input_tokens_seen": 126977168, "step": 58865 }, { "epoch": 9.603588907014682, "grad_norm": 0.03164103627204895, "learning_rate": 0.0006206708511890286, "loss": 0.0489, "num_input_tokens_seen": 126987632, "step": 58870 }, { "epoch": 9.604404567699836, "grad_norm": 0.12964217364788055, "learning_rate": 0.0006206017742051014, "loss": 0.1237, "num_input_tokens_seen": 126998992, "step": 58875 }, { "epoch": 9.605220228384992, "grad_norm": 0.2892257869243622, "learning_rate": 0.0006205326947770263, "loss": 0.0683, "num_input_tokens_seen": 127009488, "step": 58880 }, { "epoch": 9.606035889070148, "grad_norm": 0.1605965942144394, "learning_rate": 0.0006204636129062034, "loss": 0.0432, "num_input_tokens_seen": 127020912, "step": 58885 }, { "epoch": 9.606851549755302, "grad_norm": 0.43785592913627625, "learning_rate": 0.0006203945285940325, "loss": 0.1907, "num_input_tokens_seen": 127031440, "step": 58890 }, { "epoch": 9.607667210440457, "grad_norm": 0.01647508144378662, "learning_rate": 0.0006203254418419137, "loss": 0.0237, "num_input_tokens_seen": 127041360, "step": 58895 }, { "epoch": 9.608482871125611, "grad_norm": 0.45576223731040955, "learning_rate": 0.0006202563526512471, "loss": 0.077, "num_input_tokens_seen": 127053232, "step": 58900 }, { "epoch": 9.609298531810767, "grad_norm": 0.055365096777677536, "learning_rate": 0.0006201872610234331, "loss": 0.0835, "num_input_tokens_seen": 127064656, "step": 58905 }, { "epoch": 9.61011419249592, "grad_norm": 0.004479246214032173, "learning_rate": 0.0006201181669598717, "loss": 0.0063, "num_input_tokens_seen": 127074896, "step": 58910 }, { "epoch": 9.610929853181077, "grad_norm": 0.006454662885516882, "learning_rate": 0.0006200490704619633, "loss": 0.0854, "num_input_tokens_seen": 127085552, "step": 58915 }, { "epoch": 9.611745513866232, "grad_norm": 0.3500971794128418, "learning_rate": 0.0006199799715311083, "loss": 0.1038, "num_input_tokens_seen": 127096688, "step": 58920 }, { "epoch": 9.612561174551386, "grad_norm": 0.021332917734980583, "learning_rate": 0.0006199108701687068, "loss": 0.0098, "num_input_tokens_seen": 127108624, "step": 58925 }, { "epoch": 9.613376835236542, "grad_norm": 0.02128397487103939, "learning_rate": 0.0006198417663761596, "loss": 0.1153, "num_input_tokens_seen": 127119056, "step": 58930 }, { "epoch": 9.614192495921696, "grad_norm": 0.032950107008218765, "learning_rate": 0.0006197726601548667, "loss": 0.0656, "num_input_tokens_seen": 127129936, "step": 58935 }, { "epoch": 9.615008156606851, "grad_norm": 0.4912428855895996, "learning_rate": 0.0006197035515062291, "loss": 0.0387, "num_input_tokens_seen": 127140528, "step": 58940 }, { "epoch": 9.615823817292007, "grad_norm": 0.20945891737937927, "learning_rate": 0.0006196344404316472, "loss": 0.1632, "num_input_tokens_seen": 127151152, "step": 58945 }, { "epoch": 9.616639477977161, "grad_norm": 0.01846211403608322, "learning_rate": 0.0006195653269325214, "loss": 0.0904, "num_input_tokens_seen": 127161808, "step": 58950 }, { "epoch": 9.617455138662317, "grad_norm": 0.02588370069861412, "learning_rate": 0.0006194962110102528, "loss": 0.0889, "num_input_tokens_seen": 127172304, "step": 58955 }, { "epoch": 9.61827079934747, "grad_norm": 0.523381769657135, "learning_rate": 0.0006194270926662416, "loss": 0.2071, "num_input_tokens_seen": 127181904, "step": 58960 }, { "epoch": 9.619086460032626, "grad_norm": 0.01897239312529564, "learning_rate": 0.000619357971901889, "loss": 0.012, "num_input_tokens_seen": 127193200, "step": 58965 }, { "epoch": 9.619902120717782, "grad_norm": 0.0077952113933861256, "learning_rate": 0.0006192888487185958, "loss": 0.0102, "num_input_tokens_seen": 127204272, "step": 58970 }, { "epoch": 9.620717781402936, "grad_norm": 0.23757962882518768, "learning_rate": 0.0006192197231177627, "loss": 0.0515, "num_input_tokens_seen": 127215344, "step": 58975 }, { "epoch": 9.621533442088092, "grad_norm": 0.07730694860219955, "learning_rate": 0.0006191505951007906, "loss": 0.0293, "num_input_tokens_seen": 127225360, "step": 58980 }, { "epoch": 9.622349102773246, "grad_norm": 0.10588816553354263, "learning_rate": 0.0006190814646690805, "loss": 0.0155, "num_input_tokens_seen": 127236144, "step": 58985 }, { "epoch": 9.623164763458401, "grad_norm": 0.6818053722381592, "learning_rate": 0.0006190123318240335, "loss": 0.0562, "num_input_tokens_seen": 127245808, "step": 58990 }, { "epoch": 9.623980424143557, "grad_norm": 0.02543734386563301, "learning_rate": 0.0006189431965670507, "loss": 0.0335, "num_input_tokens_seen": 127256432, "step": 58995 }, { "epoch": 9.624796084828711, "grad_norm": 0.020855382084846497, "learning_rate": 0.0006188740588995331, "loss": 0.051, "num_input_tokens_seen": 127266736, "step": 59000 }, { "epoch": 9.625611745513867, "grad_norm": 0.370516836643219, "learning_rate": 0.000618804918822882, "loss": 0.1542, "num_input_tokens_seen": 127278928, "step": 59005 }, { "epoch": 9.62642740619902, "grad_norm": 0.05113256722688675, "learning_rate": 0.0006187357763384982, "loss": 0.0081, "num_input_tokens_seen": 127288176, "step": 59010 }, { "epoch": 9.627243066884176, "grad_norm": 0.4826485812664032, "learning_rate": 0.0006186666314477835, "loss": 0.1846, "num_input_tokens_seen": 127298960, "step": 59015 }, { "epoch": 9.62805872756933, "grad_norm": 0.0074044340290129185, "learning_rate": 0.0006185974841521389, "loss": 0.068, "num_input_tokens_seen": 127309936, "step": 59020 }, { "epoch": 9.628874388254486, "grad_norm": 0.024083927273750305, "learning_rate": 0.0006185283344529659, "loss": 0.1293, "num_input_tokens_seen": 127321904, "step": 59025 }, { "epoch": 9.629690048939642, "grad_norm": 0.07417862862348557, "learning_rate": 0.0006184591823516658, "loss": 0.0219, "num_input_tokens_seen": 127333392, "step": 59030 }, { "epoch": 9.630505709624796, "grad_norm": 0.006417748052626848, "learning_rate": 0.00061839002784964, "loss": 0.1108, "num_input_tokens_seen": 127345392, "step": 59035 }, { "epoch": 9.631321370309951, "grad_norm": 0.010149630717933178, "learning_rate": 0.0006183208709482903, "loss": 0.0101, "num_input_tokens_seen": 127354480, "step": 59040 }, { "epoch": 9.632137030995105, "grad_norm": 0.12620042264461517, "learning_rate": 0.0006182517116490179, "loss": 0.0919, "num_input_tokens_seen": 127365936, "step": 59045 }, { "epoch": 9.632952691680261, "grad_norm": 0.006073113530874252, "learning_rate": 0.0006181825499532247, "loss": 0.0324, "num_input_tokens_seen": 127376656, "step": 59050 }, { "epoch": 9.633768352365417, "grad_norm": 0.09124762564897537, "learning_rate": 0.000618113385862312, "loss": 0.1004, "num_input_tokens_seen": 127387376, "step": 59055 }, { "epoch": 9.63458401305057, "grad_norm": 0.06786935776472092, "learning_rate": 0.0006180442193776818, "loss": 0.078, "num_input_tokens_seen": 127397840, "step": 59060 }, { "epoch": 9.635399673735726, "grad_norm": 0.30001193284988403, "learning_rate": 0.0006179750505007357, "loss": 0.0716, "num_input_tokens_seen": 127409264, "step": 59065 }, { "epoch": 9.63621533442088, "grad_norm": 0.38568803668022156, "learning_rate": 0.0006179058792328756, "loss": 0.0553, "num_input_tokens_seen": 127420016, "step": 59070 }, { "epoch": 9.637030995106036, "grad_norm": 0.011678035371005535, "learning_rate": 0.0006178367055755032, "loss": 0.0511, "num_input_tokens_seen": 127431152, "step": 59075 }, { "epoch": 9.63784665579119, "grad_norm": 0.12551720440387726, "learning_rate": 0.0006177675295300206, "loss": 0.0713, "num_input_tokens_seen": 127441872, "step": 59080 }, { "epoch": 9.638662316476346, "grad_norm": 0.17559421062469482, "learning_rate": 0.0006176983510978296, "loss": 0.0362, "num_input_tokens_seen": 127452464, "step": 59085 }, { "epoch": 9.639477977161501, "grad_norm": 0.24982838332653046, "learning_rate": 0.000617629170280332, "loss": 0.1969, "num_input_tokens_seen": 127463568, "step": 59090 }, { "epoch": 9.640293637846655, "grad_norm": 0.016090940684080124, "learning_rate": 0.0006175599870789301, "loss": 0.0097, "num_input_tokens_seen": 127473168, "step": 59095 }, { "epoch": 9.641109298531811, "grad_norm": 0.010737456381320953, "learning_rate": 0.000617490801495026, "loss": 0.021, "num_input_tokens_seen": 127484592, "step": 59100 }, { "epoch": 9.641924959216965, "grad_norm": 0.04577292129397392, "learning_rate": 0.0006174216135300219, "loss": 0.0524, "num_input_tokens_seen": 127496400, "step": 59105 }, { "epoch": 9.64274061990212, "grad_norm": 0.0745958611369133, "learning_rate": 0.0006173524231853197, "loss": 0.0471, "num_input_tokens_seen": 127507600, "step": 59110 }, { "epoch": 9.643556280587276, "grad_norm": 0.005202131345868111, "learning_rate": 0.0006172832304623217, "loss": 0.0398, "num_input_tokens_seen": 127519120, "step": 59115 }, { "epoch": 9.64437194127243, "grad_norm": 0.011529003269970417, "learning_rate": 0.0006172140353624304, "loss": 0.0368, "num_input_tokens_seen": 127529808, "step": 59120 }, { "epoch": 9.645187601957586, "grad_norm": 0.3066558539867401, "learning_rate": 0.0006171448378870479, "loss": 0.0429, "num_input_tokens_seen": 127541296, "step": 59125 }, { "epoch": 9.64600326264274, "grad_norm": 0.44579315185546875, "learning_rate": 0.0006170756380375766, "loss": 0.1892, "num_input_tokens_seen": 127552528, "step": 59130 }, { "epoch": 9.646818923327896, "grad_norm": 0.13914191722869873, "learning_rate": 0.000617006435815419, "loss": 0.1162, "num_input_tokens_seen": 127564368, "step": 59135 }, { "epoch": 9.647634584013051, "grad_norm": 0.061775900423526764, "learning_rate": 0.0006169372312219777, "loss": 0.0439, "num_input_tokens_seen": 127574352, "step": 59140 }, { "epoch": 9.648450244698205, "grad_norm": 0.5701382756233215, "learning_rate": 0.0006168680242586549, "loss": 0.0195, "num_input_tokens_seen": 127585680, "step": 59145 }, { "epoch": 9.649265905383361, "grad_norm": 0.013736234046518803, "learning_rate": 0.0006167988149268533, "loss": 0.0221, "num_input_tokens_seen": 127596272, "step": 59150 }, { "epoch": 9.650081566068515, "grad_norm": 0.19821572303771973, "learning_rate": 0.0006167296032279757, "loss": 0.1068, "num_input_tokens_seen": 127608240, "step": 59155 }, { "epoch": 9.65089722675367, "grad_norm": 0.04824179410934448, "learning_rate": 0.0006166603891634245, "loss": 0.0639, "num_input_tokens_seen": 127619376, "step": 59160 }, { "epoch": 9.651712887438826, "grad_norm": 0.013447104021906853, "learning_rate": 0.0006165911727346025, "loss": 0.0363, "num_input_tokens_seen": 127629712, "step": 59165 }, { "epoch": 9.65252854812398, "grad_norm": 0.008100142702460289, "learning_rate": 0.0006165219539429126, "loss": 0.0147, "num_input_tokens_seen": 127640304, "step": 59170 }, { "epoch": 9.653344208809136, "grad_norm": 0.012568583711981773, "learning_rate": 0.0006164527327897574, "loss": 0.0191, "num_input_tokens_seen": 127650960, "step": 59175 }, { "epoch": 9.65415986949429, "grad_norm": 0.05765538290143013, "learning_rate": 0.0006163835092765399, "loss": 0.068, "num_input_tokens_seen": 127661200, "step": 59180 }, { "epoch": 9.654975530179446, "grad_norm": 0.2561284899711609, "learning_rate": 0.0006163142834046629, "loss": 0.0644, "num_input_tokens_seen": 127671568, "step": 59185 }, { "epoch": 9.655791190864601, "grad_norm": 0.5109092593193054, "learning_rate": 0.0006162450551755295, "loss": 0.088, "num_input_tokens_seen": 127682256, "step": 59190 }, { "epoch": 9.656606851549755, "grad_norm": 0.12591546773910522, "learning_rate": 0.0006161758245905423, "loss": 0.0238, "num_input_tokens_seen": 127692336, "step": 59195 }, { "epoch": 9.65742251223491, "grad_norm": 0.12105773389339447, "learning_rate": 0.0006161065916511047, "loss": 0.0366, "num_input_tokens_seen": 127702384, "step": 59200 }, { "epoch": 9.658238172920065, "grad_norm": 0.11443740874528885, "learning_rate": 0.0006160373563586199, "loss": 0.0104, "num_input_tokens_seen": 127714448, "step": 59205 }, { "epoch": 9.65905383360522, "grad_norm": 0.5324103236198425, "learning_rate": 0.0006159681187144909, "loss": 0.0317, "num_input_tokens_seen": 127726320, "step": 59210 }, { "epoch": 9.659869494290374, "grad_norm": 0.026588650420308113, "learning_rate": 0.0006158988787201208, "loss": 0.0363, "num_input_tokens_seen": 127737232, "step": 59215 }, { "epoch": 9.66068515497553, "grad_norm": 0.7043901681900024, "learning_rate": 0.0006158296363769128, "loss": 0.0419, "num_input_tokens_seen": 127747248, "step": 59220 }, { "epoch": 9.661500815660686, "grad_norm": 0.008077929727733135, "learning_rate": 0.0006157603916862703, "loss": 0.0042, "num_input_tokens_seen": 127758416, "step": 59225 }, { "epoch": 9.66231647634584, "grad_norm": 0.029939042404294014, "learning_rate": 0.0006156911446495967, "loss": 0.0102, "num_input_tokens_seen": 127768304, "step": 59230 }, { "epoch": 9.663132137030995, "grad_norm": 0.015461000613868237, "learning_rate": 0.0006156218952682953, "loss": 0.1658, "num_input_tokens_seen": 127779344, "step": 59235 }, { "epoch": 9.66394779771615, "grad_norm": 0.1992121934890747, "learning_rate": 0.0006155526435437694, "loss": 0.1642, "num_input_tokens_seen": 127789680, "step": 59240 }, { "epoch": 9.664763458401305, "grad_norm": 0.25618046522140503, "learning_rate": 0.0006154833894774226, "loss": 0.0321, "num_input_tokens_seen": 127800880, "step": 59245 }, { "epoch": 9.66557911908646, "grad_norm": 0.01203316543251276, "learning_rate": 0.0006154141330706586, "loss": 0.077, "num_input_tokens_seen": 127810832, "step": 59250 }, { "epoch": 9.666394779771615, "grad_norm": 0.0243932344019413, "learning_rate": 0.0006153448743248805, "loss": 0.0096, "num_input_tokens_seen": 127822064, "step": 59255 }, { "epoch": 9.66721044045677, "grad_norm": 0.008688291534781456, "learning_rate": 0.0006152756132414924, "loss": 0.0283, "num_input_tokens_seen": 127834288, "step": 59260 }, { "epoch": 9.668026101141924, "grad_norm": 0.3940548598766327, "learning_rate": 0.0006152063498218977, "loss": 0.1303, "num_input_tokens_seen": 127844880, "step": 59265 }, { "epoch": 9.66884176182708, "grad_norm": 0.17932185530662537, "learning_rate": 0.0006151370840675001, "loss": 0.0312, "num_input_tokens_seen": 127857104, "step": 59270 }, { "epoch": 9.669657422512234, "grad_norm": 0.5375314950942993, "learning_rate": 0.0006150678159797034, "loss": 0.0359, "num_input_tokens_seen": 127869008, "step": 59275 }, { "epoch": 9.67047308319739, "grad_norm": 0.14569474756717682, "learning_rate": 0.0006149985455599115, "loss": 0.0261, "num_input_tokens_seen": 127880432, "step": 59280 }, { "epoch": 9.671288743882545, "grad_norm": 0.13653336465358734, "learning_rate": 0.0006149292728095283, "loss": 0.0959, "num_input_tokens_seen": 127890384, "step": 59285 }, { "epoch": 9.6721044045677, "grad_norm": 0.003455261467024684, "learning_rate": 0.0006148599977299575, "loss": 0.1408, "num_input_tokens_seen": 127901904, "step": 59290 }, { "epoch": 9.672920065252855, "grad_norm": 0.08904647827148438, "learning_rate": 0.0006147907203226031, "loss": 0.0311, "num_input_tokens_seen": 127910736, "step": 59295 }, { "epoch": 9.673735725938009, "grad_norm": 0.35068783164024353, "learning_rate": 0.0006147214405888692, "loss": 0.0941, "num_input_tokens_seen": 127921552, "step": 59300 }, { "epoch": 9.674551386623165, "grad_norm": 0.03405596315860748, "learning_rate": 0.0006146521585301596, "loss": 0.0159, "num_input_tokens_seen": 127932816, "step": 59305 }, { "epoch": 9.67536704730832, "grad_norm": 0.4466707110404968, "learning_rate": 0.0006145828741478788, "loss": 0.0601, "num_input_tokens_seen": 127942608, "step": 59310 }, { "epoch": 9.676182707993474, "grad_norm": 0.005472542718052864, "learning_rate": 0.0006145135874434305, "loss": 0.0758, "num_input_tokens_seen": 127952816, "step": 59315 }, { "epoch": 9.67699836867863, "grad_norm": 0.05396012216806412, "learning_rate": 0.0006144442984182193, "loss": 0.0077, "num_input_tokens_seen": 127964048, "step": 59320 }, { "epoch": 9.677814029363784, "grad_norm": 0.743674099445343, "learning_rate": 0.0006143750070736491, "loss": 0.039, "num_input_tokens_seen": 127974864, "step": 59325 }, { "epoch": 9.67862969004894, "grad_norm": 0.006385708227753639, "learning_rate": 0.0006143057134111243, "loss": 0.0223, "num_input_tokens_seen": 127985040, "step": 59330 }, { "epoch": 9.679445350734095, "grad_norm": 0.11342454701662064, "learning_rate": 0.0006142364174320492, "loss": 0.0422, "num_input_tokens_seen": 127996432, "step": 59335 }, { "epoch": 9.68026101141925, "grad_norm": 0.38768672943115234, "learning_rate": 0.0006141671191378281, "loss": 0.1784, "num_input_tokens_seen": 128006736, "step": 59340 }, { "epoch": 9.681076672104405, "grad_norm": 0.29378315806388855, "learning_rate": 0.0006140978185298656, "loss": 0.0571, "num_input_tokens_seen": 128018608, "step": 59345 }, { "epoch": 9.681892332789559, "grad_norm": 0.028993673622608185, "learning_rate": 0.0006140285156095661, "loss": 0.1404, "num_input_tokens_seen": 128029072, "step": 59350 }, { "epoch": 9.682707993474715, "grad_norm": 0.14155976474285126, "learning_rate": 0.0006139592103783339, "loss": 0.1134, "num_input_tokens_seen": 128039504, "step": 59355 }, { "epoch": 9.68352365415987, "grad_norm": 0.02189723215997219, "learning_rate": 0.000613889902837574, "loss": 0.0299, "num_input_tokens_seen": 128048880, "step": 59360 }, { "epoch": 9.684339314845024, "grad_norm": 0.023539388552308083, "learning_rate": 0.0006138205929886905, "loss": 0.0305, "num_input_tokens_seen": 128060016, "step": 59365 }, { "epoch": 9.68515497553018, "grad_norm": 0.010362669825553894, "learning_rate": 0.0006137512808330884, "loss": 0.0347, "num_input_tokens_seen": 128071248, "step": 59370 }, { "epoch": 9.685970636215334, "grad_norm": 0.529768168926239, "learning_rate": 0.0006136819663721722, "loss": 0.125, "num_input_tokens_seen": 128082544, "step": 59375 }, { "epoch": 9.68678629690049, "grad_norm": 0.571416437625885, "learning_rate": 0.0006136126496073469, "loss": 0.0569, "num_input_tokens_seen": 128093968, "step": 59380 }, { "epoch": 9.687601957585644, "grad_norm": 0.010441435500979424, "learning_rate": 0.0006135433305400169, "loss": 0.0347, "num_input_tokens_seen": 128104112, "step": 59385 }, { "epoch": 9.6884176182708, "grad_norm": 0.021858040243387222, "learning_rate": 0.0006134740091715875, "loss": 0.0167, "num_input_tokens_seen": 128114704, "step": 59390 }, { "epoch": 9.689233278955955, "grad_norm": 0.005157130304723978, "learning_rate": 0.0006134046855034631, "loss": 0.0649, "num_input_tokens_seen": 128124976, "step": 59395 }, { "epoch": 9.690048939641109, "grad_norm": 0.07616396993398666, "learning_rate": 0.0006133353595370491, "loss": 0.0189, "num_input_tokens_seen": 128135696, "step": 59400 }, { "epoch": 9.690864600326265, "grad_norm": 0.1567595899105072, "learning_rate": 0.0006132660312737502, "loss": 0.0087, "num_input_tokens_seen": 128145872, "step": 59405 }, { "epoch": 9.691680261011419, "grad_norm": 0.01609027199447155, "learning_rate": 0.0006131967007149716, "loss": 0.109, "num_input_tokens_seen": 128158800, "step": 59410 }, { "epoch": 9.692495921696574, "grad_norm": 0.4005237817764282, "learning_rate": 0.000613127367862118, "loss": 0.1556, "num_input_tokens_seen": 128170192, "step": 59415 }, { "epoch": 9.69331158238173, "grad_norm": 0.25563105940818787, "learning_rate": 0.0006130580327165949, "loss": 0.0764, "num_input_tokens_seen": 128181872, "step": 59420 }, { "epoch": 9.694127243066884, "grad_norm": 0.005286304280161858, "learning_rate": 0.0006129886952798074, "loss": 0.0259, "num_input_tokens_seen": 128193904, "step": 59425 }, { "epoch": 9.69494290375204, "grad_norm": 0.563501238822937, "learning_rate": 0.0006129193555531606, "loss": 0.1461, "num_input_tokens_seen": 128206064, "step": 59430 }, { "epoch": 9.695758564437194, "grad_norm": 0.6357414126396179, "learning_rate": 0.0006128500135380598, "loss": 0.0491, "num_input_tokens_seen": 128216848, "step": 59435 }, { "epoch": 9.69657422512235, "grad_norm": 0.5962631106376648, "learning_rate": 0.0006127806692359103, "loss": 0.0507, "num_input_tokens_seen": 128227632, "step": 59440 }, { "epoch": 9.697389885807503, "grad_norm": 0.07188794016838074, "learning_rate": 0.0006127113226481175, "loss": 0.0592, "num_input_tokens_seen": 128237552, "step": 59445 }, { "epoch": 9.698205546492659, "grad_norm": 0.0068653360940515995, "learning_rate": 0.0006126419737760868, "loss": 0.0299, "num_input_tokens_seen": 128248272, "step": 59450 }, { "epoch": 9.699021207177815, "grad_norm": 0.1035524383187294, "learning_rate": 0.0006125726226212236, "loss": 0.1173, "num_input_tokens_seen": 128259216, "step": 59455 }, { "epoch": 9.699836867862969, "grad_norm": 0.06350940465927124, "learning_rate": 0.0006125032691849333, "loss": 0.0342, "num_input_tokens_seen": 128269360, "step": 59460 }, { "epoch": 9.700652528548124, "grad_norm": 0.04358367621898651, "learning_rate": 0.0006124339134686216, "loss": 0.0092, "num_input_tokens_seen": 128280816, "step": 59465 }, { "epoch": 9.701468189233278, "grad_norm": 0.24621592462062836, "learning_rate": 0.0006123645554736941, "loss": 0.0996, "num_input_tokens_seen": 128292240, "step": 59470 }, { "epoch": 9.702283849918434, "grad_norm": 0.04358672350645065, "learning_rate": 0.0006122951952015562, "loss": 0.0193, "num_input_tokens_seen": 128303056, "step": 59475 }, { "epoch": 9.70309951060359, "grad_norm": 0.03317830711603165, "learning_rate": 0.0006122258326536138, "loss": 0.1288, "num_input_tokens_seen": 128314320, "step": 59480 }, { "epoch": 9.703915171288743, "grad_norm": 0.33884885907173157, "learning_rate": 0.0006121564678312724, "loss": 0.0711, "num_input_tokens_seen": 128326000, "step": 59485 }, { "epoch": 9.7047308319739, "grad_norm": 0.20701944828033447, "learning_rate": 0.0006120871007359381, "loss": 0.1154, "num_input_tokens_seen": 128335440, "step": 59490 }, { "epoch": 9.705546492659053, "grad_norm": 0.13118068873882294, "learning_rate": 0.0006120177313690164, "loss": 0.0506, "num_input_tokens_seen": 128345936, "step": 59495 }, { "epoch": 9.706362153344209, "grad_norm": 0.1921592801809311, "learning_rate": 0.0006119483597319132, "loss": 0.1192, "num_input_tokens_seen": 128355184, "step": 59500 }, { "epoch": 9.707177814029365, "grad_norm": 0.04181896522641182, "learning_rate": 0.0006118789858260347, "loss": 0.0737, "num_input_tokens_seen": 128365168, "step": 59505 }, { "epoch": 9.707993474714518, "grad_norm": 0.007728222291916609, "learning_rate": 0.0006118096096527863, "loss": 0.0103, "num_input_tokens_seen": 128376784, "step": 59510 }, { "epoch": 9.708809135399674, "grad_norm": 0.14828740060329437, "learning_rate": 0.0006117402312135746, "loss": 0.0535, "num_input_tokens_seen": 128388400, "step": 59515 }, { "epoch": 9.709624796084828, "grad_norm": 0.4409679174423218, "learning_rate": 0.0006116708505098051, "loss": 0.0825, "num_input_tokens_seen": 128398544, "step": 59520 }, { "epoch": 9.710440456769984, "grad_norm": 0.006540243048220873, "learning_rate": 0.0006116014675428842, "loss": 0.0178, "num_input_tokens_seen": 128409328, "step": 59525 }, { "epoch": 9.71125611745514, "grad_norm": 0.008600655011832714, "learning_rate": 0.0006115320823142182, "loss": 0.0077, "num_input_tokens_seen": 128419600, "step": 59530 }, { "epoch": 9.712071778140293, "grad_norm": 0.011281375773251057, "learning_rate": 0.000611462694825213, "loss": 0.0155, "num_input_tokens_seen": 128430960, "step": 59535 }, { "epoch": 9.71288743882545, "grad_norm": 0.0032797667663544416, "learning_rate": 0.0006113933050772749, "loss": 0.045, "num_input_tokens_seen": 128440720, "step": 59540 }, { "epoch": 9.713703099510603, "grad_norm": 0.08857857435941696, "learning_rate": 0.00061132391307181, "loss": 0.1242, "num_input_tokens_seen": 128450704, "step": 59545 }, { "epoch": 9.714518760195759, "grad_norm": 0.647238552570343, "learning_rate": 0.0006112545188102249, "loss": 0.2091, "num_input_tokens_seen": 128460432, "step": 59550 }, { "epoch": 9.715334420880914, "grad_norm": 0.5638406872749329, "learning_rate": 0.0006111851222939257, "loss": 0.1426, "num_input_tokens_seen": 128470800, "step": 59555 }, { "epoch": 9.716150081566068, "grad_norm": 0.5339933037757874, "learning_rate": 0.0006111157235243192, "loss": 0.0914, "num_input_tokens_seen": 128479856, "step": 59560 }, { "epoch": 9.716965742251224, "grad_norm": 0.0191995520144701, "learning_rate": 0.0006110463225028114, "loss": 0.0185, "num_input_tokens_seen": 128489680, "step": 59565 }, { "epoch": 9.717781402936378, "grad_norm": 0.031790006905794144, "learning_rate": 0.0006109769192308091, "loss": 0.0827, "num_input_tokens_seen": 128501488, "step": 59570 }, { "epoch": 9.718597063621534, "grad_norm": 0.03342828154563904, "learning_rate": 0.0006109075137097188, "loss": 0.0661, "num_input_tokens_seen": 128513008, "step": 59575 }, { "epoch": 9.719412724306688, "grad_norm": 0.03837617486715317, "learning_rate": 0.0006108381059409469, "loss": 0.1236, "num_input_tokens_seen": 128525520, "step": 59580 }, { "epoch": 9.720228384991843, "grad_norm": 0.1065344288945198, "learning_rate": 0.0006107686959259003, "loss": 0.0237, "num_input_tokens_seen": 128536048, "step": 59585 }, { "epoch": 9.721044045676999, "grad_norm": 0.14159248769283295, "learning_rate": 0.0006106992836659853, "loss": 0.0366, "num_input_tokens_seen": 128546384, "step": 59590 }, { "epoch": 9.721859706362153, "grad_norm": 0.007199563086032867, "learning_rate": 0.0006106298691626091, "loss": 0.0238, "num_input_tokens_seen": 128556368, "step": 59595 }, { "epoch": 9.722675367047309, "grad_norm": 0.5850858092308044, "learning_rate": 0.0006105604524171782, "loss": 0.1876, "num_input_tokens_seen": 128566576, "step": 59600 }, { "epoch": 9.723491027732463, "grad_norm": 0.17327198386192322, "learning_rate": 0.0006104910334310996, "loss": 0.1551, "num_input_tokens_seen": 128576464, "step": 59605 }, { "epoch": 9.724306688417618, "grad_norm": 0.05750057101249695, "learning_rate": 0.0006104216122057799, "loss": 0.0165, "num_input_tokens_seen": 128586480, "step": 59610 }, { "epoch": 9.725122349102774, "grad_norm": 0.36712631583213806, "learning_rate": 0.0006103521887426262, "loss": 0.1236, "num_input_tokens_seen": 128596976, "step": 59615 }, { "epoch": 9.725938009787928, "grad_norm": 0.11014421284198761, "learning_rate": 0.0006102827630430454, "loss": 0.04, "num_input_tokens_seen": 128608592, "step": 59620 }, { "epoch": 9.726753670473084, "grad_norm": 0.28624188899993896, "learning_rate": 0.0006102133351084443, "loss": 0.1479, "num_input_tokens_seen": 128619408, "step": 59625 }, { "epoch": 9.727569331158238, "grad_norm": 0.13876740634441376, "learning_rate": 0.0006101439049402304, "loss": 0.0274, "num_input_tokens_seen": 128631312, "step": 59630 }, { "epoch": 9.728384991843393, "grad_norm": 0.39365214109420776, "learning_rate": 0.0006100744725398105, "loss": 0.0741, "num_input_tokens_seen": 128642672, "step": 59635 }, { "epoch": 9.729200652528547, "grad_norm": 0.4415866732597351, "learning_rate": 0.0006100050379085918, "loss": 0.2069, "num_input_tokens_seen": 128654032, "step": 59640 }, { "epoch": 9.730016313213703, "grad_norm": 0.4772409200668335, "learning_rate": 0.0006099356010479814, "loss": 0.0691, "num_input_tokens_seen": 128664656, "step": 59645 }, { "epoch": 9.730831973898859, "grad_norm": 0.003714944003149867, "learning_rate": 0.0006098661619593866, "loss": 0.0296, "num_input_tokens_seen": 128676048, "step": 59650 }, { "epoch": 9.731647634584013, "grad_norm": 0.20215542614459991, "learning_rate": 0.0006097967206442147, "loss": 0.2111, "num_input_tokens_seen": 128687440, "step": 59655 }, { "epoch": 9.732463295269168, "grad_norm": 0.08912170678377151, "learning_rate": 0.0006097272771038728, "loss": 0.0152, "num_input_tokens_seen": 128696784, "step": 59660 }, { "epoch": 9.733278955954322, "grad_norm": 0.009626642800867558, "learning_rate": 0.0006096578313397687, "loss": 0.0161, "num_input_tokens_seen": 128707888, "step": 59665 }, { "epoch": 9.734094616639478, "grad_norm": 0.010801995173096657, "learning_rate": 0.0006095883833533094, "loss": 0.0246, "num_input_tokens_seen": 128718480, "step": 59670 }, { "epoch": 9.734910277324634, "grad_norm": 0.07456929236650467, "learning_rate": 0.0006095189331459024, "loss": 0.0326, "num_input_tokens_seen": 128728208, "step": 59675 }, { "epoch": 9.735725938009788, "grad_norm": 0.005665457807481289, "learning_rate": 0.0006094494807189555, "loss": 0.0405, "num_input_tokens_seen": 128737904, "step": 59680 }, { "epoch": 9.736541598694943, "grad_norm": 0.003741934895515442, "learning_rate": 0.0006093800260738758, "loss": 0.0333, "num_input_tokens_seen": 128749424, "step": 59685 }, { "epoch": 9.737357259380097, "grad_norm": 0.02974558062851429, "learning_rate": 0.0006093105692120712, "loss": 0.0147, "num_input_tokens_seen": 128760880, "step": 59690 }, { "epoch": 9.738172920065253, "grad_norm": 0.11289545148611069, "learning_rate": 0.0006092411101349492, "loss": 0.041, "num_input_tokens_seen": 128771600, "step": 59695 }, { "epoch": 9.738988580750409, "grad_norm": 0.15293298661708832, "learning_rate": 0.0006091716488439177, "loss": 0.1118, "num_input_tokens_seen": 128780656, "step": 59700 }, { "epoch": 9.739804241435563, "grad_norm": 0.06330209225416183, "learning_rate": 0.0006091021853403841, "loss": 0.0245, "num_input_tokens_seen": 128791376, "step": 59705 }, { "epoch": 9.740619902120718, "grad_norm": 0.05996935814619064, "learning_rate": 0.0006090327196257562, "loss": 0.0353, "num_input_tokens_seen": 128802064, "step": 59710 }, { "epoch": 9.741435562805872, "grad_norm": 0.23148603737354279, "learning_rate": 0.000608963251701442, "loss": 0.0452, "num_input_tokens_seen": 128813232, "step": 59715 }, { "epoch": 9.742251223491028, "grad_norm": 0.1062559261918068, "learning_rate": 0.0006088937815688495, "loss": 0.1489, "num_input_tokens_seen": 128824176, "step": 59720 }, { "epoch": 9.743066884176184, "grad_norm": 0.02797776646912098, "learning_rate": 0.0006088243092293861, "loss": 0.019, "num_input_tokens_seen": 128834512, "step": 59725 }, { "epoch": 9.743882544861338, "grad_norm": 0.444024920463562, "learning_rate": 0.0006087548346844601, "loss": 0.0239, "num_input_tokens_seen": 128846032, "step": 59730 }, { "epoch": 9.744698205546493, "grad_norm": 0.054043132811784744, "learning_rate": 0.0006086853579354793, "loss": 0.1395, "num_input_tokens_seen": 128858000, "step": 59735 }, { "epoch": 9.745513866231647, "grad_norm": 0.3209349811077118, "learning_rate": 0.0006086158789838519, "loss": 0.0366, "num_input_tokens_seen": 128868112, "step": 59740 }, { "epoch": 9.746329526916803, "grad_norm": 0.14267776906490326, "learning_rate": 0.0006085463978309861, "loss": 0.0379, "num_input_tokens_seen": 128879536, "step": 59745 }, { "epoch": 9.747145187601957, "grad_norm": 0.5398485064506531, "learning_rate": 0.0006084769144782897, "loss": 0.1206, "num_input_tokens_seen": 128888912, "step": 59750 }, { "epoch": 9.747960848287113, "grad_norm": 0.2112918645143509, "learning_rate": 0.0006084074289271711, "loss": 0.0203, "num_input_tokens_seen": 128899600, "step": 59755 }, { "epoch": 9.748776508972268, "grad_norm": 0.03452795371413231, "learning_rate": 0.0006083379411790383, "loss": 0.1196, "num_input_tokens_seen": 128910576, "step": 59760 }, { "epoch": 9.749592169657422, "grad_norm": 0.01583356410264969, "learning_rate": 0.0006082684512352997, "loss": 0.1388, "num_input_tokens_seen": 128921200, "step": 59765 }, { "epoch": 9.750407830342578, "grad_norm": 0.018591761589050293, "learning_rate": 0.0006081989590973637, "loss": 0.0161, "num_input_tokens_seen": 128932528, "step": 59770 }, { "epoch": 9.751223491027732, "grad_norm": 0.1268494874238968, "learning_rate": 0.0006081294647666385, "loss": 0.0568, "num_input_tokens_seen": 128942896, "step": 59775 }, { "epoch": 9.752039151712887, "grad_norm": 0.06473544239997864, "learning_rate": 0.0006080599682445325, "loss": 0.0362, "num_input_tokens_seen": 128954224, "step": 59780 }, { "epoch": 9.752854812398043, "grad_norm": 0.5049475431442261, "learning_rate": 0.000607990469532454, "loss": 0.0597, "num_input_tokens_seen": 128965776, "step": 59785 }, { "epoch": 9.753670473083197, "grad_norm": 0.01095609087496996, "learning_rate": 0.0006079209686318119, "loss": 0.0217, "num_input_tokens_seen": 128976560, "step": 59790 }, { "epoch": 9.754486133768353, "grad_norm": 0.04790578782558441, "learning_rate": 0.0006078514655440144, "loss": 0.0334, "num_input_tokens_seen": 128987728, "step": 59795 }, { "epoch": 9.755301794453507, "grad_norm": 0.03613391891121864, "learning_rate": 0.0006077819602704702, "loss": 0.0964, "num_input_tokens_seen": 128998352, "step": 59800 }, { "epoch": 9.756117455138662, "grad_norm": 0.013149875216186047, "learning_rate": 0.0006077124528125877, "loss": 0.0325, "num_input_tokens_seen": 129008208, "step": 59805 }, { "epoch": 9.756933115823816, "grad_norm": 0.07882387936115265, "learning_rate": 0.0006076429431717757, "loss": 0.0289, "num_input_tokens_seen": 129018928, "step": 59810 }, { "epoch": 9.757748776508972, "grad_norm": 0.060114238411188126, "learning_rate": 0.000607573431349443, "loss": 0.067, "num_input_tokens_seen": 129028688, "step": 59815 }, { "epoch": 9.758564437194128, "grad_norm": 0.0033841747790575027, "learning_rate": 0.0006075039173469982, "loss": 0.0114, "num_input_tokens_seen": 129038928, "step": 59820 }, { "epoch": 9.759380097879282, "grad_norm": 0.051176197826862335, "learning_rate": 0.0006074344011658501, "loss": 0.0392, "num_input_tokens_seen": 129049808, "step": 59825 }, { "epoch": 9.760195758564437, "grad_norm": 0.3888872563838959, "learning_rate": 0.0006073648828074077, "loss": 0.129, "num_input_tokens_seen": 129061968, "step": 59830 }, { "epoch": 9.761011419249591, "grad_norm": 0.10704643279314041, "learning_rate": 0.0006072953622730796, "loss": 0.1217, "num_input_tokens_seen": 129073040, "step": 59835 }, { "epoch": 9.761827079934747, "grad_norm": 0.02120734378695488, "learning_rate": 0.0006072258395642748, "loss": 0.0594, "num_input_tokens_seen": 129082704, "step": 59840 }, { "epoch": 9.762642740619903, "grad_norm": 0.6762648820877075, "learning_rate": 0.0006071563146824024, "loss": 0.1674, "num_input_tokens_seen": 129093296, "step": 59845 }, { "epoch": 9.763458401305057, "grad_norm": 0.20323128998279572, "learning_rate": 0.0006070867876288715, "loss": 0.0145, "num_input_tokens_seen": 129103760, "step": 59850 }, { "epoch": 9.764274061990212, "grad_norm": 0.43074461817741394, "learning_rate": 0.0006070172584050908, "loss": 0.0755, "num_input_tokens_seen": 129114992, "step": 59855 }, { "epoch": 9.765089722675366, "grad_norm": 0.015071609057486057, "learning_rate": 0.0006069477270124697, "loss": 0.1237, "num_input_tokens_seen": 129127088, "step": 59860 }, { "epoch": 9.765905383360522, "grad_norm": 0.6018599271774292, "learning_rate": 0.0006068781934524172, "loss": 0.11, "num_input_tokens_seen": 129137712, "step": 59865 }, { "epoch": 9.766721044045678, "grad_norm": 0.3518945872783661, "learning_rate": 0.0006068086577263426, "loss": 0.0512, "num_input_tokens_seen": 129150000, "step": 59870 }, { "epoch": 9.767536704730832, "grad_norm": 0.42258796095848083, "learning_rate": 0.0006067391198356551, "loss": 0.2231, "num_input_tokens_seen": 129161200, "step": 59875 }, { "epoch": 9.768352365415987, "grad_norm": 0.015050526708364487, "learning_rate": 0.0006066695797817638, "loss": 0.0177, "num_input_tokens_seen": 129171664, "step": 59880 }, { "epoch": 9.769168026101141, "grad_norm": 0.1276446133852005, "learning_rate": 0.0006066000375660782, "loss": 0.0332, "num_input_tokens_seen": 129182512, "step": 59885 }, { "epoch": 9.769983686786297, "grad_norm": 0.4870022237300873, "learning_rate": 0.0006065304931900076, "loss": 0.1875, "num_input_tokens_seen": 129193328, "step": 59890 }, { "epoch": 9.770799347471453, "grad_norm": 0.030402760952711105, "learning_rate": 0.0006064609466549614, "loss": 0.0854, "num_input_tokens_seen": 129203792, "step": 59895 }, { "epoch": 9.771615008156607, "grad_norm": 0.01551265362650156, "learning_rate": 0.0006063913979623491, "loss": 0.1107, "num_input_tokens_seen": 129215696, "step": 59900 }, { "epoch": 9.772430668841762, "grad_norm": 0.15786214172840118, "learning_rate": 0.0006063218471135801, "loss": 0.0669, "num_input_tokens_seen": 129226320, "step": 59905 }, { "epoch": 9.773246329526916, "grad_norm": 0.021008562296628952, "learning_rate": 0.0006062522941100639, "loss": 0.0637, "num_input_tokens_seen": 129237392, "step": 59910 }, { "epoch": 9.774061990212072, "grad_norm": 0.007064450066536665, "learning_rate": 0.0006061827389532103, "loss": 0.1521, "num_input_tokens_seen": 129247504, "step": 59915 }, { "epoch": 9.774877650897226, "grad_norm": 0.2623698115348816, "learning_rate": 0.0006061131816444287, "loss": 0.0151, "num_input_tokens_seen": 129257680, "step": 59920 }, { "epoch": 9.775693311582382, "grad_norm": 0.342385858297348, "learning_rate": 0.000606043622185129, "loss": 0.0807, "num_input_tokens_seen": 129268752, "step": 59925 }, { "epoch": 9.776508972267537, "grad_norm": 0.1297520101070404, "learning_rate": 0.0006059740605767207, "loss": 0.0425, "num_input_tokens_seen": 129279280, "step": 59930 }, { "epoch": 9.777324632952691, "grad_norm": 0.5785143375396729, "learning_rate": 0.0006059044968206136, "loss": 0.1462, "num_input_tokens_seen": 129291088, "step": 59935 }, { "epoch": 9.778140293637847, "grad_norm": 0.01792210340499878, "learning_rate": 0.0006058349309182176, "loss": 0.0172, "num_input_tokens_seen": 129301456, "step": 59940 }, { "epoch": 9.778955954323001, "grad_norm": 0.6044672727584839, "learning_rate": 0.0006057653628709424, "loss": 0.1584, "num_input_tokens_seen": 129312240, "step": 59945 }, { "epoch": 9.779771615008157, "grad_norm": 0.44258731603622437, "learning_rate": 0.0006056957926801979, "loss": 0.0988, "num_input_tokens_seen": 129322928, "step": 59950 }, { "epoch": 9.780587275693312, "grad_norm": 0.07836981862783432, "learning_rate": 0.0006056262203473941, "loss": 0.0294, "num_input_tokens_seen": 129333680, "step": 59955 }, { "epoch": 9.781402936378466, "grad_norm": 0.06452058255672455, "learning_rate": 0.000605556645873941, "loss": 0.0253, "num_input_tokens_seen": 129345328, "step": 59960 }, { "epoch": 9.782218597063622, "grad_norm": 0.2443358600139618, "learning_rate": 0.0006054870692612487, "loss": 0.1128, "num_input_tokens_seen": 129357008, "step": 59965 }, { "epoch": 9.783034257748776, "grad_norm": 0.12506158649921417, "learning_rate": 0.0006054174905107269, "loss": 0.0229, "num_input_tokens_seen": 129367920, "step": 59970 }, { "epoch": 9.783849918433932, "grad_norm": 0.06048731878399849, "learning_rate": 0.0006053479096237859, "loss": 0.0637, "num_input_tokens_seen": 129378640, "step": 59975 }, { "epoch": 9.784665579119086, "grad_norm": 0.9736197590827942, "learning_rate": 0.000605278326601836, "loss": 0.0738, "num_input_tokens_seen": 129389744, "step": 59980 }, { "epoch": 9.785481239804241, "grad_norm": 0.005157461389899254, "learning_rate": 0.0006052087414462873, "loss": 0.0435, "num_input_tokens_seen": 129400144, "step": 59985 }, { "epoch": 9.786296900489397, "grad_norm": 0.11301448941230774, "learning_rate": 0.00060513915415855, "loss": 0.0502, "num_input_tokens_seen": 129412048, "step": 59990 }, { "epoch": 9.78711256117455, "grad_norm": 0.004482972901314497, "learning_rate": 0.0006050695647400342, "loss": 0.0116, "num_input_tokens_seen": 129423504, "step": 59995 }, { "epoch": 9.787928221859707, "grad_norm": 0.048271965235471725, "learning_rate": 0.0006049999731921504, "loss": 0.054, "num_input_tokens_seen": 129434160, "step": 60000 }, { "epoch": 9.78874388254486, "grad_norm": 0.012818471528589725, "learning_rate": 0.0006049303795163091, "loss": 0.0245, "num_input_tokens_seen": 129443824, "step": 60005 }, { "epoch": 9.789559543230016, "grad_norm": 0.44753003120422363, "learning_rate": 0.0006048607837139204, "loss": 0.0687, "num_input_tokens_seen": 129454160, "step": 60010 }, { "epoch": 9.790375203915172, "grad_norm": 0.25669169425964355, "learning_rate": 0.0006047911857863949, "loss": 0.0411, "num_input_tokens_seen": 129464240, "step": 60015 }, { "epoch": 9.791190864600326, "grad_norm": 0.01666855439543724, "learning_rate": 0.0006047215857351431, "loss": 0.0218, "num_input_tokens_seen": 129474768, "step": 60020 }, { "epoch": 9.792006525285482, "grad_norm": 0.02743428200483322, "learning_rate": 0.0006046519835615756, "loss": 0.0241, "num_input_tokens_seen": 129484464, "step": 60025 }, { "epoch": 9.792822185970635, "grad_norm": 0.6244400143623352, "learning_rate": 0.0006045823792671029, "loss": 0.0984, "num_input_tokens_seen": 129494160, "step": 60030 }, { "epoch": 9.793637846655791, "grad_norm": 0.09618891775608063, "learning_rate": 0.0006045127728531354, "loss": 0.0155, "num_input_tokens_seen": 129504176, "step": 60035 }, { "epoch": 9.794453507340947, "grad_norm": 0.0171664971858263, "learning_rate": 0.0006044431643210842, "loss": 0.015, "num_input_tokens_seen": 129514320, "step": 60040 }, { "epoch": 9.7952691680261, "grad_norm": 0.014444014988839626, "learning_rate": 0.0006043735536723595, "loss": 0.1049, "num_input_tokens_seen": 129525360, "step": 60045 }, { "epoch": 9.796084828711257, "grad_norm": 0.011157829314470291, "learning_rate": 0.0006043039409083726, "loss": 0.0985, "num_input_tokens_seen": 129537328, "step": 60050 }, { "epoch": 9.79690048939641, "grad_norm": 0.22508659958839417, "learning_rate": 0.0006042343260305339, "loss": 0.0907, "num_input_tokens_seen": 129548176, "step": 60055 }, { "epoch": 9.797716150081566, "grad_norm": 0.21338315308094025, "learning_rate": 0.0006041647090402544, "loss": 0.2796, "num_input_tokens_seen": 129559856, "step": 60060 }, { "epoch": 9.798531810766722, "grad_norm": 0.0068143438547849655, "learning_rate": 0.0006040950899389449, "loss": 0.0037, "num_input_tokens_seen": 129570064, "step": 60065 }, { "epoch": 9.799347471451876, "grad_norm": 0.14554664492607117, "learning_rate": 0.0006040254687280163, "loss": 0.0603, "num_input_tokens_seen": 129581648, "step": 60070 }, { "epoch": 9.800163132137031, "grad_norm": 0.01601925864815712, "learning_rate": 0.0006039558454088796, "loss": 0.0998, "num_input_tokens_seen": 129592464, "step": 60075 }, { "epoch": 9.800978792822185, "grad_norm": 0.10687457770109177, "learning_rate": 0.0006038862199829459, "loss": 0.0154, "num_input_tokens_seen": 129601968, "step": 60080 }, { "epoch": 9.801794453507341, "grad_norm": 0.09762312471866608, "learning_rate": 0.0006038165924516262, "loss": 0.0497, "num_input_tokens_seen": 129613456, "step": 60085 }, { "epoch": 9.802610114192497, "grad_norm": 0.07222993671894073, "learning_rate": 0.0006037469628163315, "loss": 0.0116, "num_input_tokens_seen": 129624720, "step": 60090 }, { "epoch": 9.80342577487765, "grad_norm": 0.28654274344444275, "learning_rate": 0.000603677331078473, "loss": 0.2241, "num_input_tokens_seen": 129634224, "step": 60095 }, { "epoch": 9.804241435562806, "grad_norm": 0.3556496500968933, "learning_rate": 0.0006036076972394618, "loss": 0.0424, "num_input_tokens_seen": 129643984, "step": 60100 }, { "epoch": 9.80505709624796, "grad_norm": 0.011473250575363636, "learning_rate": 0.0006035380613007093, "loss": 0.0858, "num_input_tokens_seen": 129655344, "step": 60105 }, { "epoch": 9.805872756933116, "grad_norm": 0.004508579149842262, "learning_rate": 0.0006034684232636266, "loss": 0.0636, "num_input_tokens_seen": 129666096, "step": 60110 }, { "epoch": 9.80668841761827, "grad_norm": 0.04873543605208397, "learning_rate": 0.0006033987831296251, "loss": 0.0286, "num_input_tokens_seen": 129677776, "step": 60115 }, { "epoch": 9.807504078303426, "grad_norm": 0.014568612910807133, "learning_rate": 0.0006033291409001159, "loss": 0.0301, "num_input_tokens_seen": 129688656, "step": 60120 }, { "epoch": 9.808319738988581, "grad_norm": 0.020745690912008286, "learning_rate": 0.0006032594965765107, "loss": 0.0531, "num_input_tokens_seen": 129699248, "step": 60125 }, { "epoch": 9.809135399673735, "grad_norm": 0.21380488574504852, "learning_rate": 0.0006031898501602207, "loss": 0.0244, "num_input_tokens_seen": 129709840, "step": 60130 }, { "epoch": 9.809951060358891, "grad_norm": 0.016890889033675194, "learning_rate": 0.0006031202016526576, "loss": 0.1013, "num_input_tokens_seen": 129721296, "step": 60135 }, { "epoch": 9.810766721044045, "grad_norm": 0.010258971713483334, "learning_rate": 0.0006030505510552329, "loss": 0.0235, "num_input_tokens_seen": 129732624, "step": 60140 }, { "epoch": 9.8115823817292, "grad_norm": 0.027959706261754036, "learning_rate": 0.0006029808983693579, "loss": 0.041, "num_input_tokens_seen": 129742704, "step": 60145 }, { "epoch": 9.812398042414356, "grad_norm": 0.0038482893723994493, "learning_rate": 0.0006029112435964444, "loss": 0.1661, "num_input_tokens_seen": 129753072, "step": 60150 }, { "epoch": 9.81321370309951, "grad_norm": 0.09377842396497726, "learning_rate": 0.0006028415867379039, "loss": 0.0957, "num_input_tokens_seen": 129764208, "step": 60155 }, { "epoch": 9.814029363784666, "grad_norm": 0.02694900520145893, "learning_rate": 0.0006027719277951482, "loss": 0.0594, "num_input_tokens_seen": 129774320, "step": 60160 }, { "epoch": 9.81484502446982, "grad_norm": 0.020052198320627213, "learning_rate": 0.000602702266769589, "loss": 0.0125, "num_input_tokens_seen": 129785840, "step": 60165 }, { "epoch": 9.815660685154976, "grad_norm": 0.00816995371133089, "learning_rate": 0.0006026326036626382, "loss": 0.0524, "num_input_tokens_seen": 129796176, "step": 60170 }, { "epoch": 9.81647634584013, "grad_norm": 0.008058549836277962, "learning_rate": 0.0006025629384757075, "loss": 0.037, "num_input_tokens_seen": 129806448, "step": 60175 }, { "epoch": 9.817292006525285, "grad_norm": 0.4470105469226837, "learning_rate": 0.0006024932712102085, "loss": 0.0401, "num_input_tokens_seen": 129817008, "step": 60180 }, { "epoch": 9.818107667210441, "grad_norm": 0.4783194661140442, "learning_rate": 0.0006024236018675537, "loss": 0.0482, "num_input_tokens_seen": 129827696, "step": 60185 }, { "epoch": 9.818923327895595, "grad_norm": 0.09768476337194443, "learning_rate": 0.0006023539304491544, "loss": 0.1356, "num_input_tokens_seen": 129838864, "step": 60190 }, { "epoch": 9.81973898858075, "grad_norm": 0.3727039098739624, "learning_rate": 0.000602284256956423, "loss": 0.1265, "num_input_tokens_seen": 129847824, "step": 60195 }, { "epoch": 9.820554649265905, "grad_norm": 0.05069934204220772, "learning_rate": 0.0006022145813907713, "loss": 0.02, "num_input_tokens_seen": 129858928, "step": 60200 }, { "epoch": 9.82137030995106, "grad_norm": 0.48469915986061096, "learning_rate": 0.0006021449037536114, "loss": 0.2065, "num_input_tokens_seen": 129870160, "step": 60205 }, { "epoch": 9.822185970636216, "grad_norm": 0.0069075883366167545, "learning_rate": 0.0006020752240463555, "loss": 0.0425, "num_input_tokens_seen": 129881712, "step": 60210 }, { "epoch": 9.82300163132137, "grad_norm": 0.0902118980884552, "learning_rate": 0.0006020055422704156, "loss": 0.0512, "num_input_tokens_seen": 129891664, "step": 60215 }, { "epoch": 9.823817292006526, "grad_norm": 0.05141918361186981, "learning_rate": 0.0006019358584272042, "loss": 0.1633, "num_input_tokens_seen": 129901936, "step": 60220 }, { "epoch": 9.82463295269168, "grad_norm": 0.013908959925174713, "learning_rate": 0.0006018661725181332, "loss": 0.156, "num_input_tokens_seen": 129911344, "step": 60225 }, { "epoch": 9.825448613376835, "grad_norm": 0.49641624093055725, "learning_rate": 0.0006017964845446149, "loss": 0.1052, "num_input_tokens_seen": 129922640, "step": 60230 }, { "epoch": 9.826264274061991, "grad_norm": 0.0926760882139206, "learning_rate": 0.0006017267945080618, "loss": 0.1728, "num_input_tokens_seen": 129934768, "step": 60235 }, { "epoch": 9.827079934747145, "grad_norm": 0.4523102641105652, "learning_rate": 0.000601657102409886, "loss": 0.0586, "num_input_tokens_seen": 129945936, "step": 60240 }, { "epoch": 9.8278955954323, "grad_norm": 0.2736942768096924, "learning_rate": 0.0006015874082515003, "loss": 0.0796, "num_input_tokens_seen": 129957968, "step": 60245 }, { "epoch": 9.828711256117455, "grad_norm": 0.1255037486553192, "learning_rate": 0.0006015177120343168, "loss": 0.0853, "num_input_tokens_seen": 129968688, "step": 60250 }, { "epoch": 9.82952691680261, "grad_norm": 0.11677335202693939, "learning_rate": 0.000601448013759748, "loss": 0.0461, "num_input_tokens_seen": 129980016, "step": 60255 }, { "epoch": 9.830342577487766, "grad_norm": 0.03897833824157715, "learning_rate": 0.0006013783134292067, "loss": 0.0247, "num_input_tokens_seen": 129990768, "step": 60260 }, { "epoch": 9.83115823817292, "grad_norm": 0.05676056817173958, "learning_rate": 0.0006013086110441049, "loss": 0.0753, "num_input_tokens_seen": 130000720, "step": 60265 }, { "epoch": 9.831973898858076, "grad_norm": 0.02339472621679306, "learning_rate": 0.0006012389066058559, "loss": 0.0287, "num_input_tokens_seen": 130011088, "step": 60270 }, { "epoch": 9.83278955954323, "grad_norm": 0.11308763921260834, "learning_rate": 0.0006011692001158719, "loss": 0.083, "num_input_tokens_seen": 130022288, "step": 60275 }, { "epoch": 9.833605220228385, "grad_norm": 0.008174906484782696, "learning_rate": 0.0006010994915755659, "loss": 0.0392, "num_input_tokens_seen": 130033232, "step": 60280 }, { "epoch": 9.83442088091354, "grad_norm": 0.02510620839893818, "learning_rate": 0.0006010297809863503, "loss": 0.0115, "num_input_tokens_seen": 130043504, "step": 60285 }, { "epoch": 9.835236541598695, "grad_norm": 0.01666785217821598, "learning_rate": 0.000600960068349638, "loss": 0.0113, "num_input_tokens_seen": 130052528, "step": 60290 }, { "epoch": 9.83605220228385, "grad_norm": 0.26995569467544556, "learning_rate": 0.000600890353666842, "loss": 0.0859, "num_input_tokens_seen": 130064176, "step": 60295 }, { "epoch": 9.836867862969005, "grad_norm": 0.35379454493522644, "learning_rate": 0.0006008206369393748, "loss": 0.0447, "num_input_tokens_seen": 130074896, "step": 60300 }, { "epoch": 9.83768352365416, "grad_norm": 0.5631797313690186, "learning_rate": 0.0006007509181686496, "loss": 0.1055, "num_input_tokens_seen": 130085360, "step": 60305 }, { "epoch": 9.838499184339314, "grad_norm": 0.15511733293533325, "learning_rate": 0.0006006811973560792, "loss": 0.1365, "num_input_tokens_seen": 130096880, "step": 60310 }, { "epoch": 9.83931484502447, "grad_norm": 0.005963537376374006, "learning_rate": 0.0006006114745030766, "loss": 0.0157, "num_input_tokens_seen": 130106864, "step": 60315 }, { "epoch": 9.840130505709626, "grad_norm": 0.5405645966529846, "learning_rate": 0.0006005417496110549, "loss": 0.1064, "num_input_tokens_seen": 130117200, "step": 60320 }, { "epoch": 9.84094616639478, "grad_norm": 0.3312319815158844, "learning_rate": 0.0006004720226814271, "loss": 0.0383, "num_input_tokens_seen": 130128752, "step": 60325 }, { "epoch": 9.841761827079935, "grad_norm": 0.10563849657773972, "learning_rate": 0.0006004022937156062, "loss": 0.0346, "num_input_tokens_seen": 130139024, "step": 60330 }, { "epoch": 9.84257748776509, "grad_norm": 0.03105340525507927, "learning_rate": 0.0006003325627150054, "loss": 0.0299, "num_input_tokens_seen": 130149488, "step": 60335 }, { "epoch": 9.843393148450245, "grad_norm": 0.03994855284690857, "learning_rate": 0.0006002628296810381, "loss": 0.1242, "num_input_tokens_seen": 130160464, "step": 60340 }, { "epoch": 9.844208809135399, "grad_norm": 0.008972239680588245, "learning_rate": 0.0006001930946151172, "loss": 0.091, "num_input_tokens_seen": 130170384, "step": 60345 }, { "epoch": 9.845024469820554, "grad_norm": 0.006627100985497236, "learning_rate": 0.0006001233575186563, "loss": 0.0985, "num_input_tokens_seen": 130181840, "step": 60350 }, { "epoch": 9.84584013050571, "grad_norm": 0.18770357966423035, "learning_rate": 0.0006000536183930684, "loss": 0.0849, "num_input_tokens_seen": 130192528, "step": 60355 }, { "epoch": 9.846655791190864, "grad_norm": 0.3666689395904541, "learning_rate": 0.000599983877239767, "loss": 0.0656, "num_input_tokens_seen": 130204304, "step": 60360 }, { "epoch": 9.84747145187602, "grad_norm": 0.032251257449388504, "learning_rate": 0.0005999141340601657, "loss": 0.053, "num_input_tokens_seen": 130215024, "step": 60365 }, { "epoch": 9.848287112561174, "grad_norm": 0.24580244719982147, "learning_rate": 0.0005998443888556776, "loss": 0.0411, "num_input_tokens_seen": 130224752, "step": 60370 }, { "epoch": 9.84910277324633, "grad_norm": 0.710914671421051, "learning_rate": 0.0005997746416277162, "loss": 0.03, "num_input_tokens_seen": 130235408, "step": 60375 }, { "epoch": 9.849918433931485, "grad_norm": 0.28853854537010193, "learning_rate": 0.0005997048923776953, "loss": 0.0444, "num_input_tokens_seen": 130245680, "step": 60380 }, { "epoch": 9.850734094616639, "grad_norm": 0.03014230541884899, "learning_rate": 0.000599635141107028, "loss": 0.024, "num_input_tokens_seen": 130255984, "step": 60385 }, { "epoch": 9.851549755301795, "grad_norm": 0.2534084618091583, "learning_rate": 0.0005995653878171283, "loss": 0.0674, "num_input_tokens_seen": 130267440, "step": 60390 }, { "epoch": 9.852365415986949, "grad_norm": 0.01732614077627659, "learning_rate": 0.0005994956325094099, "loss": 0.084, "num_input_tokens_seen": 130277072, "step": 60395 }, { "epoch": 9.853181076672104, "grad_norm": 0.3803198039531708, "learning_rate": 0.000599425875185286, "loss": 0.0598, "num_input_tokens_seen": 130287408, "step": 60400 }, { "epoch": 9.85399673735726, "grad_norm": 0.007414644584059715, "learning_rate": 0.0005993561158461708, "loss": 0.0116, "num_input_tokens_seen": 130298160, "step": 60405 }, { "epoch": 9.854812398042414, "grad_norm": 0.5633655786514282, "learning_rate": 0.0005992863544934777, "loss": 0.0592, "num_input_tokens_seen": 130309840, "step": 60410 }, { "epoch": 9.85562805872757, "grad_norm": 0.010347158648073673, "learning_rate": 0.000599216591128621, "loss": 0.0279, "num_input_tokens_seen": 130321040, "step": 60415 }, { "epoch": 9.856443719412724, "grad_norm": 0.06163579598069191, "learning_rate": 0.000599146825753014, "loss": 0.0425, "num_input_tokens_seen": 130332048, "step": 60420 }, { "epoch": 9.85725938009788, "grad_norm": 0.010226025246083736, "learning_rate": 0.0005990770583680707, "loss": 0.0158, "num_input_tokens_seen": 130342000, "step": 60425 }, { "epoch": 9.858075040783035, "grad_norm": 0.2956731617450714, "learning_rate": 0.0005990072889752052, "loss": 0.0361, "num_input_tokens_seen": 130352976, "step": 60430 }, { "epoch": 9.858890701468189, "grad_norm": 0.19398818910121918, "learning_rate": 0.0005989375175758315, "loss": 0.1084, "num_input_tokens_seen": 130364944, "step": 60435 }, { "epoch": 9.859706362153345, "grad_norm": 0.49993371963500977, "learning_rate": 0.0005988677441713633, "loss": 0.1483, "num_input_tokens_seen": 130374928, "step": 60440 }, { "epoch": 9.860522022838499, "grad_norm": 0.5132774710655212, "learning_rate": 0.000598797968763215, "loss": 0.1323, "num_input_tokens_seen": 130384944, "step": 60445 }, { "epoch": 9.861337683523654, "grad_norm": 1.0541963577270508, "learning_rate": 0.0005987281913528006, "loss": 0.1009, "num_input_tokens_seen": 130395600, "step": 60450 }, { "epoch": 9.86215334420881, "grad_norm": 0.3232889771461487, "learning_rate": 0.0005986584119415339, "loss": 0.0664, "num_input_tokens_seen": 130406352, "step": 60455 }, { "epoch": 9.862969004893964, "grad_norm": 0.0038960338570177555, "learning_rate": 0.0005985886305308295, "loss": 0.0073, "num_input_tokens_seen": 130418160, "step": 60460 }, { "epoch": 9.86378466557912, "grad_norm": 0.3589342534542084, "learning_rate": 0.0005985188471221014, "loss": 0.0711, "num_input_tokens_seen": 130427952, "step": 60465 }, { "epoch": 9.864600326264274, "grad_norm": 0.38039344549179077, "learning_rate": 0.0005984490617167639, "loss": 0.1004, "num_input_tokens_seen": 130439856, "step": 60470 }, { "epoch": 9.86541598694943, "grad_norm": 0.0035143066197633743, "learning_rate": 0.0005983792743162313, "loss": 0.0694, "num_input_tokens_seen": 130450992, "step": 60475 }, { "epoch": 9.866231647634583, "grad_norm": 0.06519393622875214, "learning_rate": 0.0005983094849219177, "loss": 0.1229, "num_input_tokens_seen": 130459696, "step": 60480 }, { "epoch": 9.867047308319739, "grad_norm": 0.004261984955519438, "learning_rate": 0.0005982396935352379, "loss": 0.0207, "num_input_tokens_seen": 130470672, "step": 60485 }, { "epoch": 9.867862969004895, "grad_norm": 0.11394942551851273, "learning_rate": 0.000598169900157606, "loss": 0.0157, "num_input_tokens_seen": 130482032, "step": 60490 }, { "epoch": 9.868678629690049, "grad_norm": 0.02758982591331005, "learning_rate": 0.0005981001047904365, "loss": 0.0062, "num_input_tokens_seen": 130492720, "step": 60495 }, { "epoch": 9.869494290375204, "grad_norm": 0.026079881936311722, "learning_rate": 0.000598030307435144, "loss": 0.1078, "num_input_tokens_seen": 130504240, "step": 60500 }, { "epoch": 9.870309951060358, "grad_norm": 0.004437056370079517, "learning_rate": 0.000597960508093143, "loss": 0.0426, "num_input_tokens_seen": 130515088, "step": 60505 }, { "epoch": 9.871125611745514, "grad_norm": 0.007827182300388813, "learning_rate": 0.0005978907067658479, "loss": 0.1124, "num_input_tokens_seen": 130526512, "step": 60510 }, { "epoch": 9.87194127243067, "grad_norm": 0.009335353039205074, "learning_rate": 0.0005978209034546736, "loss": 0.0476, "num_input_tokens_seen": 130537104, "step": 60515 }, { "epoch": 9.872756933115824, "grad_norm": 0.09219788014888763, "learning_rate": 0.0005977510981610344, "loss": 0.0278, "num_input_tokens_seen": 130549264, "step": 60520 }, { "epoch": 9.87357259380098, "grad_norm": 0.13047052919864655, "learning_rate": 0.0005976812908863454, "loss": 0.0187, "num_input_tokens_seen": 130559120, "step": 60525 }, { "epoch": 9.874388254486133, "grad_norm": 0.21799753606319427, "learning_rate": 0.0005976114816320208, "loss": 0.0604, "num_input_tokens_seen": 130569328, "step": 60530 }, { "epoch": 9.875203915171289, "grad_norm": 0.01951451599597931, "learning_rate": 0.000597541670399476, "loss": 0.0145, "num_input_tokens_seen": 130579376, "step": 60535 }, { "epoch": 9.876019575856443, "grad_norm": 0.095870740711689, "learning_rate": 0.0005974718571901254, "loss": 0.0331, "num_input_tokens_seen": 130591536, "step": 60540 }, { "epoch": 9.876835236541599, "grad_norm": 0.4140845835208893, "learning_rate": 0.0005974020420053841, "loss": 0.0267, "num_input_tokens_seen": 130602000, "step": 60545 }, { "epoch": 9.877650897226754, "grad_norm": 0.07865575700998306, "learning_rate": 0.0005973322248466666, "loss": 0.0146, "num_input_tokens_seen": 130611952, "step": 60550 }, { "epoch": 9.878466557911908, "grad_norm": 0.06738736480474472, "learning_rate": 0.0005972624057153882, "loss": 0.0327, "num_input_tokens_seen": 130622704, "step": 60555 }, { "epoch": 9.879282218597064, "grad_norm": 0.4741089642047882, "learning_rate": 0.0005971925846129639, "loss": 0.1844, "num_input_tokens_seen": 130632304, "step": 60560 }, { "epoch": 9.880097879282218, "grad_norm": 0.027765685692429543, "learning_rate": 0.0005971227615408084, "loss": 0.1377, "num_input_tokens_seen": 130641872, "step": 60565 }, { "epoch": 9.880913539967374, "grad_norm": 0.05565100535750389, "learning_rate": 0.0005970529365003371, "loss": 0.0382, "num_input_tokens_seen": 130652400, "step": 60570 }, { "epoch": 9.88172920065253, "grad_norm": 0.22095130383968353, "learning_rate": 0.0005969831094929648, "loss": 0.0757, "num_input_tokens_seen": 130663408, "step": 60575 }, { "epoch": 9.882544861337683, "grad_norm": 0.1838729977607727, "learning_rate": 0.0005969132805201067, "loss": 0.0918, "num_input_tokens_seen": 130674640, "step": 60580 }, { "epoch": 9.883360522022839, "grad_norm": 0.949181854724884, "learning_rate": 0.0005968434495831781, "loss": 0.0375, "num_input_tokens_seen": 130684336, "step": 60585 }, { "epoch": 9.884176182707993, "grad_norm": 0.12807956337928772, "learning_rate": 0.000596773616683594, "loss": 0.0294, "num_input_tokens_seen": 130694608, "step": 60590 }, { "epoch": 9.884991843393149, "grad_norm": 0.01616070792078972, "learning_rate": 0.0005967037818227701, "loss": 0.1875, "num_input_tokens_seen": 130704944, "step": 60595 }, { "epoch": 9.885807504078304, "grad_norm": 0.709864616394043, "learning_rate": 0.0005966339450021212, "loss": 0.0753, "num_input_tokens_seen": 130716336, "step": 60600 }, { "epoch": 9.886623164763458, "grad_norm": 0.08465084433555603, "learning_rate": 0.0005965641062230627, "loss": 0.1019, "num_input_tokens_seen": 130726896, "step": 60605 }, { "epoch": 9.887438825448614, "grad_norm": 0.12790916860103607, "learning_rate": 0.0005964942654870103, "loss": 0.0079, "num_input_tokens_seen": 130738000, "step": 60610 }, { "epoch": 9.888254486133768, "grad_norm": 0.10872706770896912, "learning_rate": 0.0005964244227953791, "loss": 0.1714, "num_input_tokens_seen": 130747312, "step": 60615 }, { "epoch": 9.889070146818923, "grad_norm": 0.15647926926612854, "learning_rate": 0.0005963545781495847, "loss": 0.0382, "num_input_tokens_seen": 130758832, "step": 60620 }, { "epoch": 9.88988580750408, "grad_norm": 0.09480641782283783, "learning_rate": 0.0005962847315510426, "loss": 0.0341, "num_input_tokens_seen": 130769456, "step": 60625 }, { "epoch": 9.890701468189233, "grad_norm": 0.45121249556541443, "learning_rate": 0.0005962148830011681, "loss": 0.1677, "num_input_tokens_seen": 130780432, "step": 60630 }, { "epoch": 9.891517128874389, "grad_norm": 0.00740869389846921, "learning_rate": 0.0005961450325013771, "loss": 0.1508, "num_input_tokens_seen": 130791216, "step": 60635 }, { "epoch": 9.892332789559543, "grad_norm": 0.00788277480751276, "learning_rate": 0.0005960751800530849, "loss": 0.0221, "num_input_tokens_seen": 130802096, "step": 60640 }, { "epoch": 9.893148450244698, "grad_norm": 0.11565227806568146, "learning_rate": 0.0005960053256577073, "loss": 0.1051, "num_input_tokens_seen": 130812208, "step": 60645 }, { "epoch": 9.893964110929852, "grad_norm": 1.633353352546692, "learning_rate": 0.0005959354693166601, "loss": 0.0678, "num_input_tokens_seen": 130821552, "step": 60650 }, { "epoch": 9.894779771615008, "grad_norm": 0.023939277976751328, "learning_rate": 0.0005958656110313589, "loss": 0.0117, "num_input_tokens_seen": 130831152, "step": 60655 }, { "epoch": 9.895595432300164, "grad_norm": 0.32127076387405396, "learning_rate": 0.0005957957508032194, "loss": 0.0186, "num_input_tokens_seen": 130841296, "step": 60660 }, { "epoch": 9.896411092985318, "grad_norm": 0.09769389033317566, "learning_rate": 0.0005957258886336575, "loss": 0.1578, "num_input_tokens_seen": 130851824, "step": 60665 }, { "epoch": 9.897226753670473, "grad_norm": 0.28010886907577515, "learning_rate": 0.0005956560245240891, "loss": 0.028, "num_input_tokens_seen": 130862512, "step": 60670 }, { "epoch": 9.898042414355627, "grad_norm": 0.016588253900408745, "learning_rate": 0.0005955861584759298, "loss": 0.0372, "num_input_tokens_seen": 130872368, "step": 60675 }, { "epoch": 9.898858075040783, "grad_norm": 0.7766941785812378, "learning_rate": 0.0005955162904905959, "loss": 0.0973, "num_input_tokens_seen": 130882992, "step": 60680 }, { "epoch": 9.899673735725939, "grad_norm": 0.00947907380759716, "learning_rate": 0.0005954464205695033, "loss": 0.028, "num_input_tokens_seen": 130894640, "step": 60685 }, { "epoch": 9.900489396411093, "grad_norm": 0.009547766298055649, "learning_rate": 0.0005953765487140678, "loss": 0.016, "num_input_tokens_seen": 130904432, "step": 60690 }, { "epoch": 9.901305057096248, "grad_norm": 0.256560355424881, "learning_rate": 0.0005953066749257055, "loss": 0.0502, "num_input_tokens_seen": 130915664, "step": 60695 }, { "epoch": 9.902120717781402, "grad_norm": 0.03108804300427437, "learning_rate": 0.0005952367992058326, "loss": 0.0614, "num_input_tokens_seen": 130925456, "step": 60700 }, { "epoch": 9.902936378466558, "grad_norm": 0.16083090007305145, "learning_rate": 0.0005951669215558651, "loss": 0.0825, "num_input_tokens_seen": 130935440, "step": 60705 }, { "epoch": 9.903752039151712, "grad_norm": 0.019032921642065048, "learning_rate": 0.0005950970419772192, "loss": 0.0169, "num_input_tokens_seen": 130945936, "step": 60710 }, { "epoch": 9.904567699836868, "grad_norm": 0.047515127807855606, "learning_rate": 0.0005950271604713111, "loss": 0.14, "num_input_tokens_seen": 130956528, "step": 60715 }, { "epoch": 9.905383360522023, "grad_norm": 0.18238703906536102, "learning_rate": 0.000594957277039557, "loss": 0.047, "num_input_tokens_seen": 130967280, "step": 60720 }, { "epoch": 9.906199021207177, "grad_norm": 0.025907719507813454, "learning_rate": 0.0005948873916833733, "loss": 0.0776, "num_input_tokens_seen": 130977424, "step": 60725 }, { "epoch": 9.907014681892333, "grad_norm": 0.03453850746154785, "learning_rate": 0.0005948175044041764, "loss": 0.4038, "num_input_tokens_seen": 130987536, "step": 60730 }, { "epoch": 9.907830342577487, "grad_norm": 0.5120149850845337, "learning_rate": 0.0005947476152033822, "loss": 0.0347, "num_input_tokens_seen": 131000784, "step": 60735 }, { "epoch": 9.908646003262643, "grad_norm": 0.6088401079177856, "learning_rate": 0.0005946777240824076, "loss": 0.3079, "num_input_tokens_seen": 131011024, "step": 60740 }, { "epoch": 9.909461663947798, "grad_norm": 0.023911427706480026, "learning_rate": 0.0005946078310426687, "loss": 0.0183, "num_input_tokens_seen": 131021648, "step": 60745 }, { "epoch": 9.910277324632952, "grad_norm": 0.01152818650007248, "learning_rate": 0.000594537936085582, "loss": 0.1457, "num_input_tokens_seen": 131031696, "step": 60750 }, { "epoch": 9.911092985318108, "grad_norm": 0.12052475661039352, "learning_rate": 0.0005944680392125643, "loss": 0.0573, "num_input_tokens_seen": 131041776, "step": 60755 }, { "epoch": 9.911908646003262, "grad_norm": 0.448579877614975, "learning_rate": 0.0005943981404250318, "loss": 0.129, "num_input_tokens_seen": 131052880, "step": 60760 }, { "epoch": 9.912724306688418, "grad_norm": 0.06391939520835876, "learning_rate": 0.0005943282397244013, "loss": 0.0326, "num_input_tokens_seen": 131064592, "step": 60765 }, { "epoch": 9.913539967373573, "grad_norm": 0.2718791961669922, "learning_rate": 0.0005942583371120893, "loss": 0.1676, "num_input_tokens_seen": 131075728, "step": 60770 }, { "epoch": 9.914355628058727, "grad_norm": 0.10354335606098175, "learning_rate": 0.0005941884325895127, "loss": 0.0184, "num_input_tokens_seen": 131087696, "step": 60775 }, { "epoch": 9.915171288743883, "grad_norm": 0.08380724489688873, "learning_rate": 0.0005941185261580878, "loss": 0.0445, "num_input_tokens_seen": 131098480, "step": 60780 }, { "epoch": 9.915986949429037, "grad_norm": 0.04952889680862427, "learning_rate": 0.0005940486178192317, "loss": 0.1399, "num_input_tokens_seen": 131109712, "step": 60785 }, { "epoch": 9.916802610114193, "grad_norm": 0.04938492551445961, "learning_rate": 0.000593978707574361, "loss": 0.0197, "num_input_tokens_seen": 131120208, "step": 60790 }, { "epoch": 9.917618270799348, "grad_norm": 0.5455061793327332, "learning_rate": 0.0005939087954248926, "loss": 0.1578, "num_input_tokens_seen": 131130832, "step": 60795 }, { "epoch": 9.918433931484502, "grad_norm": 0.2918844521045685, "learning_rate": 0.0005938388813722432, "loss": 0.0404, "num_input_tokens_seen": 131141808, "step": 60800 }, { "epoch": 9.919249592169658, "grad_norm": 0.037345822900533676, "learning_rate": 0.0005937689654178298, "loss": 0.0354, "num_input_tokens_seen": 131152944, "step": 60805 }, { "epoch": 9.920065252854812, "grad_norm": 0.012990389950573444, "learning_rate": 0.0005936990475630696, "loss": 0.0296, "num_input_tokens_seen": 131163600, "step": 60810 }, { "epoch": 9.920880913539968, "grad_norm": 0.3363749384880066, "learning_rate": 0.0005936291278093793, "loss": 0.1579, "num_input_tokens_seen": 131173264, "step": 60815 }, { "epoch": 9.921696574225122, "grad_norm": 0.6597939729690552, "learning_rate": 0.0005935592061581758, "loss": 0.1115, "num_input_tokens_seen": 131184304, "step": 60820 }, { "epoch": 9.922512234910277, "grad_norm": 0.7053841352462769, "learning_rate": 0.0005934892826108764, "loss": 0.1667, "num_input_tokens_seen": 131194736, "step": 60825 }, { "epoch": 9.923327895595433, "grad_norm": 0.160603329539299, "learning_rate": 0.0005934193571688981, "loss": 0.0238, "num_input_tokens_seen": 131205904, "step": 60830 }, { "epoch": 9.924143556280587, "grad_norm": 0.22924336791038513, "learning_rate": 0.0005933494298336579, "loss": 0.0985, "num_input_tokens_seen": 131217648, "step": 60835 }, { "epoch": 9.924959216965743, "grad_norm": 0.32545581459999084, "learning_rate": 0.0005932795006065732, "loss": 0.058, "num_input_tokens_seen": 131228496, "step": 60840 }, { "epoch": 9.925774877650896, "grad_norm": 0.0928037017583847, "learning_rate": 0.000593209569489061, "loss": 0.1424, "num_input_tokens_seen": 131239216, "step": 60845 }, { "epoch": 9.926590538336052, "grad_norm": 0.847078263759613, "learning_rate": 0.0005931396364825387, "loss": 0.0534, "num_input_tokens_seen": 131250512, "step": 60850 }, { "epoch": 9.927406199021208, "grad_norm": 0.04070904850959778, "learning_rate": 0.0005930697015884234, "loss": 0.0637, "num_input_tokens_seen": 131261552, "step": 60855 }, { "epoch": 9.928221859706362, "grad_norm": 0.03348315879702568, "learning_rate": 0.0005929997648081327, "loss": 0.0424, "num_input_tokens_seen": 131272720, "step": 60860 }, { "epoch": 9.929037520391518, "grad_norm": 0.006665338296443224, "learning_rate": 0.0005929298261430837, "loss": 0.0355, "num_input_tokens_seen": 131282672, "step": 60865 }, { "epoch": 9.929853181076671, "grad_norm": 0.1498643010854721, "learning_rate": 0.0005928598855946939, "loss": 0.0769, "num_input_tokens_seen": 131293008, "step": 60870 }, { "epoch": 9.930668841761827, "grad_norm": 0.10557324439287186, "learning_rate": 0.0005927899431643807, "loss": 0.1209, "num_input_tokens_seen": 131302672, "step": 60875 }, { "epoch": 9.931484502446983, "grad_norm": 0.04179172217845917, "learning_rate": 0.0005927199988535616, "loss": 0.0312, "num_input_tokens_seen": 131313968, "step": 60880 }, { "epoch": 9.932300163132137, "grad_norm": 0.03051568567752838, "learning_rate": 0.0005926500526636542, "loss": 0.1088, "num_input_tokens_seen": 131323600, "step": 60885 }, { "epoch": 9.933115823817293, "grad_norm": 0.09377099573612213, "learning_rate": 0.0005925801045960757, "loss": 0.1592, "num_input_tokens_seen": 131333968, "step": 60890 }, { "epoch": 9.933931484502446, "grad_norm": 0.04638167843222618, "learning_rate": 0.0005925101546522441, "loss": 0.0226, "num_input_tokens_seen": 131345296, "step": 60895 }, { "epoch": 9.934747145187602, "grad_norm": 0.007143331691622734, "learning_rate": 0.0005924402028335769, "loss": 0.1017, "num_input_tokens_seen": 131355888, "step": 60900 }, { "epoch": 9.935562805872756, "grad_norm": 0.025095529854297638, "learning_rate": 0.0005923702491414916, "loss": 0.1512, "num_input_tokens_seen": 131367440, "step": 60905 }, { "epoch": 9.936378466557912, "grad_norm": 0.0349934846162796, "learning_rate": 0.000592300293577406, "loss": 0.0538, "num_input_tokens_seen": 131378320, "step": 60910 }, { "epoch": 9.937194127243067, "grad_norm": 0.021588705480098724, "learning_rate": 0.0005922303361427379, "loss": 0.0343, "num_input_tokens_seen": 131387312, "step": 60915 }, { "epoch": 9.938009787928221, "grad_norm": 0.11879408359527588, "learning_rate": 0.0005921603768389051, "loss": 0.1785, "num_input_tokens_seen": 131396752, "step": 60920 }, { "epoch": 9.938825448613377, "grad_norm": 0.20185640454292297, "learning_rate": 0.0005920904156673254, "loss": 0.0367, "num_input_tokens_seen": 131408400, "step": 60925 }, { "epoch": 9.939641109298531, "grad_norm": 0.9917058944702148, "learning_rate": 0.0005920204526294165, "loss": 0.0895, "num_input_tokens_seen": 131418768, "step": 60930 }, { "epoch": 9.940456769983687, "grad_norm": 0.05641289800405502, "learning_rate": 0.0005919504877265965, "loss": 0.0442, "num_input_tokens_seen": 131430480, "step": 60935 }, { "epoch": 9.941272430668842, "grad_norm": 0.4553598463535309, "learning_rate": 0.000591880520960283, "loss": 0.1182, "num_input_tokens_seen": 131439984, "step": 60940 }, { "epoch": 9.942088091353996, "grad_norm": 0.06321804225444794, "learning_rate": 0.0005918105523318944, "loss": 0.0733, "num_input_tokens_seen": 131451216, "step": 60945 }, { "epoch": 9.942903752039152, "grad_norm": 0.04557380452752113, "learning_rate": 0.0005917405818428484, "loss": 0.0673, "num_input_tokens_seen": 131462928, "step": 60950 }, { "epoch": 9.943719412724306, "grad_norm": 0.019453784450888634, "learning_rate": 0.0005916706094945631, "loss": 0.1141, "num_input_tokens_seen": 131475088, "step": 60955 }, { "epoch": 9.944535073409462, "grad_norm": 0.024048911407589912, "learning_rate": 0.0005916006352884567, "loss": 0.0996, "num_input_tokens_seen": 131486096, "step": 60960 }, { "epoch": 9.945350734094617, "grad_norm": 0.09786482155323029, "learning_rate": 0.0005915306592259471, "loss": 0.1103, "num_input_tokens_seen": 131497008, "step": 60965 }, { "epoch": 9.946166394779771, "grad_norm": 0.24518489837646484, "learning_rate": 0.0005914606813084526, "loss": 0.0451, "num_input_tokens_seen": 131507888, "step": 60970 }, { "epoch": 9.946982055464927, "grad_norm": 0.01737568899989128, "learning_rate": 0.0005913907015373915, "loss": 0.013, "num_input_tokens_seen": 131519504, "step": 60975 }, { "epoch": 9.947797716150081, "grad_norm": 0.011724404990673065, "learning_rate": 0.0005913207199141818, "loss": 0.0186, "num_input_tokens_seen": 131531216, "step": 60980 }, { "epoch": 9.948613376835237, "grad_norm": 0.3961881399154663, "learning_rate": 0.0005912507364402419, "loss": 0.074, "num_input_tokens_seen": 131542640, "step": 60985 }, { "epoch": 9.949429037520392, "grad_norm": 0.006749361753463745, "learning_rate": 0.0005911807511169899, "loss": 0.2492, "num_input_tokens_seen": 131553424, "step": 60990 }, { "epoch": 9.950244698205546, "grad_norm": 0.03477555140852928, "learning_rate": 0.0005911107639458444, "loss": 0.0308, "num_input_tokens_seen": 131565008, "step": 60995 }, { "epoch": 9.951060358890702, "grad_norm": 0.029503030702471733, "learning_rate": 0.0005910407749282237, "loss": 0.0754, "num_input_tokens_seen": 131577360, "step": 61000 }, { "epoch": 9.951876019575856, "grad_norm": 0.005080615635961294, "learning_rate": 0.0005909707840655462, "loss": 0.0959, "num_input_tokens_seen": 131588208, "step": 61005 }, { "epoch": 9.952691680261012, "grad_norm": 0.04830136150121689, "learning_rate": 0.0005909007913592304, "loss": 0.0574, "num_input_tokens_seen": 131598736, "step": 61010 }, { "epoch": 9.953507340946166, "grad_norm": 0.35426515340805054, "learning_rate": 0.0005908307968106948, "loss": 0.0286, "num_input_tokens_seen": 131609104, "step": 61015 }, { "epoch": 9.954323001631321, "grad_norm": 0.030167724937200546, "learning_rate": 0.0005907608004213577, "loss": 0.1004, "num_input_tokens_seen": 131620848, "step": 61020 }, { "epoch": 9.955138662316477, "grad_norm": 0.09699372202157974, "learning_rate": 0.0005906908021926379, "loss": 0.0184, "num_input_tokens_seen": 131632176, "step": 61025 }, { "epoch": 9.955954323001631, "grad_norm": 0.03444267809391022, "learning_rate": 0.000590620802125954, "loss": 0.0184, "num_input_tokens_seen": 131642512, "step": 61030 }, { "epoch": 9.956769983686787, "grad_norm": 0.011276466771960258, "learning_rate": 0.0005905508002227247, "loss": 0.0138, "num_input_tokens_seen": 131653712, "step": 61035 }, { "epoch": 9.95758564437194, "grad_norm": 0.07355879247188568, "learning_rate": 0.0005904807964843684, "loss": 0.1429, "num_input_tokens_seen": 131664464, "step": 61040 }, { "epoch": 9.958401305057096, "grad_norm": 0.009992639534175396, "learning_rate": 0.0005904107909123039, "loss": 0.0245, "num_input_tokens_seen": 131675024, "step": 61045 }, { "epoch": 9.959216965742252, "grad_norm": 0.0561150498688221, "learning_rate": 0.0005903407835079502, "loss": 0.0701, "num_input_tokens_seen": 131685808, "step": 61050 }, { "epoch": 9.960032626427406, "grad_norm": 0.1623920202255249, "learning_rate": 0.000590270774272726, "loss": 0.1036, "num_input_tokens_seen": 131696976, "step": 61055 }, { "epoch": 9.960848287112562, "grad_norm": 0.17517897486686707, "learning_rate": 0.0005902007632080499, "loss": 0.0341, "num_input_tokens_seen": 131708528, "step": 61060 }, { "epoch": 9.961663947797716, "grad_norm": 0.12540830671787262, "learning_rate": 0.0005901307503153408, "loss": 0.0491, "num_input_tokens_seen": 131718832, "step": 61065 }, { "epoch": 9.962479608482871, "grad_norm": 0.06763845682144165, "learning_rate": 0.0005900607355960178, "loss": 0.1014, "num_input_tokens_seen": 131727728, "step": 61070 }, { "epoch": 9.963295269168025, "grad_norm": 0.06722762435674667, "learning_rate": 0.0005899907190514999, "loss": 0.0173, "num_input_tokens_seen": 131737936, "step": 61075 }, { "epoch": 9.964110929853181, "grad_norm": 0.016902245581150055, "learning_rate": 0.0005899207006832056, "loss": 0.0585, "num_input_tokens_seen": 131749328, "step": 61080 }, { "epoch": 9.964926590538337, "grad_norm": 0.06104455143213272, "learning_rate": 0.0005898506804925545, "loss": 0.0333, "num_input_tokens_seen": 131761424, "step": 61085 }, { "epoch": 9.96574225122349, "grad_norm": 0.06223906949162483, "learning_rate": 0.0005897806584809653, "loss": 0.0712, "num_input_tokens_seen": 131772880, "step": 61090 }, { "epoch": 9.966557911908646, "grad_norm": 0.10632332414388657, "learning_rate": 0.0005897106346498571, "loss": 0.0112, "num_input_tokens_seen": 131783696, "step": 61095 }, { "epoch": 9.9673735725938, "grad_norm": 0.46011582016944885, "learning_rate": 0.0005896406090006491, "loss": 0.0753, "num_input_tokens_seen": 131794608, "step": 61100 }, { "epoch": 9.968189233278956, "grad_norm": 0.0562114417552948, "learning_rate": 0.0005895705815347605, "loss": 0.0224, "num_input_tokens_seen": 131806160, "step": 61105 }, { "epoch": 9.969004893964112, "grad_norm": 0.008589844219386578, "learning_rate": 0.0005895005522536104, "loss": 0.097, "num_input_tokens_seen": 131817616, "step": 61110 }, { "epoch": 9.969820554649266, "grad_norm": 0.09932585060596466, "learning_rate": 0.000589430521158618, "loss": 0.0707, "num_input_tokens_seen": 131828112, "step": 61115 }, { "epoch": 9.970636215334421, "grad_norm": 0.024618972092866898, "learning_rate": 0.0005893604882512027, "loss": 0.0492, "num_input_tokens_seen": 131838160, "step": 61120 }, { "epoch": 9.971451876019575, "grad_norm": 0.050446897745132446, "learning_rate": 0.0005892904535327837, "loss": 0.1008, "num_input_tokens_seen": 131846864, "step": 61125 }, { "epoch": 9.97226753670473, "grad_norm": 0.10901523381471634, "learning_rate": 0.0005892204170047804, "loss": 0.0526, "num_input_tokens_seen": 131857072, "step": 61130 }, { "epoch": 9.973083197389887, "grad_norm": 0.24630193412303925, "learning_rate": 0.0005891503786686123, "loss": 0.0433, "num_input_tokens_seen": 131867824, "step": 61135 }, { "epoch": 9.97389885807504, "grad_norm": 0.27165156602859497, "learning_rate": 0.0005890803385256985, "loss": 0.2057, "num_input_tokens_seen": 131878640, "step": 61140 }, { "epoch": 9.974714518760196, "grad_norm": 0.015620440244674683, "learning_rate": 0.0005890102965774587, "loss": 0.1561, "num_input_tokens_seen": 131889680, "step": 61145 }, { "epoch": 9.97553017944535, "grad_norm": 0.0077781639993190765, "learning_rate": 0.0005889402528253124, "loss": 0.1514, "num_input_tokens_seen": 131899728, "step": 61150 }, { "epoch": 9.976345840130506, "grad_norm": 0.0180341973900795, "learning_rate": 0.0005888702072706788, "loss": 0.1275, "num_input_tokens_seen": 131910000, "step": 61155 }, { "epoch": 9.977161500815662, "grad_norm": 0.013503906317055225, "learning_rate": 0.0005888001599149781, "loss": 0.0456, "num_input_tokens_seen": 131920624, "step": 61160 }, { "epoch": 9.977977161500815, "grad_norm": 0.32608669996261597, "learning_rate": 0.0005887301107596292, "loss": 0.1201, "num_input_tokens_seen": 131931312, "step": 61165 }, { "epoch": 9.978792822185971, "grad_norm": 0.09953126311302185, "learning_rate": 0.0005886600598060522, "loss": 0.0184, "num_input_tokens_seen": 131942384, "step": 61170 }, { "epoch": 9.979608482871125, "grad_norm": 0.018058426678180695, "learning_rate": 0.0005885900070556665, "loss": 0.1189, "num_input_tokens_seen": 131952944, "step": 61175 }, { "epoch": 9.98042414355628, "grad_norm": 0.27167809009552, "learning_rate": 0.0005885199525098919, "loss": 0.046, "num_input_tokens_seen": 131963600, "step": 61180 }, { "epoch": 9.981239804241435, "grad_norm": 0.086937814950943, "learning_rate": 0.0005884498961701483, "loss": 0.0169, "num_input_tokens_seen": 131974960, "step": 61185 }, { "epoch": 9.98205546492659, "grad_norm": 0.12010996043682098, "learning_rate": 0.0005883798380378554, "loss": 0.015, "num_input_tokens_seen": 131985648, "step": 61190 }, { "epoch": 9.982871125611746, "grad_norm": 0.17354534566402435, "learning_rate": 0.0005883097781144329, "loss": 0.0206, "num_input_tokens_seen": 131995984, "step": 61195 }, { "epoch": 9.9836867862969, "grad_norm": 0.3734014332294464, "learning_rate": 0.0005882397164013005, "loss": 0.0663, "num_input_tokens_seen": 132007344, "step": 61200 }, { "epoch": 9.984502446982056, "grad_norm": 0.007803584448993206, "learning_rate": 0.0005881696528998785, "loss": 0.0454, "num_input_tokens_seen": 132017584, "step": 61205 }, { "epoch": 9.98531810766721, "grad_norm": 0.026356784626841545, "learning_rate": 0.0005880995876115868, "loss": 0.091, "num_input_tokens_seen": 132028112, "step": 61210 }, { "epoch": 9.986133768352365, "grad_norm": 0.06008047237992287, "learning_rate": 0.0005880295205378449, "loss": 0.0903, "num_input_tokens_seen": 132039408, "step": 61215 }, { "epoch": 9.986949429037521, "grad_norm": 0.06083710491657257, "learning_rate": 0.0005879594516800732, "loss": 0.0197, "num_input_tokens_seen": 132048720, "step": 61220 }, { "epoch": 9.987765089722675, "grad_norm": 0.009052700363099575, "learning_rate": 0.0005878893810396916, "loss": 0.0862, "num_input_tokens_seen": 132059472, "step": 61225 }, { "epoch": 9.98858075040783, "grad_norm": 0.011163829825818539, "learning_rate": 0.0005878193086181203, "loss": 0.0047, "num_input_tokens_seen": 132071312, "step": 61230 }, { "epoch": 9.989396411092985, "grad_norm": 0.026182830333709717, "learning_rate": 0.0005877492344167792, "loss": 0.053, "num_input_tokens_seen": 132083248, "step": 61235 }, { "epoch": 9.99021207177814, "grad_norm": 0.027238667011260986, "learning_rate": 0.0005876791584370886, "loss": 0.0748, "num_input_tokens_seen": 132092848, "step": 61240 }, { "epoch": 9.991027732463294, "grad_norm": 0.1023106500506401, "learning_rate": 0.0005876090806804686, "loss": 0.0704, "num_input_tokens_seen": 132103728, "step": 61245 }, { "epoch": 9.99184339314845, "grad_norm": 0.19057044386863708, "learning_rate": 0.0005875390011483394, "loss": 0.0759, "num_input_tokens_seen": 132114960, "step": 61250 }, { "epoch": 9.992659053833606, "grad_norm": 0.03020823746919632, "learning_rate": 0.0005874689198421214, "loss": 0.0781, "num_input_tokens_seen": 132124432, "step": 61255 }, { "epoch": 9.99347471451876, "grad_norm": 0.1120600476861, "learning_rate": 0.0005873988367632347, "loss": 0.0208, "num_input_tokens_seen": 132135440, "step": 61260 }, { "epoch": 9.994290375203915, "grad_norm": 0.06063985452055931, "learning_rate": 0.0005873287519130997, "loss": 0.0943, "num_input_tokens_seen": 132146320, "step": 61265 }, { "epoch": 9.99510603588907, "grad_norm": 0.31514763832092285, "learning_rate": 0.0005872586652931368, "loss": 0.0577, "num_input_tokens_seen": 132157616, "step": 61270 }, { "epoch": 9.995921696574225, "grad_norm": 0.058251649141311646, "learning_rate": 0.0005871885769047664, "loss": 0.2073, "num_input_tokens_seen": 132168624, "step": 61275 }, { "epoch": 9.99673735725938, "grad_norm": 0.013234366662800312, "learning_rate": 0.0005871184867494088, "loss": 0.0629, "num_input_tokens_seen": 132179600, "step": 61280 }, { "epoch": 9.997553017944535, "grad_norm": 0.30558326840400696, "learning_rate": 0.0005870483948284845, "loss": 0.0664, "num_input_tokens_seen": 132190224, "step": 61285 }, { "epoch": 9.99836867862969, "grad_norm": 0.004525907803326845, "learning_rate": 0.0005869783011434141, "loss": 0.021, "num_input_tokens_seen": 132201968, "step": 61290 }, { "epoch": 9.999184339314844, "grad_norm": 0.09373323619365692, "learning_rate": 0.0005869082056956181, "loss": 0.0314, "num_input_tokens_seen": 132213072, "step": 61295 }, { "epoch": 10.0, "grad_norm": 0.016201334074139595, "learning_rate": 0.000586838108486517, "loss": 0.1022, "num_input_tokens_seen": 132223184, "step": 61300 }, { "epoch": 10.0, "eval_loss": 0.1377226710319519, "eval_runtime": 104.6608, "eval_samples_per_second": 26.036, "eval_steps_per_second": 6.516, "num_input_tokens_seen": 132223184, "step": 61300 }, { "epoch": 10.000815660685156, "grad_norm": 0.2678169012069702, "learning_rate": 0.0005867680095175315, "loss": 0.0735, "num_input_tokens_seen": 132234864, "step": 61305 }, { "epoch": 10.00163132137031, "grad_norm": 0.18137049674987793, "learning_rate": 0.0005866979087900822, "loss": 0.0296, "num_input_tokens_seen": 132243888, "step": 61310 }, { "epoch": 10.002446982055465, "grad_norm": 0.005366615019738674, "learning_rate": 0.0005866278063055898, "loss": 0.0229, "num_input_tokens_seen": 132255568, "step": 61315 }, { "epoch": 10.00326264274062, "grad_norm": 0.04574661701917648, "learning_rate": 0.0005865577020654751, "loss": 0.1479, "num_input_tokens_seen": 132266096, "step": 61320 }, { "epoch": 10.004078303425775, "grad_norm": 0.46023669838905334, "learning_rate": 0.0005864875960711588, "loss": 0.0418, "num_input_tokens_seen": 132277424, "step": 61325 }, { "epoch": 10.00489396411093, "grad_norm": 0.026542169973254204, "learning_rate": 0.0005864174883240614, "loss": 0.0671, "num_input_tokens_seen": 132289328, "step": 61330 }, { "epoch": 10.005709624796085, "grad_norm": 0.314367413520813, "learning_rate": 0.0005863473788256042, "loss": 0.0411, "num_input_tokens_seen": 132300368, "step": 61335 }, { "epoch": 10.00652528548124, "grad_norm": 0.11848754435777664, "learning_rate": 0.0005862772675772076, "loss": 0.0308, "num_input_tokens_seen": 132310896, "step": 61340 }, { "epoch": 10.007340946166394, "grad_norm": 0.40424296259880066, "learning_rate": 0.000586207154580293, "loss": 0.0206, "num_input_tokens_seen": 132320592, "step": 61345 }, { "epoch": 10.00815660685155, "grad_norm": 0.2124911993741989, "learning_rate": 0.0005861370398362809, "loss": 0.05, "num_input_tokens_seen": 132331344, "step": 61350 }, { "epoch": 10.008972267536704, "grad_norm": 0.013648834079504013, "learning_rate": 0.0005860669233465925, "loss": 0.0886, "num_input_tokens_seen": 132341968, "step": 61355 }, { "epoch": 10.00978792822186, "grad_norm": 0.021844767034053802, "learning_rate": 0.0005859968051126486, "loss": 0.0966, "num_input_tokens_seen": 132353072, "step": 61360 }, { "epoch": 10.010603588907015, "grad_norm": 0.8775524497032166, "learning_rate": 0.0005859266851358704, "loss": 0.1554, "num_input_tokens_seen": 132365008, "step": 61365 }, { "epoch": 10.01141924959217, "grad_norm": 0.0030676175374537706, "learning_rate": 0.0005858565634176789, "loss": 0.0256, "num_input_tokens_seen": 132375696, "step": 61370 }, { "epoch": 10.012234910277325, "grad_norm": 0.0289422869682312, "learning_rate": 0.0005857864399594953, "loss": 0.0303, "num_input_tokens_seen": 132386512, "step": 61375 }, { "epoch": 10.013050570962479, "grad_norm": 0.12274732440710068, "learning_rate": 0.0005857163147627406, "loss": 0.0487, "num_input_tokens_seen": 132396976, "step": 61380 }, { "epoch": 10.013866231647635, "grad_norm": 0.21211713552474976, "learning_rate": 0.000585646187828836, "loss": 0.0252, "num_input_tokens_seen": 132408432, "step": 61385 }, { "epoch": 10.01468189233279, "grad_norm": 0.15673518180847168, "learning_rate": 0.000585576059159203, "loss": 0.0089, "num_input_tokens_seen": 132420016, "step": 61390 }, { "epoch": 10.015497553017944, "grad_norm": 0.011132924817502499, "learning_rate": 0.0005855059287552623, "loss": 0.0863, "num_input_tokens_seen": 132431280, "step": 61395 }, { "epoch": 10.0163132137031, "grad_norm": 0.0725574865937233, "learning_rate": 0.0005854357966184356, "loss": 0.0683, "num_input_tokens_seen": 132441808, "step": 61400 }, { "epoch": 10.017128874388254, "grad_norm": 0.009463079273700714, "learning_rate": 0.0005853656627501442, "loss": 0.0157, "num_input_tokens_seen": 132452976, "step": 61405 }, { "epoch": 10.01794453507341, "grad_norm": 0.03040081448853016, "learning_rate": 0.0005852955271518092, "loss": 0.0392, "num_input_tokens_seen": 132463888, "step": 61410 }, { "epoch": 10.018760195758565, "grad_norm": 0.051599469035863876, "learning_rate": 0.0005852253898248522, "loss": 0.0323, "num_input_tokens_seen": 132474448, "step": 61415 }, { "epoch": 10.01957585644372, "grad_norm": 0.4508799910545349, "learning_rate": 0.0005851552507706945, "loss": 0.0378, "num_input_tokens_seen": 132485872, "step": 61420 }, { "epoch": 10.020391517128875, "grad_norm": 0.2546553611755371, "learning_rate": 0.0005850851099907577, "loss": 0.0418, "num_input_tokens_seen": 132495856, "step": 61425 }, { "epoch": 10.021207177814029, "grad_norm": 0.7121838927268982, "learning_rate": 0.0005850149674864631, "loss": 0.0621, "num_input_tokens_seen": 132506064, "step": 61430 }, { "epoch": 10.022022838499185, "grad_norm": 0.08141548186540604, "learning_rate": 0.0005849448232592324, "loss": 0.0394, "num_input_tokens_seen": 132517808, "step": 61435 }, { "epoch": 10.022838499184338, "grad_norm": 0.0146437743678689, "learning_rate": 0.0005848746773104871, "loss": 0.0103, "num_input_tokens_seen": 132528272, "step": 61440 }, { "epoch": 10.023654159869494, "grad_norm": 0.24976637959480286, "learning_rate": 0.0005848045296416488, "loss": 0.037, "num_input_tokens_seen": 132538960, "step": 61445 }, { "epoch": 10.02446982055465, "grad_norm": 0.024098461493849754, "learning_rate": 0.0005847343802541391, "loss": 0.0068, "num_input_tokens_seen": 132550704, "step": 61450 }, { "epoch": 10.025285481239804, "grad_norm": 0.006092616356909275, "learning_rate": 0.0005846642291493796, "loss": 0.0149, "num_input_tokens_seen": 132561296, "step": 61455 }, { "epoch": 10.02610114192496, "grad_norm": 0.008456066250801086, "learning_rate": 0.0005845940763287923, "loss": 0.0621, "num_input_tokens_seen": 132571696, "step": 61460 }, { "epoch": 10.026916802610113, "grad_norm": 0.008161747828125954, "learning_rate": 0.0005845239217937986, "loss": 0.165, "num_input_tokens_seen": 132582512, "step": 61465 }, { "epoch": 10.02773246329527, "grad_norm": 0.02738736756145954, "learning_rate": 0.0005844537655458203, "loss": 0.0087, "num_input_tokens_seen": 132591408, "step": 61470 }, { "epoch": 10.028548123980425, "grad_norm": 0.016410566866397858, "learning_rate": 0.0005843836075862794, "loss": 0.0082, "num_input_tokens_seen": 132602160, "step": 61475 }, { "epoch": 10.029363784665579, "grad_norm": 0.5761615633964539, "learning_rate": 0.0005843134479165977, "loss": 0.0874, "num_input_tokens_seen": 132612720, "step": 61480 }, { "epoch": 10.030179445350734, "grad_norm": 0.19983547925949097, "learning_rate": 0.0005842432865381971, "loss": 0.057, "num_input_tokens_seen": 132624048, "step": 61485 }, { "epoch": 10.030995106035888, "grad_norm": 0.06637218594551086, "learning_rate": 0.0005841731234524993, "loss": 0.0159, "num_input_tokens_seen": 132634224, "step": 61490 }, { "epoch": 10.031810766721044, "grad_norm": 0.10646211355924606, "learning_rate": 0.0005841029586609263, "loss": 0.0766, "num_input_tokens_seen": 132643664, "step": 61495 }, { "epoch": 10.0326264274062, "grad_norm": 0.027791617438197136, "learning_rate": 0.0005840327921649003, "loss": 0.0127, "num_input_tokens_seen": 132653744, "step": 61500 }, { "epoch": 10.033442088091354, "grad_norm": 0.052600327879190445, "learning_rate": 0.0005839626239658431, "loss": 0.0044, "num_input_tokens_seen": 132665136, "step": 61505 }, { "epoch": 10.03425774877651, "grad_norm": 0.04009760916233063, "learning_rate": 0.0005838924540651769, "loss": 0.0227, "num_input_tokens_seen": 132676368, "step": 61510 }, { "epoch": 10.035073409461663, "grad_norm": 0.15309365093708038, "learning_rate": 0.0005838222824643235, "loss": 0.0623, "num_input_tokens_seen": 132685904, "step": 61515 }, { "epoch": 10.035889070146819, "grad_norm": 0.002563132205978036, "learning_rate": 0.0005837521091647054, "loss": 0.0811, "num_input_tokens_seen": 132697200, "step": 61520 }, { "epoch": 10.036704730831975, "grad_norm": 0.062458232045173645, "learning_rate": 0.0005836819341677444, "loss": 0.0743, "num_input_tokens_seen": 132706992, "step": 61525 }, { "epoch": 10.037520391517129, "grad_norm": 0.017263075336813927, "learning_rate": 0.0005836117574748629, "loss": 0.0939, "num_input_tokens_seen": 132718128, "step": 61530 }, { "epoch": 10.038336052202284, "grad_norm": 0.644141435623169, "learning_rate": 0.0005835415790874832, "loss": 0.0914, "num_input_tokens_seen": 132728304, "step": 61535 }, { "epoch": 10.039151712887438, "grad_norm": 0.2830989360809326, "learning_rate": 0.0005834713990070273, "loss": 0.0639, "num_input_tokens_seen": 132739792, "step": 61540 }, { "epoch": 10.039967373572594, "grad_norm": 0.1408531665802002, "learning_rate": 0.0005834012172349174, "loss": 0.0363, "num_input_tokens_seen": 132750288, "step": 61545 }, { "epoch": 10.040783034257748, "grad_norm": 0.15121932327747345, "learning_rate": 0.0005833310337725764, "loss": 0.0492, "num_input_tokens_seen": 132761040, "step": 61550 }, { "epoch": 10.041598694942904, "grad_norm": 0.009859373793005943, "learning_rate": 0.0005832608486214261, "loss": 0.0423, "num_input_tokens_seen": 132770768, "step": 61555 }, { "epoch": 10.04241435562806, "grad_norm": 0.04514402523636818, "learning_rate": 0.0005831906617828892, "loss": 0.1762, "num_input_tokens_seen": 132780688, "step": 61560 }, { "epoch": 10.043230016313213, "grad_norm": 0.10863786190748215, "learning_rate": 0.0005831204732583879, "loss": 0.0858, "num_input_tokens_seen": 132790704, "step": 61565 }, { "epoch": 10.044045676998369, "grad_norm": 0.01585051603615284, "learning_rate": 0.0005830502830493447, "loss": 0.0042, "num_input_tokens_seen": 132801584, "step": 61570 }, { "epoch": 10.044861337683523, "grad_norm": 0.15733830630779266, "learning_rate": 0.0005829800911571824, "loss": 0.0306, "num_input_tokens_seen": 132812464, "step": 61575 }, { "epoch": 10.045676998368679, "grad_norm": 0.0340900644659996, "learning_rate": 0.000582909897583323, "loss": 0.0131, "num_input_tokens_seen": 132822640, "step": 61580 }, { "epoch": 10.046492659053834, "grad_norm": 0.00634809909388423, "learning_rate": 0.0005828397023291895, "loss": 0.0185, "num_input_tokens_seen": 132833136, "step": 61585 }, { "epoch": 10.047308319738988, "grad_norm": 0.004743380472064018, "learning_rate": 0.0005827695053962043, "loss": 0.0374, "num_input_tokens_seen": 132843728, "step": 61590 }, { "epoch": 10.048123980424144, "grad_norm": 0.0059966123662889, "learning_rate": 0.0005826993067857901, "loss": 0.0816, "num_input_tokens_seen": 132854416, "step": 61595 }, { "epoch": 10.048939641109298, "grad_norm": 0.014437413774430752, "learning_rate": 0.0005826291064993695, "loss": 0.0346, "num_input_tokens_seen": 132865008, "step": 61600 }, { "epoch": 10.049755301794454, "grad_norm": 0.17344315350055695, "learning_rate": 0.0005825589045383654, "loss": 0.0271, "num_input_tokens_seen": 132877456, "step": 61605 }, { "epoch": 10.05057096247961, "grad_norm": 0.005999527871608734, "learning_rate": 0.0005824887009042002, "loss": 0.1807, "num_input_tokens_seen": 132887440, "step": 61610 }, { "epoch": 10.051386623164763, "grad_norm": 0.05266229808330536, "learning_rate": 0.0005824184955982967, "loss": 0.0986, "num_input_tokens_seen": 132898384, "step": 61615 }, { "epoch": 10.052202283849919, "grad_norm": 0.03723113611340523, "learning_rate": 0.000582348288622078, "loss": 0.0525, "num_input_tokens_seen": 132909744, "step": 61620 }, { "epoch": 10.053017944535073, "grad_norm": 0.010554548352956772, "learning_rate": 0.0005822780799769667, "loss": 0.0068, "num_input_tokens_seen": 132919664, "step": 61625 }, { "epoch": 10.053833605220229, "grad_norm": 0.004291731398552656, "learning_rate": 0.0005822078696643859, "loss": 0.0035, "num_input_tokens_seen": 132930544, "step": 61630 }, { "epoch": 10.054649265905383, "grad_norm": 0.4468053877353668, "learning_rate": 0.0005821376576857582, "loss": 0.0945, "num_input_tokens_seen": 132941712, "step": 61635 }, { "epoch": 10.055464926590538, "grad_norm": 0.40709027647972107, "learning_rate": 0.0005820674440425067, "loss": 0.0507, "num_input_tokens_seen": 132952400, "step": 61640 }, { "epoch": 10.056280587275694, "grad_norm": 0.298998087644577, "learning_rate": 0.0005819972287360543, "loss": 0.084, "num_input_tokens_seen": 132962608, "step": 61645 }, { "epoch": 10.057096247960848, "grad_norm": 0.15763355791568756, "learning_rate": 0.0005819270117678239, "loss": 0.0271, "num_input_tokens_seen": 132972016, "step": 61650 }, { "epoch": 10.057911908646004, "grad_norm": 0.004148266278207302, "learning_rate": 0.0005818567931392389, "loss": 0.0245, "num_input_tokens_seen": 132981872, "step": 61655 }, { "epoch": 10.058727569331158, "grad_norm": 0.006985764019191265, "learning_rate": 0.000581786572851722, "loss": 0.0057, "num_input_tokens_seen": 132991472, "step": 61660 }, { "epoch": 10.059543230016313, "grad_norm": 0.006122528109699488, "learning_rate": 0.0005817163509066966, "loss": 0.0092, "num_input_tokens_seen": 133001712, "step": 61665 }, { "epoch": 10.060358890701469, "grad_norm": 0.07483422011137009, "learning_rate": 0.0005816461273055857, "loss": 0.0175, "num_input_tokens_seen": 133012848, "step": 61670 }, { "epoch": 10.061174551386623, "grad_norm": 0.03318703547120094, "learning_rate": 0.0005815759020498122, "loss": 0.0582, "num_input_tokens_seen": 133024720, "step": 61675 }, { "epoch": 10.061990212071779, "grad_norm": 0.35977837443351746, "learning_rate": 0.0005815056751407999, "loss": 0.1247, "num_input_tokens_seen": 133036496, "step": 61680 }, { "epoch": 10.062805872756933, "grad_norm": 0.027384331449866295, "learning_rate": 0.0005814354465799715, "loss": 0.0142, "num_input_tokens_seen": 133047728, "step": 61685 }, { "epoch": 10.063621533442088, "grad_norm": 0.007883036509156227, "learning_rate": 0.0005813652163687504, "loss": 0.0426, "num_input_tokens_seen": 133058576, "step": 61690 }, { "epoch": 10.064437194127244, "grad_norm": 0.007013268768787384, "learning_rate": 0.0005812949845085601, "loss": 0.0143, "num_input_tokens_seen": 133069136, "step": 61695 }, { "epoch": 10.065252854812398, "grad_norm": 0.3014785647392273, "learning_rate": 0.0005812247510008238, "loss": 0.1548, "num_input_tokens_seen": 133079312, "step": 61700 }, { "epoch": 10.066068515497554, "grad_norm": 0.39774078130722046, "learning_rate": 0.0005811545158469649, "loss": 0.1453, "num_input_tokens_seen": 133090256, "step": 61705 }, { "epoch": 10.066884176182707, "grad_norm": 0.0037517540622502565, "learning_rate": 0.0005810842790484066, "loss": 0.012, "num_input_tokens_seen": 133100848, "step": 61710 }, { "epoch": 10.067699836867863, "grad_norm": 0.18614134192466736, "learning_rate": 0.0005810140406065727, "loss": 0.0141, "num_input_tokens_seen": 133110160, "step": 61715 }, { "epoch": 10.068515497553017, "grad_norm": 0.02847582846879959, "learning_rate": 0.0005809438005228866, "loss": 0.0247, "num_input_tokens_seen": 133120816, "step": 61720 }, { "epoch": 10.069331158238173, "grad_norm": 0.07765766978263855, "learning_rate": 0.0005808735587987714, "loss": 0.0176, "num_input_tokens_seen": 133131120, "step": 61725 }, { "epoch": 10.070146818923329, "grad_norm": 0.050454381853342056, "learning_rate": 0.0005808033154356511, "loss": 0.0312, "num_input_tokens_seen": 133142512, "step": 61730 }, { "epoch": 10.070962479608482, "grad_norm": 0.10077342391014099, "learning_rate": 0.0005807330704349492, "loss": 0.0138, "num_input_tokens_seen": 133151472, "step": 61735 }, { "epoch": 10.071778140293638, "grad_norm": 0.008212494663894176, "learning_rate": 0.0005806628237980891, "loss": 0.0637, "num_input_tokens_seen": 133161776, "step": 61740 }, { "epoch": 10.072593800978792, "grad_norm": 0.003061407245695591, "learning_rate": 0.0005805925755264945, "loss": 0.015, "num_input_tokens_seen": 133173872, "step": 61745 }, { "epoch": 10.073409461663948, "grad_norm": 0.009130405262112617, "learning_rate": 0.0005805223256215891, "loss": 0.0178, "num_input_tokens_seen": 133184752, "step": 61750 }, { "epoch": 10.074225122349104, "grad_norm": 0.016217553988099098, "learning_rate": 0.0005804520740847966, "loss": 0.0502, "num_input_tokens_seen": 133195824, "step": 61755 }, { "epoch": 10.075040783034257, "grad_norm": 0.022380659356713295, "learning_rate": 0.0005803818209175409, "loss": 0.0267, "num_input_tokens_seen": 133206544, "step": 61760 }, { "epoch": 10.075856443719413, "grad_norm": 0.05859624221920967, "learning_rate": 0.0005803115661212456, "loss": 0.0242, "num_input_tokens_seen": 133217904, "step": 61765 }, { "epoch": 10.076672104404567, "grad_norm": 0.6733582615852356, "learning_rate": 0.0005802413096973345, "loss": 0.043, "num_input_tokens_seen": 133228304, "step": 61770 }, { "epoch": 10.077487765089723, "grad_norm": 0.04086980223655701, "learning_rate": 0.0005801710516472315, "loss": 0.0293, "num_input_tokens_seen": 133238512, "step": 61775 }, { "epoch": 10.078303425774878, "grad_norm": 0.339104026556015, "learning_rate": 0.0005801007919723605, "loss": 0.0281, "num_input_tokens_seen": 133248912, "step": 61780 }, { "epoch": 10.079119086460032, "grad_norm": 0.03331382945179939, "learning_rate": 0.000580030530674145, "loss": 0.0208, "num_input_tokens_seen": 133259696, "step": 61785 }, { "epoch": 10.079934747145188, "grad_norm": 0.02295873872935772, "learning_rate": 0.0005799602677540095, "loss": 0.0248, "num_input_tokens_seen": 133269456, "step": 61790 }, { "epoch": 10.080750407830342, "grad_norm": 0.015243462286889553, "learning_rate": 0.0005798900032133778, "loss": 0.0203, "num_input_tokens_seen": 133279504, "step": 61795 }, { "epoch": 10.081566068515498, "grad_norm": 0.04454034939408302, "learning_rate": 0.0005798197370536737, "loss": 0.0063, "num_input_tokens_seen": 133290736, "step": 61800 }, { "epoch": 10.082381729200652, "grad_norm": 0.0063231950625777245, "learning_rate": 0.0005797494692763215, "loss": 0.0824, "num_input_tokens_seen": 133300592, "step": 61805 }, { "epoch": 10.083197389885807, "grad_norm": 0.03869770094752312, "learning_rate": 0.0005796791998827451, "loss": 0.0202, "num_input_tokens_seen": 133311472, "step": 61810 }, { "epoch": 10.084013050570963, "grad_norm": 0.15234312415122986, "learning_rate": 0.0005796089288743687, "loss": 0.0201, "num_input_tokens_seen": 133322544, "step": 61815 }, { "epoch": 10.084828711256117, "grad_norm": 0.002217642031610012, "learning_rate": 0.0005795386562526163, "loss": 0.009, "num_input_tokens_seen": 133332400, "step": 61820 }, { "epoch": 10.085644371941273, "grad_norm": 0.009056595154106617, "learning_rate": 0.000579468382018912, "loss": 0.0064, "num_input_tokens_seen": 133342608, "step": 61825 }, { "epoch": 10.086460032626427, "grad_norm": 0.03752760589122772, "learning_rate": 0.0005793981061746802, "loss": 0.0108, "num_input_tokens_seen": 133354288, "step": 61830 }, { "epoch": 10.087275693311582, "grad_norm": 0.3855893313884735, "learning_rate": 0.0005793278287213453, "loss": 0.0181, "num_input_tokens_seen": 133363472, "step": 61835 }, { "epoch": 10.088091353996738, "grad_norm": 0.0027312436141073704, "learning_rate": 0.000579257549660331, "loss": 0.1167, "num_input_tokens_seen": 133374032, "step": 61840 }, { "epoch": 10.088907014681892, "grad_norm": 0.6444520950317383, "learning_rate": 0.0005791872689930621, "loss": 0.0473, "num_input_tokens_seen": 133384944, "step": 61845 }, { "epoch": 10.089722675367048, "grad_norm": 0.27100419998168945, "learning_rate": 0.0005791169867209626, "loss": 0.0089, "num_input_tokens_seen": 133396400, "step": 61850 }, { "epoch": 10.090538336052202, "grad_norm": 0.47414451837539673, "learning_rate": 0.0005790467028454571, "loss": 0.0436, "num_input_tokens_seen": 133406416, "step": 61855 }, { "epoch": 10.091353996737357, "grad_norm": 0.6902989745140076, "learning_rate": 0.0005789764173679698, "loss": 0.21, "num_input_tokens_seen": 133417808, "step": 61860 }, { "epoch": 10.092169657422513, "grad_norm": 0.005177111830562353, "learning_rate": 0.0005789061302899252, "loss": 0.0176, "num_input_tokens_seen": 133428496, "step": 61865 }, { "epoch": 10.092985318107667, "grad_norm": 0.011229160241782665, "learning_rate": 0.0005788358416127478, "loss": 0.0706, "num_input_tokens_seen": 133438928, "step": 61870 }, { "epoch": 10.093800978792823, "grad_norm": 0.40886592864990234, "learning_rate": 0.0005787655513378622, "loss": 0.0595, "num_input_tokens_seen": 133448432, "step": 61875 }, { "epoch": 10.094616639477977, "grad_norm": 0.003765393514186144, "learning_rate": 0.0005786952594666925, "loss": 0.045, "num_input_tokens_seen": 133460912, "step": 61880 }, { "epoch": 10.095432300163132, "grad_norm": 0.016948604956269264, "learning_rate": 0.0005786249660006638, "loss": 0.1006, "num_input_tokens_seen": 133472240, "step": 61885 }, { "epoch": 10.096247960848286, "grad_norm": 0.002912787487730384, "learning_rate": 0.0005785546709412004, "loss": 0.0026, "num_input_tokens_seen": 133483472, "step": 61890 }, { "epoch": 10.097063621533442, "grad_norm": 0.02974146418273449, "learning_rate": 0.0005784843742897268, "loss": 0.0176, "num_input_tokens_seen": 133494672, "step": 61895 }, { "epoch": 10.097879282218598, "grad_norm": 0.007090809289366007, "learning_rate": 0.0005784140760476679, "loss": 0.1732, "num_input_tokens_seen": 133505680, "step": 61900 }, { "epoch": 10.098694942903752, "grad_norm": 0.006513925269246101, "learning_rate": 0.0005783437762164483, "loss": 0.0355, "num_input_tokens_seen": 133516176, "step": 61905 }, { "epoch": 10.099510603588907, "grad_norm": 0.035911157727241516, "learning_rate": 0.0005782734747974926, "loss": 0.0736, "num_input_tokens_seen": 133527248, "step": 61910 }, { "epoch": 10.100326264274061, "grad_norm": 1.5193142890930176, "learning_rate": 0.0005782031717922256, "loss": 0.0727, "num_input_tokens_seen": 133537040, "step": 61915 }, { "epoch": 10.101141924959217, "grad_norm": 0.008320252411067486, "learning_rate": 0.0005781328672020723, "loss": 0.0145, "num_input_tokens_seen": 133547152, "step": 61920 }, { "epoch": 10.101957585644373, "grad_norm": 0.5464676022529602, "learning_rate": 0.0005780625610284572, "loss": 0.1228, "num_input_tokens_seen": 133557552, "step": 61925 }, { "epoch": 10.102773246329527, "grad_norm": 0.014367361553013325, "learning_rate": 0.000577992253272805, "loss": 0.0818, "num_input_tokens_seen": 133568304, "step": 61930 }, { "epoch": 10.103588907014682, "grad_norm": 0.0032405138481408358, "learning_rate": 0.0005779219439365411, "loss": 0.0511, "num_input_tokens_seen": 133578032, "step": 61935 }, { "epoch": 10.104404567699836, "grad_norm": 0.023005958646535873, "learning_rate": 0.0005778516330210902, "loss": 0.0248, "num_input_tokens_seen": 133590352, "step": 61940 }, { "epoch": 10.105220228384992, "grad_norm": 0.006340945605188608, "learning_rate": 0.0005777813205278772, "loss": 0.0385, "num_input_tokens_seen": 133601200, "step": 61945 }, { "epoch": 10.106035889070148, "grad_norm": 0.07047683745622635, "learning_rate": 0.0005777110064583271, "loss": 0.048, "num_input_tokens_seen": 133612336, "step": 61950 }, { "epoch": 10.106851549755302, "grad_norm": 0.02920965477824211, "learning_rate": 0.0005776406908138648, "loss": 0.0334, "num_input_tokens_seen": 133623408, "step": 61955 }, { "epoch": 10.107667210440457, "grad_norm": 0.019985370337963104, "learning_rate": 0.0005775703735959155, "loss": 0.0038, "num_input_tokens_seen": 133633904, "step": 61960 }, { "epoch": 10.108482871125611, "grad_norm": 0.0194255318492651, "learning_rate": 0.000577500054805904, "loss": 0.0855, "num_input_tokens_seen": 133644496, "step": 61965 }, { "epoch": 10.109298531810767, "grad_norm": 0.2530505657196045, "learning_rate": 0.0005774297344452556, "loss": 0.0178, "num_input_tokens_seen": 133655952, "step": 61970 }, { "epoch": 10.11011419249592, "grad_norm": 1.0987221002578735, "learning_rate": 0.0005773594125153955, "loss": 0.159, "num_input_tokens_seen": 133667056, "step": 61975 }, { "epoch": 10.110929853181077, "grad_norm": 0.04169277846813202, "learning_rate": 0.0005772890890177487, "loss": 0.022, "num_input_tokens_seen": 133677232, "step": 61980 }, { "epoch": 10.111745513866232, "grad_norm": 0.004866071045398712, "learning_rate": 0.0005772187639537405, "loss": 0.0077, "num_input_tokens_seen": 133688016, "step": 61985 }, { "epoch": 10.112561174551386, "grad_norm": 0.010460522025823593, "learning_rate": 0.000577148437324796, "loss": 0.0141, "num_input_tokens_seen": 133698320, "step": 61990 }, { "epoch": 10.113376835236542, "grad_norm": 0.026330674067139626, "learning_rate": 0.0005770781091323407, "loss": 0.109, "num_input_tokens_seen": 133709872, "step": 61995 }, { "epoch": 10.114192495921696, "grad_norm": 0.00981769897043705, "learning_rate": 0.0005770077793777996, "loss": 0.0232, "num_input_tokens_seen": 133720592, "step": 62000 }, { "epoch": 10.115008156606851, "grad_norm": 4.685029029846191, "learning_rate": 0.0005769374480625983, "loss": 0.0981, "num_input_tokens_seen": 133731856, "step": 62005 }, { "epoch": 10.115823817292007, "grad_norm": 0.014458171091973782, "learning_rate": 0.000576867115188162, "loss": 0.0132, "num_input_tokens_seen": 133742032, "step": 62010 }, { "epoch": 10.116639477977161, "grad_norm": 0.03550034761428833, "learning_rate": 0.000576796780755916, "loss": 0.1902, "num_input_tokens_seen": 133751728, "step": 62015 }, { "epoch": 10.117455138662317, "grad_norm": 0.3246733546257019, "learning_rate": 0.0005767264447672859, "loss": 0.0505, "num_input_tokens_seen": 133763056, "step": 62020 }, { "epoch": 10.11827079934747, "grad_norm": 0.13192394375801086, "learning_rate": 0.000576656107223697, "loss": 0.0268, "num_input_tokens_seen": 133773456, "step": 62025 }, { "epoch": 10.119086460032626, "grad_norm": 0.00867773313075304, "learning_rate": 0.0005765857681265749, "loss": 0.1475, "num_input_tokens_seen": 133784784, "step": 62030 }, { "epoch": 10.119902120717782, "grad_norm": 0.4968458116054535, "learning_rate": 0.000576515427477345, "loss": 0.1069, "num_input_tokens_seen": 133795952, "step": 62035 }, { "epoch": 10.120717781402936, "grad_norm": 0.018576500937342644, "learning_rate": 0.0005764450852774329, "loss": 0.1408, "num_input_tokens_seen": 133805616, "step": 62040 }, { "epoch": 10.121533442088092, "grad_norm": 0.10699335485696793, "learning_rate": 0.0005763747415282642, "loss": 0.0471, "num_input_tokens_seen": 133816688, "step": 62045 }, { "epoch": 10.122349102773246, "grad_norm": 0.0135413883253932, "learning_rate": 0.0005763043962312644, "loss": 0.0451, "num_input_tokens_seen": 133828144, "step": 62050 }, { "epoch": 10.123164763458401, "grad_norm": 0.30159252882003784, "learning_rate": 0.0005762340493878593, "loss": 0.1399, "num_input_tokens_seen": 133840080, "step": 62055 }, { "epoch": 10.123980424143557, "grad_norm": 0.037092968821525574, "learning_rate": 0.0005761637009994745, "loss": 0.0314, "num_input_tokens_seen": 133851088, "step": 62060 }, { "epoch": 10.124796084828711, "grad_norm": 0.007264563348144293, "learning_rate": 0.0005760933510675356, "loss": 0.0034, "num_input_tokens_seen": 133862000, "step": 62065 }, { "epoch": 10.125611745513867, "grad_norm": 0.02116227149963379, "learning_rate": 0.0005760229995934684, "loss": 0.0992, "num_input_tokens_seen": 133873136, "step": 62070 }, { "epoch": 10.12642740619902, "grad_norm": 0.1036268100142479, "learning_rate": 0.0005759526465786986, "loss": 0.0133, "num_input_tokens_seen": 133884240, "step": 62075 }, { "epoch": 10.127243066884176, "grad_norm": 0.02696262300014496, "learning_rate": 0.0005758822920246523, "loss": 0.1178, "num_input_tokens_seen": 133896368, "step": 62080 }, { "epoch": 10.12805872756933, "grad_norm": 0.07249107956886292, "learning_rate": 0.000575811935932755, "loss": 0.014, "num_input_tokens_seen": 133907760, "step": 62085 }, { "epoch": 10.128874388254486, "grad_norm": 0.02204158529639244, "learning_rate": 0.0005757415783044325, "loss": 0.1125, "num_input_tokens_seen": 133919056, "step": 62090 }, { "epoch": 10.129690048939642, "grad_norm": 0.016496529802680016, "learning_rate": 0.0005756712191411109, "loss": 0.089, "num_input_tokens_seen": 133929776, "step": 62095 }, { "epoch": 10.130505709624796, "grad_norm": 0.016657959669828415, "learning_rate": 0.0005756008584442161, "loss": 0.1002, "num_input_tokens_seen": 133939984, "step": 62100 }, { "epoch": 10.131321370309951, "grad_norm": 0.08338290452957153, "learning_rate": 0.0005755304962151739, "loss": 0.0133, "num_input_tokens_seen": 133950864, "step": 62105 }, { "epoch": 10.132137030995105, "grad_norm": 0.3505701720714569, "learning_rate": 0.0005754601324554104, "loss": 0.0383, "num_input_tokens_seen": 133961232, "step": 62110 }, { "epoch": 10.132952691680261, "grad_norm": 0.007745380979031324, "learning_rate": 0.0005753897671663518, "loss": 0.0257, "num_input_tokens_seen": 133972176, "step": 62115 }, { "epoch": 10.133768352365417, "grad_norm": 0.08587710559368134, "learning_rate": 0.0005753194003494237, "loss": 0.0938, "num_input_tokens_seen": 133983024, "step": 62120 }, { "epoch": 10.13458401305057, "grad_norm": 0.44414669275283813, "learning_rate": 0.0005752490320060524, "loss": 0.0337, "num_input_tokens_seen": 133993520, "step": 62125 }, { "epoch": 10.135399673735726, "grad_norm": 0.017452040687203407, "learning_rate": 0.0005751786621376641, "loss": 0.006, "num_input_tokens_seen": 134003504, "step": 62130 }, { "epoch": 10.13621533442088, "grad_norm": 0.11180145293474197, "learning_rate": 0.0005751082907456849, "loss": 0.0189, "num_input_tokens_seen": 134013680, "step": 62135 }, { "epoch": 10.137030995106036, "grad_norm": 0.4557669162750244, "learning_rate": 0.0005750379178315408, "loss": 0.1944, "num_input_tokens_seen": 134024080, "step": 62140 }, { "epoch": 10.137846655791192, "grad_norm": 0.34465929865837097, "learning_rate": 0.0005749675433966581, "loss": 0.1356, "num_input_tokens_seen": 134034448, "step": 62145 }, { "epoch": 10.138662316476346, "grad_norm": 0.39242857694625854, "learning_rate": 0.0005748971674424631, "loss": 0.024, "num_input_tokens_seen": 134045616, "step": 62150 }, { "epoch": 10.139477977161501, "grad_norm": 0.03713627904653549, "learning_rate": 0.0005748267899703819, "loss": 0.0185, "num_input_tokens_seen": 134058512, "step": 62155 }, { "epoch": 10.140293637846655, "grad_norm": 0.029162008315324783, "learning_rate": 0.000574756410981841, "loss": 0.035, "num_input_tokens_seen": 134069808, "step": 62160 }, { "epoch": 10.141109298531811, "grad_norm": 0.007242150139063597, "learning_rate": 0.0005746860304782665, "loss": 0.0138, "num_input_tokens_seen": 134080272, "step": 62165 }, { "epoch": 10.141924959216965, "grad_norm": 0.007788899354636669, "learning_rate": 0.0005746156484610849, "loss": 0.0176, "num_input_tokens_seen": 134090928, "step": 62170 }, { "epoch": 10.14274061990212, "grad_norm": 1.2558012008666992, "learning_rate": 0.0005745452649317225, "loss": 0.0789, "num_input_tokens_seen": 134101264, "step": 62175 }, { "epoch": 10.143556280587276, "grad_norm": 0.05152910575270653, "learning_rate": 0.0005744748798916057, "loss": 0.0459, "num_input_tokens_seen": 134111216, "step": 62180 }, { "epoch": 10.14437194127243, "grad_norm": 0.17185325920581818, "learning_rate": 0.0005744044933421609, "loss": 0.1146, "num_input_tokens_seen": 134122640, "step": 62185 }, { "epoch": 10.145187601957586, "grad_norm": 0.006490977481007576, "learning_rate": 0.0005743341052848147, "loss": 0.0166, "num_input_tokens_seen": 134134128, "step": 62190 }, { "epoch": 10.14600326264274, "grad_norm": 0.5435214042663574, "learning_rate": 0.0005742637157209936, "loss": 0.1053, "num_input_tokens_seen": 134144400, "step": 62195 }, { "epoch": 10.146818923327896, "grad_norm": 0.5340539216995239, "learning_rate": 0.0005741933246521243, "loss": 0.1256, "num_input_tokens_seen": 134155504, "step": 62200 }, { "epoch": 10.147634584013051, "grad_norm": 0.010587858967483044, "learning_rate": 0.0005741229320796329, "loss": 0.088, "num_input_tokens_seen": 134168336, "step": 62205 }, { "epoch": 10.148450244698205, "grad_norm": 0.31097400188446045, "learning_rate": 0.0005740525380049464, "loss": 0.195, "num_input_tokens_seen": 134178768, "step": 62210 }, { "epoch": 10.149265905383361, "grad_norm": 0.7456463575363159, "learning_rate": 0.0005739821424294911, "loss": 0.0767, "num_input_tokens_seen": 134189296, "step": 62215 }, { "epoch": 10.150081566068515, "grad_norm": 0.013184874318540096, "learning_rate": 0.000573911745354694, "loss": 0.0891, "num_input_tokens_seen": 134201328, "step": 62220 }, { "epoch": 10.15089722675367, "grad_norm": 0.02348286472260952, "learning_rate": 0.0005738413467819816, "loss": 0.0285, "num_input_tokens_seen": 134210480, "step": 62225 }, { "epoch": 10.151712887438826, "grad_norm": 0.049068301916122437, "learning_rate": 0.0005737709467127805, "loss": 0.0198, "num_input_tokens_seen": 134221936, "step": 62230 }, { "epoch": 10.15252854812398, "grad_norm": 0.029392356052994728, "learning_rate": 0.0005737005451485177, "loss": 0.0756, "num_input_tokens_seen": 134231952, "step": 62235 }, { "epoch": 10.153344208809136, "grad_norm": 0.007911349646747112, "learning_rate": 0.0005736301420906196, "loss": 0.0182, "num_input_tokens_seen": 134242480, "step": 62240 }, { "epoch": 10.15415986949429, "grad_norm": 1.3132518529891968, "learning_rate": 0.0005735597375405135, "loss": 0.0295, "num_input_tokens_seen": 134253840, "step": 62245 }, { "epoch": 10.154975530179446, "grad_norm": 0.005830275360494852, "learning_rate": 0.000573489331499626, "loss": 0.013, "num_input_tokens_seen": 134263984, "step": 62250 }, { "epoch": 10.1557911908646, "grad_norm": 0.12323333323001862, "learning_rate": 0.000573418923969384, "loss": 0.0803, "num_input_tokens_seen": 134274704, "step": 62255 }, { "epoch": 10.156606851549755, "grad_norm": 0.14877955615520477, "learning_rate": 0.0005733485149512143, "loss": 0.0326, "num_input_tokens_seen": 134285328, "step": 62260 }, { "epoch": 10.15742251223491, "grad_norm": 0.00594542920589447, "learning_rate": 0.000573278104446544, "loss": 0.0165, "num_input_tokens_seen": 134296848, "step": 62265 }, { "epoch": 10.158238172920065, "grad_norm": 0.006854815874248743, "learning_rate": 0.0005732076924567999, "loss": 0.0064, "num_input_tokens_seen": 134307440, "step": 62270 }, { "epoch": 10.15905383360522, "grad_norm": 0.2644030451774597, "learning_rate": 0.0005731372789834089, "loss": 0.1519, "num_input_tokens_seen": 134318672, "step": 62275 }, { "epoch": 10.159869494290374, "grad_norm": 0.06025613844394684, "learning_rate": 0.0005730668640277983, "loss": 0.1049, "num_input_tokens_seen": 134329456, "step": 62280 }, { "epoch": 10.16068515497553, "grad_norm": 0.007477723527699709, "learning_rate": 0.0005729964475913949, "loss": 0.0194, "num_input_tokens_seen": 134340240, "step": 62285 }, { "epoch": 10.161500815660686, "grad_norm": 0.43554967641830444, "learning_rate": 0.0005729260296756259, "loss": 0.034, "num_input_tokens_seen": 134352144, "step": 62290 }, { "epoch": 10.16231647634584, "grad_norm": 0.010769812390208244, "learning_rate": 0.0005728556102819185, "loss": 0.0143, "num_input_tokens_seen": 134361936, "step": 62295 }, { "epoch": 10.163132137030995, "grad_norm": 0.012514322996139526, "learning_rate": 0.0005727851894116997, "loss": 0.0233, "num_input_tokens_seen": 134371632, "step": 62300 }, { "epoch": 10.16394779771615, "grad_norm": 0.0023373812437057495, "learning_rate": 0.0005727147670663967, "loss": 0.005, "num_input_tokens_seen": 134381936, "step": 62305 }, { "epoch": 10.164763458401305, "grad_norm": 0.008871118538081646, "learning_rate": 0.0005726443432474366, "loss": 0.072, "num_input_tokens_seen": 134393264, "step": 62310 }, { "epoch": 10.16557911908646, "grad_norm": 0.036336179822683334, "learning_rate": 0.0005725739179562469, "loss": 0.0398, "num_input_tokens_seen": 134404784, "step": 62315 }, { "epoch": 10.166394779771615, "grad_norm": 0.06922555714845657, "learning_rate": 0.0005725034911942546, "loss": 0.0255, "num_input_tokens_seen": 134415664, "step": 62320 }, { "epoch": 10.16721044045677, "grad_norm": 0.013306976296007633, "learning_rate": 0.0005724330629628871, "loss": 0.1483, "num_input_tokens_seen": 134425136, "step": 62325 }, { "epoch": 10.168026101141924, "grad_norm": 0.7215395569801331, "learning_rate": 0.0005723626332635717, "loss": 0.0892, "num_input_tokens_seen": 134435856, "step": 62330 }, { "epoch": 10.16884176182708, "grad_norm": 0.012294365093111992, "learning_rate": 0.0005722922020977356, "loss": 0.1589, "num_input_tokens_seen": 134445808, "step": 62335 }, { "epoch": 10.169657422512234, "grad_norm": 0.5038813948631287, "learning_rate": 0.0005722217694668065, "loss": 0.103, "num_input_tokens_seen": 134456496, "step": 62340 }, { "epoch": 10.17047308319739, "grad_norm": 0.3173074424266815, "learning_rate": 0.0005721513353722116, "loss": 0.0334, "num_input_tokens_seen": 134468368, "step": 62345 }, { "epoch": 10.171288743882545, "grad_norm": 0.023631680756807327, "learning_rate": 0.0005720808998153782, "loss": 0.0064, "num_input_tokens_seen": 134479920, "step": 62350 }, { "epoch": 10.1721044045677, "grad_norm": 0.6158096790313721, "learning_rate": 0.000572010462797734, "loss": 0.2134, "num_input_tokens_seen": 134490672, "step": 62355 }, { "epoch": 10.172920065252855, "grad_norm": 0.019042370840907097, "learning_rate": 0.0005719400243207065, "loss": 0.0074, "num_input_tokens_seen": 134501200, "step": 62360 }, { "epoch": 10.173735725938009, "grad_norm": 0.1468971073627472, "learning_rate": 0.0005718695843857231, "loss": 0.0221, "num_input_tokens_seen": 134511728, "step": 62365 }, { "epoch": 10.174551386623165, "grad_norm": 0.02168126218020916, "learning_rate": 0.0005717991429942114, "loss": 0.0238, "num_input_tokens_seen": 134522448, "step": 62370 }, { "epoch": 10.17536704730832, "grad_norm": 0.008950801566243172, "learning_rate": 0.000571728700147599, "loss": 0.0237, "num_input_tokens_seen": 134533648, "step": 62375 }, { "epoch": 10.176182707993474, "grad_norm": 0.07155296951532364, "learning_rate": 0.0005716582558473136, "loss": 0.1035, "num_input_tokens_seen": 134544304, "step": 62380 }, { "epoch": 10.17699836867863, "grad_norm": 0.505325973033905, "learning_rate": 0.0005715878100947824, "loss": 0.0436, "num_input_tokens_seen": 134555600, "step": 62385 }, { "epoch": 10.177814029363784, "grad_norm": 0.05268160253763199, "learning_rate": 0.0005715173628914336, "loss": 0.0409, "num_input_tokens_seen": 134566864, "step": 62390 }, { "epoch": 10.17862969004894, "grad_norm": 0.060194388031959534, "learning_rate": 0.0005714469142386948, "loss": 0.0271, "num_input_tokens_seen": 134577744, "step": 62395 }, { "epoch": 10.179445350734095, "grad_norm": 0.008379959501326084, "learning_rate": 0.0005713764641379936, "loss": 0.1332, "num_input_tokens_seen": 134588496, "step": 62400 }, { "epoch": 10.18026101141925, "grad_norm": 0.5389543175697327, "learning_rate": 0.0005713060125907578, "loss": 0.2619, "num_input_tokens_seen": 134600624, "step": 62405 }, { "epoch": 10.181076672104405, "grad_norm": 0.21804223954677582, "learning_rate": 0.0005712355595984151, "loss": 0.0207, "num_input_tokens_seen": 134610992, "step": 62410 }, { "epoch": 10.181892332789559, "grad_norm": 0.08661021292209625, "learning_rate": 0.0005711651051623935, "loss": 0.0174, "num_input_tokens_seen": 134620528, "step": 62415 }, { "epoch": 10.182707993474715, "grad_norm": 0.24981293082237244, "learning_rate": 0.0005710946492841208, "loss": 0.0886, "num_input_tokens_seen": 134629872, "step": 62420 }, { "epoch": 10.18352365415987, "grad_norm": 0.05358084291219711, "learning_rate": 0.0005710241919650248, "loss": 0.0553, "num_input_tokens_seen": 134642128, "step": 62425 }, { "epoch": 10.184339314845024, "grad_norm": 0.41562560200691223, "learning_rate": 0.0005709537332065335, "loss": 0.1314, "num_input_tokens_seen": 134653040, "step": 62430 }, { "epoch": 10.18515497553018, "grad_norm": 0.11145520955324173, "learning_rate": 0.0005708832730100747, "loss": 0.1048, "num_input_tokens_seen": 134663120, "step": 62435 }, { "epoch": 10.185970636215334, "grad_norm": 0.042409516870975494, "learning_rate": 0.0005708128113770765, "loss": 0.0561, "num_input_tokens_seen": 134673008, "step": 62440 }, { "epoch": 10.18678629690049, "grad_norm": 0.027601126581430435, "learning_rate": 0.0005707423483089669, "loss": 0.0585, "num_input_tokens_seen": 134684240, "step": 62445 }, { "epoch": 10.187601957585644, "grad_norm": 0.11550195515155792, "learning_rate": 0.0005706718838071738, "loss": 0.05, "num_input_tokens_seen": 134696208, "step": 62450 }, { "epoch": 10.1884176182708, "grad_norm": 0.24068880081176758, "learning_rate": 0.0005706014178731253, "loss": 0.1134, "num_input_tokens_seen": 134705968, "step": 62455 }, { "epoch": 10.189233278955955, "grad_norm": 0.2038469761610031, "learning_rate": 0.0005705309505082496, "loss": 0.0229, "num_input_tokens_seen": 134716624, "step": 62460 }, { "epoch": 10.190048939641109, "grad_norm": 1.0574994087219238, "learning_rate": 0.0005704604817139747, "loss": 0.0743, "num_input_tokens_seen": 134728144, "step": 62465 }, { "epoch": 10.190864600326265, "grad_norm": 0.03372318670153618, "learning_rate": 0.0005703900114917286, "loss": 0.0118, "num_input_tokens_seen": 134738448, "step": 62470 }, { "epoch": 10.191680261011419, "grad_norm": 0.07903827726840973, "learning_rate": 0.0005703195398429397, "loss": 0.12, "num_input_tokens_seen": 134749040, "step": 62475 }, { "epoch": 10.192495921696574, "grad_norm": 0.04518524929881096, "learning_rate": 0.0005702490667690363, "loss": 0.0144, "num_input_tokens_seen": 134760624, "step": 62480 }, { "epoch": 10.19331158238173, "grad_norm": 0.04731779918074608, "learning_rate": 0.0005701785922714461, "loss": 0.0121, "num_input_tokens_seen": 134772016, "step": 62485 }, { "epoch": 10.194127243066884, "grad_norm": 0.5301098227500916, "learning_rate": 0.000570108116351598, "loss": 0.1378, "num_input_tokens_seen": 134782480, "step": 62490 }, { "epoch": 10.19494290375204, "grad_norm": 0.052599433809518814, "learning_rate": 0.0005700376390109198, "loss": 0.0129, "num_input_tokens_seen": 134793040, "step": 62495 }, { "epoch": 10.195758564437194, "grad_norm": 0.013229180127382278, "learning_rate": 0.00056996716025084, "loss": 0.0383, "num_input_tokens_seen": 134803376, "step": 62500 }, { "epoch": 10.19657422512235, "grad_norm": 0.15712787210941315, "learning_rate": 0.000569896680072787, "loss": 0.0188, "num_input_tokens_seen": 134813008, "step": 62505 }, { "epoch": 10.197389885807505, "grad_norm": 0.11198220402002335, "learning_rate": 0.0005698261984781891, "loss": 0.0322, "num_input_tokens_seen": 134822576, "step": 62510 }, { "epoch": 10.198205546492659, "grad_norm": 0.005141271743923426, "learning_rate": 0.0005697557154684749, "loss": 0.0353, "num_input_tokens_seen": 134833264, "step": 62515 }, { "epoch": 10.199021207177815, "grad_norm": 0.006401033140718937, "learning_rate": 0.0005696852310450723, "loss": 0.0154, "num_input_tokens_seen": 134843504, "step": 62520 }, { "epoch": 10.199836867862969, "grad_norm": 0.026176193729043007, "learning_rate": 0.0005696147452094102, "loss": 0.0389, "num_input_tokens_seen": 134853104, "step": 62525 }, { "epoch": 10.200652528548124, "grad_norm": 0.02058955281972885, "learning_rate": 0.000569544257962917, "loss": 0.0212, "num_input_tokens_seen": 134864432, "step": 62530 }, { "epoch": 10.201468189233278, "grad_norm": 0.37435677647590637, "learning_rate": 0.0005694737693070213, "loss": 0.1443, "num_input_tokens_seen": 134875696, "step": 62535 }, { "epoch": 10.202283849918434, "grad_norm": 0.05597110092639923, "learning_rate": 0.0005694032792431515, "loss": 0.0165, "num_input_tokens_seen": 134885968, "step": 62540 }, { "epoch": 10.20309951060359, "grad_norm": 0.26195210218429565, "learning_rate": 0.0005693327877727361, "loss": 0.036, "num_input_tokens_seen": 134897008, "step": 62545 }, { "epoch": 10.203915171288743, "grad_norm": 0.011689431965351105, "learning_rate": 0.0005692622948972039, "loss": 0.0273, "num_input_tokens_seen": 134908208, "step": 62550 }, { "epoch": 10.2047308319739, "grad_norm": 0.315701425075531, "learning_rate": 0.0005691918006179833, "loss": 0.0295, "num_input_tokens_seen": 134918000, "step": 62555 }, { "epoch": 10.205546492659053, "grad_norm": 0.019179554656147957, "learning_rate": 0.0005691213049365031, "loss": 0.0356, "num_input_tokens_seen": 134928976, "step": 62560 }, { "epoch": 10.206362153344209, "grad_norm": 0.015334351919591427, "learning_rate": 0.000569050807854192, "loss": 0.052, "num_input_tokens_seen": 134940080, "step": 62565 }, { "epoch": 10.207177814029365, "grad_norm": 0.01013575866818428, "learning_rate": 0.0005689803093724788, "loss": 0.0296, "num_input_tokens_seen": 134951376, "step": 62570 }, { "epoch": 10.207993474714518, "grad_norm": 0.05382871255278587, "learning_rate": 0.0005689098094927921, "loss": 0.1674, "num_input_tokens_seen": 134961328, "step": 62575 }, { "epoch": 10.208809135399674, "grad_norm": 0.05677931383252144, "learning_rate": 0.0005688393082165605, "loss": 0.0504, "num_input_tokens_seen": 134970384, "step": 62580 }, { "epoch": 10.209624796084828, "grad_norm": 0.03482932224869728, "learning_rate": 0.0005687688055452132, "loss": 0.055, "num_input_tokens_seen": 134981072, "step": 62585 }, { "epoch": 10.210440456769984, "grad_norm": 0.0077704740688204765, "learning_rate": 0.0005686983014801787, "loss": 0.0834, "num_input_tokens_seen": 134991888, "step": 62590 }, { "epoch": 10.21125611745514, "grad_norm": 0.08381374925374985, "learning_rate": 0.000568627796022886, "loss": 0.0469, "num_input_tokens_seen": 135003504, "step": 62595 }, { "epoch": 10.212071778140293, "grad_norm": 0.2603074014186859, "learning_rate": 0.0005685572891747639, "loss": 0.0631, "num_input_tokens_seen": 135013936, "step": 62600 }, { "epoch": 10.21288743882545, "grad_norm": 0.005078943911939859, "learning_rate": 0.0005684867809372415, "loss": 0.0087, "num_input_tokens_seen": 135024208, "step": 62605 }, { "epoch": 10.213703099510603, "grad_norm": 0.539273738861084, "learning_rate": 0.0005684162713117473, "loss": 0.1037, "num_input_tokens_seen": 135034672, "step": 62610 }, { "epoch": 10.214518760195759, "grad_norm": 0.6135087013244629, "learning_rate": 0.0005683457602997108, "loss": 0.1145, "num_input_tokens_seen": 135045104, "step": 62615 }, { "epoch": 10.215334420880913, "grad_norm": 0.3409031927585602, "learning_rate": 0.0005682752479025608, "loss": 0.1417, "num_input_tokens_seen": 135055792, "step": 62620 }, { "epoch": 10.216150081566068, "grad_norm": 0.09703811258077621, "learning_rate": 0.0005682047341217262, "loss": 0.0471, "num_input_tokens_seen": 135065744, "step": 62625 }, { "epoch": 10.216965742251224, "grad_norm": 0.02706652134656906, "learning_rate": 0.0005681342189586362, "loss": 0.012, "num_input_tokens_seen": 135077008, "step": 62630 }, { "epoch": 10.217781402936378, "grad_norm": 0.16657434403896332, "learning_rate": 0.0005680637024147199, "loss": 0.0233, "num_input_tokens_seen": 135087792, "step": 62635 }, { "epoch": 10.218597063621534, "grad_norm": 0.09191132336854935, "learning_rate": 0.0005679931844914061, "loss": 0.0234, "num_input_tokens_seen": 135099152, "step": 62640 }, { "epoch": 10.219412724306688, "grad_norm": 0.1349492371082306, "learning_rate": 0.0005679226651901243, "loss": 0.0495, "num_input_tokens_seen": 135109136, "step": 62645 }, { "epoch": 10.220228384991843, "grad_norm": 0.026435913518071175, "learning_rate": 0.0005678521445123036, "loss": 0.1674, "num_input_tokens_seen": 135119472, "step": 62650 }, { "epoch": 10.221044045676999, "grad_norm": 0.02591535449028015, "learning_rate": 0.0005677816224593731, "loss": 0.0115, "num_input_tokens_seen": 135130864, "step": 62655 }, { "epoch": 10.221859706362153, "grad_norm": 0.36795172095298767, "learning_rate": 0.0005677110990327618, "loss": 0.1115, "num_input_tokens_seen": 135141136, "step": 62660 }, { "epoch": 10.222675367047309, "grad_norm": 0.032265905290842056, "learning_rate": 0.0005676405742338995, "loss": 0.0997, "num_input_tokens_seen": 135152208, "step": 62665 }, { "epoch": 10.223491027732463, "grad_norm": 0.12540049850940704, "learning_rate": 0.0005675700480642149, "loss": 0.0162, "num_input_tokens_seen": 135163216, "step": 62670 }, { "epoch": 10.224306688417618, "grad_norm": 0.01594599336385727, "learning_rate": 0.0005674995205251376, "loss": 0.0594, "num_input_tokens_seen": 135174160, "step": 62675 }, { "epoch": 10.225122349102774, "grad_norm": 0.5691853165626526, "learning_rate": 0.000567428991618097, "loss": 0.1531, "num_input_tokens_seen": 135184912, "step": 62680 }, { "epoch": 10.225938009787928, "grad_norm": 0.010983028449118137, "learning_rate": 0.0005673584613445223, "loss": 0.0594, "num_input_tokens_seen": 135196432, "step": 62685 }, { "epoch": 10.226753670473084, "grad_norm": 0.49172157049179077, "learning_rate": 0.000567287929705843, "loss": 0.083, "num_input_tokens_seen": 135208080, "step": 62690 }, { "epoch": 10.227569331158238, "grad_norm": 0.45068296790122986, "learning_rate": 0.0005672173967034883, "loss": 0.0209, "num_input_tokens_seen": 135219920, "step": 62695 }, { "epoch": 10.228384991843393, "grad_norm": 0.5505008101463318, "learning_rate": 0.0005671468623388878, "loss": 0.1357, "num_input_tokens_seen": 135229488, "step": 62700 }, { "epoch": 10.229200652528547, "grad_norm": 0.03593357279896736, "learning_rate": 0.000567076326613471, "loss": 0.0067, "num_input_tokens_seen": 135237904, "step": 62705 }, { "epoch": 10.230016313213703, "grad_norm": 0.5823007225990295, "learning_rate": 0.0005670057895286674, "loss": 0.029, "num_input_tokens_seen": 135249648, "step": 62710 }, { "epoch": 10.230831973898859, "grad_norm": 0.2044401466846466, "learning_rate": 0.0005669352510859063, "loss": 0.0428, "num_input_tokens_seen": 135260112, "step": 62715 }, { "epoch": 10.231647634584013, "grad_norm": 0.014225736260414124, "learning_rate": 0.0005668647112866175, "loss": 0.0378, "num_input_tokens_seen": 135270480, "step": 62720 }, { "epoch": 10.232463295269168, "grad_norm": 0.1549697369337082, "learning_rate": 0.0005667941701322305, "loss": 0.0223, "num_input_tokens_seen": 135282544, "step": 62725 }, { "epoch": 10.233278955954322, "grad_norm": 0.20286306738853455, "learning_rate": 0.000566723627624175, "loss": 0.0222, "num_input_tokens_seen": 135293488, "step": 62730 }, { "epoch": 10.234094616639478, "grad_norm": 0.04591246694326401, "learning_rate": 0.0005666530837638805, "loss": 0.0263, "num_input_tokens_seen": 135303504, "step": 62735 }, { "epoch": 10.234910277324634, "grad_norm": 0.06446409970521927, "learning_rate": 0.0005665825385527766, "loss": 0.037, "num_input_tokens_seen": 135313008, "step": 62740 }, { "epoch": 10.235725938009788, "grad_norm": 0.018541956320405006, "learning_rate": 0.0005665119919922932, "loss": 0.0213, "num_input_tokens_seen": 135322832, "step": 62745 }, { "epoch": 10.236541598694943, "grad_norm": 0.6051971912384033, "learning_rate": 0.0005664414440838598, "loss": 0.0934, "num_input_tokens_seen": 135332816, "step": 62750 }, { "epoch": 10.237357259380097, "grad_norm": 0.6212651133537292, "learning_rate": 0.0005663708948289065, "loss": 0.1158, "num_input_tokens_seen": 135344976, "step": 62755 }, { "epoch": 10.238172920065253, "grad_norm": 0.15038177371025085, "learning_rate": 0.0005663003442288626, "loss": 0.0185, "num_input_tokens_seen": 135355600, "step": 62760 }, { "epoch": 10.238988580750409, "grad_norm": 0.31420812010765076, "learning_rate": 0.0005662297922851583, "loss": 0.02, "num_input_tokens_seen": 135367536, "step": 62765 }, { "epoch": 10.239804241435563, "grad_norm": 0.005929914303123951, "learning_rate": 0.0005661592389992231, "loss": 0.0079, "num_input_tokens_seen": 135378384, "step": 62770 }, { "epoch": 10.240619902120718, "grad_norm": 0.3754366636276245, "learning_rate": 0.0005660886843724869, "loss": 0.0123, "num_input_tokens_seen": 135387952, "step": 62775 }, { "epoch": 10.241435562805872, "grad_norm": 0.06475894153118134, "learning_rate": 0.0005660181284063798, "loss": 0.087, "num_input_tokens_seen": 135399696, "step": 62780 }, { "epoch": 10.242251223491028, "grad_norm": 0.005752126220613718, "learning_rate": 0.0005659475711023317, "loss": 0.0849, "num_input_tokens_seen": 135412240, "step": 62785 }, { "epoch": 10.243066884176184, "grad_norm": 0.1944762021303177, "learning_rate": 0.0005658770124617722, "loss": 0.0153, "num_input_tokens_seen": 135422896, "step": 62790 }, { "epoch": 10.243882544861338, "grad_norm": 0.04992297664284706, "learning_rate": 0.0005658064524861315, "loss": 0.1287, "num_input_tokens_seen": 135433744, "step": 62795 }, { "epoch": 10.244698205546493, "grad_norm": 0.047272030264139175, "learning_rate": 0.0005657358911768395, "loss": 0.015, "num_input_tokens_seen": 135445264, "step": 62800 }, { "epoch": 10.245513866231647, "grad_norm": 0.33140531182289124, "learning_rate": 0.0005656653285353265, "loss": 0.0478, "num_input_tokens_seen": 135455664, "step": 62805 }, { "epoch": 10.246329526916803, "grad_norm": 0.18322815001010895, "learning_rate": 0.0005655947645630222, "loss": 0.0361, "num_input_tokens_seen": 135465552, "step": 62810 }, { "epoch": 10.247145187601957, "grad_norm": 0.48981189727783203, "learning_rate": 0.0005655241992613566, "loss": 0.1473, "num_input_tokens_seen": 135476336, "step": 62815 }, { "epoch": 10.247960848287113, "grad_norm": 0.012519458308815956, "learning_rate": 0.0005654536326317602, "loss": 0.0042, "num_input_tokens_seen": 135486576, "step": 62820 }, { "epoch": 10.248776508972268, "grad_norm": 0.06337398290634155, "learning_rate": 0.0005653830646756629, "loss": 0.091, "num_input_tokens_seen": 135498512, "step": 62825 }, { "epoch": 10.249592169657422, "grad_norm": 0.009690511971712112, "learning_rate": 0.0005653124953944947, "loss": 0.0754, "num_input_tokens_seen": 135509040, "step": 62830 }, { "epoch": 10.250407830342578, "grad_norm": 0.04498912766575813, "learning_rate": 0.0005652419247896861, "loss": 0.0947, "num_input_tokens_seen": 135519280, "step": 62835 }, { "epoch": 10.251223491027732, "grad_norm": 0.8288303017616272, "learning_rate": 0.000565171352862667, "loss": 0.1952, "num_input_tokens_seen": 135529744, "step": 62840 }, { "epoch": 10.252039151712887, "grad_norm": 0.04391171783208847, "learning_rate": 0.0005651007796148678, "loss": 0.0523, "num_input_tokens_seen": 135541200, "step": 62845 }, { "epoch": 10.252854812398043, "grad_norm": 0.05937337130308151, "learning_rate": 0.0005650302050477187, "loss": 0.0307, "num_input_tokens_seen": 135551056, "step": 62850 }, { "epoch": 10.253670473083197, "grad_norm": 1.166271448135376, "learning_rate": 0.0005649596291626501, "loss": 0.1077, "num_input_tokens_seen": 135561648, "step": 62855 }, { "epoch": 10.254486133768353, "grad_norm": 0.20711597800254822, "learning_rate": 0.0005648890519610921, "loss": 0.0206, "num_input_tokens_seen": 135572432, "step": 62860 }, { "epoch": 10.255301794453507, "grad_norm": 0.02406413108110428, "learning_rate": 0.0005648184734444753, "loss": 0.0344, "num_input_tokens_seen": 135582000, "step": 62865 }, { "epoch": 10.256117455138662, "grad_norm": 0.011656362563371658, "learning_rate": 0.0005647478936142296, "loss": 0.0593, "num_input_tokens_seen": 135592208, "step": 62870 }, { "epoch": 10.256933115823816, "grad_norm": 0.06082538887858391, "learning_rate": 0.0005646773124717858, "loss": 0.0122, "num_input_tokens_seen": 135603600, "step": 62875 }, { "epoch": 10.257748776508972, "grad_norm": 0.008337998762726784, "learning_rate": 0.0005646067300185744, "loss": 0.1263, "num_input_tokens_seen": 135614224, "step": 62880 }, { "epoch": 10.258564437194128, "grad_norm": 0.4667803645133972, "learning_rate": 0.0005645361462560256, "loss": 0.0239, "num_input_tokens_seen": 135624368, "step": 62885 }, { "epoch": 10.259380097879282, "grad_norm": 0.2749415934085846, "learning_rate": 0.0005644655611855698, "loss": 0.0289, "num_input_tokens_seen": 135635856, "step": 62890 }, { "epoch": 10.260195758564437, "grad_norm": 0.06583676487207413, "learning_rate": 0.0005643949748086377, "loss": 0.0213, "num_input_tokens_seen": 135646416, "step": 62895 }, { "epoch": 10.261011419249591, "grad_norm": 0.1848277896642685, "learning_rate": 0.0005643243871266598, "loss": 0.0183, "num_input_tokens_seen": 135656176, "step": 62900 }, { "epoch": 10.261827079934747, "grad_norm": 0.05676567181944847, "learning_rate": 0.0005642537981410665, "loss": 0.0777, "num_input_tokens_seen": 135667184, "step": 62905 }, { "epoch": 10.262642740619903, "grad_norm": 0.5600584149360657, "learning_rate": 0.0005641832078532886, "loss": 0.0949, "num_input_tokens_seen": 135677904, "step": 62910 }, { "epoch": 10.263458401305057, "grad_norm": 0.4165595471858978, "learning_rate": 0.0005641126162647564, "loss": 0.0265, "num_input_tokens_seen": 135689456, "step": 62915 }, { "epoch": 10.264274061990212, "grad_norm": 0.7945905923843384, "learning_rate": 0.0005640420233769008, "loss": 0.0321, "num_input_tokens_seen": 135699696, "step": 62920 }, { "epoch": 10.265089722675366, "grad_norm": 1.0050934553146362, "learning_rate": 0.0005639714291911524, "loss": 0.0795, "num_input_tokens_seen": 135711408, "step": 62925 }, { "epoch": 10.265905383360522, "grad_norm": 0.0816318541765213, "learning_rate": 0.0005639008337089416, "loss": 0.1045, "num_input_tokens_seen": 135721744, "step": 62930 }, { "epoch": 10.266721044045678, "grad_norm": 0.02676548808813095, "learning_rate": 0.0005638302369316995, "loss": 0.0137, "num_input_tokens_seen": 135731344, "step": 62935 }, { "epoch": 10.267536704730832, "grad_norm": 0.009084532037377357, "learning_rate": 0.0005637596388608567, "loss": 0.0191, "num_input_tokens_seen": 135742480, "step": 62940 }, { "epoch": 10.268352365415987, "grad_norm": 0.34411120414733887, "learning_rate": 0.0005636890394978439, "loss": 0.0525, "num_input_tokens_seen": 135752400, "step": 62945 }, { "epoch": 10.269168026101141, "grad_norm": 0.618686854839325, "learning_rate": 0.0005636184388440919, "loss": 0.0442, "num_input_tokens_seen": 135763312, "step": 62950 }, { "epoch": 10.269983686786297, "grad_norm": 0.011867786757647991, "learning_rate": 0.0005635478369010316, "loss": 0.0983, "num_input_tokens_seen": 135774480, "step": 62955 }, { "epoch": 10.270799347471453, "grad_norm": 0.13335591554641724, "learning_rate": 0.0005634772336700937, "loss": 0.0281, "num_input_tokens_seen": 135785680, "step": 62960 }, { "epoch": 10.271615008156607, "grad_norm": 0.6448015570640564, "learning_rate": 0.0005634066291527092, "loss": 0.0722, "num_input_tokens_seen": 135796784, "step": 62965 }, { "epoch": 10.272430668841762, "grad_norm": 0.29328083992004395, "learning_rate": 0.000563336023350309, "loss": 0.1525, "num_input_tokens_seen": 135808176, "step": 62970 }, { "epoch": 10.273246329526916, "grad_norm": 0.4624020457267761, "learning_rate": 0.0005632654162643239, "loss": 0.0224, "num_input_tokens_seen": 135819344, "step": 62975 }, { "epoch": 10.274061990212072, "grad_norm": 0.03284401446580887, "learning_rate": 0.0005631948078961847, "loss": 0.1059, "num_input_tokens_seen": 135830064, "step": 62980 }, { "epoch": 10.274877650897226, "grad_norm": 0.28184202313423157, "learning_rate": 0.0005631241982473227, "loss": 0.0763, "num_input_tokens_seen": 135840592, "step": 62985 }, { "epoch": 10.275693311582382, "grad_norm": 0.007004639599472284, "learning_rate": 0.0005630535873191687, "loss": 0.019, "num_input_tokens_seen": 135851344, "step": 62990 }, { "epoch": 10.276508972267537, "grad_norm": 0.01525322999805212, "learning_rate": 0.0005629829751131538, "loss": 0.0763, "num_input_tokens_seen": 135863600, "step": 62995 }, { "epoch": 10.277324632952691, "grad_norm": 0.015915412455797195, "learning_rate": 0.0005629123616307089, "loss": 0.0524, "num_input_tokens_seen": 135875728, "step": 63000 }, { "epoch": 10.278140293637847, "grad_norm": 0.12701416015625, "learning_rate": 0.0005628417468732653, "loss": 0.0313, "num_input_tokens_seen": 135885776, "step": 63005 }, { "epoch": 10.278955954323001, "grad_norm": 0.04895911365747452, "learning_rate": 0.0005627711308422539, "loss": 0.0103, "num_input_tokens_seen": 135897200, "step": 63010 }, { "epoch": 10.279771615008157, "grad_norm": 0.034936245530843735, "learning_rate": 0.000562700513539106, "loss": 0.0131, "num_input_tokens_seen": 135909232, "step": 63015 }, { "epoch": 10.280587275693312, "grad_norm": 0.13237547874450684, "learning_rate": 0.0005626298949652524, "loss": 0.1573, "num_input_tokens_seen": 135920272, "step": 63020 }, { "epoch": 10.281402936378466, "grad_norm": 0.3582659959793091, "learning_rate": 0.0005625592751221248, "loss": 0.1163, "num_input_tokens_seen": 135930832, "step": 63025 }, { "epoch": 10.282218597063622, "grad_norm": 0.03465120121836662, "learning_rate": 0.000562488654011154, "loss": 0.0507, "num_input_tokens_seen": 135941072, "step": 63030 }, { "epoch": 10.283034257748776, "grad_norm": 0.011232134886085987, "learning_rate": 0.0005624180316337715, "loss": 0.0089, "num_input_tokens_seen": 135952912, "step": 63035 }, { "epoch": 10.283849918433932, "grad_norm": 0.05320986360311508, "learning_rate": 0.0005623474079914082, "loss": 0.0529, "num_input_tokens_seen": 135964080, "step": 63040 }, { "epoch": 10.284665579119087, "grad_norm": 0.24157589673995972, "learning_rate": 0.0005622767830854957, "loss": 0.0366, "num_input_tokens_seen": 135974640, "step": 63045 }, { "epoch": 10.285481239804241, "grad_norm": 0.007266675122082233, "learning_rate": 0.0005622061569174651, "loss": 0.013, "num_input_tokens_seen": 135986128, "step": 63050 }, { "epoch": 10.286296900489397, "grad_norm": 0.02779882401227951, "learning_rate": 0.0005621355294887479, "loss": 0.0382, "num_input_tokens_seen": 135997072, "step": 63055 }, { "epoch": 10.28711256117455, "grad_norm": 0.4344821274280548, "learning_rate": 0.0005620649008007755, "loss": 0.0681, "num_input_tokens_seen": 136009008, "step": 63060 }, { "epoch": 10.287928221859707, "grad_norm": 0.0533701553940773, "learning_rate": 0.0005619942708549789, "loss": 0.0226, "num_input_tokens_seen": 136019952, "step": 63065 }, { "epoch": 10.28874388254486, "grad_norm": 0.41163161396980286, "learning_rate": 0.0005619236396527899, "loss": 0.0714, "num_input_tokens_seen": 136031248, "step": 63070 }, { "epoch": 10.289559543230016, "grad_norm": 0.03365471214056015, "learning_rate": 0.0005618530071956397, "loss": 0.0232, "num_input_tokens_seen": 136042384, "step": 63075 }, { "epoch": 10.290375203915172, "grad_norm": 0.0382050946354866, "learning_rate": 0.00056178237348496, "loss": 0.0093, "num_input_tokens_seen": 136053712, "step": 63080 }, { "epoch": 10.291190864600326, "grad_norm": 0.08874277770519257, "learning_rate": 0.0005617117385221819, "loss": 0.1783, "num_input_tokens_seen": 136065136, "step": 63085 }, { "epoch": 10.292006525285482, "grad_norm": 0.02770037204027176, "learning_rate": 0.0005616411023087373, "loss": 0.1906, "num_input_tokens_seen": 136075952, "step": 63090 }, { "epoch": 10.292822185970635, "grad_norm": 0.31035342812538147, "learning_rate": 0.0005615704648460575, "loss": 0.0386, "num_input_tokens_seen": 136087472, "step": 63095 }, { "epoch": 10.293637846655791, "grad_norm": 0.0030887925531715155, "learning_rate": 0.0005614998261355741, "loss": 0.1363, "num_input_tokens_seen": 136098832, "step": 63100 }, { "epoch": 10.294453507340947, "grad_norm": 0.4564250111579895, "learning_rate": 0.0005614291861787188, "loss": 0.0695, "num_input_tokens_seen": 136110256, "step": 63105 }, { "epoch": 10.2952691680261, "grad_norm": 0.04325089976191521, "learning_rate": 0.0005613585449769232, "loss": 0.0259, "num_input_tokens_seen": 136120144, "step": 63110 }, { "epoch": 10.296084828711257, "grad_norm": 0.08346651494503021, "learning_rate": 0.0005612879025316186, "loss": 0.028, "num_input_tokens_seen": 136131632, "step": 63115 }, { "epoch": 10.29690048939641, "grad_norm": 0.05566442385315895, "learning_rate": 0.000561217258844237, "loss": 0.0877, "num_input_tokens_seen": 136143376, "step": 63120 }, { "epoch": 10.297716150081566, "grad_norm": 0.02049890160560608, "learning_rate": 0.0005611466139162101, "loss": 0.0588, "num_input_tokens_seen": 136153808, "step": 63125 }, { "epoch": 10.298531810766722, "grad_norm": 0.017896132543683052, "learning_rate": 0.0005610759677489694, "loss": 0.0539, "num_input_tokens_seen": 136165488, "step": 63130 }, { "epoch": 10.299347471451876, "grad_norm": 0.11901908367872238, "learning_rate": 0.0005610053203439467, "loss": 0.0315, "num_input_tokens_seen": 136174640, "step": 63135 }, { "epoch": 10.300163132137031, "grad_norm": 0.017384737730026245, "learning_rate": 0.0005609346717025737, "loss": 0.1693, "num_input_tokens_seen": 136185104, "step": 63140 }, { "epoch": 10.300978792822185, "grad_norm": 0.05314183607697487, "learning_rate": 0.0005608640218262825, "loss": 0.0439, "num_input_tokens_seen": 136196432, "step": 63145 }, { "epoch": 10.301794453507341, "grad_norm": 0.004131925292313099, "learning_rate": 0.0005607933707165046, "loss": 0.0106, "num_input_tokens_seen": 136206704, "step": 63150 }, { "epoch": 10.302610114192497, "grad_norm": 0.43982335925102234, "learning_rate": 0.000560722718374672, "loss": 0.0565, "num_input_tokens_seen": 136216912, "step": 63155 }, { "epoch": 10.30342577487765, "grad_norm": 0.6024689674377441, "learning_rate": 0.0005606520648022164, "loss": 0.1006, "num_input_tokens_seen": 136227056, "step": 63160 }, { "epoch": 10.304241435562806, "grad_norm": 0.01105036772787571, "learning_rate": 0.0005605814100005696, "loss": 0.0151, "num_input_tokens_seen": 136238064, "step": 63165 }, { "epoch": 10.30505709624796, "grad_norm": 0.06677806377410889, "learning_rate": 0.0005605107539711639, "loss": 0.0237, "num_input_tokens_seen": 136249904, "step": 63170 }, { "epoch": 10.305872756933116, "grad_norm": 0.8918985724449158, "learning_rate": 0.000560440096715431, "loss": 0.2142, "num_input_tokens_seen": 136260976, "step": 63175 }, { "epoch": 10.30668841761827, "grad_norm": 0.019323671236634254, "learning_rate": 0.0005603694382348027, "loss": 0.0521, "num_input_tokens_seen": 136272752, "step": 63180 }, { "epoch": 10.307504078303426, "grad_norm": 0.006140450015664101, "learning_rate": 0.0005602987785307112, "loss": 0.014, "num_input_tokens_seen": 136282736, "step": 63185 }, { "epoch": 10.308319738988581, "grad_norm": 0.28479525446891785, "learning_rate": 0.0005602281176045885, "loss": 0.0414, "num_input_tokens_seen": 136293520, "step": 63190 }, { "epoch": 10.309135399673735, "grad_norm": 0.22275696694850922, "learning_rate": 0.0005601574554578666, "loss": 0.0322, "num_input_tokens_seen": 136304976, "step": 63195 }, { "epoch": 10.309951060358891, "grad_norm": 0.030748821794986725, "learning_rate": 0.0005600867920919775, "loss": 0.0159, "num_input_tokens_seen": 136316784, "step": 63200 }, { "epoch": 10.310766721044045, "grad_norm": 0.18005597591400146, "learning_rate": 0.0005600161275083535, "loss": 0.113, "num_input_tokens_seen": 136327568, "step": 63205 }, { "epoch": 10.3115823817292, "grad_norm": 0.7377886772155762, "learning_rate": 0.0005599454617084264, "loss": 0.0643, "num_input_tokens_seen": 136338416, "step": 63210 }, { "epoch": 10.312398042414356, "grad_norm": 0.10712673515081406, "learning_rate": 0.0005598747946936285, "loss": 0.0208, "num_input_tokens_seen": 136349232, "step": 63215 }, { "epoch": 10.31321370309951, "grad_norm": 0.01814027689397335, "learning_rate": 0.0005598041264653919, "loss": 0.0505, "num_input_tokens_seen": 136358224, "step": 63220 }, { "epoch": 10.314029363784666, "grad_norm": 0.24749046564102173, "learning_rate": 0.0005597334570251489, "loss": 0.0361, "num_input_tokens_seen": 136369648, "step": 63225 }, { "epoch": 10.31484502446982, "grad_norm": 0.5052238702774048, "learning_rate": 0.0005596627863743316, "loss": 0.0574, "num_input_tokens_seen": 136379120, "step": 63230 }, { "epoch": 10.315660685154976, "grad_norm": 0.1470906138420105, "learning_rate": 0.0005595921145143722, "loss": 0.1961, "num_input_tokens_seen": 136390640, "step": 63235 }, { "epoch": 10.31647634584013, "grad_norm": 0.03053382970392704, "learning_rate": 0.0005595214414467029, "loss": 0.1165, "num_input_tokens_seen": 136401040, "step": 63240 }, { "epoch": 10.317292006525285, "grad_norm": 0.016087966039776802, "learning_rate": 0.0005594507671727563, "loss": 0.0585, "num_input_tokens_seen": 136411728, "step": 63245 }, { "epoch": 10.318107667210441, "grad_norm": 0.002599406987428665, "learning_rate": 0.0005593800916939642, "loss": 0.0524, "num_input_tokens_seen": 136421648, "step": 63250 }, { "epoch": 10.318923327895595, "grad_norm": 0.008174429647624493, "learning_rate": 0.0005593094150117595, "loss": 0.0834, "num_input_tokens_seen": 136433104, "step": 63255 }, { "epoch": 10.31973898858075, "grad_norm": 0.05433814972639084, "learning_rate": 0.0005592387371275741, "loss": 0.0407, "num_input_tokens_seen": 136443728, "step": 63260 }, { "epoch": 10.320554649265905, "grad_norm": 0.3313831090927124, "learning_rate": 0.0005591680580428406, "loss": 0.0811, "num_input_tokens_seen": 136454192, "step": 63265 }, { "epoch": 10.32137030995106, "grad_norm": 0.002746083540841937, "learning_rate": 0.0005590973777589912, "loss": 0.0238, "num_input_tokens_seen": 136464496, "step": 63270 }, { "epoch": 10.322185970636216, "grad_norm": 0.07199495285749435, "learning_rate": 0.0005590266962774588, "loss": 0.0725, "num_input_tokens_seen": 136474896, "step": 63275 }, { "epoch": 10.32300163132137, "grad_norm": 0.0022853021509945393, "learning_rate": 0.0005589560135996752, "loss": 0.0327, "num_input_tokens_seen": 136486608, "step": 63280 }, { "epoch": 10.323817292006526, "grad_norm": 0.28687554597854614, "learning_rate": 0.0005588853297270734, "loss": 0.1684, "num_input_tokens_seen": 136497872, "step": 63285 }, { "epoch": 10.32463295269168, "grad_norm": 0.01978861168026924, "learning_rate": 0.0005588146446610855, "loss": 0.152, "num_input_tokens_seen": 136509008, "step": 63290 }, { "epoch": 10.325448613376835, "grad_norm": 0.43222975730895996, "learning_rate": 0.0005587439584031444, "loss": 0.1385, "num_input_tokens_seen": 136520368, "step": 63295 }, { "epoch": 10.326264274061991, "grad_norm": 0.02030717395246029, "learning_rate": 0.0005586732709546824, "loss": 0.0112, "num_input_tokens_seen": 136530256, "step": 63300 }, { "epoch": 10.327079934747145, "grad_norm": 0.42113855481147766, "learning_rate": 0.0005586025823171321, "loss": 0.2053, "num_input_tokens_seen": 136541840, "step": 63305 }, { "epoch": 10.3278955954323, "grad_norm": 0.008576578460633755, "learning_rate": 0.0005585318924919262, "loss": 0.0094, "num_input_tokens_seen": 136553360, "step": 63310 }, { "epoch": 10.328711256117455, "grad_norm": 0.32805389165878296, "learning_rate": 0.0005584612014804972, "loss": 0.0716, "num_input_tokens_seen": 136564688, "step": 63315 }, { "epoch": 10.32952691680261, "grad_norm": 0.13228364288806915, "learning_rate": 0.0005583905092842777, "loss": 0.0134, "num_input_tokens_seen": 136575472, "step": 63320 }, { "epoch": 10.330342577487766, "grad_norm": 0.018416935577988625, "learning_rate": 0.0005583198159047005, "loss": 0.0923, "num_input_tokens_seen": 136587280, "step": 63325 }, { "epoch": 10.33115823817292, "grad_norm": 0.18725548684597015, "learning_rate": 0.0005582491213431983, "loss": 0.145, "num_input_tokens_seen": 136597904, "step": 63330 }, { "epoch": 10.331973898858076, "grad_norm": 0.0064430139027535915, "learning_rate": 0.0005581784256012037, "loss": 0.0266, "num_input_tokens_seen": 136609104, "step": 63335 }, { "epoch": 10.33278955954323, "grad_norm": 0.04725635051727295, "learning_rate": 0.0005581077286801495, "loss": 0.1495, "num_input_tokens_seen": 136620176, "step": 63340 }, { "epoch": 10.333605220228385, "grad_norm": 0.24823887646198273, "learning_rate": 0.0005580370305814686, "loss": 0.0253, "num_input_tokens_seen": 136631440, "step": 63345 }, { "epoch": 10.33442088091354, "grad_norm": 0.018732301890850067, "learning_rate": 0.0005579663313065935, "loss": 0.0192, "num_input_tokens_seen": 136643280, "step": 63350 }, { "epoch": 10.335236541598695, "grad_norm": 0.05741023272275925, "learning_rate": 0.0005578956308569572, "loss": 0.102, "num_input_tokens_seen": 136653456, "step": 63355 }, { "epoch": 10.33605220228385, "grad_norm": 0.05545477569103241, "learning_rate": 0.0005578249292339924, "loss": 0.0616, "num_input_tokens_seen": 136663152, "step": 63360 }, { "epoch": 10.336867862969005, "grad_norm": 0.09622546285390854, "learning_rate": 0.0005577542264391322, "loss": 0.0146, "num_input_tokens_seen": 136674320, "step": 63365 }, { "epoch": 10.33768352365416, "grad_norm": 0.2525721788406372, "learning_rate": 0.0005576835224738092, "loss": 0.0543, "num_input_tokens_seen": 136683728, "step": 63370 }, { "epoch": 10.338499184339314, "grad_norm": 0.03037172742187977, "learning_rate": 0.0005576128173394567, "loss": 0.0081, "num_input_tokens_seen": 136693744, "step": 63375 }, { "epoch": 10.33931484502447, "grad_norm": 0.08694158494472504, "learning_rate": 0.0005575421110375072, "loss": 0.0283, "num_input_tokens_seen": 136704720, "step": 63380 }, { "epoch": 10.340130505709626, "grad_norm": 0.4893362820148468, "learning_rate": 0.0005574714035693938, "loss": 0.0612, "num_input_tokens_seen": 136716464, "step": 63385 }, { "epoch": 10.34094616639478, "grad_norm": 0.017124425619840622, "learning_rate": 0.0005574006949365496, "loss": 0.2298, "num_input_tokens_seen": 136727536, "step": 63390 }, { "epoch": 10.341761827079935, "grad_norm": 0.04489161819219589, "learning_rate": 0.0005573299851404074, "loss": 0.0254, "num_input_tokens_seen": 136737360, "step": 63395 }, { "epoch": 10.34257748776509, "grad_norm": 0.37540313601493835, "learning_rate": 0.0005572592741824003, "loss": 0.0884, "num_input_tokens_seen": 136748400, "step": 63400 }, { "epoch": 10.343393148450245, "grad_norm": 0.02080947533249855, "learning_rate": 0.0005571885620639614, "loss": 0.1072, "num_input_tokens_seen": 136759248, "step": 63405 }, { "epoch": 10.3442088091354, "grad_norm": 0.47219449281692505, "learning_rate": 0.0005571178487865238, "loss": 0.1251, "num_input_tokens_seen": 136770000, "step": 63410 }, { "epoch": 10.345024469820554, "grad_norm": 0.09539025276899338, "learning_rate": 0.0005570471343515205, "loss": 0.0347, "num_input_tokens_seen": 136779952, "step": 63415 }, { "epoch": 10.34584013050571, "grad_norm": 0.034578222781419754, "learning_rate": 0.0005569764187603846, "loss": 0.0317, "num_input_tokens_seen": 136791792, "step": 63420 }, { "epoch": 10.346655791190864, "grad_norm": 0.03256550058722496, "learning_rate": 0.0005569057020145494, "loss": 0.1187, "num_input_tokens_seen": 136803056, "step": 63425 }, { "epoch": 10.34747145187602, "grad_norm": 0.0322863943874836, "learning_rate": 0.0005568349841154479, "loss": 0.0225, "num_input_tokens_seen": 136813200, "step": 63430 }, { "epoch": 10.348287112561174, "grad_norm": 0.04415307939052582, "learning_rate": 0.0005567642650645134, "loss": 0.0939, "num_input_tokens_seen": 136823472, "step": 63435 }, { "epoch": 10.34910277324633, "grad_norm": 0.09202703833580017, "learning_rate": 0.000556693544863179, "loss": 0.0181, "num_input_tokens_seen": 136834000, "step": 63440 }, { "epoch": 10.349918433931485, "grad_norm": 0.6340416669845581, "learning_rate": 0.000556622823512878, "loss": 0.0412, "num_input_tokens_seen": 136843344, "step": 63445 }, { "epoch": 10.350734094616639, "grad_norm": 0.01114475354552269, "learning_rate": 0.0005565521010150436, "loss": 0.1015, "num_input_tokens_seen": 136855408, "step": 63450 }, { "epoch": 10.351549755301795, "grad_norm": 0.3301995098590851, "learning_rate": 0.0005564813773711092, "loss": 0.0541, "num_input_tokens_seen": 136867504, "step": 63455 }, { "epoch": 10.352365415986949, "grad_norm": 0.20060542225837708, "learning_rate": 0.0005564106525825079, "loss": 0.0176, "num_input_tokens_seen": 136877904, "step": 63460 }, { "epoch": 10.353181076672104, "grad_norm": 0.39352941513061523, "learning_rate": 0.0005563399266506734, "loss": 0.1178, "num_input_tokens_seen": 136888976, "step": 63465 }, { "epoch": 10.35399673735726, "grad_norm": 0.6670607328414917, "learning_rate": 0.0005562691995770386, "loss": 0.0925, "num_input_tokens_seen": 136900976, "step": 63470 }, { "epoch": 10.354812398042414, "grad_norm": 0.011798514053225517, "learning_rate": 0.0005561984713630373, "loss": 0.1556, "num_input_tokens_seen": 136911888, "step": 63475 }, { "epoch": 10.35562805872757, "grad_norm": 0.01295718364417553, "learning_rate": 0.0005561277420101026, "loss": 0.0345, "num_input_tokens_seen": 136923344, "step": 63480 }, { "epoch": 10.356443719412724, "grad_norm": 0.4389972686767578, "learning_rate": 0.0005560570115196679, "loss": 0.0505, "num_input_tokens_seen": 136934320, "step": 63485 }, { "epoch": 10.35725938009788, "grad_norm": 0.02383430115878582, "learning_rate": 0.0005559862798931668, "loss": 0.0478, "num_input_tokens_seen": 136945136, "step": 63490 }, { "epoch": 10.358075040783035, "grad_norm": 0.23734000325202942, "learning_rate": 0.0005559155471320326, "loss": 0.0276, "num_input_tokens_seen": 136955280, "step": 63495 }, { "epoch": 10.358890701468189, "grad_norm": 0.7925203442573547, "learning_rate": 0.0005558448132376991, "loss": 0.0413, "num_input_tokens_seen": 136966704, "step": 63500 }, { "epoch": 10.359706362153345, "grad_norm": 0.035174410790205, "learning_rate": 0.0005557740782115995, "loss": 0.0086, "num_input_tokens_seen": 136975760, "step": 63505 }, { "epoch": 10.360522022838499, "grad_norm": 0.47616222500801086, "learning_rate": 0.0005557033420551676, "loss": 0.0533, "num_input_tokens_seen": 136986000, "step": 63510 }, { "epoch": 10.361337683523654, "grad_norm": 1.6294997930526733, "learning_rate": 0.0005556326047698367, "loss": 0.1448, "num_input_tokens_seen": 136997904, "step": 63515 }, { "epoch": 10.362153344208808, "grad_norm": 0.012375717051327229, "learning_rate": 0.0005555618663570405, "loss": 0.0434, "num_input_tokens_seen": 137008048, "step": 63520 }, { "epoch": 10.362969004893964, "grad_norm": 0.04057256504893303, "learning_rate": 0.0005554911268182126, "loss": 0.026, "num_input_tokens_seen": 137018704, "step": 63525 }, { "epoch": 10.36378466557912, "grad_norm": 0.06751630455255508, "learning_rate": 0.0005554203861547866, "loss": 0.0683, "num_input_tokens_seen": 137028848, "step": 63530 }, { "epoch": 10.364600326264274, "grad_norm": 0.01192142441868782, "learning_rate": 0.0005553496443681961, "loss": 0.1382, "num_input_tokens_seen": 137039824, "step": 63535 }, { "epoch": 10.36541598694943, "grad_norm": 0.01332141738384962, "learning_rate": 0.000555278901459875, "loss": 0.0187, "num_input_tokens_seen": 137050960, "step": 63540 }, { "epoch": 10.366231647634583, "grad_norm": 0.07950220257043839, "learning_rate": 0.0005552081574312568, "loss": 0.0178, "num_input_tokens_seen": 137062032, "step": 63545 }, { "epoch": 10.367047308319739, "grad_norm": 0.008421842940151691, "learning_rate": 0.0005551374122837752, "loss": 0.03, "num_input_tokens_seen": 137072240, "step": 63550 }, { "epoch": 10.367862969004895, "grad_norm": 0.04372353106737137, "learning_rate": 0.000555066666018864, "loss": 0.1017, "num_input_tokens_seen": 137081680, "step": 63555 }, { "epoch": 10.368678629690049, "grad_norm": 0.028710391372442245, "learning_rate": 0.0005549959186379569, "loss": 0.0435, "num_input_tokens_seen": 137093232, "step": 63560 }, { "epoch": 10.369494290375204, "grad_norm": 0.12152156978845596, "learning_rate": 0.0005549251701424878, "loss": 0.0394, "num_input_tokens_seen": 137104112, "step": 63565 }, { "epoch": 10.370309951060358, "grad_norm": 0.02806548960506916, "learning_rate": 0.0005548544205338905, "loss": 0.0618, "num_input_tokens_seen": 137115696, "step": 63570 }, { "epoch": 10.371125611745514, "grad_norm": 0.573082685470581, "learning_rate": 0.0005547836698135987, "loss": 0.0568, "num_input_tokens_seen": 137125456, "step": 63575 }, { "epoch": 10.37194127243067, "grad_norm": 0.0704675167798996, "learning_rate": 0.0005547129179830463, "loss": 0.0102, "num_input_tokens_seen": 137136016, "step": 63580 }, { "epoch": 10.372756933115824, "grad_norm": 0.06273286044597626, "learning_rate": 0.0005546421650436674, "loss": 0.1798, "num_input_tokens_seen": 137147280, "step": 63585 }, { "epoch": 10.37357259380098, "grad_norm": 0.3858356773853302, "learning_rate": 0.0005545714109968956, "loss": 0.044, "num_input_tokens_seen": 137158384, "step": 63590 }, { "epoch": 10.374388254486133, "grad_norm": 0.015207291580736637, "learning_rate": 0.0005545006558441649, "loss": 0.0614, "num_input_tokens_seen": 137168528, "step": 63595 }, { "epoch": 10.375203915171289, "grad_norm": 0.3349883258342743, "learning_rate": 0.0005544298995869093, "loss": 0.0418, "num_input_tokens_seen": 137178576, "step": 63600 }, { "epoch": 10.376019575856443, "grad_norm": 0.0033354645129293203, "learning_rate": 0.0005543591422265627, "loss": 0.0805, "num_input_tokens_seen": 137188752, "step": 63605 }, { "epoch": 10.376835236541599, "grad_norm": 0.024082988500595093, "learning_rate": 0.0005542883837645592, "loss": 0.0395, "num_input_tokens_seen": 137197904, "step": 63610 }, { "epoch": 10.377650897226754, "grad_norm": 0.024593215435743332, "learning_rate": 0.0005542176242023326, "loss": 0.0685, "num_input_tokens_seen": 137208624, "step": 63615 }, { "epoch": 10.378466557911908, "grad_norm": 0.027033358812332153, "learning_rate": 0.0005541468635413172, "loss": 0.0375, "num_input_tokens_seen": 137220048, "step": 63620 }, { "epoch": 10.379282218597064, "grad_norm": 0.26668083667755127, "learning_rate": 0.0005540761017829468, "loss": 0.0242, "num_input_tokens_seen": 137230800, "step": 63625 }, { "epoch": 10.380097879282218, "grad_norm": 0.008037866093218327, "learning_rate": 0.0005540053389286556, "loss": 0.0882, "num_input_tokens_seen": 137242832, "step": 63630 }, { "epoch": 10.380913539967374, "grad_norm": 0.018319467082619667, "learning_rate": 0.0005539345749798778, "loss": 0.0481, "num_input_tokens_seen": 137253968, "step": 63635 }, { "epoch": 10.38172920065253, "grad_norm": 0.01667613908648491, "learning_rate": 0.0005538638099380473, "loss": 0.0129, "num_input_tokens_seen": 137264624, "step": 63640 }, { "epoch": 10.382544861337683, "grad_norm": 0.7342408895492554, "learning_rate": 0.0005537930438045984, "loss": 0.1984, "num_input_tokens_seen": 137275280, "step": 63645 }, { "epoch": 10.383360522022839, "grad_norm": 0.09902302175760269, "learning_rate": 0.0005537222765809653, "loss": 0.0389, "num_input_tokens_seen": 137286064, "step": 63650 }, { "epoch": 10.384176182707993, "grad_norm": 0.0026765353977680206, "learning_rate": 0.000553651508268582, "loss": 0.1049, "num_input_tokens_seen": 137296368, "step": 63655 }, { "epoch": 10.384991843393149, "grad_norm": 0.27610719203948975, "learning_rate": 0.000553580738868883, "loss": 0.0683, "num_input_tokens_seen": 137307408, "step": 63660 }, { "epoch": 10.385807504078304, "grad_norm": 0.009612761437892914, "learning_rate": 0.0005535099683833021, "loss": 0.1959, "num_input_tokens_seen": 137319120, "step": 63665 }, { "epoch": 10.386623164763458, "grad_norm": 0.10612490028142929, "learning_rate": 0.0005534391968132741, "loss": 0.04, "num_input_tokens_seen": 137331152, "step": 63670 }, { "epoch": 10.387438825448614, "grad_norm": 0.01533946767449379, "learning_rate": 0.0005533684241602327, "loss": 0.0102, "num_input_tokens_seen": 137342384, "step": 63675 }, { "epoch": 10.388254486133768, "grad_norm": 0.029374638572335243, "learning_rate": 0.0005532976504256127, "loss": 0.0452, "num_input_tokens_seen": 137353456, "step": 63680 }, { "epoch": 10.389070146818923, "grad_norm": 0.14879557490348816, "learning_rate": 0.000553226875610848, "loss": 0.1017, "num_input_tokens_seen": 137363536, "step": 63685 }, { "epoch": 10.38988580750408, "grad_norm": 0.06224306672811508, "learning_rate": 0.0005531560997173733, "loss": 0.0158, "num_input_tokens_seen": 137374736, "step": 63690 }, { "epoch": 10.390701468189233, "grad_norm": 0.6529107689857483, "learning_rate": 0.0005530853227466229, "loss": 0.0585, "num_input_tokens_seen": 137383824, "step": 63695 }, { "epoch": 10.391517128874389, "grad_norm": 0.011391432955861092, "learning_rate": 0.0005530145447000308, "loss": 0.0368, "num_input_tokens_seen": 137393584, "step": 63700 }, { "epoch": 10.392332789559543, "grad_norm": 0.3292117714881897, "learning_rate": 0.0005529437655790319, "loss": 0.0746, "num_input_tokens_seen": 137403920, "step": 63705 }, { "epoch": 10.393148450244698, "grad_norm": 0.5137777328491211, "learning_rate": 0.0005528729853850604, "loss": 0.052, "num_input_tokens_seen": 137415440, "step": 63710 }, { "epoch": 10.393964110929852, "grad_norm": 0.28829124569892883, "learning_rate": 0.0005528022041195507, "loss": 0.2187, "num_input_tokens_seen": 137426960, "step": 63715 }, { "epoch": 10.394779771615008, "grad_norm": 0.006952963769435883, "learning_rate": 0.0005527314217839375, "loss": 0.126, "num_input_tokens_seen": 137436176, "step": 63720 }, { "epoch": 10.395595432300164, "grad_norm": 0.30857205390930176, "learning_rate": 0.0005526606383796551, "loss": 0.0237, "num_input_tokens_seen": 137447184, "step": 63725 }, { "epoch": 10.396411092985318, "grad_norm": 0.15155260264873505, "learning_rate": 0.000552589853908138, "loss": 0.1165, "num_input_tokens_seen": 137458736, "step": 63730 }, { "epoch": 10.397226753670473, "grad_norm": 0.018841926008462906, "learning_rate": 0.0005525190683708207, "loss": 0.2255, "num_input_tokens_seen": 137470160, "step": 63735 }, { "epoch": 10.398042414355627, "grad_norm": 0.08467467874288559, "learning_rate": 0.0005524482817691381, "loss": 0.04, "num_input_tokens_seen": 137480464, "step": 63740 }, { "epoch": 10.398858075040783, "grad_norm": 0.6041029691696167, "learning_rate": 0.0005523774941045244, "loss": 0.0642, "num_input_tokens_seen": 137489968, "step": 63745 }, { "epoch": 10.399673735725939, "grad_norm": 0.09278396517038345, "learning_rate": 0.0005523067053784143, "loss": 0.0661, "num_input_tokens_seen": 137501424, "step": 63750 }, { "epoch": 10.400489396411093, "grad_norm": 0.038348373025655746, "learning_rate": 0.0005522359155922425, "loss": 0.0094, "num_input_tokens_seen": 137511792, "step": 63755 }, { "epoch": 10.401305057096248, "grad_norm": 0.01266828365623951, "learning_rate": 0.0005521651247474436, "loss": 0.0187, "num_input_tokens_seen": 137522832, "step": 63760 }, { "epoch": 10.402120717781402, "grad_norm": 0.007137711625546217, "learning_rate": 0.0005520943328454523, "loss": 0.0866, "num_input_tokens_seen": 137533296, "step": 63765 }, { "epoch": 10.402936378466558, "grad_norm": 0.4042056202888489, "learning_rate": 0.0005520235398877032, "loss": 0.0558, "num_input_tokens_seen": 137545168, "step": 63770 }, { "epoch": 10.403752039151712, "grad_norm": 0.09753723442554474, "learning_rate": 0.0005519527458756312, "loss": 0.0231, "num_input_tokens_seen": 137556464, "step": 63775 }, { "epoch": 10.404567699836868, "grad_norm": 0.029314560815691948, "learning_rate": 0.0005518819508106706, "loss": 0.0804, "num_input_tokens_seen": 137567888, "step": 63780 }, { "epoch": 10.405383360522023, "grad_norm": 0.01772337406873703, "learning_rate": 0.0005518111546942567, "loss": 0.0264, "num_input_tokens_seen": 137579152, "step": 63785 }, { "epoch": 10.406199021207177, "grad_norm": 0.005660946946591139, "learning_rate": 0.000551740357527824, "loss": 0.1304, "num_input_tokens_seen": 137590544, "step": 63790 }, { "epoch": 10.407014681892333, "grad_norm": 0.003444041358307004, "learning_rate": 0.0005516695593128073, "loss": 0.0336, "num_input_tokens_seen": 137600880, "step": 63795 }, { "epoch": 10.407830342577487, "grad_norm": 0.07773767411708832, "learning_rate": 0.0005515987600506414, "loss": 0.1395, "num_input_tokens_seen": 137611952, "step": 63800 }, { "epoch": 10.408646003262643, "grad_norm": 0.0023035933263599873, "learning_rate": 0.0005515279597427612, "loss": 0.0075, "num_input_tokens_seen": 137622192, "step": 63805 }, { "epoch": 10.409461663947798, "grad_norm": 0.49277976155281067, "learning_rate": 0.0005514571583906014, "loss": 0.0321, "num_input_tokens_seen": 137633968, "step": 63810 }, { "epoch": 10.410277324632952, "grad_norm": 0.1581151932477951, "learning_rate": 0.0005513863559955971, "loss": 0.0769, "num_input_tokens_seen": 137643408, "step": 63815 }, { "epoch": 10.411092985318108, "grad_norm": 0.03316400572657585, "learning_rate": 0.0005513155525591831, "loss": 0.1386, "num_input_tokens_seen": 137654352, "step": 63820 }, { "epoch": 10.411908646003262, "grad_norm": 0.027986867353320122, "learning_rate": 0.0005512447480827945, "loss": 0.0316, "num_input_tokens_seen": 137665648, "step": 63825 }, { "epoch": 10.412724306688418, "grad_norm": 0.01837276853621006, "learning_rate": 0.0005511739425678658, "loss": 0.0262, "num_input_tokens_seen": 137677648, "step": 63830 }, { "epoch": 10.413539967373573, "grad_norm": 0.016788076609373093, "learning_rate": 0.0005511031360158324, "loss": 0.0045, "num_input_tokens_seen": 137687440, "step": 63835 }, { "epoch": 10.414355628058727, "grad_norm": 0.05905027687549591, "learning_rate": 0.0005510323284281291, "loss": 0.2464, "num_input_tokens_seen": 137697648, "step": 63840 }, { "epoch": 10.415171288743883, "grad_norm": 0.025583801791071892, "learning_rate": 0.0005509615198061909, "loss": 0.0397, "num_input_tokens_seen": 137708208, "step": 63845 }, { "epoch": 10.415986949429037, "grad_norm": 0.012476097792387009, "learning_rate": 0.0005508907101514529, "loss": 0.0356, "num_input_tokens_seen": 137719376, "step": 63850 }, { "epoch": 10.416802610114193, "grad_norm": 0.5206412672996521, "learning_rate": 0.0005508198994653501, "loss": 0.1534, "num_input_tokens_seen": 137730032, "step": 63855 }, { "epoch": 10.417618270799348, "grad_norm": 0.024470193311572075, "learning_rate": 0.0005507490877493176, "loss": 0.175, "num_input_tokens_seen": 137740912, "step": 63860 }, { "epoch": 10.418433931484502, "grad_norm": 0.026254329830408096, "learning_rate": 0.0005506782750047903, "loss": 0.0969, "num_input_tokens_seen": 137751600, "step": 63865 }, { "epoch": 10.419249592169658, "grad_norm": 0.014383930712938309, "learning_rate": 0.0005506074612332035, "loss": 0.0479, "num_input_tokens_seen": 137762032, "step": 63870 }, { "epoch": 10.420065252854812, "grad_norm": 0.003859440330415964, "learning_rate": 0.0005505366464359924, "loss": 0.0086, "num_input_tokens_seen": 137773104, "step": 63875 }, { "epoch": 10.420880913539968, "grad_norm": 0.11444339156150818, "learning_rate": 0.000550465830614592, "loss": 0.0272, "num_input_tokens_seen": 137782512, "step": 63880 }, { "epoch": 10.421696574225122, "grad_norm": 0.6042308807373047, "learning_rate": 0.0005503950137704374, "loss": 0.1607, "num_input_tokens_seen": 137793488, "step": 63885 }, { "epoch": 10.422512234910277, "grad_norm": 0.022820625454187393, "learning_rate": 0.0005503241959049641, "loss": 0.0165, "num_input_tokens_seen": 137804080, "step": 63890 }, { "epoch": 10.423327895595433, "grad_norm": 0.032168157398700714, "learning_rate": 0.000550253377019607, "loss": 0.0236, "num_input_tokens_seen": 137814896, "step": 63895 }, { "epoch": 10.424143556280587, "grad_norm": 0.008834709413349628, "learning_rate": 0.0005501825571158016, "loss": 0.0259, "num_input_tokens_seen": 137825616, "step": 63900 }, { "epoch": 10.424959216965743, "grad_norm": 0.21443764865398407, "learning_rate": 0.000550111736194983, "loss": 0.0377, "num_input_tokens_seen": 137836848, "step": 63905 }, { "epoch": 10.425774877650896, "grad_norm": 0.05455216392874718, "learning_rate": 0.0005500409142585864, "loss": 0.0466, "num_input_tokens_seen": 137845840, "step": 63910 }, { "epoch": 10.426590538336052, "grad_norm": 0.6302210688591003, "learning_rate": 0.0005499700913080472, "loss": 0.0951, "num_input_tokens_seen": 137856688, "step": 63915 }, { "epoch": 10.427406199021208, "grad_norm": 0.39718711376190186, "learning_rate": 0.0005498992673448008, "loss": 0.0219, "num_input_tokens_seen": 137867664, "step": 63920 }, { "epoch": 10.428221859706362, "grad_norm": 0.5401532053947449, "learning_rate": 0.0005498284423702824, "loss": 0.0544, "num_input_tokens_seen": 137878992, "step": 63925 }, { "epoch": 10.429037520391518, "grad_norm": 0.01007236260920763, "learning_rate": 0.0005497576163859273, "loss": 0.1386, "num_input_tokens_seen": 137890320, "step": 63930 }, { "epoch": 10.429853181076671, "grad_norm": 0.018046928569674492, "learning_rate": 0.0005496867893931711, "loss": 0.0285, "num_input_tokens_seen": 137901424, "step": 63935 }, { "epoch": 10.430668841761827, "grad_norm": 0.48323580622673035, "learning_rate": 0.0005496159613934492, "loss": 0.0195, "num_input_tokens_seen": 137913136, "step": 63940 }, { "epoch": 10.431484502446983, "grad_norm": 0.3995681405067444, "learning_rate": 0.0005495451323881967, "loss": 0.1614, "num_input_tokens_seen": 137923632, "step": 63945 }, { "epoch": 10.432300163132137, "grad_norm": 0.3726401925086975, "learning_rate": 0.0005494743023788493, "loss": 0.0638, "num_input_tokens_seen": 137934256, "step": 63950 }, { "epoch": 10.433115823817293, "grad_norm": 0.6001352071762085, "learning_rate": 0.0005494034713668423, "loss": 0.0933, "num_input_tokens_seen": 137946320, "step": 63955 }, { "epoch": 10.433931484502446, "grad_norm": 0.3359851539134979, "learning_rate": 0.0005493326393536113, "loss": 0.076, "num_input_tokens_seen": 137956432, "step": 63960 }, { "epoch": 10.434747145187602, "grad_norm": 0.09823975712060928, "learning_rate": 0.000549261806340592, "loss": 0.027, "num_input_tokens_seen": 137967408, "step": 63965 }, { "epoch": 10.435562805872756, "grad_norm": 0.005529469344764948, "learning_rate": 0.0005491909723292196, "loss": 0.1554, "num_input_tokens_seen": 137977936, "step": 63970 }, { "epoch": 10.436378466557912, "grad_norm": 0.20471151173114777, "learning_rate": 0.0005491201373209295, "loss": 0.1033, "num_input_tokens_seen": 137988464, "step": 63975 }, { "epoch": 10.437194127243067, "grad_norm": 0.09724956005811691, "learning_rate": 0.0005490493013171578, "loss": 0.0587, "num_input_tokens_seen": 137998672, "step": 63980 }, { "epoch": 10.438009787928221, "grad_norm": 0.2935082018375397, "learning_rate": 0.0005489784643193397, "loss": 0.0175, "num_input_tokens_seen": 138010224, "step": 63985 }, { "epoch": 10.438825448613377, "grad_norm": 0.30549514293670654, "learning_rate": 0.0005489076263289109, "loss": 0.0272, "num_input_tokens_seen": 138021328, "step": 63990 }, { "epoch": 10.439641109298531, "grad_norm": 0.07838549464941025, "learning_rate": 0.000548836787347307, "loss": 0.1025, "num_input_tokens_seen": 138032720, "step": 63995 }, { "epoch": 10.440456769983687, "grad_norm": 0.31619992852211, "learning_rate": 0.0005487659473759635, "loss": 0.0572, "num_input_tokens_seen": 138044432, "step": 64000 }, { "epoch": 10.441272430668842, "grad_norm": 0.027024956420063972, "learning_rate": 0.0005486951064163164, "loss": 0.0405, "num_input_tokens_seen": 138055632, "step": 64005 }, { "epoch": 10.442088091353996, "grad_norm": 0.03015359491109848, "learning_rate": 0.0005486242644698011, "loss": 0.0155, "num_input_tokens_seen": 138066416, "step": 64010 }, { "epoch": 10.442903752039152, "grad_norm": 0.008899150416254997, "learning_rate": 0.0005485534215378535, "loss": 0.0188, "num_input_tokens_seen": 138077488, "step": 64015 }, { "epoch": 10.443719412724306, "grad_norm": 0.08763915300369263, "learning_rate": 0.0005484825776219092, "loss": 0.0442, "num_input_tokens_seen": 138088304, "step": 64020 }, { "epoch": 10.444535073409462, "grad_norm": 0.4398113787174225, "learning_rate": 0.0005484117327234038, "loss": 0.0425, "num_input_tokens_seen": 138098096, "step": 64025 }, { "epoch": 10.445350734094617, "grad_norm": 0.14407259225845337, "learning_rate": 0.0005483408868437734, "loss": 0.2326, "num_input_tokens_seen": 138109808, "step": 64030 }, { "epoch": 10.446166394779771, "grad_norm": 0.010263379663228989, "learning_rate": 0.0005482700399844536, "loss": 0.0057, "num_input_tokens_seen": 138120432, "step": 64035 }, { "epoch": 10.446982055464927, "grad_norm": 0.1767607033252716, "learning_rate": 0.0005481991921468801, "loss": 0.083, "num_input_tokens_seen": 138131536, "step": 64040 }, { "epoch": 10.447797716150081, "grad_norm": 0.02731829695403576, "learning_rate": 0.0005481283433324888, "loss": 0.0954, "num_input_tokens_seen": 138143216, "step": 64045 }, { "epoch": 10.448613376835237, "grad_norm": 0.024048801511526108, "learning_rate": 0.0005480574935427157, "loss": 0.0439, "num_input_tokens_seen": 138153232, "step": 64050 }, { "epoch": 10.449429037520392, "grad_norm": 0.012898556888103485, "learning_rate": 0.0005479866427789965, "loss": 0.0256, "num_input_tokens_seen": 138163024, "step": 64055 }, { "epoch": 10.450244698205546, "grad_norm": 0.012192055583000183, "learning_rate": 0.0005479157910427672, "loss": 0.0932, "num_input_tokens_seen": 138174064, "step": 64060 }, { "epoch": 10.451060358890702, "grad_norm": 0.010896336287260056, "learning_rate": 0.0005478449383354634, "loss": 0.0943, "num_input_tokens_seen": 138185232, "step": 64065 }, { "epoch": 10.451876019575856, "grad_norm": 0.035546451807022095, "learning_rate": 0.0005477740846585213, "loss": 0.0181, "num_input_tokens_seen": 138196816, "step": 64070 }, { "epoch": 10.452691680261012, "grad_norm": 0.004571928642690182, "learning_rate": 0.0005477032300133768, "loss": 0.0076, "num_input_tokens_seen": 138207120, "step": 64075 }, { "epoch": 10.453507340946166, "grad_norm": 0.006645991932600737, "learning_rate": 0.0005476323744014658, "loss": 0.1358, "num_input_tokens_seen": 138218000, "step": 64080 }, { "epoch": 10.454323001631321, "grad_norm": 0.10609616339206696, "learning_rate": 0.0005475615178242244, "loss": 0.0095, "num_input_tokens_seen": 138227696, "step": 64085 }, { "epoch": 10.455138662316477, "grad_norm": 0.16971544921398163, "learning_rate": 0.0005474906602830884, "loss": 0.06, "num_input_tokens_seen": 138237584, "step": 64090 }, { "epoch": 10.455954323001631, "grad_norm": 0.12304145842790604, "learning_rate": 0.0005474198017794939, "loss": 0.0766, "num_input_tokens_seen": 138248880, "step": 64095 }, { "epoch": 10.456769983686787, "grad_norm": 0.07342102378606796, "learning_rate": 0.000547348942314877, "loss": 0.055, "num_input_tokens_seen": 138259472, "step": 64100 }, { "epoch": 10.45758564437194, "grad_norm": 0.01688706874847412, "learning_rate": 0.0005472780818906736, "loss": 0.0647, "num_input_tokens_seen": 138268304, "step": 64105 }, { "epoch": 10.458401305057096, "grad_norm": 0.01488231960684061, "learning_rate": 0.00054720722050832, "loss": 0.1094, "num_input_tokens_seen": 138280112, "step": 64110 }, { "epoch": 10.459216965742252, "grad_norm": 0.005448227748274803, "learning_rate": 0.0005471363581692523, "loss": 0.009, "num_input_tokens_seen": 138291056, "step": 64115 }, { "epoch": 10.460032626427406, "grad_norm": 0.08526018261909485, "learning_rate": 0.0005470654948749065, "loss": 0.021, "num_input_tokens_seen": 138302160, "step": 64120 }, { "epoch": 10.460848287112562, "grad_norm": 0.05652700364589691, "learning_rate": 0.0005469946306267185, "loss": 0.0403, "num_input_tokens_seen": 138312560, "step": 64125 }, { "epoch": 10.461663947797716, "grad_norm": 0.24024170637130737, "learning_rate": 0.0005469237654261249, "loss": 0.0421, "num_input_tokens_seen": 138323440, "step": 64130 }, { "epoch": 10.462479608482871, "grad_norm": 0.44101381301879883, "learning_rate": 0.0005468528992745615, "loss": 0.0737, "num_input_tokens_seen": 138333456, "step": 64135 }, { "epoch": 10.463295269168025, "grad_norm": 0.47098276019096375, "learning_rate": 0.0005467820321734647, "loss": 0.1515, "num_input_tokens_seen": 138343152, "step": 64140 }, { "epoch": 10.464110929853181, "grad_norm": 0.00318201445043087, "learning_rate": 0.0005467111641242709, "loss": 0.0388, "num_input_tokens_seen": 138353808, "step": 64145 }, { "epoch": 10.464926590538337, "grad_norm": 0.14881281554698944, "learning_rate": 0.000546640295128416, "loss": 0.0491, "num_input_tokens_seen": 138366384, "step": 64150 }, { "epoch": 10.46574225122349, "grad_norm": 0.11272022128105164, "learning_rate": 0.0005465694251873362, "loss": 0.0491, "num_input_tokens_seen": 138377808, "step": 64155 }, { "epoch": 10.466557911908646, "grad_norm": 0.1374955028295517, "learning_rate": 0.000546498554302468, "loss": 0.0445, "num_input_tokens_seen": 138388816, "step": 64160 }, { "epoch": 10.4673735725938, "grad_norm": 0.25655078887939453, "learning_rate": 0.0005464276824752477, "loss": 0.0838, "num_input_tokens_seen": 138399920, "step": 64165 }, { "epoch": 10.468189233278956, "grad_norm": 0.15557493269443512, "learning_rate": 0.0005463568097071115, "loss": 0.1476, "num_input_tokens_seen": 138410192, "step": 64170 }, { "epoch": 10.469004893964112, "grad_norm": 0.019318245351314545, "learning_rate": 0.0005462859359994957, "loss": 0.0313, "num_input_tokens_seen": 138420144, "step": 64175 }, { "epoch": 10.469820554649266, "grad_norm": 0.019374024122953415, "learning_rate": 0.0005462150613538366, "loss": 0.0159, "num_input_tokens_seen": 138430128, "step": 64180 }, { "epoch": 10.470636215334421, "grad_norm": 0.0035229274071753025, "learning_rate": 0.0005461441857715708, "loss": 0.0398, "num_input_tokens_seen": 138441968, "step": 64185 }, { "epoch": 10.471451876019575, "grad_norm": 0.015669353306293488, "learning_rate": 0.0005460733092541345, "loss": 0.0744, "num_input_tokens_seen": 138453072, "step": 64190 }, { "epoch": 10.47226753670473, "grad_norm": 0.04297253489494324, "learning_rate": 0.000546002431802964, "loss": 0.0094, "num_input_tokens_seen": 138463856, "step": 64195 }, { "epoch": 10.473083197389887, "grad_norm": 0.3232375383377075, "learning_rate": 0.0005459315534194959, "loss": 0.0461, "num_input_tokens_seen": 138474320, "step": 64200 }, { "epoch": 10.47389885807504, "grad_norm": 0.30045461654663086, "learning_rate": 0.0005458606741051667, "loss": 0.0327, "num_input_tokens_seen": 138485200, "step": 64205 }, { "epoch": 10.474714518760196, "grad_norm": 0.0036539973225444555, "learning_rate": 0.0005457897938614127, "loss": 0.0095, "num_input_tokens_seen": 138496592, "step": 64210 }, { "epoch": 10.47553017944535, "grad_norm": 0.7020013928413391, "learning_rate": 0.0005457189126896704, "loss": 0.126, "num_input_tokens_seen": 138507248, "step": 64215 }, { "epoch": 10.476345840130506, "grad_norm": 0.04570115730166435, "learning_rate": 0.0005456480305913765, "loss": 0.0243, "num_input_tokens_seen": 138517072, "step": 64220 }, { "epoch": 10.477161500815662, "grad_norm": 0.16781480610370636, "learning_rate": 0.0005455771475679673, "loss": 0.1778, "num_input_tokens_seen": 138529072, "step": 64225 }, { "epoch": 10.477977161500815, "grad_norm": 0.19935885071754456, "learning_rate": 0.0005455062636208793, "loss": 0.0472, "num_input_tokens_seen": 138539184, "step": 64230 }, { "epoch": 10.478792822185971, "grad_norm": 0.016239508986473083, "learning_rate": 0.0005454353787515493, "loss": 0.0075, "num_input_tokens_seen": 138551088, "step": 64235 }, { "epoch": 10.479608482871125, "grad_norm": 0.24021486937999725, "learning_rate": 0.0005453644929614136, "loss": 0.0201, "num_input_tokens_seen": 138561648, "step": 64240 }, { "epoch": 10.48042414355628, "grad_norm": 0.3780392110347748, "learning_rate": 0.0005452936062519088, "loss": 0.0268, "num_input_tokens_seen": 138571408, "step": 64245 }, { "epoch": 10.481239804241435, "grad_norm": 0.013503940775990486, "learning_rate": 0.0005452227186244717, "loss": 0.0323, "num_input_tokens_seen": 138583856, "step": 64250 }, { "epoch": 10.48205546492659, "grad_norm": 0.14068900048732758, "learning_rate": 0.0005451518300805389, "loss": 0.0203, "num_input_tokens_seen": 138593808, "step": 64255 }, { "epoch": 10.482871125611746, "grad_norm": 0.15440669655799866, "learning_rate": 0.0005450809406215469, "loss": 0.0964, "num_input_tokens_seen": 138604688, "step": 64260 }, { "epoch": 10.4836867862969, "grad_norm": 0.039056237787008286, "learning_rate": 0.0005450100502489324, "loss": 0.0204, "num_input_tokens_seen": 138616304, "step": 64265 }, { "epoch": 10.484502446982056, "grad_norm": 0.0060806069523096085, "learning_rate": 0.0005449391589641321, "loss": 0.0579, "num_input_tokens_seen": 138628368, "step": 64270 }, { "epoch": 10.48531810766721, "grad_norm": 0.5055065751075745, "learning_rate": 0.0005448682667685829, "loss": 0.0369, "num_input_tokens_seen": 138638800, "step": 64275 }, { "epoch": 10.486133768352365, "grad_norm": 0.9203654527664185, "learning_rate": 0.0005447973736637214, "loss": 0.0566, "num_input_tokens_seen": 138648976, "step": 64280 }, { "epoch": 10.486949429037521, "grad_norm": 0.013024217449128628, "learning_rate": 0.0005447264796509841, "loss": 0.0207, "num_input_tokens_seen": 138659536, "step": 64285 }, { "epoch": 10.487765089722675, "grad_norm": 0.3898140490055084, "learning_rate": 0.0005446555847318081, "loss": 0.0769, "num_input_tokens_seen": 138669456, "step": 64290 }, { "epoch": 10.48858075040783, "grad_norm": 0.3163543939590454, "learning_rate": 0.00054458468890763, "loss": 0.1092, "num_input_tokens_seen": 138680496, "step": 64295 }, { "epoch": 10.489396411092985, "grad_norm": 0.09784460812807083, "learning_rate": 0.0005445137921798866, "loss": 0.1207, "num_input_tokens_seen": 138691408, "step": 64300 }, { "epoch": 10.49021207177814, "grad_norm": 0.0065801567398011684, "learning_rate": 0.0005444428945500147, "loss": 0.014, "num_input_tokens_seen": 138701968, "step": 64305 }, { "epoch": 10.491027732463296, "grad_norm": 0.5571448802947998, "learning_rate": 0.0005443719960194513, "loss": 0.027, "num_input_tokens_seen": 138712464, "step": 64310 }, { "epoch": 10.49184339314845, "grad_norm": 1.098577857017517, "learning_rate": 0.0005443010965896327, "loss": 0.1927, "num_input_tokens_seen": 138721712, "step": 64315 }, { "epoch": 10.492659053833606, "grad_norm": 0.037644777446985245, "learning_rate": 0.0005442301962619965, "loss": 0.0457, "num_input_tokens_seen": 138732048, "step": 64320 }, { "epoch": 10.49347471451876, "grad_norm": 0.015640581026673317, "learning_rate": 0.0005441592950379792, "loss": 0.0053, "num_input_tokens_seen": 138741808, "step": 64325 }, { "epoch": 10.494290375203915, "grad_norm": 0.011389346793293953, "learning_rate": 0.0005440883929190179, "loss": 0.0246, "num_input_tokens_seen": 138752496, "step": 64330 }, { "epoch": 10.49510603588907, "grad_norm": 0.0074125719256699085, "learning_rate": 0.0005440174899065493, "loss": 0.0189, "num_input_tokens_seen": 138763696, "step": 64335 }, { "epoch": 10.495921696574225, "grad_norm": 0.028213627636432648, "learning_rate": 0.0005439465860020104, "loss": 0.0428, "num_input_tokens_seen": 138773808, "step": 64340 }, { "epoch": 10.49673735725938, "grad_norm": 0.030723532661795616, "learning_rate": 0.0005438756812068382, "loss": 0.0474, "num_input_tokens_seen": 138784624, "step": 64345 }, { "epoch": 10.497553017944535, "grad_norm": 0.43396124243736267, "learning_rate": 0.0005438047755224696, "loss": 0.0244, "num_input_tokens_seen": 138795952, "step": 64350 }, { "epoch": 10.49836867862969, "grad_norm": 0.09175914525985718, "learning_rate": 0.0005437338689503417, "loss": 0.0688, "num_input_tokens_seen": 138805680, "step": 64355 }, { "epoch": 10.499184339314844, "grad_norm": 0.007985112257301807, "learning_rate": 0.0005436629614918915, "loss": 0.0491, "num_input_tokens_seen": 138815472, "step": 64360 }, { "epoch": 10.5, "grad_norm": 0.23452095687389374, "learning_rate": 0.0005435920531485559, "loss": 0.0674, "num_input_tokens_seen": 138826736, "step": 64365 }, { "epoch": 10.500815660685156, "grad_norm": 0.1747833639383316, "learning_rate": 0.0005435211439217722, "loss": 0.0174, "num_input_tokens_seen": 138836528, "step": 64370 }, { "epoch": 10.50163132137031, "grad_norm": 0.8240127563476562, "learning_rate": 0.0005434502338129773, "loss": 0.0265, "num_input_tokens_seen": 138846480, "step": 64375 }, { "epoch": 10.502446982055465, "grad_norm": 0.15552246570587158, "learning_rate": 0.0005433793228236081, "loss": 0.0078, "num_input_tokens_seen": 138856880, "step": 64380 }, { "epoch": 10.50326264274062, "grad_norm": 0.1577841192483902, "learning_rate": 0.000543308410955102, "loss": 0.0129, "num_input_tokens_seen": 138867888, "step": 64385 }, { "epoch": 10.504078303425775, "grad_norm": 0.004878859501332045, "learning_rate": 0.0005432374982088961, "loss": 0.0236, "num_input_tokens_seen": 138879760, "step": 64390 }, { "epoch": 10.50489396411093, "grad_norm": 0.007793755270540714, "learning_rate": 0.0005431665845864274, "loss": 0.0076, "num_input_tokens_seen": 138891024, "step": 64395 }, { "epoch": 10.505709624796085, "grad_norm": 0.1626664251089096, "learning_rate": 0.0005430956700891331, "loss": 0.0632, "num_input_tokens_seen": 138902096, "step": 64400 }, { "epoch": 10.50652528548124, "grad_norm": 0.08236097544431686, "learning_rate": 0.0005430247547184504, "loss": 0.0225, "num_input_tokens_seen": 138913264, "step": 64405 }, { "epoch": 10.507340946166394, "grad_norm": 0.06862727552652359, "learning_rate": 0.0005429538384758162, "loss": 0.014, "num_input_tokens_seen": 138924400, "step": 64410 }, { "epoch": 10.50815660685155, "grad_norm": 0.005328190978616476, "learning_rate": 0.0005428829213626683, "loss": 0.0263, "num_input_tokens_seen": 138935280, "step": 64415 }, { "epoch": 10.508972267536706, "grad_norm": 0.4532453715801239, "learning_rate": 0.0005428120033804433, "loss": 0.0873, "num_input_tokens_seen": 138946128, "step": 64420 }, { "epoch": 10.50978792822186, "grad_norm": 0.05264997482299805, "learning_rate": 0.0005427410845305791, "loss": 0.0609, "num_input_tokens_seen": 138957104, "step": 64425 }, { "epoch": 10.510603588907015, "grad_norm": 0.20525546371936798, "learning_rate": 0.0005426701648145124, "loss": 0.0135, "num_input_tokens_seen": 138967696, "step": 64430 }, { "epoch": 10.51141924959217, "grad_norm": 0.4313879609107971, "learning_rate": 0.0005425992442336805, "loss": 0.0397, "num_input_tokens_seen": 138978736, "step": 64435 }, { "epoch": 10.512234910277325, "grad_norm": 0.0016902941279113293, "learning_rate": 0.0005425283227895212, "loss": 0.0307, "num_input_tokens_seen": 138990544, "step": 64440 }, { "epoch": 10.513050570962479, "grad_norm": 0.004030873533338308, "learning_rate": 0.0005424574004834712, "loss": 0.0142, "num_input_tokens_seen": 139001456, "step": 64445 }, { "epoch": 10.513866231647635, "grad_norm": 0.003918727394193411, "learning_rate": 0.0005423864773169683, "loss": 0.0217, "num_input_tokens_seen": 139012720, "step": 64450 }, { "epoch": 10.51468189233279, "grad_norm": 0.419070839881897, "learning_rate": 0.0005423155532914497, "loss": 0.2792, "num_input_tokens_seen": 139022128, "step": 64455 }, { "epoch": 10.515497553017944, "grad_norm": 0.005643937736749649, "learning_rate": 0.0005422446284083527, "loss": 0.0296, "num_input_tokens_seen": 139032464, "step": 64460 }, { "epoch": 10.5163132137031, "grad_norm": 0.005430314689874649, "learning_rate": 0.0005421737026691147, "loss": 0.0796, "num_input_tokens_seen": 139043152, "step": 64465 }, { "epoch": 10.517128874388254, "grad_norm": 0.07572004944086075, "learning_rate": 0.0005421027760751731, "loss": 0.1167, "num_input_tokens_seen": 139054192, "step": 64470 }, { "epoch": 10.51794453507341, "grad_norm": 0.21771161258220673, "learning_rate": 0.0005420318486279653, "loss": 0.1012, "num_input_tokens_seen": 139064880, "step": 64475 }, { "epoch": 10.518760195758565, "grad_norm": 0.014903522096574306, "learning_rate": 0.0005419609203289288, "loss": 0.0804, "num_input_tokens_seen": 139075792, "step": 64480 }, { "epoch": 10.51957585644372, "grad_norm": 0.07839614152908325, "learning_rate": 0.0005418899911795011, "loss": 0.0773, "num_input_tokens_seen": 139086320, "step": 64485 }, { "epoch": 10.520391517128875, "grad_norm": 0.005765294190496206, "learning_rate": 0.0005418190611811194, "loss": 0.0722, "num_input_tokens_seen": 139097296, "step": 64490 }, { "epoch": 10.521207177814029, "grad_norm": 0.49048173427581787, "learning_rate": 0.0005417481303352216, "loss": 0.094, "num_input_tokens_seen": 139108848, "step": 64495 }, { "epoch": 10.522022838499185, "grad_norm": 0.341752827167511, "learning_rate": 0.0005416771986432448, "loss": 0.0503, "num_input_tokens_seen": 139118992, "step": 64500 }, { "epoch": 10.522838499184338, "grad_norm": 0.05072177201509476, "learning_rate": 0.0005416062661066268, "loss": 0.1253, "num_input_tokens_seen": 139130320, "step": 64505 }, { "epoch": 10.523654159869494, "grad_norm": 0.03055431693792343, "learning_rate": 0.000541535332726805, "loss": 0.1402, "num_input_tokens_seen": 139139824, "step": 64510 }, { "epoch": 10.52446982055465, "grad_norm": 0.005871880333870649, "learning_rate": 0.000541464398505217, "loss": 0.0865, "num_input_tokens_seen": 139149616, "step": 64515 }, { "epoch": 10.525285481239804, "grad_norm": 0.8979186415672302, "learning_rate": 0.0005413934634433003, "loss": 0.217, "num_input_tokens_seen": 139161744, "step": 64520 }, { "epoch": 10.52610114192496, "grad_norm": 0.43007710576057434, "learning_rate": 0.0005413225275424926, "loss": 0.0323, "num_input_tokens_seen": 139173008, "step": 64525 }, { "epoch": 10.526916802610113, "grad_norm": 0.24892736971378326, "learning_rate": 0.0005412515908042314, "loss": 0.1323, "num_input_tokens_seen": 139184848, "step": 64530 }, { "epoch": 10.52773246329527, "grad_norm": 0.08679567277431488, "learning_rate": 0.0005411806532299544, "loss": 0.0392, "num_input_tokens_seen": 139196112, "step": 64535 }, { "epoch": 10.528548123980425, "grad_norm": 0.2673286199569702, "learning_rate": 0.0005411097148210992, "loss": 0.0545, "num_input_tokens_seen": 139206352, "step": 64540 }, { "epoch": 10.529363784665579, "grad_norm": 0.36285731196403503, "learning_rate": 0.0005410387755791036, "loss": 0.0419, "num_input_tokens_seen": 139216880, "step": 64545 }, { "epoch": 10.530179445350734, "grad_norm": 0.36109820008277893, "learning_rate": 0.0005409678355054051, "loss": 0.0309, "num_input_tokens_seen": 139228528, "step": 64550 }, { "epoch": 10.530995106035888, "grad_norm": 0.11290938407182693, "learning_rate": 0.0005408968946014416, "loss": 0.0158, "num_input_tokens_seen": 139238160, "step": 64555 }, { "epoch": 10.531810766721044, "grad_norm": 0.0500091090798378, "learning_rate": 0.0005408259528686503, "loss": 0.0613, "num_input_tokens_seen": 139248944, "step": 64560 }, { "epoch": 10.5326264274062, "grad_norm": 0.5163236856460571, "learning_rate": 0.0005407550103084695, "loss": 0.0375, "num_input_tokens_seen": 139259600, "step": 64565 }, { "epoch": 10.533442088091354, "grad_norm": 0.03871779888868332, "learning_rate": 0.0005406840669223367, "loss": 0.0131, "num_input_tokens_seen": 139269520, "step": 64570 }, { "epoch": 10.53425774877651, "grad_norm": 0.004415431991219521, "learning_rate": 0.0005406131227116896, "loss": 0.0203, "num_input_tokens_seen": 139280144, "step": 64575 }, { "epoch": 10.535073409461663, "grad_norm": 0.11878913640975952, "learning_rate": 0.000540542177677966, "loss": 0.0552, "num_input_tokens_seen": 139290928, "step": 64580 }, { "epoch": 10.535889070146819, "grad_norm": 0.014984916895627975, "learning_rate": 0.0005404712318226038, "loss": 0.1557, "num_input_tokens_seen": 139300304, "step": 64585 }, { "epoch": 10.536704730831975, "grad_norm": 0.07112250477075577, "learning_rate": 0.0005404002851470409, "loss": 0.0504, "num_input_tokens_seen": 139310832, "step": 64590 }, { "epoch": 10.537520391517129, "grad_norm": 0.12174956500530243, "learning_rate": 0.0005403293376527148, "loss": 0.1008, "num_input_tokens_seen": 139321904, "step": 64595 }, { "epoch": 10.538336052202284, "grad_norm": 0.03201400861144066, "learning_rate": 0.0005402583893410636, "loss": 0.2007, "num_input_tokens_seen": 139331056, "step": 64600 }, { "epoch": 10.539151712887438, "grad_norm": 0.11586010456085205, "learning_rate": 0.0005401874402135249, "loss": 0.0453, "num_input_tokens_seen": 139342736, "step": 64605 }, { "epoch": 10.539967373572594, "grad_norm": 0.6288331151008606, "learning_rate": 0.000540116490271537, "loss": 0.0581, "num_input_tokens_seen": 139354128, "step": 64610 }, { "epoch": 10.540783034257748, "grad_norm": 0.08417706936597824, "learning_rate": 0.0005400455395165373, "loss": 0.0321, "num_input_tokens_seen": 139364880, "step": 64615 }, { "epoch": 10.541598694942904, "grad_norm": 0.0020906664431095123, "learning_rate": 0.0005399745879499641, "loss": 0.0089, "num_input_tokens_seen": 139375312, "step": 64620 }, { "epoch": 10.54241435562806, "grad_norm": 0.04065576568245888, "learning_rate": 0.0005399036355732552, "loss": 0.0821, "num_input_tokens_seen": 139386384, "step": 64625 }, { "epoch": 10.543230016313213, "grad_norm": 0.013034175150096416, "learning_rate": 0.0005398326823878482, "loss": 0.0186, "num_input_tokens_seen": 139397520, "step": 64630 }, { "epoch": 10.544045676998369, "grad_norm": 0.10967890173196793, "learning_rate": 0.0005397617283951816, "loss": 0.0425, "num_input_tokens_seen": 139407536, "step": 64635 }, { "epoch": 10.544861337683523, "grad_norm": 0.210423544049263, "learning_rate": 0.000539690773596693, "loss": 0.0252, "num_input_tokens_seen": 139418992, "step": 64640 }, { "epoch": 10.545676998368679, "grad_norm": 0.6833847165107727, "learning_rate": 0.0005396198179938208, "loss": 0.0565, "num_input_tokens_seen": 139429072, "step": 64645 }, { "epoch": 10.546492659053834, "grad_norm": 0.14036019146442413, "learning_rate": 0.0005395488615880024, "loss": 0.1921, "num_input_tokens_seen": 139438768, "step": 64650 }, { "epoch": 10.547308319738988, "grad_norm": 0.023060699924826622, "learning_rate": 0.0005394779043806764, "loss": 0.0651, "num_input_tokens_seen": 139449968, "step": 64655 }, { "epoch": 10.548123980424144, "grad_norm": 0.012311146594583988, "learning_rate": 0.0005394069463732805, "loss": 0.0414, "num_input_tokens_seen": 139459856, "step": 64660 }, { "epoch": 10.548939641109298, "grad_norm": 0.007062338758260012, "learning_rate": 0.0005393359875672527, "loss": 0.0174, "num_input_tokens_seen": 139470832, "step": 64665 }, { "epoch": 10.549755301794454, "grad_norm": 0.6710882186889648, "learning_rate": 0.0005392650279640314, "loss": 0.0627, "num_input_tokens_seen": 139482448, "step": 64670 }, { "epoch": 10.550570962479608, "grad_norm": 0.00417950889095664, "learning_rate": 0.0005391940675650545, "loss": 0.0656, "num_input_tokens_seen": 139493008, "step": 64675 }, { "epoch": 10.551386623164763, "grad_norm": 0.017933398485183716, "learning_rate": 0.00053912310637176, "loss": 0.0071, "num_input_tokens_seen": 139503632, "step": 64680 }, { "epoch": 10.552202283849919, "grad_norm": 0.010104028508067131, "learning_rate": 0.0005390521443855861, "loss": 0.0541, "num_input_tokens_seen": 139515024, "step": 64685 }, { "epoch": 10.553017944535073, "grad_norm": 0.004263074137270451, "learning_rate": 0.0005389811816079711, "loss": 0.0178, "num_input_tokens_seen": 139525328, "step": 64690 }, { "epoch": 10.553833605220229, "grad_norm": 0.44047901034355164, "learning_rate": 0.0005389102180403529, "loss": 0.1661, "num_input_tokens_seen": 139537712, "step": 64695 }, { "epoch": 10.554649265905383, "grad_norm": 0.005963977891951799, "learning_rate": 0.0005388392536841697, "loss": 0.0886, "num_input_tokens_seen": 139548880, "step": 64700 }, { "epoch": 10.555464926590538, "grad_norm": 0.019876038655638695, "learning_rate": 0.00053876828854086, "loss": 0.0084, "num_input_tokens_seen": 139558352, "step": 64705 }, { "epoch": 10.556280587275694, "grad_norm": 0.006085327826440334, "learning_rate": 0.0005386973226118615, "loss": 0.064, "num_input_tokens_seen": 139568848, "step": 64710 }, { "epoch": 10.557096247960848, "grad_norm": 0.611207127571106, "learning_rate": 0.0005386263558986127, "loss": 0.1006, "num_input_tokens_seen": 139579408, "step": 64715 }, { "epoch": 10.557911908646004, "grad_norm": 0.08773281425237656, "learning_rate": 0.0005385553884025519, "loss": 0.1386, "num_input_tokens_seen": 139589520, "step": 64720 }, { "epoch": 10.558727569331158, "grad_norm": 0.12624812126159668, "learning_rate": 0.000538484420125117, "loss": 0.0859, "num_input_tokens_seen": 139599088, "step": 64725 }, { "epoch": 10.559543230016313, "grad_norm": 0.18464867770671844, "learning_rate": 0.0005384134510677468, "loss": 0.0261, "num_input_tokens_seen": 139609840, "step": 64730 }, { "epoch": 10.560358890701469, "grad_norm": 0.046585813164711, "learning_rate": 0.0005383424812318791, "loss": 0.1234, "num_input_tokens_seen": 139620784, "step": 64735 }, { "epoch": 10.561174551386623, "grad_norm": 0.10552578419446945, "learning_rate": 0.0005382715106189525, "loss": 0.0222, "num_input_tokens_seen": 139631952, "step": 64740 }, { "epoch": 10.561990212071779, "grad_norm": 0.005807815585285425, "learning_rate": 0.0005382005392304051, "loss": 0.1015, "num_input_tokens_seen": 139644016, "step": 64745 }, { "epoch": 10.562805872756933, "grad_norm": 0.08380457758903503, "learning_rate": 0.0005381295670676752, "loss": 0.0148, "num_input_tokens_seen": 139656560, "step": 64750 }, { "epoch": 10.563621533442088, "grad_norm": 0.008668149821460247, "learning_rate": 0.0005380585941322014, "loss": 0.0196, "num_input_tokens_seen": 139668016, "step": 64755 }, { "epoch": 10.564437194127244, "grad_norm": 0.38901275396347046, "learning_rate": 0.000537987620425422, "loss": 0.0989, "num_input_tokens_seen": 139678640, "step": 64760 }, { "epoch": 10.565252854812398, "grad_norm": 0.03685824200510979, "learning_rate": 0.0005379166459487752, "loss": 0.0096, "num_input_tokens_seen": 139688688, "step": 64765 }, { "epoch": 10.566068515497554, "grad_norm": 0.202426016330719, "learning_rate": 0.0005378456707036995, "loss": 0.0443, "num_input_tokens_seen": 139699984, "step": 64770 }, { "epoch": 10.566884176182707, "grad_norm": 0.1074969470500946, "learning_rate": 0.0005377746946916332, "loss": 0.0498, "num_input_tokens_seen": 139710416, "step": 64775 }, { "epoch": 10.567699836867863, "grad_norm": 0.1614186316728592, "learning_rate": 0.0005377037179140149, "loss": 0.1066, "num_input_tokens_seen": 139721808, "step": 64780 }, { "epoch": 10.568515497553017, "grad_norm": 0.05008689686655998, "learning_rate": 0.0005376327403722828, "loss": 0.0123, "num_input_tokens_seen": 139732752, "step": 64785 }, { "epoch": 10.569331158238173, "grad_norm": 0.09753349423408508, "learning_rate": 0.0005375617620678756, "loss": 0.0806, "num_input_tokens_seen": 139745488, "step": 64790 }, { "epoch": 10.570146818923329, "grad_norm": 0.011999144218862057, "learning_rate": 0.0005374907830022316, "loss": 0.155, "num_input_tokens_seen": 139756880, "step": 64795 }, { "epoch": 10.570962479608482, "grad_norm": 0.06300096958875656, "learning_rate": 0.0005374198031767892, "loss": 0.0627, "num_input_tokens_seen": 139767984, "step": 64800 }, { "epoch": 10.571778140293638, "grad_norm": 0.03064427152276039, "learning_rate": 0.0005373488225929871, "loss": 0.0134, "num_input_tokens_seen": 139777872, "step": 64805 }, { "epoch": 10.572593800978792, "grad_norm": 0.18165799975395203, "learning_rate": 0.0005372778412522638, "loss": 0.0183, "num_input_tokens_seen": 139788880, "step": 64810 }, { "epoch": 10.573409461663948, "grad_norm": 0.03748062998056412, "learning_rate": 0.0005372068591560577, "loss": 0.0412, "num_input_tokens_seen": 139800144, "step": 64815 }, { "epoch": 10.574225122349104, "grad_norm": 0.5680138468742371, "learning_rate": 0.0005371358763058074, "loss": 0.1573, "num_input_tokens_seen": 139811824, "step": 64820 }, { "epoch": 10.575040783034257, "grad_norm": 0.029221855103969574, "learning_rate": 0.0005370648927029515, "loss": 0.023, "num_input_tokens_seen": 139821616, "step": 64825 }, { "epoch": 10.575856443719413, "grad_norm": 0.05404824763536453, "learning_rate": 0.0005369939083489283, "loss": 0.0448, "num_input_tokens_seen": 139831888, "step": 64830 }, { "epoch": 10.576672104404567, "grad_norm": 0.011340592056512833, "learning_rate": 0.0005369229232451769, "loss": 0.0869, "num_input_tokens_seen": 139842960, "step": 64835 }, { "epoch": 10.577487765089723, "grad_norm": 0.020535647869110107, "learning_rate": 0.0005368519373931355, "loss": 0.0868, "num_input_tokens_seen": 139854384, "step": 64840 }, { "epoch": 10.578303425774878, "grad_norm": 0.21963241696357727, "learning_rate": 0.0005367809507942429, "loss": 0.0981, "num_input_tokens_seen": 139865968, "step": 64845 }, { "epoch": 10.579119086460032, "grad_norm": 0.007206362206488848, "learning_rate": 0.0005367099634499375, "loss": 0.1474, "num_input_tokens_seen": 139876720, "step": 64850 }, { "epoch": 10.579934747145188, "grad_norm": 0.005058453418314457, "learning_rate": 0.0005366389753616583, "loss": 0.0086, "num_input_tokens_seen": 139888112, "step": 64855 }, { "epoch": 10.580750407830342, "grad_norm": 0.008400590158998966, "learning_rate": 0.0005365679865308437, "loss": 0.0376, "num_input_tokens_seen": 139898576, "step": 64860 }, { "epoch": 10.581566068515498, "grad_norm": 0.03582179546356201, "learning_rate": 0.0005364969969589325, "loss": 0.0281, "num_input_tokens_seen": 139909296, "step": 64865 }, { "epoch": 10.582381729200652, "grad_norm": 0.030712993815541267, "learning_rate": 0.0005364260066473634, "loss": 0.1246, "num_input_tokens_seen": 139919664, "step": 64870 }, { "epoch": 10.583197389885807, "grad_norm": 0.0163736455142498, "learning_rate": 0.000536355015597575, "loss": 0.0116, "num_input_tokens_seen": 139929968, "step": 64875 }, { "epoch": 10.584013050570963, "grad_norm": 0.060377851128578186, "learning_rate": 0.0005362840238110061, "loss": 0.1517, "num_input_tokens_seen": 139940112, "step": 64880 }, { "epoch": 10.584828711256117, "grad_norm": 0.015612814575433731, "learning_rate": 0.0005362130312890955, "loss": 0.0167, "num_input_tokens_seen": 139950000, "step": 64885 }, { "epoch": 10.585644371941273, "grad_norm": 0.4621472954750061, "learning_rate": 0.0005361420380332818, "loss": 0.0973, "num_input_tokens_seen": 139961264, "step": 64890 }, { "epoch": 10.586460032626427, "grad_norm": 0.1778033822774887, "learning_rate": 0.0005360710440450037, "loss": 0.0702, "num_input_tokens_seen": 139971952, "step": 64895 }, { "epoch": 10.587275693311582, "grad_norm": 0.49138760566711426, "learning_rate": 0.0005360000493257003, "loss": 0.0777, "num_input_tokens_seen": 139983088, "step": 64900 }, { "epoch": 10.588091353996738, "grad_norm": 0.025415783748030663, "learning_rate": 0.0005359290538768102, "loss": 0.025, "num_input_tokens_seen": 139993776, "step": 64905 }, { "epoch": 10.588907014681892, "grad_norm": 0.10553082078695297, "learning_rate": 0.0005358580576997723, "loss": 0.0214, "num_input_tokens_seen": 140004304, "step": 64910 }, { "epoch": 10.589722675367048, "grad_norm": 0.4100148677825928, "learning_rate": 0.0005357870607960255, "loss": 0.028, "num_input_tokens_seen": 140015312, "step": 64915 }, { "epoch": 10.590538336052202, "grad_norm": 0.27414923906326294, "learning_rate": 0.0005357160631670083, "loss": 0.0456, "num_input_tokens_seen": 140026768, "step": 64920 }, { "epoch": 10.591353996737357, "grad_norm": 0.014687271788716316, "learning_rate": 0.0005356450648141599, "loss": 0.0078, "num_input_tokens_seen": 140038192, "step": 64925 }, { "epoch": 10.592169657422513, "grad_norm": 0.007926033809781075, "learning_rate": 0.0005355740657389189, "loss": 0.0385, "num_input_tokens_seen": 140049584, "step": 64930 }, { "epoch": 10.592985318107667, "grad_norm": 0.0072236377745866776, "learning_rate": 0.0005355030659427245, "loss": 0.2304, "num_input_tokens_seen": 140059632, "step": 64935 }, { "epoch": 10.593800978792823, "grad_norm": 0.011273419484496117, "learning_rate": 0.0005354320654270153, "loss": 0.1194, "num_input_tokens_seen": 140069968, "step": 64940 }, { "epoch": 10.594616639477977, "grad_norm": 0.010185904800891876, "learning_rate": 0.0005353610641932304, "loss": 0.0234, "num_input_tokens_seen": 140081808, "step": 64945 }, { "epoch": 10.595432300163132, "grad_norm": 0.01689624786376953, "learning_rate": 0.0005352900622428086, "loss": 0.1193, "num_input_tokens_seen": 140093648, "step": 64950 }, { "epoch": 10.596247960848288, "grad_norm": 0.026090145111083984, "learning_rate": 0.0005352190595771889, "loss": 0.0073, "num_input_tokens_seen": 140104720, "step": 64955 }, { "epoch": 10.597063621533442, "grad_norm": 0.6454436182975769, "learning_rate": 0.0005351480561978103, "loss": 0.1236, "num_input_tokens_seen": 140115472, "step": 64960 }, { "epoch": 10.597879282218598, "grad_norm": 0.31625354290008545, "learning_rate": 0.0005350770521061118, "loss": 0.0851, "num_input_tokens_seen": 140125360, "step": 64965 }, { "epoch": 10.598694942903752, "grad_norm": 0.01373235136270523, "learning_rate": 0.0005350060473035324, "loss": 0.125, "num_input_tokens_seen": 140137040, "step": 64970 }, { "epoch": 10.599510603588907, "grad_norm": 0.02478274516761303, "learning_rate": 0.000534935041791511, "loss": 0.0154, "num_input_tokens_seen": 140148176, "step": 64975 }, { "epoch": 10.600326264274061, "grad_norm": 0.05479089170694351, "learning_rate": 0.0005348640355714866, "loss": 0.1072, "num_input_tokens_seen": 140159312, "step": 64980 }, { "epoch": 10.601141924959217, "grad_norm": 0.04837688058614731, "learning_rate": 0.0005347930286448984, "loss": 0.0212, "num_input_tokens_seen": 140170320, "step": 64985 }, { "epoch": 10.601957585644373, "grad_norm": 0.009426210075616837, "learning_rate": 0.0005347220210131853, "loss": 0.1011, "num_input_tokens_seen": 140180400, "step": 64990 }, { "epoch": 10.602773246329527, "grad_norm": 0.033510513603687286, "learning_rate": 0.0005346510126777864, "loss": 0.0138, "num_input_tokens_seen": 140192560, "step": 64995 }, { "epoch": 10.603588907014682, "grad_norm": 0.02661915123462677, "learning_rate": 0.0005345800036401407, "loss": 0.0165, "num_input_tokens_seen": 140202896, "step": 65000 }, { "epoch": 10.604404567699836, "grad_norm": 0.3637932538986206, "learning_rate": 0.0005345089939016874, "loss": 0.0625, "num_input_tokens_seen": 140213680, "step": 65005 }, { "epoch": 10.605220228384992, "grad_norm": 0.01840251125395298, "learning_rate": 0.0005344379834638656, "loss": 0.2567, "num_input_tokens_seen": 140224944, "step": 65010 }, { "epoch": 10.606035889070148, "grad_norm": 0.02252040058374405, "learning_rate": 0.0005343669723281144, "loss": 0.0191, "num_input_tokens_seen": 140235856, "step": 65015 }, { "epoch": 10.606851549755302, "grad_norm": 0.06929527968168259, "learning_rate": 0.0005342959604958728, "loss": 0.2592, "num_input_tokens_seen": 140247088, "step": 65020 }, { "epoch": 10.607667210440457, "grad_norm": 0.025894179940223694, "learning_rate": 0.0005342249479685801, "loss": 0.0376, "num_input_tokens_seen": 140259376, "step": 65025 }, { "epoch": 10.608482871125611, "grad_norm": 0.029939504340291023, "learning_rate": 0.0005341539347476754, "loss": 0.0253, "num_input_tokens_seen": 140268752, "step": 65030 }, { "epoch": 10.609298531810767, "grad_norm": 0.15071600675582886, "learning_rate": 0.0005340829208345979, "loss": 0.1069, "num_input_tokens_seen": 140279312, "step": 65035 }, { "epoch": 10.61011419249592, "grad_norm": 0.5390784740447998, "learning_rate": 0.0005340119062307866, "loss": 0.1475, "num_input_tokens_seen": 140289488, "step": 65040 }, { "epoch": 10.610929853181077, "grad_norm": 0.23984746634960175, "learning_rate": 0.0005339408909376812, "loss": 0.0545, "num_input_tokens_seen": 140300592, "step": 65045 }, { "epoch": 10.611745513866232, "grad_norm": 0.01038853544741869, "learning_rate": 0.0005338698749567203, "loss": 0.0186, "num_input_tokens_seen": 140311664, "step": 65050 }, { "epoch": 10.612561174551386, "grad_norm": 0.29475951194763184, "learning_rate": 0.0005337988582893436, "loss": 0.0283, "num_input_tokens_seen": 140323408, "step": 65055 }, { "epoch": 10.613376835236542, "grad_norm": 0.05272824317216873, "learning_rate": 0.0005337278409369901, "loss": 0.0306, "num_input_tokens_seen": 140333776, "step": 65060 }, { "epoch": 10.614192495921696, "grad_norm": 0.34929758310317993, "learning_rate": 0.0005336568229010991, "loss": 0.0315, "num_input_tokens_seen": 140343888, "step": 65065 }, { "epoch": 10.615008156606851, "grad_norm": 0.021580711007118225, "learning_rate": 0.0005335858041831099, "loss": 0.0755, "num_input_tokens_seen": 140354896, "step": 65070 }, { "epoch": 10.615823817292007, "grad_norm": 0.023260554298758507, "learning_rate": 0.0005335147847844618, "loss": 0.0257, "num_input_tokens_seen": 140365040, "step": 65075 }, { "epoch": 10.616639477977161, "grad_norm": 0.0206208024173975, "learning_rate": 0.000533443764706594, "loss": 0.0494, "num_input_tokens_seen": 140373840, "step": 65080 }, { "epoch": 10.617455138662317, "grad_norm": 0.061352699995040894, "learning_rate": 0.0005333727439509459, "loss": 0.0587, "num_input_tokens_seen": 140385520, "step": 65085 }, { "epoch": 10.61827079934747, "grad_norm": 0.04550410807132721, "learning_rate": 0.0005333017225189569, "loss": 0.0506, "num_input_tokens_seen": 140395600, "step": 65090 }, { "epoch": 10.619086460032626, "grad_norm": 0.03024260699748993, "learning_rate": 0.0005332307004120662, "loss": 0.1011, "num_input_tokens_seen": 140404720, "step": 65095 }, { "epoch": 10.619902120717782, "grad_norm": 0.05776134505867958, "learning_rate": 0.0005331596776317133, "loss": 0.0152, "num_input_tokens_seen": 140415152, "step": 65100 }, { "epoch": 10.620717781402936, "grad_norm": 0.226288840174675, "learning_rate": 0.0005330886541793372, "loss": 0.0293, "num_input_tokens_seen": 140426512, "step": 65105 }, { "epoch": 10.621533442088092, "grad_norm": 0.030748805031180382, "learning_rate": 0.0005330176300563778, "loss": 0.0785, "num_input_tokens_seen": 140438480, "step": 65110 }, { "epoch": 10.622349102773246, "grad_norm": 0.014461339451372623, "learning_rate": 0.0005329466052642741, "loss": 0.0635, "num_input_tokens_seen": 140449968, "step": 65115 }, { "epoch": 10.623164763458401, "grad_norm": 0.01605989597737789, "learning_rate": 0.0005328755798044658, "loss": 0.0275, "num_input_tokens_seen": 140459216, "step": 65120 }, { "epoch": 10.623980424143557, "grad_norm": 0.5504515171051025, "learning_rate": 0.000532804553678392, "loss": 0.0528, "num_input_tokens_seen": 140469648, "step": 65125 }, { "epoch": 10.624796084828711, "grad_norm": 0.008542252704501152, "learning_rate": 0.0005327335268874924, "loss": 0.0051, "num_input_tokens_seen": 140479824, "step": 65130 }, { "epoch": 10.625611745513867, "grad_norm": 0.035341527312994, "learning_rate": 0.0005326624994332063, "loss": 0.0832, "num_input_tokens_seen": 140489744, "step": 65135 }, { "epoch": 10.62642740619902, "grad_norm": 0.008635364472866058, "learning_rate": 0.0005325914713169733, "loss": 0.0115, "num_input_tokens_seen": 140499280, "step": 65140 }, { "epoch": 10.627243066884176, "grad_norm": 0.07077856361865997, "learning_rate": 0.0005325204425402327, "loss": 0.1339, "num_input_tokens_seen": 140509936, "step": 65145 }, { "epoch": 10.62805872756933, "grad_norm": 0.038388922810554504, "learning_rate": 0.0005324494131044241, "loss": 0.0564, "num_input_tokens_seen": 140520816, "step": 65150 }, { "epoch": 10.628874388254486, "grad_norm": 0.2406747043132782, "learning_rate": 0.000532378383010987, "loss": 0.0535, "num_input_tokens_seen": 140532176, "step": 65155 }, { "epoch": 10.629690048939642, "grad_norm": 0.0690184086561203, "learning_rate": 0.0005323073522613608, "loss": 0.2085, "num_input_tokens_seen": 140541872, "step": 65160 }, { "epoch": 10.630505709624796, "grad_norm": 0.10710988193750381, "learning_rate": 0.0005322363208569851, "loss": 0.112, "num_input_tokens_seen": 140552112, "step": 65165 }, { "epoch": 10.631321370309951, "grad_norm": 0.02112758904695511, "learning_rate": 0.0005321652887992996, "loss": 0.1608, "num_input_tokens_seen": 140562768, "step": 65170 }, { "epoch": 10.632137030995105, "grad_norm": 0.02989118918776512, "learning_rate": 0.0005320942560897436, "loss": 0.0694, "num_input_tokens_seen": 140572016, "step": 65175 }, { "epoch": 10.632952691680261, "grad_norm": 0.0713958889245987, "learning_rate": 0.0005320232227297569, "loss": 0.0198, "num_input_tokens_seen": 140582768, "step": 65180 }, { "epoch": 10.633768352365417, "grad_norm": 0.08218973875045776, "learning_rate": 0.0005319521887207789, "loss": 0.0073, "num_input_tokens_seen": 140592592, "step": 65185 }, { "epoch": 10.63458401305057, "grad_norm": 0.055104997009038925, "learning_rate": 0.0005318811540642493, "loss": 0.0224, "num_input_tokens_seen": 140603312, "step": 65190 }, { "epoch": 10.635399673735726, "grad_norm": 0.013545108027756214, "learning_rate": 0.0005318101187616077, "loss": 0.0228, "num_input_tokens_seen": 140613648, "step": 65195 }, { "epoch": 10.63621533442088, "grad_norm": 0.015664026141166687, "learning_rate": 0.0005317390828142937, "loss": 0.0322, "num_input_tokens_seen": 140623792, "step": 65200 }, { "epoch": 10.637030995106036, "grad_norm": 0.22384244203567505, "learning_rate": 0.0005316680462237468, "loss": 0.2024, "num_input_tokens_seen": 140635952, "step": 65205 }, { "epoch": 10.63784665579119, "grad_norm": 0.11214940249919891, "learning_rate": 0.0005315970089914068, "loss": 0.0406, "num_input_tokens_seen": 140645904, "step": 65210 }, { "epoch": 10.638662316476346, "grad_norm": 0.6119344830513, "learning_rate": 0.0005315259711187134, "loss": 0.0944, "num_input_tokens_seen": 140656848, "step": 65215 }, { "epoch": 10.639477977161501, "grad_norm": 0.025184648111462593, "learning_rate": 0.0005314549326071061, "loss": 0.0127, "num_input_tokens_seen": 140666960, "step": 65220 }, { "epoch": 10.640293637846655, "grad_norm": 0.6606709361076355, "learning_rate": 0.0005313838934580248, "loss": 0.0323, "num_input_tokens_seen": 140678736, "step": 65225 }, { "epoch": 10.641109298531811, "grad_norm": 0.023694856092333794, "learning_rate": 0.0005313128536729091, "loss": 0.0206, "num_input_tokens_seen": 140689968, "step": 65230 }, { "epoch": 10.641924959216965, "grad_norm": 0.4197846055030823, "learning_rate": 0.0005312418132531985, "loss": 0.1762, "num_input_tokens_seen": 140699472, "step": 65235 }, { "epoch": 10.64274061990212, "grad_norm": 0.0142621248960495, "learning_rate": 0.0005311707722003332, "loss": 0.0325, "num_input_tokens_seen": 140710896, "step": 65240 }, { "epoch": 10.643556280587276, "grad_norm": 0.04110664501786232, "learning_rate": 0.0005310997305157524, "loss": 0.0097, "num_input_tokens_seen": 140720880, "step": 65245 }, { "epoch": 10.64437194127243, "grad_norm": 0.7605090141296387, "learning_rate": 0.0005310286882008962, "loss": 0.07, "num_input_tokens_seen": 140731888, "step": 65250 }, { "epoch": 10.645187601957586, "grad_norm": 0.026999490335583687, "learning_rate": 0.0005309576452572043, "loss": 0.087, "num_input_tokens_seen": 140742896, "step": 65255 }, { "epoch": 10.64600326264274, "grad_norm": 0.018997231498360634, "learning_rate": 0.0005308866016861166, "loss": 0.1303, "num_input_tokens_seen": 140752560, "step": 65260 }, { "epoch": 10.646818923327896, "grad_norm": 0.013576850295066833, "learning_rate": 0.0005308155574890725, "loss": 0.0635, "num_input_tokens_seen": 140762704, "step": 65265 }, { "epoch": 10.647634584013051, "grad_norm": 0.1187472864985466, "learning_rate": 0.000530744512667512, "loss": 0.028, "num_input_tokens_seen": 140773840, "step": 65270 }, { "epoch": 10.648450244698205, "grad_norm": 0.08240360021591187, "learning_rate": 0.0005306734672228751, "loss": 0.0744, "num_input_tokens_seen": 140783664, "step": 65275 }, { "epoch": 10.649265905383361, "grad_norm": 0.02886849083006382, "learning_rate": 0.0005306024211566014, "loss": 0.0682, "num_input_tokens_seen": 140795248, "step": 65280 }, { "epoch": 10.650081566068515, "grad_norm": 0.04073810949921608, "learning_rate": 0.0005305313744701309, "loss": 0.1782, "num_input_tokens_seen": 140806608, "step": 65285 }, { "epoch": 10.65089722675367, "grad_norm": 0.279098778963089, "learning_rate": 0.0005304603271649033, "loss": 0.0802, "num_input_tokens_seen": 140817072, "step": 65290 }, { "epoch": 10.651712887438826, "grad_norm": 0.06167510524392128, "learning_rate": 0.0005303892792423585, "loss": 0.0379, "num_input_tokens_seen": 140827056, "step": 65295 }, { "epoch": 10.65252854812398, "grad_norm": 0.020316317677497864, "learning_rate": 0.0005303182307039364, "loss": 0.0106, "num_input_tokens_seen": 140838544, "step": 65300 }, { "epoch": 10.653344208809136, "grad_norm": 0.0034559844061732292, "learning_rate": 0.0005302471815510771, "loss": 0.0164, "num_input_tokens_seen": 140849584, "step": 65305 }, { "epoch": 10.65415986949429, "grad_norm": 0.025866305455565453, "learning_rate": 0.00053017613178522, "loss": 0.0251, "num_input_tokens_seen": 140858352, "step": 65310 }, { "epoch": 10.654975530179446, "grad_norm": 0.12084754556417465, "learning_rate": 0.0005301050814078055, "loss": 0.0288, "num_input_tokens_seen": 140868784, "step": 65315 }, { "epoch": 10.655791190864601, "grad_norm": 0.012867621146142483, "learning_rate": 0.0005300340304202734, "loss": 0.0139, "num_input_tokens_seen": 140880240, "step": 65320 }, { "epoch": 10.656606851549755, "grad_norm": 0.6676931977272034, "learning_rate": 0.0005299629788240634, "loss": 0.2179, "num_input_tokens_seen": 140889520, "step": 65325 }, { "epoch": 10.65742251223491, "grad_norm": 0.21255181729793549, "learning_rate": 0.0005298919266206157, "loss": 0.0515, "num_input_tokens_seen": 140898768, "step": 65330 }, { "epoch": 10.658238172920065, "grad_norm": 0.4611773192882538, "learning_rate": 0.0005298208738113701, "loss": 0.2822, "num_input_tokens_seen": 140908976, "step": 65335 }, { "epoch": 10.65905383360522, "grad_norm": 0.25364717841148376, "learning_rate": 0.0005297498203977668, "loss": 0.0631, "num_input_tokens_seen": 140919472, "step": 65340 }, { "epoch": 10.659869494290374, "grad_norm": 0.10999982804059982, "learning_rate": 0.0005296787663812456, "loss": 0.0307, "num_input_tokens_seen": 140930416, "step": 65345 }, { "epoch": 10.66068515497553, "grad_norm": 0.18474049866199493, "learning_rate": 0.0005296077117632464, "loss": 0.0623, "num_input_tokens_seen": 140942768, "step": 65350 }, { "epoch": 10.661500815660686, "grad_norm": 0.007145730312913656, "learning_rate": 0.0005295366565452094, "loss": 0.054, "num_input_tokens_seen": 140952912, "step": 65355 }, { "epoch": 10.66231647634584, "grad_norm": 0.18517763912677765, "learning_rate": 0.0005294656007285748, "loss": 0.0832, "num_input_tokens_seen": 140961936, "step": 65360 }, { "epoch": 10.663132137030995, "grad_norm": 0.013944596983492374, "learning_rate": 0.0005293945443147821, "loss": 0.1215, "num_input_tokens_seen": 140972048, "step": 65365 }, { "epoch": 10.66394779771615, "grad_norm": 0.012275336310267448, "learning_rate": 0.000529323487305272, "loss": 0.1245, "num_input_tokens_seen": 140982352, "step": 65370 }, { "epoch": 10.664763458401305, "grad_norm": 0.19761279225349426, "learning_rate": 0.0005292524297014842, "loss": 0.0519, "num_input_tokens_seen": 140994000, "step": 65375 }, { "epoch": 10.66557911908646, "grad_norm": 0.010719568468630314, "learning_rate": 0.0005291813715048584, "loss": 0.0204, "num_input_tokens_seen": 141003664, "step": 65380 }, { "epoch": 10.666394779771615, "grad_norm": 0.03829227387905121, "learning_rate": 0.0005291103127168355, "loss": 0.0393, "num_input_tokens_seen": 141015504, "step": 65385 }, { "epoch": 10.66721044045677, "grad_norm": 0.04063795506954193, "learning_rate": 0.000529039253338855, "loss": 0.1025, "num_input_tokens_seen": 141026736, "step": 65390 }, { "epoch": 10.668026101141924, "grad_norm": 0.015031510032713413, "learning_rate": 0.0005289681933723573, "loss": 0.0513, "num_input_tokens_seen": 141037680, "step": 65395 }, { "epoch": 10.66884176182708, "grad_norm": 0.012516951188445091, "learning_rate": 0.0005288971328187824, "loss": 0.0213, "num_input_tokens_seen": 141047600, "step": 65400 }, { "epoch": 10.669657422512234, "grad_norm": 0.08012958616018295, "learning_rate": 0.0005288260716795704, "loss": 0.012, "num_input_tokens_seen": 141058768, "step": 65405 }, { "epoch": 10.67047308319739, "grad_norm": 0.7402973771095276, "learning_rate": 0.0005287550099561614, "loss": 0.2082, "num_input_tokens_seen": 141069040, "step": 65410 }, { "epoch": 10.671288743882545, "grad_norm": 0.012344174087047577, "learning_rate": 0.0005286839476499959, "loss": 0.0325, "num_input_tokens_seen": 141080496, "step": 65415 }, { "epoch": 10.6721044045677, "grad_norm": 0.028689252212643623, "learning_rate": 0.0005286128847625136, "loss": 0.0128, "num_input_tokens_seen": 141092272, "step": 65420 }, { "epoch": 10.672920065252855, "grad_norm": 0.18323378264904022, "learning_rate": 0.0005285418212951549, "loss": 0.2116, "num_input_tokens_seen": 141103856, "step": 65425 }, { "epoch": 10.673735725938009, "grad_norm": 0.1645793914794922, "learning_rate": 0.0005284707572493601, "loss": 0.0374, "num_input_tokens_seen": 141114544, "step": 65430 }, { "epoch": 10.674551386623165, "grad_norm": 0.3749389946460724, "learning_rate": 0.0005283996926265692, "loss": 0.0454, "num_input_tokens_seen": 141125488, "step": 65435 }, { "epoch": 10.67536704730832, "grad_norm": 0.010810227133333683, "learning_rate": 0.0005283286274282226, "loss": 0.0151, "num_input_tokens_seen": 141135440, "step": 65440 }, { "epoch": 10.676182707993474, "grad_norm": 0.01340310275554657, "learning_rate": 0.0005282575616557603, "loss": 0.0529, "num_input_tokens_seen": 141146320, "step": 65445 }, { "epoch": 10.67699836867863, "grad_norm": 0.0324542410671711, "learning_rate": 0.0005281864953106226, "loss": 0.0375, "num_input_tokens_seen": 141157712, "step": 65450 }, { "epoch": 10.677814029363784, "grad_norm": 0.09730778634548187, "learning_rate": 0.0005281154283942501, "loss": 0.0182, "num_input_tokens_seen": 141168080, "step": 65455 }, { "epoch": 10.67862969004894, "grad_norm": 0.020266860723495483, "learning_rate": 0.0005280443609080826, "loss": 0.0512, "num_input_tokens_seen": 141179600, "step": 65460 }, { "epoch": 10.679445350734095, "grad_norm": 0.42096146941185, "learning_rate": 0.0005279732928535606, "loss": 0.104, "num_input_tokens_seen": 141190448, "step": 65465 }, { "epoch": 10.68026101141925, "grad_norm": 0.19911323487758636, "learning_rate": 0.0005279022242321242, "loss": 0.1766, "num_input_tokens_seen": 141201808, "step": 65470 }, { "epoch": 10.681076672104405, "grad_norm": 0.48958820104599, "learning_rate": 0.000527831155045214, "loss": 0.0397, "num_input_tokens_seen": 141212944, "step": 65475 }, { "epoch": 10.681892332789559, "grad_norm": 0.012792887166142464, "learning_rate": 0.00052776008529427, "loss": 0.0414, "num_input_tokens_seen": 141224880, "step": 65480 }, { "epoch": 10.682707993474715, "grad_norm": 0.27004119753837585, "learning_rate": 0.0005276890149807326, "loss": 0.0296, "num_input_tokens_seen": 141236400, "step": 65485 }, { "epoch": 10.68352365415987, "grad_norm": 0.03514759615063667, "learning_rate": 0.0005276179441060423, "loss": 0.0795, "num_input_tokens_seen": 141246768, "step": 65490 }, { "epoch": 10.684339314845024, "grad_norm": 0.050748128443956375, "learning_rate": 0.0005275468726716393, "loss": 0.052, "num_input_tokens_seen": 141258704, "step": 65495 }, { "epoch": 10.68515497553018, "grad_norm": 0.33963674306869507, "learning_rate": 0.000527475800678964, "loss": 0.0949, "num_input_tokens_seen": 141268848, "step": 65500 }, { "epoch": 10.685970636215334, "grad_norm": 0.3999970853328705, "learning_rate": 0.0005274047281294569, "loss": 0.0324, "num_input_tokens_seen": 141279376, "step": 65505 }, { "epoch": 10.68678629690049, "grad_norm": 0.07198058068752289, "learning_rate": 0.000527333655024558, "loss": 0.1111, "num_input_tokens_seen": 141290224, "step": 65510 }, { "epoch": 10.687601957585644, "grad_norm": 0.21202673017978668, "learning_rate": 0.0005272625813657079, "loss": 0.026, "num_input_tokens_seen": 141300240, "step": 65515 }, { "epoch": 10.6884176182708, "grad_norm": 0.5448369979858398, "learning_rate": 0.000527191507154347, "loss": 0.1308, "num_input_tokens_seen": 141310704, "step": 65520 }, { "epoch": 10.689233278955955, "grad_norm": 0.440204918384552, "learning_rate": 0.0005271204323919158, "loss": 0.0954, "num_input_tokens_seen": 141321552, "step": 65525 }, { "epoch": 10.690048939641109, "grad_norm": 0.1459086537361145, "learning_rate": 0.0005270493570798546, "loss": 0.0279, "num_input_tokens_seen": 141331888, "step": 65530 }, { "epoch": 10.690864600326265, "grad_norm": 0.03768517076969147, "learning_rate": 0.000526978281219604, "loss": 0.0275, "num_input_tokens_seen": 141342512, "step": 65535 }, { "epoch": 10.691680261011419, "grad_norm": 0.03881453350186348, "learning_rate": 0.0005269072048126041, "loss": 0.1102, "num_input_tokens_seen": 141353296, "step": 65540 }, { "epoch": 10.692495921696574, "grad_norm": 0.38973429799079895, "learning_rate": 0.0005268361278602957, "loss": 0.3324, "num_input_tokens_seen": 141363920, "step": 65545 }, { "epoch": 10.69331158238173, "grad_norm": 0.076751708984375, "learning_rate": 0.0005267650503641191, "loss": 0.0144, "num_input_tokens_seen": 141373744, "step": 65550 }, { "epoch": 10.694127243066884, "grad_norm": 0.07290762662887573, "learning_rate": 0.0005266939723255148, "loss": 0.0192, "num_input_tokens_seen": 141384144, "step": 65555 }, { "epoch": 10.69494290375204, "grad_norm": 0.4330404996871948, "learning_rate": 0.0005266228937459233, "loss": 0.1381, "num_input_tokens_seen": 141395472, "step": 65560 }, { "epoch": 10.695758564437194, "grad_norm": 0.020248396322131157, "learning_rate": 0.0005265518146267851, "loss": 0.0129, "num_input_tokens_seen": 141406896, "step": 65565 }, { "epoch": 10.69657422512235, "grad_norm": 0.09693227708339691, "learning_rate": 0.0005264807349695406, "loss": 0.0198, "num_input_tokens_seen": 141418160, "step": 65570 }, { "epoch": 10.697389885807503, "grad_norm": 0.03468271344900131, "learning_rate": 0.0005264096547756305, "loss": 0.035, "num_input_tokens_seen": 141427984, "step": 65575 }, { "epoch": 10.698205546492659, "grad_norm": 0.02497992478311062, "learning_rate": 0.0005263385740464951, "loss": 0.0759, "num_input_tokens_seen": 141437680, "step": 65580 }, { "epoch": 10.699021207177815, "grad_norm": 0.46053534746170044, "learning_rate": 0.0005262674927835752, "loss": 0.0772, "num_input_tokens_seen": 141448432, "step": 65585 }, { "epoch": 10.699836867862969, "grad_norm": 0.6338720321655273, "learning_rate": 0.0005261964109883111, "loss": 0.1734, "num_input_tokens_seen": 141459600, "step": 65590 }, { "epoch": 10.700652528548124, "grad_norm": 0.035169441252946854, "learning_rate": 0.0005261253286621437, "loss": 0.0665, "num_input_tokens_seen": 141470544, "step": 65595 }, { "epoch": 10.701468189233278, "grad_norm": 0.5085427165031433, "learning_rate": 0.0005260542458065132, "loss": 0.1326, "num_input_tokens_seen": 141482736, "step": 65600 }, { "epoch": 10.702283849918434, "grad_norm": 0.024001995101571083, "learning_rate": 0.0005259831624228605, "loss": 0.0542, "num_input_tokens_seen": 141493712, "step": 65605 }, { "epoch": 10.70309951060359, "grad_norm": 0.03938017413020134, "learning_rate": 0.000525912078512626, "loss": 0.063, "num_input_tokens_seen": 141503856, "step": 65610 }, { "epoch": 10.703915171288743, "grad_norm": 0.06727879494428635, "learning_rate": 0.0005258409940772504, "loss": 0.0411, "num_input_tokens_seen": 141514320, "step": 65615 }, { "epoch": 10.7047308319739, "grad_norm": 0.16139467060565948, "learning_rate": 0.0005257699091181742, "loss": 0.0141, "num_input_tokens_seen": 141524912, "step": 65620 }, { "epoch": 10.705546492659053, "grad_norm": 0.019714100286364555, "learning_rate": 0.0005256988236368382, "loss": 0.0072, "num_input_tokens_seen": 141536144, "step": 65625 }, { "epoch": 10.706362153344209, "grad_norm": 0.00855976715683937, "learning_rate": 0.0005256277376346829, "loss": 0.1034, "num_input_tokens_seen": 141545648, "step": 65630 }, { "epoch": 10.707177814029365, "grad_norm": 0.06991308927536011, "learning_rate": 0.0005255566511131489, "loss": 0.0726, "num_input_tokens_seen": 141556848, "step": 65635 }, { "epoch": 10.707993474714518, "grad_norm": 0.08617755770683289, "learning_rate": 0.000525485564073677, "loss": 0.1184, "num_input_tokens_seen": 141568432, "step": 65640 }, { "epoch": 10.708809135399674, "grad_norm": 0.5619592666625977, "learning_rate": 0.0005254144765177078, "loss": 0.1102, "num_input_tokens_seen": 141580592, "step": 65645 }, { "epoch": 10.709624796084828, "grad_norm": 0.13612224161624908, "learning_rate": 0.0005253433884466821, "loss": 0.0163, "num_input_tokens_seen": 141592624, "step": 65650 }, { "epoch": 10.710440456769984, "grad_norm": 0.011760419234633446, "learning_rate": 0.0005252722998620403, "loss": 0.0103, "num_input_tokens_seen": 141605008, "step": 65655 }, { "epoch": 10.71125611745514, "grad_norm": 0.005417845211923122, "learning_rate": 0.0005252012107652234, "loss": 0.0793, "num_input_tokens_seen": 141616752, "step": 65660 }, { "epoch": 10.712071778140293, "grad_norm": 0.09083826839923859, "learning_rate": 0.0005251301211576718, "loss": 0.1138, "num_input_tokens_seen": 141626544, "step": 65665 }, { "epoch": 10.71288743882545, "grad_norm": 0.014894891530275345, "learning_rate": 0.0005250590310408266, "loss": 0.0701, "num_input_tokens_seen": 141637552, "step": 65670 }, { "epoch": 10.713703099510603, "grad_norm": 0.007882265374064445, "learning_rate": 0.0005249879404161284, "loss": 0.0922, "num_input_tokens_seen": 141647920, "step": 65675 }, { "epoch": 10.714518760195759, "grad_norm": 0.008900432847440243, "learning_rate": 0.0005249168492850178, "loss": 0.0283, "num_input_tokens_seen": 141658736, "step": 65680 }, { "epoch": 10.715334420880914, "grad_norm": 0.10293632745742798, "learning_rate": 0.0005248457576489356, "loss": 0.1642, "num_input_tokens_seen": 141668432, "step": 65685 }, { "epoch": 10.716150081566068, "grad_norm": 0.026290183886885643, "learning_rate": 0.0005247746655093228, "loss": 0.0456, "num_input_tokens_seen": 141678800, "step": 65690 }, { "epoch": 10.716965742251224, "grad_norm": 0.07370040565729141, "learning_rate": 0.0005247035728676196, "loss": 0.0185, "num_input_tokens_seen": 141690128, "step": 65695 }, { "epoch": 10.717781402936378, "grad_norm": 0.015016176737844944, "learning_rate": 0.0005246324797252674, "loss": 0.0481, "num_input_tokens_seen": 141701328, "step": 65700 }, { "epoch": 10.718597063621534, "grad_norm": 0.005605230573564768, "learning_rate": 0.0005245613860837068, "loss": 0.1115, "num_input_tokens_seen": 141711344, "step": 65705 }, { "epoch": 10.719412724306688, "grad_norm": 0.011927730403840542, "learning_rate": 0.0005244902919443785, "loss": 0.0156, "num_input_tokens_seen": 141721456, "step": 65710 }, { "epoch": 10.720228384991843, "grad_norm": 0.0363427959382534, "learning_rate": 0.0005244191973087233, "loss": 0.0244, "num_input_tokens_seen": 141732208, "step": 65715 }, { "epoch": 10.721044045676999, "grad_norm": 0.11510616540908813, "learning_rate": 0.0005243481021781821, "loss": 0.0623, "num_input_tokens_seen": 141742864, "step": 65720 }, { "epoch": 10.721859706362153, "grad_norm": 0.0431240014731884, "learning_rate": 0.0005242770065541958, "loss": 0.0095, "num_input_tokens_seen": 141753616, "step": 65725 }, { "epoch": 10.722675367047309, "grad_norm": 0.03617875277996063, "learning_rate": 0.0005242059104382052, "loss": 0.0209, "num_input_tokens_seen": 141763120, "step": 65730 }, { "epoch": 10.723491027732463, "grad_norm": 0.458207368850708, "learning_rate": 0.000524134813831651, "loss": 0.1421, "num_input_tokens_seen": 141774704, "step": 65735 }, { "epoch": 10.724306688417618, "grad_norm": 0.017852023243904114, "learning_rate": 0.0005240637167359743, "loss": 0.009, "num_input_tokens_seen": 141785520, "step": 65740 }, { "epoch": 10.725122349102774, "grad_norm": 0.10649161040782928, "learning_rate": 0.0005239926191526157, "loss": 0.1156, "num_input_tokens_seen": 141795696, "step": 65745 }, { "epoch": 10.725938009787928, "grad_norm": 0.05884489789605141, "learning_rate": 0.0005239215210830164, "loss": 0.0435, "num_input_tokens_seen": 141806992, "step": 65750 }, { "epoch": 10.726753670473084, "grad_norm": 0.056067198514938354, "learning_rate": 0.000523850422528617, "loss": 0.2545, "num_input_tokens_seen": 141816208, "step": 65755 }, { "epoch": 10.727569331158238, "grad_norm": 0.018193768337368965, "learning_rate": 0.0005237793234908586, "loss": 0.0968, "num_input_tokens_seen": 141825456, "step": 65760 }, { "epoch": 10.728384991843393, "grad_norm": 0.045108601450920105, "learning_rate": 0.000523708223971182, "loss": 0.035, "num_input_tokens_seen": 141836400, "step": 65765 }, { "epoch": 10.729200652528547, "grad_norm": 0.07621747255325317, "learning_rate": 0.0005236371239710283, "loss": 0.0302, "num_input_tokens_seen": 141847664, "step": 65770 }, { "epoch": 10.730016313213703, "grad_norm": 0.6209467053413391, "learning_rate": 0.0005235660234918381, "loss": 0.0696, "num_input_tokens_seen": 141859312, "step": 65775 }, { "epoch": 10.730831973898859, "grad_norm": 0.06658677011728287, "learning_rate": 0.0005234949225350526, "loss": 0.0287, "num_input_tokens_seen": 141870448, "step": 65780 }, { "epoch": 10.731647634584013, "grad_norm": 0.06654839962720871, "learning_rate": 0.0005234238211021127, "loss": 0.0459, "num_input_tokens_seen": 141881008, "step": 65785 }, { "epoch": 10.732463295269168, "grad_norm": 0.06726885586977005, "learning_rate": 0.0005233527191944593, "loss": 0.0235, "num_input_tokens_seen": 141890992, "step": 65790 }, { "epoch": 10.733278955954322, "grad_norm": 0.1636054813861847, "learning_rate": 0.0005232816168135336, "loss": 0.0355, "num_input_tokens_seen": 141901456, "step": 65795 }, { "epoch": 10.734094616639478, "grad_norm": 0.05554470792412758, "learning_rate": 0.0005232105139607763, "loss": 0.1178, "num_input_tokens_seen": 141911696, "step": 65800 }, { "epoch": 10.734910277324634, "grad_norm": 0.3934885859489441, "learning_rate": 0.0005231394106376283, "loss": 0.0294, "num_input_tokens_seen": 141922224, "step": 65805 }, { "epoch": 10.735725938009788, "grad_norm": 0.9125637412071228, "learning_rate": 0.000523068306845531, "loss": 0.1316, "num_input_tokens_seen": 141934448, "step": 65810 }, { "epoch": 10.736541598694943, "grad_norm": 0.020677465945482254, "learning_rate": 0.0005229972025859252, "loss": 0.0679, "num_input_tokens_seen": 141944496, "step": 65815 }, { "epoch": 10.737357259380097, "grad_norm": 0.00527220731601119, "learning_rate": 0.0005229260978602519, "loss": 0.017, "num_input_tokens_seen": 141954096, "step": 65820 }, { "epoch": 10.738172920065253, "grad_norm": 0.09959209710359573, "learning_rate": 0.0005228549926699521, "loss": 0.0916, "num_input_tokens_seen": 141962832, "step": 65825 }, { "epoch": 10.738988580750409, "grad_norm": 0.32093146443367004, "learning_rate": 0.0005227838870164669, "loss": 0.0467, "num_input_tokens_seen": 141973648, "step": 65830 }, { "epoch": 10.739804241435563, "grad_norm": 0.05958164483308792, "learning_rate": 0.0005227127809012372, "loss": 0.0381, "num_input_tokens_seen": 141984336, "step": 65835 }, { "epoch": 10.740619902120718, "grad_norm": 0.015763558447360992, "learning_rate": 0.0005226416743257043, "loss": 0.0913, "num_input_tokens_seen": 141994672, "step": 65840 }, { "epoch": 10.741435562805872, "grad_norm": 0.0045907460153102875, "learning_rate": 0.0005225705672913092, "loss": 0.0095, "num_input_tokens_seen": 142005136, "step": 65845 }, { "epoch": 10.742251223491028, "grad_norm": 1.5955307483673096, "learning_rate": 0.0005224994597994929, "loss": 0.152, "num_input_tokens_seen": 142016272, "step": 65850 }, { "epoch": 10.743066884176184, "grad_norm": 0.07174677401781082, "learning_rate": 0.0005224283518516965, "loss": 0.0159, "num_input_tokens_seen": 142028144, "step": 65855 }, { "epoch": 10.743882544861338, "grad_norm": 0.7015431523323059, "learning_rate": 0.000522357243449361, "loss": 0.1924, "num_input_tokens_seen": 142039280, "step": 65860 }, { "epoch": 10.744698205546493, "grad_norm": 0.032426830381155014, "learning_rate": 0.0005222861345939278, "loss": 0.0588, "num_input_tokens_seen": 142049552, "step": 65865 }, { "epoch": 10.745513866231647, "grad_norm": 0.14766238629817963, "learning_rate": 0.0005222150252868375, "loss": 0.0156, "num_input_tokens_seen": 142060624, "step": 65870 }, { "epoch": 10.746329526916803, "grad_norm": 0.022274743765592575, "learning_rate": 0.0005221439155295318, "loss": 0.0075, "num_input_tokens_seen": 142071216, "step": 65875 }, { "epoch": 10.747145187601957, "grad_norm": 0.05714645981788635, "learning_rate": 0.0005220728053234514, "loss": 0.0798, "num_input_tokens_seen": 142081744, "step": 65880 }, { "epoch": 10.747960848287113, "grad_norm": 0.0947713628411293, "learning_rate": 0.0005220016946700378, "loss": 0.1, "num_input_tokens_seen": 142092656, "step": 65885 }, { "epoch": 10.748776508972268, "grad_norm": 0.10865871608257294, "learning_rate": 0.0005219305835707318, "loss": 0.016, "num_input_tokens_seen": 142104368, "step": 65890 }, { "epoch": 10.749592169657422, "grad_norm": 0.2609046399593353, "learning_rate": 0.0005218594720269748, "loss": 0.0199, "num_input_tokens_seen": 142115152, "step": 65895 }, { "epoch": 10.750407830342578, "grad_norm": 0.29787567257881165, "learning_rate": 0.0005217883600402076, "loss": 0.1234, "num_input_tokens_seen": 142125200, "step": 65900 }, { "epoch": 10.751223491027732, "grad_norm": 0.38215166330337524, "learning_rate": 0.0005217172476118719, "loss": 0.1176, "num_input_tokens_seen": 142136432, "step": 65905 }, { "epoch": 10.752039151712887, "grad_norm": 0.033840157091617584, "learning_rate": 0.0005216461347434084, "loss": 0.0598, "num_input_tokens_seen": 142147760, "step": 65910 }, { "epoch": 10.752854812398043, "grad_norm": 0.05134151130914688, "learning_rate": 0.0005215750214362588, "loss": 0.0249, "num_input_tokens_seen": 142159120, "step": 65915 }, { "epoch": 10.753670473083197, "grad_norm": 0.039257925003767014, "learning_rate": 0.0005215039076918638, "loss": 0.007, "num_input_tokens_seen": 142169168, "step": 65920 }, { "epoch": 10.754486133768353, "grad_norm": 0.025323187932372093, "learning_rate": 0.0005214327935116651, "loss": 0.0101, "num_input_tokens_seen": 142179856, "step": 65925 }, { "epoch": 10.755301794453507, "grad_norm": 0.10543278604745865, "learning_rate": 0.0005213616788971034, "loss": 0.1113, "num_input_tokens_seen": 142189008, "step": 65930 }, { "epoch": 10.756117455138662, "grad_norm": 0.011239561252295971, "learning_rate": 0.0005212905638496203, "loss": 0.0451, "num_input_tokens_seen": 142199536, "step": 65935 }, { "epoch": 10.756933115823816, "grad_norm": 0.02316836453974247, "learning_rate": 0.0005212194483706569, "loss": 0.0154, "num_input_tokens_seen": 142210320, "step": 65940 }, { "epoch": 10.757748776508972, "grad_norm": 0.015035904943943024, "learning_rate": 0.0005211483324616544, "loss": 0.0096, "num_input_tokens_seen": 142221328, "step": 65945 }, { "epoch": 10.758564437194128, "grad_norm": 0.05971091240644455, "learning_rate": 0.0005210772161240541, "loss": 0.0167, "num_input_tokens_seen": 142232080, "step": 65950 }, { "epoch": 10.759380097879282, "grad_norm": 0.008284551091492176, "learning_rate": 0.0005210060993592973, "loss": 0.0228, "num_input_tokens_seen": 142242480, "step": 65955 }, { "epoch": 10.760195758564437, "grad_norm": 0.08989621698856354, "learning_rate": 0.0005209349821688254, "loss": 0.0207, "num_input_tokens_seen": 142252912, "step": 65960 }, { "epoch": 10.761011419249591, "grad_norm": 1.0601121187210083, "learning_rate": 0.0005208638645540795, "loss": 0.0999, "num_input_tokens_seen": 142263888, "step": 65965 }, { "epoch": 10.761827079934747, "grad_norm": 0.09080396592617035, "learning_rate": 0.0005207927465165007, "loss": 0.0768, "num_input_tokens_seen": 142274544, "step": 65970 }, { "epoch": 10.762642740619903, "grad_norm": 0.04634217172861099, "learning_rate": 0.0005207216280575306, "loss": 0.0122, "num_input_tokens_seen": 142284400, "step": 65975 }, { "epoch": 10.763458401305057, "grad_norm": 0.3500921130180359, "learning_rate": 0.0005206505091786103, "loss": 0.0267, "num_input_tokens_seen": 142295504, "step": 65980 }, { "epoch": 10.764274061990212, "grad_norm": 0.044202182441949844, "learning_rate": 0.0005205793898811814, "loss": 0.0205, "num_input_tokens_seen": 142306320, "step": 65985 }, { "epoch": 10.765089722675366, "grad_norm": 0.005621148739010096, "learning_rate": 0.0005205082701666851, "loss": 0.0273, "num_input_tokens_seen": 142316336, "step": 65990 }, { "epoch": 10.765905383360522, "grad_norm": 0.08013339340686798, "learning_rate": 0.0005204371500365627, "loss": 0.0108, "num_input_tokens_seen": 142327472, "step": 65995 }, { "epoch": 10.766721044045678, "grad_norm": 0.021581266075372696, "learning_rate": 0.0005203660294922554, "loss": 0.0314, "num_input_tokens_seen": 142338288, "step": 66000 }, { "epoch": 10.767536704730832, "grad_norm": 0.07743038982152939, "learning_rate": 0.0005202949085352048, "loss": 0.0532, "num_input_tokens_seen": 142349840, "step": 66005 }, { "epoch": 10.768352365415987, "grad_norm": 0.0696510523557663, "learning_rate": 0.000520223787166852, "loss": 0.0362, "num_input_tokens_seen": 142362192, "step": 66010 }, { "epoch": 10.769168026101141, "grad_norm": 0.02906361036002636, "learning_rate": 0.0005201526653886385, "loss": 0.2154, "num_input_tokens_seen": 142373072, "step": 66015 }, { "epoch": 10.769983686786297, "grad_norm": 0.00962374173104763, "learning_rate": 0.0005200815432020058, "loss": 0.0066, "num_input_tokens_seen": 142384176, "step": 66020 }, { "epoch": 10.770799347471453, "grad_norm": 0.013699590228497982, "learning_rate": 0.0005200104206083951, "loss": 0.0262, "num_input_tokens_seen": 142395600, "step": 66025 }, { "epoch": 10.771615008156607, "grad_norm": 0.9402329325675964, "learning_rate": 0.0005199392976092479, "loss": 0.0715, "num_input_tokens_seen": 142405776, "step": 66030 }, { "epoch": 10.772430668841762, "grad_norm": 0.06350778788328171, "learning_rate": 0.0005198681742060055, "loss": 0.0171, "num_input_tokens_seen": 142416752, "step": 66035 }, { "epoch": 10.773246329526916, "grad_norm": 0.13295669853687286, "learning_rate": 0.0005197970504001091, "loss": 0.1213, "num_input_tokens_seen": 142426992, "step": 66040 }, { "epoch": 10.774061990212072, "grad_norm": 0.002908498514443636, "learning_rate": 0.0005197259261930007, "loss": 0.0241, "num_input_tokens_seen": 142438128, "step": 66045 }, { "epoch": 10.774877650897226, "grad_norm": 1.1991382837295532, "learning_rate": 0.0005196548015861212, "loss": 0.2355, "num_input_tokens_seen": 142448752, "step": 66050 }, { "epoch": 10.775693311582382, "grad_norm": 0.1859101504087448, "learning_rate": 0.0005195836765809123, "loss": 0.037, "num_input_tokens_seen": 142459664, "step": 66055 }, { "epoch": 10.776508972267537, "grad_norm": 0.004806674085557461, "learning_rate": 0.0005195125511788153, "loss": 0.0308, "num_input_tokens_seen": 142471280, "step": 66060 }, { "epoch": 10.777324632952691, "grad_norm": 0.6417043805122375, "learning_rate": 0.0005194414253812718, "loss": 0.0973, "num_input_tokens_seen": 142482416, "step": 66065 }, { "epoch": 10.778140293637847, "grad_norm": 0.06615472584962845, "learning_rate": 0.000519370299189723, "loss": 0.0948, "num_input_tokens_seen": 142492112, "step": 66070 }, { "epoch": 10.778955954323001, "grad_norm": 0.010804622434079647, "learning_rate": 0.0005192991726056107, "loss": 0.0439, "num_input_tokens_seen": 142502416, "step": 66075 }, { "epoch": 10.779771615008157, "grad_norm": 0.054748017340898514, "learning_rate": 0.0005192280456303759, "loss": 0.0799, "num_input_tokens_seen": 142513264, "step": 66080 }, { "epoch": 10.780587275693312, "grad_norm": 0.010598719120025635, "learning_rate": 0.0005191569182654606, "loss": 0.0816, "num_input_tokens_seen": 142524208, "step": 66085 }, { "epoch": 10.781402936378466, "grad_norm": 0.4023333787918091, "learning_rate": 0.000519085790512306, "loss": 0.3613, "num_input_tokens_seen": 142535472, "step": 66090 }, { "epoch": 10.782218597063622, "grad_norm": 0.1402120143175125, "learning_rate": 0.0005190146623723536, "loss": 0.107, "num_input_tokens_seen": 142547664, "step": 66095 }, { "epoch": 10.783034257748776, "grad_norm": 0.04191403463482857, "learning_rate": 0.000518943533847045, "loss": 0.1234, "num_input_tokens_seen": 142558608, "step": 66100 }, { "epoch": 10.783849918433932, "grad_norm": 0.3423280417919159, "learning_rate": 0.0005188724049378216, "loss": 0.0709, "num_input_tokens_seen": 142570000, "step": 66105 }, { "epoch": 10.784665579119086, "grad_norm": 0.0329495333135128, "learning_rate": 0.0005188012756461251, "loss": 0.0309, "num_input_tokens_seen": 142580272, "step": 66110 }, { "epoch": 10.785481239804241, "grad_norm": 0.08309388160705566, "learning_rate": 0.0005187301459733967, "loss": 0.0196, "num_input_tokens_seen": 142591728, "step": 66115 }, { "epoch": 10.786296900489397, "grad_norm": 0.40597599744796753, "learning_rate": 0.0005186590159210783, "loss": 0.0641, "num_input_tokens_seen": 142603120, "step": 66120 }, { "epoch": 10.78711256117455, "grad_norm": 0.012702791951596737, "learning_rate": 0.0005185878854906111, "loss": 0.0621, "num_input_tokens_seen": 142614064, "step": 66125 }, { "epoch": 10.787928221859707, "grad_norm": 0.1517186313867569, "learning_rate": 0.0005185167546834368, "loss": 0.017, "num_input_tokens_seen": 142623280, "step": 66130 }, { "epoch": 10.78874388254486, "grad_norm": 0.23278121650218964, "learning_rate": 0.0005184456235009972, "loss": 0.0619, "num_input_tokens_seen": 142633520, "step": 66135 }, { "epoch": 10.789559543230016, "grad_norm": 0.04242526739835739, "learning_rate": 0.0005183744919447335, "loss": 0.0438, "num_input_tokens_seen": 142644912, "step": 66140 }, { "epoch": 10.790375203915172, "grad_norm": 0.008802259340882301, "learning_rate": 0.0005183033600160875, "loss": 0.1643, "num_input_tokens_seen": 142654640, "step": 66145 }, { "epoch": 10.791190864600326, "grad_norm": 0.6168103218078613, "learning_rate": 0.0005182322277165005, "loss": 0.0808, "num_input_tokens_seen": 142664560, "step": 66150 }, { "epoch": 10.792006525285482, "grad_norm": 0.014035305939614773, "learning_rate": 0.0005181610950474143, "loss": 0.1319, "num_input_tokens_seen": 142674704, "step": 66155 }, { "epoch": 10.792822185970635, "grad_norm": 0.052696533501148224, "learning_rate": 0.0005180899620102707, "loss": 0.0252, "num_input_tokens_seen": 142685264, "step": 66160 }, { "epoch": 10.793637846655791, "grad_norm": 0.14148011803627014, "learning_rate": 0.000518018828606511, "loss": 0.0392, "num_input_tokens_seen": 142695440, "step": 66165 }, { "epoch": 10.794453507340947, "grad_norm": 0.016714558005332947, "learning_rate": 0.0005179476948375767, "loss": 0.0295, "num_input_tokens_seen": 142705264, "step": 66170 }, { "epoch": 10.7952691680261, "grad_norm": 0.029497938230633736, "learning_rate": 0.0005178765607049098, "loss": 0.0617, "num_input_tokens_seen": 142716336, "step": 66175 }, { "epoch": 10.796084828711257, "grad_norm": 0.03631696105003357, "learning_rate": 0.0005178054262099516, "loss": 0.0634, "num_input_tokens_seen": 142724976, "step": 66180 }, { "epoch": 10.79690048939641, "grad_norm": 0.39152753353118896, "learning_rate": 0.000517734291354144, "loss": 0.0399, "num_input_tokens_seen": 142736784, "step": 66185 }, { "epoch": 10.797716150081566, "grad_norm": 0.025955181568861008, "learning_rate": 0.0005176631561389283, "loss": 0.0278, "num_input_tokens_seen": 142746608, "step": 66190 }, { "epoch": 10.798531810766722, "grad_norm": 0.13097329437732697, "learning_rate": 0.0005175920205657465, "loss": 0.0741, "num_input_tokens_seen": 142757424, "step": 66195 }, { "epoch": 10.799347471451876, "grad_norm": 0.01667974144220352, "learning_rate": 0.0005175208846360399, "loss": 0.0452, "num_input_tokens_seen": 142767152, "step": 66200 }, { "epoch": 10.800163132137031, "grad_norm": 0.021667111665010452, "learning_rate": 0.0005174497483512506, "loss": 0.0237, "num_input_tokens_seen": 142777936, "step": 66205 }, { "epoch": 10.800978792822185, "grad_norm": 0.2827494144439697, "learning_rate": 0.0005173786117128198, "loss": 0.0252, "num_input_tokens_seen": 142789296, "step": 66210 }, { "epoch": 10.801794453507341, "grad_norm": 0.003980646841228008, "learning_rate": 0.0005173074747221895, "loss": 0.0359, "num_input_tokens_seen": 142799600, "step": 66215 }, { "epoch": 10.802610114192497, "grad_norm": 1.0152939558029175, "learning_rate": 0.0005172363373808013, "loss": 0.0305, "num_input_tokens_seen": 142811152, "step": 66220 }, { "epoch": 10.80342577487765, "grad_norm": 0.2861131727695465, "learning_rate": 0.0005171651996900967, "loss": 0.0668, "num_input_tokens_seen": 142822192, "step": 66225 }, { "epoch": 10.804241435562806, "grad_norm": 0.2725006937980652, "learning_rate": 0.0005170940616515175, "loss": 0.1181, "num_input_tokens_seen": 142832944, "step": 66230 }, { "epoch": 10.80505709624796, "grad_norm": 0.009392892010509968, "learning_rate": 0.0005170229232665056, "loss": 0.1201, "num_input_tokens_seen": 142844272, "step": 66235 }, { "epoch": 10.805872756933116, "grad_norm": 0.6185192465782166, "learning_rate": 0.0005169517845365025, "loss": 0.1704, "num_input_tokens_seen": 142854736, "step": 66240 }, { "epoch": 10.80668841761827, "grad_norm": 0.5266598463058472, "learning_rate": 0.0005168806454629501, "loss": 0.0862, "num_input_tokens_seen": 142865328, "step": 66245 }, { "epoch": 10.807504078303426, "grad_norm": 0.12782518565654755, "learning_rate": 0.0005168095060472899, "loss": 0.0306, "num_input_tokens_seen": 142875952, "step": 66250 }, { "epoch": 10.808319738988581, "grad_norm": 0.00602505449205637, "learning_rate": 0.0005167383662909638, "loss": 0.1051, "num_input_tokens_seen": 142886672, "step": 66255 }, { "epoch": 10.809135399673735, "grad_norm": 0.3548838794231415, "learning_rate": 0.0005166672261954134, "loss": 0.0464, "num_input_tokens_seen": 142898224, "step": 66260 }, { "epoch": 10.809951060358891, "grad_norm": 0.025879787281155586, "learning_rate": 0.0005165960857620806, "loss": 0.026, "num_input_tokens_seen": 142909328, "step": 66265 }, { "epoch": 10.810766721044045, "grad_norm": 0.12285425513982773, "learning_rate": 0.000516524944992407, "loss": 0.0087, "num_input_tokens_seen": 142919088, "step": 66270 }, { "epoch": 10.8115823817292, "grad_norm": 0.012883170507848263, "learning_rate": 0.0005164538038878345, "loss": 0.0264, "num_input_tokens_seen": 142929424, "step": 66275 }, { "epoch": 10.812398042414356, "grad_norm": 0.13517752289772034, "learning_rate": 0.0005163826624498047, "loss": 0.0194, "num_input_tokens_seen": 142941072, "step": 66280 }, { "epoch": 10.81321370309951, "grad_norm": 0.007750454358756542, "learning_rate": 0.0005163115206797596, "loss": 0.0123, "num_input_tokens_seen": 142951664, "step": 66285 }, { "epoch": 10.814029363784666, "grad_norm": 0.021612824872136116, "learning_rate": 0.0005162403785791408, "loss": 0.1181, "num_input_tokens_seen": 142963760, "step": 66290 }, { "epoch": 10.81484502446982, "grad_norm": 0.009918679483234882, "learning_rate": 0.0005161692361493899, "loss": 0.1478, "num_input_tokens_seen": 142973936, "step": 66295 }, { "epoch": 10.815660685154976, "grad_norm": 0.2536182999610901, "learning_rate": 0.0005160980933919491, "loss": 0.1163, "num_input_tokens_seen": 142985488, "step": 66300 }, { "epoch": 10.81647634584013, "grad_norm": 0.010918602347373962, "learning_rate": 0.00051602695030826, "loss": 0.0586, "num_input_tokens_seen": 142996176, "step": 66305 }, { "epoch": 10.817292006525285, "grad_norm": 0.010426843538880348, "learning_rate": 0.0005159558068997644, "loss": 0.0474, "num_input_tokens_seen": 143005840, "step": 66310 }, { "epoch": 10.818107667210441, "grad_norm": 0.006808142643421888, "learning_rate": 0.0005158846631679041, "loss": 0.0263, "num_input_tokens_seen": 143016656, "step": 66315 }, { "epoch": 10.818923327895595, "grad_norm": 0.4041946530342102, "learning_rate": 0.0005158135191141211, "loss": 0.1148, "num_input_tokens_seen": 143027312, "step": 66320 }, { "epoch": 10.81973898858075, "grad_norm": 0.044392965734004974, "learning_rate": 0.000515742374739857, "loss": 0.0769, "num_input_tokens_seen": 143037264, "step": 66325 }, { "epoch": 10.820554649265905, "grad_norm": 0.09791223704814911, "learning_rate": 0.0005156712300465537, "loss": 0.0508, "num_input_tokens_seen": 143049200, "step": 66330 }, { "epoch": 10.82137030995106, "grad_norm": 0.41816458106040955, "learning_rate": 0.000515600085035653, "loss": 0.0427, "num_input_tokens_seen": 143060272, "step": 66335 }, { "epoch": 10.822185970636216, "grad_norm": 0.14748936891555786, "learning_rate": 0.0005155289397085968, "loss": 0.1081, "num_input_tokens_seen": 143070416, "step": 66340 }, { "epoch": 10.82300163132137, "grad_norm": 0.021624771878123283, "learning_rate": 0.0005154577940668269, "loss": 0.0698, "num_input_tokens_seen": 143080880, "step": 66345 }, { "epoch": 10.823817292006526, "grad_norm": 0.018015755340456963, "learning_rate": 0.0005153866481117852, "loss": 0.1111, "num_input_tokens_seen": 143092688, "step": 66350 }, { "epoch": 10.82463295269168, "grad_norm": 0.013204284943640232, "learning_rate": 0.0005153155018449137, "loss": 0.006, "num_input_tokens_seen": 143103184, "step": 66355 }, { "epoch": 10.825448613376835, "grad_norm": 0.039297837764024734, "learning_rate": 0.000515244355267654, "loss": 0.1064, "num_input_tokens_seen": 143114128, "step": 66360 }, { "epoch": 10.826264274061991, "grad_norm": 0.02965759113430977, "learning_rate": 0.0005151732083814481, "loss": 0.1335, "num_input_tokens_seen": 143125328, "step": 66365 }, { "epoch": 10.827079934747145, "grad_norm": 0.5478432178497314, "learning_rate": 0.000515102061187738, "loss": 0.1313, "num_input_tokens_seen": 143135824, "step": 66370 }, { "epoch": 10.8278955954323, "grad_norm": 0.012006096541881561, "learning_rate": 0.0005150309136879654, "loss": 0.0319, "num_input_tokens_seen": 143145904, "step": 66375 }, { "epoch": 10.828711256117455, "grad_norm": 0.008373947814106941, "learning_rate": 0.0005149597658835722, "loss": 0.0158, "num_input_tokens_seen": 143157616, "step": 66380 }, { "epoch": 10.82952691680261, "grad_norm": 0.2748185396194458, "learning_rate": 0.0005148886177760005, "loss": 0.0827, "num_input_tokens_seen": 143167952, "step": 66385 }, { "epoch": 10.830342577487766, "grad_norm": 0.011499104090034962, "learning_rate": 0.000514817469366692, "loss": 0.0369, "num_input_tokens_seen": 143179056, "step": 66390 }, { "epoch": 10.83115823817292, "grad_norm": 0.37029775977134705, "learning_rate": 0.0005147463206570886, "loss": 0.182, "num_input_tokens_seen": 143189136, "step": 66395 }, { "epoch": 10.831973898858076, "grad_norm": 0.007518650032579899, "learning_rate": 0.0005146751716486324, "loss": 0.0421, "num_input_tokens_seen": 143200784, "step": 66400 }, { "epoch": 10.83278955954323, "grad_norm": 0.030004989355802536, "learning_rate": 0.0005146040223427652, "loss": 0.0233, "num_input_tokens_seen": 143211280, "step": 66405 }, { "epoch": 10.833605220228385, "grad_norm": 0.6341733336448669, "learning_rate": 0.0005145328727409291, "loss": 0.1136, "num_input_tokens_seen": 143221808, "step": 66410 }, { "epoch": 10.83442088091354, "grad_norm": 0.027288202196359634, "learning_rate": 0.0005144617228445657, "loss": 0.0611, "num_input_tokens_seen": 143232080, "step": 66415 }, { "epoch": 10.835236541598695, "grad_norm": 0.005203576758503914, "learning_rate": 0.0005143905726551172, "loss": 0.097, "num_input_tokens_seen": 143242320, "step": 66420 }, { "epoch": 10.83605220228385, "grad_norm": 0.010392607189714909, "learning_rate": 0.0005143194221740255, "loss": 0.0087, "num_input_tokens_seen": 143254576, "step": 66425 }, { "epoch": 10.836867862969005, "grad_norm": 0.012380306608974934, "learning_rate": 0.0005142482714027326, "loss": 0.0101, "num_input_tokens_seen": 143265744, "step": 66430 }, { "epoch": 10.83768352365416, "grad_norm": 0.5805116295814514, "learning_rate": 0.0005141771203426803, "loss": 0.0539, "num_input_tokens_seen": 143276336, "step": 66435 }, { "epoch": 10.838499184339314, "grad_norm": 0.17591452598571777, "learning_rate": 0.0005141059689953107, "loss": 0.0542, "num_input_tokens_seen": 143287408, "step": 66440 }, { "epoch": 10.83931484502447, "grad_norm": 0.021742304787039757, "learning_rate": 0.0005140348173620657, "loss": 0.045, "num_input_tokens_seen": 143298256, "step": 66445 }, { "epoch": 10.840130505709626, "grad_norm": 0.14842836558818817, "learning_rate": 0.0005139636654443874, "loss": 0.0217, "num_input_tokens_seen": 143308656, "step": 66450 }, { "epoch": 10.84094616639478, "grad_norm": 0.26908770203590393, "learning_rate": 0.0005138925132437178, "loss": 0.1219, "num_input_tokens_seen": 143319920, "step": 66455 }, { "epoch": 10.841761827079935, "grad_norm": 0.3170417845249176, "learning_rate": 0.0005138213607614985, "loss": 0.0391, "num_input_tokens_seen": 143331024, "step": 66460 }, { "epoch": 10.84257748776509, "grad_norm": 0.0359996035695076, "learning_rate": 0.000513750207999172, "loss": 0.0805, "num_input_tokens_seen": 143342224, "step": 66465 }, { "epoch": 10.843393148450245, "grad_norm": 0.016904551535844803, "learning_rate": 0.0005136790549581801, "loss": 0.0286, "num_input_tokens_seen": 143351504, "step": 66470 }, { "epoch": 10.844208809135399, "grad_norm": 0.01217916700989008, "learning_rate": 0.0005136079016399647, "loss": 0.0506, "num_input_tokens_seen": 143362768, "step": 66475 }, { "epoch": 10.845024469820554, "grad_norm": 0.05636857822537422, "learning_rate": 0.000513536748045968, "loss": 0.0337, "num_input_tokens_seen": 143375408, "step": 66480 }, { "epoch": 10.84584013050571, "grad_norm": 0.02917717956006527, "learning_rate": 0.000513465594177632, "loss": 0.0172, "num_input_tokens_seen": 143386576, "step": 66485 }, { "epoch": 10.846655791190864, "grad_norm": 0.01661716401576996, "learning_rate": 0.0005133944400363986, "loss": 0.1413, "num_input_tokens_seen": 143397392, "step": 66490 }, { "epoch": 10.84747145187602, "grad_norm": 0.6303054094314575, "learning_rate": 0.0005133232856237098, "loss": 0.0517, "num_input_tokens_seen": 143406096, "step": 66495 }, { "epoch": 10.848287112561174, "grad_norm": 0.06322157382965088, "learning_rate": 0.0005132521309410078, "loss": 0.0123, "num_input_tokens_seen": 143416784, "step": 66500 }, { "epoch": 10.84910277324633, "grad_norm": 0.34654778242111206, "learning_rate": 0.0005131809759897345, "loss": 0.0666, "num_input_tokens_seen": 143428784, "step": 66505 }, { "epoch": 10.849918433931485, "grad_norm": 0.13000568747520447, "learning_rate": 0.000513109820771332, "loss": 0.0294, "num_input_tokens_seen": 143439504, "step": 66510 }, { "epoch": 10.850734094616639, "grad_norm": 0.103849858045578, "learning_rate": 0.0005130386652872423, "loss": 0.0789, "num_input_tokens_seen": 143450288, "step": 66515 }, { "epoch": 10.851549755301795, "grad_norm": 0.0033534602262079716, "learning_rate": 0.0005129675095389076, "loss": 0.0054, "num_input_tokens_seen": 143462256, "step": 66520 }, { "epoch": 10.852365415986949, "grad_norm": 0.21664060652256012, "learning_rate": 0.0005128963535277699, "loss": 0.0598, "num_input_tokens_seen": 143473840, "step": 66525 }, { "epoch": 10.853181076672104, "grad_norm": 0.015227776020765305, "learning_rate": 0.0005128251972552711, "loss": 0.0405, "num_input_tokens_seen": 143483632, "step": 66530 }, { "epoch": 10.85399673735726, "grad_norm": 0.026499086990952492, "learning_rate": 0.0005127540407228535, "loss": 0.2227, "num_input_tokens_seen": 143493840, "step": 66535 }, { "epoch": 10.854812398042414, "grad_norm": 0.027399277314543724, "learning_rate": 0.0005126828839319591, "loss": 0.0424, "num_input_tokens_seen": 143505168, "step": 66540 }, { "epoch": 10.85562805872757, "grad_norm": 0.577487587928772, "learning_rate": 0.0005126117268840299, "loss": 0.0703, "num_input_tokens_seen": 143516144, "step": 66545 }, { "epoch": 10.856443719412724, "grad_norm": 0.014248356223106384, "learning_rate": 0.000512540569580508, "loss": 0.114, "num_input_tokens_seen": 143527152, "step": 66550 }, { "epoch": 10.85725938009788, "grad_norm": 0.46240025758743286, "learning_rate": 0.0005124694120228357, "loss": 0.105, "num_input_tokens_seen": 143538544, "step": 66555 }, { "epoch": 10.858075040783035, "grad_norm": 0.11888233572244644, "learning_rate": 0.0005123982542124549, "loss": 0.0417, "num_input_tokens_seen": 143548528, "step": 66560 }, { "epoch": 10.858890701468189, "grad_norm": 0.003284773323684931, "learning_rate": 0.0005123270961508077, "loss": 0.0144, "num_input_tokens_seen": 143557904, "step": 66565 }, { "epoch": 10.859706362153345, "grad_norm": 0.017011838033795357, "learning_rate": 0.0005122559378393363, "loss": 0.0567, "num_input_tokens_seen": 143568336, "step": 66570 }, { "epoch": 10.860522022838499, "grad_norm": 0.0341564416885376, "learning_rate": 0.0005121847792794828, "loss": 0.1705, "num_input_tokens_seen": 143578832, "step": 66575 }, { "epoch": 10.861337683523654, "grad_norm": 0.00785812083631754, "learning_rate": 0.0005121136204726893, "loss": 0.0183, "num_input_tokens_seen": 143590288, "step": 66580 }, { "epoch": 10.86215334420881, "grad_norm": 0.02114320732653141, "learning_rate": 0.0005120424614203978, "loss": 0.0207, "num_input_tokens_seen": 143600880, "step": 66585 }, { "epoch": 10.862969004893964, "grad_norm": 0.016360154375433922, "learning_rate": 0.0005119713021240507, "loss": 0.0112, "num_input_tokens_seen": 143612176, "step": 66590 }, { "epoch": 10.86378466557912, "grad_norm": 0.30597764253616333, "learning_rate": 0.0005119001425850899, "loss": 0.0352, "num_input_tokens_seen": 143622736, "step": 66595 }, { "epoch": 10.864600326264274, "grad_norm": 0.6524933576583862, "learning_rate": 0.0005118289828049575, "loss": 0.0851, "num_input_tokens_seen": 143631632, "step": 66600 }, { "epoch": 10.86541598694943, "grad_norm": 0.47735321521759033, "learning_rate": 0.0005117578227850958, "loss": 0.1521, "num_input_tokens_seen": 143641552, "step": 66605 }, { "epoch": 10.866231647634583, "grad_norm": 0.2576245367527008, "learning_rate": 0.000511686662526947, "loss": 0.0349, "num_input_tokens_seen": 143651856, "step": 66610 }, { "epoch": 10.867047308319739, "grad_norm": 0.022520892322063446, "learning_rate": 0.0005116155020319531, "loss": 0.0168, "num_input_tokens_seen": 143662224, "step": 66615 }, { "epoch": 10.867862969004895, "grad_norm": 0.09923997521400452, "learning_rate": 0.0005115443413015563, "loss": 0.025, "num_input_tokens_seen": 143673008, "step": 66620 }, { "epoch": 10.868678629690049, "grad_norm": 0.02507423609495163, "learning_rate": 0.0005114731803371988, "loss": 0.0108, "num_input_tokens_seen": 143683216, "step": 66625 }, { "epoch": 10.869494290375204, "grad_norm": 0.19366225600242615, "learning_rate": 0.0005114020191403228, "loss": 0.0956, "num_input_tokens_seen": 143693936, "step": 66630 }, { "epoch": 10.870309951060358, "grad_norm": 0.03327065706253052, "learning_rate": 0.0005113308577123705, "loss": 0.0225, "num_input_tokens_seen": 143705328, "step": 66635 }, { "epoch": 10.871125611745514, "grad_norm": 0.14479467272758484, "learning_rate": 0.0005112596960547838, "loss": 0.0134, "num_input_tokens_seen": 143715728, "step": 66640 }, { "epoch": 10.87194127243067, "grad_norm": 0.09806028753519058, "learning_rate": 0.0005111885341690051, "loss": 0.0638, "num_input_tokens_seen": 143726576, "step": 66645 }, { "epoch": 10.872756933115824, "grad_norm": 0.11055514961481094, "learning_rate": 0.0005111173720564767, "loss": 0.0183, "num_input_tokens_seen": 143736336, "step": 66650 }, { "epoch": 10.87357259380098, "grad_norm": 0.0021656507160514593, "learning_rate": 0.0005110462097186405, "loss": 0.0094, "num_input_tokens_seen": 143747568, "step": 66655 }, { "epoch": 10.874388254486133, "grad_norm": 0.5819531679153442, "learning_rate": 0.0005109750471569388, "loss": 0.1448, "num_input_tokens_seen": 143759472, "step": 66660 }, { "epoch": 10.875203915171289, "grad_norm": 0.09125712513923645, "learning_rate": 0.000510903884372814, "loss": 0.0451, "num_input_tokens_seen": 143771696, "step": 66665 }, { "epoch": 10.876019575856443, "grad_norm": 0.16352109611034393, "learning_rate": 0.0005108327213677081, "loss": 0.119, "num_input_tokens_seen": 143783216, "step": 66670 }, { "epoch": 10.876835236541599, "grad_norm": 0.009362633340060711, "learning_rate": 0.0005107615581430633, "loss": 0.0292, "num_input_tokens_seen": 143794032, "step": 66675 }, { "epoch": 10.877650897226754, "grad_norm": 0.16473910212516785, "learning_rate": 0.0005106903947003221, "loss": 0.052, "num_input_tokens_seen": 143804528, "step": 66680 }, { "epoch": 10.878466557911908, "grad_norm": 0.01291736401617527, "learning_rate": 0.0005106192310409263, "loss": 0.0385, "num_input_tokens_seen": 143815824, "step": 66685 }, { "epoch": 10.879282218597064, "grad_norm": 0.04719436541199684, "learning_rate": 0.0005105480671663183, "loss": 0.0143, "num_input_tokens_seen": 143828464, "step": 66690 }, { "epoch": 10.880097879282218, "grad_norm": 0.0030133582185953856, "learning_rate": 0.0005104769030779404, "loss": 0.0255, "num_input_tokens_seen": 143839152, "step": 66695 }, { "epoch": 10.880913539967374, "grad_norm": 0.5886824131011963, "learning_rate": 0.0005104057387772347, "loss": 0.055, "num_input_tokens_seen": 143850352, "step": 66700 }, { "epoch": 10.88172920065253, "grad_norm": 0.10986584424972534, "learning_rate": 0.0005103345742656437, "loss": 0.2567, "num_input_tokens_seen": 143861680, "step": 66705 }, { "epoch": 10.882544861337683, "grad_norm": 0.009910174645483494, "learning_rate": 0.0005102634095446092, "loss": 0.015, "num_input_tokens_seen": 143872112, "step": 66710 }, { "epoch": 10.883360522022839, "grad_norm": 0.6185861229896545, "learning_rate": 0.0005101922446155738, "loss": 0.2097, "num_input_tokens_seen": 143883152, "step": 66715 }, { "epoch": 10.884176182707993, "grad_norm": 0.05485187843441963, "learning_rate": 0.0005101210794799797, "loss": 0.0685, "num_input_tokens_seen": 143893840, "step": 66720 }, { "epoch": 10.884991843393149, "grad_norm": 0.012285716831684113, "learning_rate": 0.0005100499141392689, "loss": 0.0715, "num_input_tokens_seen": 143905232, "step": 66725 }, { "epoch": 10.885807504078304, "grad_norm": 0.14610104262828827, "learning_rate": 0.0005099787485948839, "loss": 0.0844, "num_input_tokens_seen": 143915760, "step": 66730 }, { "epoch": 10.886623164763458, "grad_norm": 0.4250060021877289, "learning_rate": 0.000509907582848267, "loss": 0.0686, "num_input_tokens_seen": 143926896, "step": 66735 }, { "epoch": 10.887438825448614, "grad_norm": 0.38495898246765137, "learning_rate": 0.0005098364169008604, "loss": 0.0778, "num_input_tokens_seen": 143936912, "step": 66740 }, { "epoch": 10.888254486133768, "grad_norm": 0.6076394319534302, "learning_rate": 0.0005097652507541062, "loss": 0.0429, "num_input_tokens_seen": 143947504, "step": 66745 }, { "epoch": 10.889070146818923, "grad_norm": 0.0653289183974266, "learning_rate": 0.0005096940844094467, "loss": 0.1325, "num_input_tokens_seen": 143959184, "step": 66750 }, { "epoch": 10.88988580750408, "grad_norm": 0.08148347586393356, "learning_rate": 0.0005096229178683244, "loss": 0.0246, "num_input_tokens_seen": 143969936, "step": 66755 }, { "epoch": 10.890701468189233, "grad_norm": 0.04723308980464935, "learning_rate": 0.0005095517511321815, "loss": 0.1273, "num_input_tokens_seen": 143980720, "step": 66760 }, { "epoch": 10.891517128874389, "grad_norm": 0.005398208275437355, "learning_rate": 0.0005094805842024603, "loss": 0.0187, "num_input_tokens_seen": 143992080, "step": 66765 }, { "epoch": 10.892332789559543, "grad_norm": 0.21725358068943024, "learning_rate": 0.000509409417080603, "loss": 0.0269, "num_input_tokens_seen": 144002224, "step": 66770 }, { "epoch": 10.893148450244698, "grad_norm": 0.24836847186088562, "learning_rate": 0.0005093382497680516, "loss": 0.0683, "num_input_tokens_seen": 144013264, "step": 66775 }, { "epoch": 10.893964110929852, "grad_norm": 0.39005765318870544, "learning_rate": 0.000509267082266249, "loss": 0.0727, "num_input_tokens_seen": 144023504, "step": 66780 }, { "epoch": 10.894779771615008, "grad_norm": 0.005149967968463898, "learning_rate": 0.0005091959145766373, "loss": 0.0264, "num_input_tokens_seen": 144034512, "step": 66785 }, { "epoch": 10.895595432300164, "grad_norm": 0.007967822253704071, "learning_rate": 0.0005091247467006588, "loss": 0.1228, "num_input_tokens_seen": 144043120, "step": 66790 }, { "epoch": 10.896411092985318, "grad_norm": 0.09407348930835724, "learning_rate": 0.0005090535786397556, "loss": 0.022, "num_input_tokens_seen": 144055120, "step": 66795 }, { "epoch": 10.897226753670473, "grad_norm": 0.10319585353136063, "learning_rate": 0.0005089824103953701, "loss": 0.0673, "num_input_tokens_seen": 144066608, "step": 66800 }, { "epoch": 10.898042414355627, "grad_norm": 0.04857960343360901, "learning_rate": 0.0005089112419689447, "loss": 0.0146, "num_input_tokens_seen": 144077648, "step": 66805 }, { "epoch": 10.898858075040783, "grad_norm": 0.016697688028216362, "learning_rate": 0.0005088400733619217, "loss": 0.0372, "num_input_tokens_seen": 144088496, "step": 66810 }, { "epoch": 10.899673735725939, "grad_norm": 0.1494343876838684, "learning_rate": 0.0005087689045757433, "loss": 0.0296, "num_input_tokens_seen": 144100176, "step": 66815 }, { "epoch": 10.900489396411093, "grad_norm": 0.12097115814685822, "learning_rate": 0.000508697735611852, "loss": 0.0535, "num_input_tokens_seen": 144110384, "step": 66820 }, { "epoch": 10.901305057096248, "grad_norm": 0.24917173385620117, "learning_rate": 0.0005086265664716901, "loss": 0.0515, "num_input_tokens_seen": 144120976, "step": 66825 }, { "epoch": 10.902120717781402, "grad_norm": 0.04391200467944145, "learning_rate": 0.0005085553971566998, "loss": 0.0249, "num_input_tokens_seen": 144133040, "step": 66830 }, { "epoch": 10.902936378466558, "grad_norm": 0.15596114099025726, "learning_rate": 0.0005084842276683236, "loss": 0.0454, "num_input_tokens_seen": 144144976, "step": 66835 }, { "epoch": 10.903752039151712, "grad_norm": 0.11027966439723969, "learning_rate": 0.0005084130580080038, "loss": 0.1166, "num_input_tokens_seen": 144156304, "step": 66840 }, { "epoch": 10.904567699836868, "grad_norm": 0.12441020458936691, "learning_rate": 0.0005083418881771826, "loss": 0.0341, "num_input_tokens_seen": 144165552, "step": 66845 }, { "epoch": 10.905383360522023, "grad_norm": 0.057395678013563156, "learning_rate": 0.0005082707181773025, "loss": 0.0593, "num_input_tokens_seen": 144178096, "step": 66850 }, { "epoch": 10.906199021207177, "grad_norm": 0.015548653900623322, "learning_rate": 0.0005081995480098057, "loss": 0.0333, "num_input_tokens_seen": 144189712, "step": 66855 }, { "epoch": 10.907014681892333, "grad_norm": 0.0701521560549736, "learning_rate": 0.0005081283776761348, "loss": 0.0358, "num_input_tokens_seen": 144200528, "step": 66860 }, { "epoch": 10.907830342577487, "grad_norm": 0.06002316623926163, "learning_rate": 0.0005080572071777319, "loss": 0.0072, "num_input_tokens_seen": 144211600, "step": 66865 }, { "epoch": 10.908646003262643, "grad_norm": 0.005988378543406725, "learning_rate": 0.0005079860365160395, "loss": 0.0879, "num_input_tokens_seen": 144222096, "step": 66870 }, { "epoch": 10.909461663947798, "grad_norm": 0.01558577362447977, "learning_rate": 0.0005079148656924999, "loss": 0.0078, "num_input_tokens_seen": 144232464, "step": 66875 }, { "epoch": 10.910277324632952, "grad_norm": 0.010351789183914661, "learning_rate": 0.0005078436947085557, "loss": 0.0085, "num_input_tokens_seen": 144242800, "step": 66880 }, { "epoch": 10.911092985318108, "grad_norm": 0.026903511956334114, "learning_rate": 0.0005077725235656488, "loss": 0.084, "num_input_tokens_seen": 144254320, "step": 66885 }, { "epoch": 10.911908646003262, "grad_norm": 0.5576627850532532, "learning_rate": 0.000507701352265222, "loss": 0.0821, "num_input_tokens_seen": 144265296, "step": 66890 }, { "epoch": 10.912724306688418, "grad_norm": 0.006333055440336466, "learning_rate": 0.0005076301808087176, "loss": 0.121, "num_input_tokens_seen": 144274672, "step": 66895 }, { "epoch": 10.913539967373573, "grad_norm": 0.02787243388593197, "learning_rate": 0.0005075590091975779, "loss": 0.089, "num_input_tokens_seen": 144285776, "step": 66900 }, { "epoch": 10.914355628058727, "grad_norm": 0.3476940393447876, "learning_rate": 0.0005074878374332452, "loss": 0.0881, "num_input_tokens_seen": 144296944, "step": 66905 }, { "epoch": 10.915171288743883, "grad_norm": 0.15536561608314514, "learning_rate": 0.000507416665517162, "loss": 0.0107, "num_input_tokens_seen": 144308624, "step": 66910 }, { "epoch": 10.915986949429037, "grad_norm": 1.509933590888977, "learning_rate": 0.0005073454934507708, "loss": 0.076, "num_input_tokens_seen": 144319152, "step": 66915 }, { "epoch": 10.916802610114193, "grad_norm": 0.26060670614242554, "learning_rate": 0.0005072743212355135, "loss": 0.0706, "num_input_tokens_seen": 144330736, "step": 66920 }, { "epoch": 10.917618270799348, "grad_norm": 0.662960410118103, "learning_rate": 0.0005072031488728331, "loss": 0.1384, "num_input_tokens_seen": 144342192, "step": 66925 }, { "epoch": 10.918433931484502, "grad_norm": 0.3072301149368286, "learning_rate": 0.0005071319763641718, "loss": 0.0408, "num_input_tokens_seen": 144353968, "step": 66930 }, { "epoch": 10.919249592169658, "grad_norm": 0.6527560353279114, "learning_rate": 0.0005070608037109718, "loss": 0.1655, "num_input_tokens_seen": 144364624, "step": 66935 }, { "epoch": 10.920065252854812, "grad_norm": 0.10451140999794006, "learning_rate": 0.0005069896309146758, "loss": 0.0448, "num_input_tokens_seen": 144375824, "step": 66940 }, { "epoch": 10.920880913539968, "grad_norm": 0.0444205142557621, "learning_rate": 0.000506918457976726, "loss": 0.0479, "num_input_tokens_seen": 144387184, "step": 66945 }, { "epoch": 10.921696574225122, "grad_norm": 0.2000298798084259, "learning_rate": 0.0005068472848985647, "loss": 0.1167, "num_input_tokens_seen": 144398128, "step": 66950 }, { "epoch": 10.922512234910277, "grad_norm": 0.016129693016409874, "learning_rate": 0.0005067761116816348, "loss": 0.0141, "num_input_tokens_seen": 144409232, "step": 66955 }, { "epoch": 10.923327895595433, "grad_norm": 0.023004846647381783, "learning_rate": 0.0005067049383273783, "loss": 0.0582, "num_input_tokens_seen": 144419728, "step": 66960 }, { "epoch": 10.924143556280587, "grad_norm": 0.009545953013002872, "learning_rate": 0.0005066337648372376, "loss": 0.1254, "num_input_tokens_seen": 144430800, "step": 66965 }, { "epoch": 10.924959216965743, "grad_norm": 0.012447093613445759, "learning_rate": 0.0005065625912126553, "loss": 0.0946, "num_input_tokens_seen": 144440624, "step": 66970 }, { "epoch": 10.925774877650896, "grad_norm": 0.0243876650929451, "learning_rate": 0.0005064914174550737, "loss": 0.0108, "num_input_tokens_seen": 144451984, "step": 66975 }, { "epoch": 10.926590538336052, "grad_norm": 0.0063573336228728294, "learning_rate": 0.0005064202435659354, "loss": 0.032, "num_input_tokens_seen": 144463312, "step": 66980 }, { "epoch": 10.927406199021208, "grad_norm": 0.24652676284313202, "learning_rate": 0.0005063490695466827, "loss": 0.0398, "num_input_tokens_seen": 144474352, "step": 66985 }, { "epoch": 10.928221859706362, "grad_norm": 0.07905858010053635, "learning_rate": 0.000506277895398758, "loss": 0.0182, "num_input_tokens_seen": 144484592, "step": 66990 }, { "epoch": 10.929037520391518, "grad_norm": 0.7213504910469055, "learning_rate": 0.0005062067211236039, "loss": 0.0841, "num_input_tokens_seen": 144495696, "step": 66995 }, { "epoch": 10.929853181076671, "grad_norm": 0.01241478230804205, "learning_rate": 0.0005061355467226626, "loss": 0.0069, "num_input_tokens_seen": 144507408, "step": 67000 }, { "epoch": 10.930668841761827, "grad_norm": 0.12638041377067566, "learning_rate": 0.0005060643721973766, "loss": 0.0446, "num_input_tokens_seen": 144518928, "step": 67005 }, { "epoch": 10.931484502446983, "grad_norm": 0.15448585152626038, "learning_rate": 0.0005059931975491886, "loss": 0.1831, "num_input_tokens_seen": 144528752, "step": 67010 }, { "epoch": 10.932300163132137, "grad_norm": 0.12450391054153442, "learning_rate": 0.0005059220227795409, "loss": 0.0693, "num_input_tokens_seen": 144540208, "step": 67015 }, { "epoch": 10.933115823817293, "grad_norm": 0.006572850979864597, "learning_rate": 0.0005058508478898757, "loss": 0.0223, "num_input_tokens_seen": 144551088, "step": 67020 }, { "epoch": 10.933931484502446, "grad_norm": 0.11077727377414703, "learning_rate": 0.0005057796728816358, "loss": 0.045, "num_input_tokens_seen": 144563056, "step": 67025 }, { "epoch": 10.934747145187602, "grad_norm": 0.074786476790905, "learning_rate": 0.0005057084977562633, "loss": 0.0263, "num_input_tokens_seen": 144574224, "step": 67030 }, { "epoch": 10.935562805872756, "grad_norm": 0.7301462292671204, "learning_rate": 0.0005056373225152009, "loss": 0.0514, "num_input_tokens_seen": 144584400, "step": 67035 }, { "epoch": 10.936378466557912, "grad_norm": 0.019316788762807846, "learning_rate": 0.0005055661471598911, "loss": 0.0582, "num_input_tokens_seen": 144596048, "step": 67040 }, { "epoch": 10.937194127243067, "grad_norm": 0.009320543147623539, "learning_rate": 0.0005054949716917763, "loss": 0.1219, "num_input_tokens_seen": 144606032, "step": 67045 }, { "epoch": 10.938009787928221, "grad_norm": 0.00404919870197773, "learning_rate": 0.0005054237961122989, "loss": 0.1169, "num_input_tokens_seen": 144617872, "step": 67050 }, { "epoch": 10.938825448613377, "grad_norm": 0.43564698100090027, "learning_rate": 0.0005053526204229012, "loss": 0.023, "num_input_tokens_seen": 144629232, "step": 67055 }, { "epoch": 10.939641109298531, "grad_norm": 0.023148978129029274, "learning_rate": 0.000505281444625026, "loss": 0.1683, "num_input_tokens_seen": 144639184, "step": 67060 }, { "epoch": 10.940456769983687, "grad_norm": 0.00494196405634284, "learning_rate": 0.0005052102687201156, "loss": 0.0587, "num_input_tokens_seen": 144650960, "step": 67065 }, { "epoch": 10.941272430668842, "grad_norm": 0.05395887419581413, "learning_rate": 0.0005051390927096125, "loss": 0.0178, "num_input_tokens_seen": 144662160, "step": 67070 }, { "epoch": 10.942088091353996, "grad_norm": 0.026502911001443863, "learning_rate": 0.0005050679165949592, "loss": 0.1397, "num_input_tokens_seen": 144674992, "step": 67075 }, { "epoch": 10.942903752039152, "grad_norm": 0.024523422122001648, "learning_rate": 0.0005049967403775982, "loss": 0.0792, "num_input_tokens_seen": 144685968, "step": 67080 }, { "epoch": 10.943719412724306, "grad_norm": 0.048032671213150024, "learning_rate": 0.0005049255640589718, "loss": 0.0192, "num_input_tokens_seen": 144697936, "step": 67085 }, { "epoch": 10.944535073409462, "grad_norm": 0.015930799767374992, "learning_rate": 0.0005048543876405225, "loss": 0.0138, "num_input_tokens_seen": 144708976, "step": 67090 }, { "epoch": 10.945350734094617, "grad_norm": 0.15005412697792053, "learning_rate": 0.000504783211123693, "loss": 0.0576, "num_input_tokens_seen": 144720080, "step": 67095 }, { "epoch": 10.946166394779771, "grad_norm": 0.023135803639888763, "learning_rate": 0.0005047120345099258, "loss": 0.042, "num_input_tokens_seen": 144731824, "step": 67100 }, { "epoch": 10.946982055464927, "grad_norm": 0.4660382568836212, "learning_rate": 0.0005046408578006631, "loss": 0.0594, "num_input_tokens_seen": 144743216, "step": 67105 }, { "epoch": 10.947797716150081, "grad_norm": 0.2722334563732147, "learning_rate": 0.0005045696809973474, "loss": 0.02, "num_input_tokens_seen": 144753936, "step": 67110 }, { "epoch": 10.948613376835237, "grad_norm": 0.10591111332178116, "learning_rate": 0.0005044985041014217, "loss": 0.1343, "num_input_tokens_seen": 144765616, "step": 67115 }, { "epoch": 10.949429037520392, "grad_norm": 0.02708246558904648, "learning_rate": 0.0005044273271143277, "loss": 0.0278, "num_input_tokens_seen": 144775568, "step": 67120 }, { "epoch": 10.950244698205546, "grad_norm": 0.27803415060043335, "learning_rate": 0.0005043561500375085, "loss": 0.0481, "num_input_tokens_seen": 144786640, "step": 67125 }, { "epoch": 10.951060358890702, "grad_norm": 0.1259400099515915, "learning_rate": 0.0005042849728724064, "loss": 0.0363, "num_input_tokens_seen": 144796592, "step": 67130 }, { "epoch": 10.951876019575856, "grad_norm": 0.026501519605517387, "learning_rate": 0.0005042137956204639, "loss": 0.0106, "num_input_tokens_seen": 144807536, "step": 67135 }, { "epoch": 10.952691680261012, "grad_norm": 0.00257347640581429, "learning_rate": 0.0005041426182831233, "loss": 0.0802, "num_input_tokens_seen": 144818864, "step": 67140 }, { "epoch": 10.953507340946166, "grad_norm": 0.7486910820007324, "learning_rate": 0.0005040714408618275, "loss": 0.1226, "num_input_tokens_seen": 144830384, "step": 67145 }, { "epoch": 10.954323001631321, "grad_norm": 0.08224117010831833, "learning_rate": 0.0005040002633580188, "loss": 0.0846, "num_input_tokens_seen": 144841040, "step": 67150 }, { "epoch": 10.955138662316477, "grad_norm": 0.22220991551876068, "learning_rate": 0.0005039290857731395, "loss": 0.029, "num_input_tokens_seen": 144851984, "step": 67155 }, { "epoch": 10.955954323001631, "grad_norm": 0.13020408153533936, "learning_rate": 0.0005038579081086324, "loss": 0.0489, "num_input_tokens_seen": 144862096, "step": 67160 }, { "epoch": 10.956769983686787, "grad_norm": 0.012740707024931908, "learning_rate": 0.0005037867303659399, "loss": 0.0773, "num_input_tokens_seen": 144871728, "step": 67165 }, { "epoch": 10.95758564437194, "grad_norm": 0.17600229382514954, "learning_rate": 0.0005037155525465046, "loss": 0.0534, "num_input_tokens_seen": 144882512, "step": 67170 }, { "epoch": 10.958401305057096, "grad_norm": 0.016167305409908295, "learning_rate": 0.0005036443746517688, "loss": 0.0691, "num_input_tokens_seen": 144893744, "step": 67175 }, { "epoch": 10.959216965742252, "grad_norm": 0.16848881542682648, "learning_rate": 0.0005035731966831752, "loss": 0.0281, "num_input_tokens_seen": 144904112, "step": 67180 }, { "epoch": 10.960032626427406, "grad_norm": 0.005299835000187159, "learning_rate": 0.0005035020186421661, "loss": 0.0587, "num_input_tokens_seen": 144913744, "step": 67185 }, { "epoch": 10.960848287112562, "grad_norm": 0.006880569271743298, "learning_rate": 0.0005034308405301842, "loss": 0.0752, "num_input_tokens_seen": 144923120, "step": 67190 }, { "epoch": 10.961663947797716, "grad_norm": 0.03748849406838417, "learning_rate": 0.0005033596623486719, "loss": 0.0175, "num_input_tokens_seen": 144933168, "step": 67195 }, { "epoch": 10.962479608482871, "grad_norm": 0.9470998048782349, "learning_rate": 0.0005032884840990719, "loss": 0.0497, "num_input_tokens_seen": 144943408, "step": 67200 }, { "epoch": 10.963295269168025, "grad_norm": 0.03410933166742325, "learning_rate": 0.0005032173057828265, "loss": 0.0566, "num_input_tokens_seen": 144953712, "step": 67205 }, { "epoch": 10.964110929853181, "grad_norm": 0.012641534209251404, "learning_rate": 0.0005031461274013784, "loss": 0.0263, "num_input_tokens_seen": 144964784, "step": 67210 }, { "epoch": 10.964926590538337, "grad_norm": 0.046367861330509186, "learning_rate": 0.0005030749489561701, "loss": 0.0202, "num_input_tokens_seen": 144975248, "step": 67215 }, { "epoch": 10.96574225122349, "grad_norm": 0.005322993732988834, "learning_rate": 0.000503003770448644, "loss": 0.0193, "num_input_tokens_seen": 144986288, "step": 67220 }, { "epoch": 10.966557911908646, "grad_norm": 0.023926878347992897, "learning_rate": 0.0005029325918802426, "loss": 0.0233, "num_input_tokens_seen": 144996880, "step": 67225 }, { "epoch": 10.9673735725938, "grad_norm": 0.00871982891112566, "learning_rate": 0.0005028614132524085, "loss": 0.0104, "num_input_tokens_seen": 145008336, "step": 67230 }, { "epoch": 10.968189233278956, "grad_norm": 0.04353000223636627, "learning_rate": 0.0005027902345665843, "loss": 0.0327, "num_input_tokens_seen": 145019152, "step": 67235 }, { "epoch": 10.969004893964112, "grad_norm": 0.005337163340300322, "learning_rate": 0.0005027190558242124, "loss": 0.0089, "num_input_tokens_seen": 145030672, "step": 67240 }, { "epoch": 10.969820554649266, "grad_norm": 0.03401569277048111, "learning_rate": 0.0005026478770267355, "loss": 0.15, "num_input_tokens_seen": 145042352, "step": 67245 }, { "epoch": 10.970636215334421, "grad_norm": 0.014203323051333427, "learning_rate": 0.0005025766981755959, "loss": 0.1253, "num_input_tokens_seen": 145052624, "step": 67250 }, { "epoch": 10.971451876019575, "grad_norm": 0.008424450643360615, "learning_rate": 0.0005025055192722363, "loss": 0.1207, "num_input_tokens_seen": 145063088, "step": 67255 }, { "epoch": 10.97226753670473, "grad_norm": 0.0026070475578308105, "learning_rate": 0.0005024343403180992, "loss": 0.0487, "num_input_tokens_seen": 145075376, "step": 67260 }, { "epoch": 10.973083197389887, "grad_norm": 0.2636162042617798, "learning_rate": 0.0005023631613146272, "loss": 0.2548, "num_input_tokens_seen": 145087984, "step": 67265 }, { "epoch": 10.97389885807504, "grad_norm": 0.3883775472640991, "learning_rate": 0.0005022919822632625, "loss": 0.0311, "num_input_tokens_seen": 145098384, "step": 67270 }, { "epoch": 10.974714518760196, "grad_norm": 0.12940101325511932, "learning_rate": 0.0005022208031654479, "loss": 0.0986, "num_input_tokens_seen": 145109392, "step": 67275 }, { "epoch": 10.97553017944535, "grad_norm": 0.043512098491191864, "learning_rate": 0.0005021496240226261, "loss": 0.1037, "num_input_tokens_seen": 145119344, "step": 67280 }, { "epoch": 10.976345840130506, "grad_norm": 0.4405783414840698, "learning_rate": 0.0005020784448362393, "loss": 0.0316, "num_input_tokens_seen": 145130896, "step": 67285 }, { "epoch": 10.977161500815662, "grad_norm": 0.05776965990662575, "learning_rate": 0.0005020072656077302, "loss": 0.0178, "num_input_tokens_seen": 145141104, "step": 67290 }, { "epoch": 10.977977161500815, "grad_norm": 0.43091222643852234, "learning_rate": 0.0005019360863385413, "loss": 0.0423, "num_input_tokens_seen": 145151344, "step": 67295 }, { "epoch": 10.978792822185971, "grad_norm": 0.017432650551199913, "learning_rate": 0.0005018649070301152, "loss": 0.0083, "num_input_tokens_seen": 145161424, "step": 67300 }, { "epoch": 10.979608482871125, "grad_norm": 0.41052088141441345, "learning_rate": 0.0005017937276838943, "loss": 0.1273, "num_input_tokens_seen": 145171888, "step": 67305 }, { "epoch": 10.98042414355628, "grad_norm": 0.012393232434988022, "learning_rate": 0.0005017225483013212, "loss": 0.0536, "num_input_tokens_seen": 145183440, "step": 67310 }, { "epoch": 10.981239804241435, "grad_norm": 0.22539760172367096, "learning_rate": 0.0005016513688838387, "loss": 0.0717, "num_input_tokens_seen": 145193584, "step": 67315 }, { "epoch": 10.98205546492659, "grad_norm": 0.8589651584625244, "learning_rate": 0.0005015801894328889, "loss": 0.1493, "num_input_tokens_seen": 145205136, "step": 67320 }, { "epoch": 10.982871125611746, "grad_norm": 0.3181789219379425, "learning_rate": 0.0005015090099499147, "loss": 0.1451, "num_input_tokens_seen": 145216592, "step": 67325 }, { "epoch": 10.9836867862969, "grad_norm": 0.09572309255599976, "learning_rate": 0.0005014378304363584, "loss": 0.0538, "num_input_tokens_seen": 145227696, "step": 67330 }, { "epoch": 10.984502446982056, "grad_norm": 0.028751373291015625, "learning_rate": 0.0005013666508936627, "loss": 0.0113, "num_input_tokens_seen": 145238800, "step": 67335 }, { "epoch": 10.98531810766721, "grad_norm": 0.7009171843528748, "learning_rate": 0.0005012954713232701, "loss": 0.1503, "num_input_tokens_seen": 145249616, "step": 67340 }, { "epoch": 10.986133768352365, "grad_norm": 0.011959579773247242, "learning_rate": 0.0005012242917266232, "loss": 0.0267, "num_input_tokens_seen": 145260688, "step": 67345 }, { "epoch": 10.986949429037521, "grad_norm": 0.05492782220244408, "learning_rate": 0.0005011531121051643, "loss": 0.0235, "num_input_tokens_seen": 145272208, "step": 67350 }, { "epoch": 10.987765089722675, "grad_norm": 0.012244489043951035, "learning_rate": 0.0005010819324603363, "loss": 0.0185, "num_input_tokens_seen": 145283792, "step": 67355 }, { "epoch": 10.98858075040783, "grad_norm": 0.4465917944908142, "learning_rate": 0.0005010107527935815, "loss": 0.1238, "num_input_tokens_seen": 145294064, "step": 67360 }, { "epoch": 10.989396411092985, "grad_norm": 0.22399123013019562, "learning_rate": 0.0005009395731063424, "loss": 0.0488, "num_input_tokens_seen": 145305200, "step": 67365 }, { "epoch": 10.99021207177814, "grad_norm": 0.6184888482093811, "learning_rate": 0.0005008683934000618, "loss": 0.0498, "num_input_tokens_seen": 145316112, "step": 67370 }, { "epoch": 10.991027732463294, "grad_norm": 0.014057490043342113, "learning_rate": 0.000500797213676182, "loss": 0.2034, "num_input_tokens_seen": 145328304, "step": 67375 }, { "epoch": 10.99184339314845, "grad_norm": 0.014840108342468739, "learning_rate": 0.0005007260339361456, "loss": 0.017, "num_input_tokens_seen": 145338320, "step": 67380 }, { "epoch": 10.992659053833606, "grad_norm": 0.38281485438346863, "learning_rate": 0.0005006548541813953, "loss": 0.1341, "num_input_tokens_seen": 145349392, "step": 67385 }, { "epoch": 10.99347471451876, "grad_norm": 0.2294299304485321, "learning_rate": 0.0005005836744133736, "loss": 0.0448, "num_input_tokens_seen": 145361456, "step": 67390 }, { "epoch": 10.994290375203915, "grad_norm": 0.3289146423339844, "learning_rate": 0.0005005124946335229, "loss": 0.0252, "num_input_tokens_seen": 145373072, "step": 67395 }, { "epoch": 10.99510603588907, "grad_norm": 0.03609262779355049, "learning_rate": 0.0005004413148432859, "loss": 0.0353, "num_input_tokens_seen": 145383120, "step": 67400 }, { "epoch": 10.995921696574225, "grad_norm": 0.18514883518218994, "learning_rate": 0.000500370135044105, "loss": 0.1514, "num_input_tokens_seen": 145394288, "step": 67405 }, { "epoch": 10.99673735725938, "grad_norm": 0.011716614477336407, "learning_rate": 0.000500298955237423, "loss": 0.1169, "num_input_tokens_seen": 145404240, "step": 67410 }, { "epoch": 10.997553017944535, "grad_norm": 0.3536709249019623, "learning_rate": 0.0005002277754246822, "loss": 0.0987, "num_input_tokens_seen": 145414800, "step": 67415 }, { "epoch": 10.99836867862969, "grad_norm": 0.11022117733955383, "learning_rate": 0.0005001565956073252, "loss": 0.1163, "num_input_tokens_seen": 145425552, "step": 67420 }, { "epoch": 10.999184339314844, "grad_norm": 0.06947468966245651, "learning_rate": 0.0005000854157867947, "loss": 0.0932, "num_input_tokens_seen": 145437168, "step": 67425 }, { "epoch": 11.0, "grad_norm": 0.05226588249206543, "learning_rate": 0.0005000142359645331, "loss": 0.128, "num_input_tokens_seen": 145445936, "step": 67430 }, { "epoch": 11.0, "eval_loss": 0.14757058024406433, "eval_runtime": 104.8416, "eval_samples_per_second": 25.992, "eval_steps_per_second": 6.505, "num_input_tokens_seen": 145445936, "step": 67430 }, { "epoch": 11.000815660685156, "grad_norm": 0.08277715742588043, "learning_rate": 0.0004999430561419831, "loss": 0.021, "num_input_tokens_seen": 145456240, "step": 67435 }, { "epoch": 11.00163132137031, "grad_norm": 0.03233315050601959, "learning_rate": 0.000499871876320587, "loss": 0.0152, "num_input_tokens_seen": 145467696, "step": 67440 }, { "epoch": 11.002446982055465, "grad_norm": 0.05190197750926018, "learning_rate": 0.0004998006965017876, "loss": 0.0276, "num_input_tokens_seen": 145478160, "step": 67445 }, { "epoch": 11.00326264274062, "grad_norm": 0.44495972990989685, "learning_rate": 0.0004997295166870271, "loss": 0.0512, "num_input_tokens_seen": 145488080, "step": 67450 }, { "epoch": 11.004078303425775, "grad_norm": 0.01162407174706459, "learning_rate": 0.0004996583368777484, "loss": 0.1155, "num_input_tokens_seen": 145498064, "step": 67455 }, { "epoch": 11.00489396411093, "grad_norm": 0.007424779701977968, "learning_rate": 0.000499587157075394, "loss": 0.0403, "num_input_tokens_seen": 145508656, "step": 67460 }, { "epoch": 11.005709624796085, "grad_norm": 0.3345491290092468, "learning_rate": 0.0004995159772814063, "loss": 0.0274, "num_input_tokens_seen": 145518320, "step": 67465 }, { "epoch": 11.00652528548124, "grad_norm": 0.27533575892448425, "learning_rate": 0.0004994447974972281, "loss": 0.0995, "num_input_tokens_seen": 145529488, "step": 67470 }, { "epoch": 11.007340946166394, "grad_norm": 0.05362580344080925, "learning_rate": 0.0004993736177243016, "loss": 0.0156, "num_input_tokens_seen": 145540816, "step": 67475 }, { "epoch": 11.00815660685155, "grad_norm": 0.023270325735211372, "learning_rate": 0.0004993024379640697, "loss": 0.0112, "num_input_tokens_seen": 145550480, "step": 67480 }, { "epoch": 11.008972267536704, "grad_norm": 0.4108964800834656, "learning_rate": 0.0004992312582179746, "loss": 0.0242, "num_input_tokens_seen": 145561008, "step": 67485 }, { "epoch": 11.00978792822186, "grad_norm": 0.37365415692329407, "learning_rate": 0.0004991600784874593, "loss": 0.0493, "num_input_tokens_seen": 145571632, "step": 67490 }, { "epoch": 11.010603588907015, "grad_norm": 0.26880159974098206, "learning_rate": 0.0004990888987739657, "loss": 0.0446, "num_input_tokens_seen": 145582352, "step": 67495 }, { "epoch": 11.01141924959217, "grad_norm": 0.12317536771297455, "learning_rate": 0.0004990177190789371, "loss": 0.0105, "num_input_tokens_seen": 145593584, "step": 67500 }, { "epoch": 11.012234910277325, "grad_norm": 0.05089998245239258, "learning_rate": 0.0004989465394038153, "loss": 0.0571, "num_input_tokens_seen": 145603856, "step": 67505 }, { "epoch": 11.013050570962479, "grad_norm": 0.024789908900856972, "learning_rate": 0.0004988753597500435, "loss": 0.1173, "num_input_tokens_seen": 145614768, "step": 67510 }, { "epoch": 11.013866231647635, "grad_norm": 0.0853211060166359, "learning_rate": 0.0004988041801190638, "loss": 0.0517, "num_input_tokens_seen": 145624976, "step": 67515 }, { "epoch": 11.01468189233279, "grad_norm": 0.01061803475022316, "learning_rate": 0.000498733000512319, "loss": 0.0092, "num_input_tokens_seen": 145635600, "step": 67520 }, { "epoch": 11.015497553017944, "grad_norm": 0.052582383155822754, "learning_rate": 0.0004986618209312515, "loss": 0.0513, "num_input_tokens_seen": 145646640, "step": 67525 }, { "epoch": 11.0163132137031, "grad_norm": 0.40751320123672485, "learning_rate": 0.000498590641377304, "loss": 0.1436, "num_input_tokens_seen": 145656464, "step": 67530 }, { "epoch": 11.017128874388254, "grad_norm": 0.3974672257900238, "learning_rate": 0.0004985194618519188, "loss": 0.0649, "num_input_tokens_seen": 145667088, "step": 67535 }, { "epoch": 11.01794453507341, "grad_norm": 0.025889523327350616, "learning_rate": 0.0004984482823565386, "loss": 0.0565, "num_input_tokens_seen": 145679152, "step": 67540 }, { "epoch": 11.018760195758565, "grad_norm": 0.06833422929048538, "learning_rate": 0.0004983771028926059, "loss": 0.0146, "num_input_tokens_seen": 145691376, "step": 67545 }, { "epoch": 11.01957585644372, "grad_norm": 0.029239406809210777, "learning_rate": 0.0004983059234615635, "loss": 0.0197, "num_input_tokens_seen": 145701136, "step": 67550 }, { "epoch": 11.020391517128875, "grad_norm": 0.03531575947999954, "learning_rate": 0.0004982347440648534, "loss": 0.0094, "num_input_tokens_seen": 145710736, "step": 67555 }, { "epoch": 11.021207177814029, "grad_norm": 0.01962222345173359, "learning_rate": 0.0004981635647039186, "loss": 0.0151, "num_input_tokens_seen": 145722640, "step": 67560 }, { "epoch": 11.022022838499185, "grad_norm": 0.6887209415435791, "learning_rate": 0.0004980923853802015, "loss": 0.1384, "num_input_tokens_seen": 145733040, "step": 67565 }, { "epoch": 11.022838499184338, "grad_norm": 0.05965841934084892, "learning_rate": 0.0004980212060951447, "loss": 0.025, "num_input_tokens_seen": 145743024, "step": 67570 }, { "epoch": 11.023654159869494, "grad_norm": 0.04420540854334831, "learning_rate": 0.0004979500268501905, "loss": 0.0073, "num_input_tokens_seen": 145754544, "step": 67575 }, { "epoch": 11.02446982055465, "grad_norm": 0.0665394589304924, "learning_rate": 0.0004978788476467816, "loss": 0.0056, "num_input_tokens_seen": 145765232, "step": 67580 }, { "epoch": 11.025285481239804, "grad_norm": 0.06753057986497879, "learning_rate": 0.0004978076684863607, "loss": 0.0423, "num_input_tokens_seen": 145776432, "step": 67585 }, { "epoch": 11.02610114192496, "grad_norm": 0.1472737342119217, "learning_rate": 0.0004977364893703701, "loss": 0.0111, "num_input_tokens_seen": 145786928, "step": 67590 }, { "epoch": 11.026916802610113, "grad_norm": 0.0875563696026802, "learning_rate": 0.0004976653103002526, "loss": 0.0087, "num_input_tokens_seen": 145797232, "step": 67595 }, { "epoch": 11.02773246329527, "grad_norm": 0.030784960836172104, "learning_rate": 0.0004975941312774502, "loss": 0.0134, "num_input_tokens_seen": 145808240, "step": 67600 }, { "epoch": 11.028548123980425, "grad_norm": 0.010304808616638184, "learning_rate": 0.0004975229523034061, "loss": 0.0152, "num_input_tokens_seen": 145819792, "step": 67605 }, { "epoch": 11.029363784665579, "grad_norm": 0.01321113295853138, "learning_rate": 0.0004974517733795623, "loss": 0.1318, "num_input_tokens_seen": 145829616, "step": 67610 }, { "epoch": 11.030179445350734, "grad_norm": 0.004699608311057091, "learning_rate": 0.0004973805945073617, "loss": 0.0047, "num_input_tokens_seen": 145839984, "step": 67615 }, { "epoch": 11.030995106035888, "grad_norm": 0.372491717338562, "learning_rate": 0.0004973094156882466, "loss": 0.0493, "num_input_tokens_seen": 145851088, "step": 67620 }, { "epoch": 11.031810766721044, "grad_norm": 0.007300133816897869, "learning_rate": 0.0004972382369236596, "loss": 0.0308, "num_input_tokens_seen": 145860528, "step": 67625 }, { "epoch": 11.0326264274062, "grad_norm": 0.03710523247718811, "learning_rate": 0.0004971670582150431, "loss": 0.1208, "num_input_tokens_seen": 145871088, "step": 67630 }, { "epoch": 11.033442088091354, "grad_norm": 0.009470686316490173, "learning_rate": 0.0004970958795638401, "loss": 0.062, "num_input_tokens_seen": 145882672, "step": 67635 }, { "epoch": 11.03425774877651, "grad_norm": 0.00818817038089037, "learning_rate": 0.0004970247009714924, "loss": 0.0658, "num_input_tokens_seen": 145894192, "step": 67640 }, { "epoch": 11.035073409461663, "grad_norm": 0.19118033349514008, "learning_rate": 0.0004969535224394432, "loss": 0.0792, "num_input_tokens_seen": 145905264, "step": 67645 }, { "epoch": 11.035889070146819, "grad_norm": 0.18684667348861694, "learning_rate": 0.0004968823439691346, "loss": 0.0483, "num_input_tokens_seen": 145916432, "step": 67650 }, { "epoch": 11.036704730831975, "grad_norm": 0.03479033336043358, "learning_rate": 0.0004968111655620093, "loss": 0.0113, "num_input_tokens_seen": 145927632, "step": 67655 }, { "epoch": 11.037520391517129, "grad_norm": 0.011880474165081978, "learning_rate": 0.0004967399872195096, "loss": 0.0206, "num_input_tokens_seen": 145939600, "step": 67660 }, { "epoch": 11.038336052202284, "grad_norm": 0.294475257396698, "learning_rate": 0.0004966688089430785, "loss": 0.021, "num_input_tokens_seen": 145948752, "step": 67665 }, { "epoch": 11.039151712887438, "grad_norm": 0.0014246327336877584, "learning_rate": 0.000496597630734158, "loss": 0.0277, "num_input_tokens_seen": 145958416, "step": 67670 }, { "epoch": 11.039967373572594, "grad_norm": 0.7035305500030518, "learning_rate": 0.0004965264525941908, "loss": 0.1272, "num_input_tokens_seen": 145970032, "step": 67675 }, { "epoch": 11.040783034257748, "grad_norm": 0.13287997245788574, "learning_rate": 0.0004964552745246196, "loss": 0.019, "num_input_tokens_seen": 145980272, "step": 67680 }, { "epoch": 11.041598694942904, "grad_norm": 0.024083945900201797, "learning_rate": 0.0004963840965268866, "loss": 0.0088, "num_input_tokens_seen": 145990608, "step": 67685 }, { "epoch": 11.04241435562806, "grad_norm": 0.010029754601418972, "learning_rate": 0.0004963129186024346, "loss": 0.1531, "num_input_tokens_seen": 146002256, "step": 67690 }, { "epoch": 11.043230016313213, "grad_norm": 0.03101443126797676, "learning_rate": 0.0004962417407527059, "loss": 0.0037, "num_input_tokens_seen": 146013488, "step": 67695 }, { "epoch": 11.044045676998369, "grad_norm": 0.06969520449638367, "learning_rate": 0.0004961705629791431, "loss": 0.0075, "num_input_tokens_seen": 146024496, "step": 67700 }, { "epoch": 11.044861337683523, "grad_norm": 0.030503438785672188, "learning_rate": 0.0004960993852831888, "loss": 0.0111, "num_input_tokens_seen": 146035568, "step": 67705 }, { "epoch": 11.045676998368679, "grad_norm": 0.2159014195203781, "learning_rate": 0.0004960282076662853, "loss": 0.1788, "num_input_tokens_seen": 146046928, "step": 67710 }, { "epoch": 11.046492659053834, "grad_norm": 0.048196107149124146, "learning_rate": 0.0004959570301298752, "loss": 0.0301, "num_input_tokens_seen": 146058192, "step": 67715 }, { "epoch": 11.047308319738988, "grad_norm": 0.008495060727000237, "learning_rate": 0.0004958858526754012, "loss": 0.0207, "num_input_tokens_seen": 146068848, "step": 67720 }, { "epoch": 11.048123980424144, "grad_norm": 0.017228279262781143, "learning_rate": 0.0004958146753043053, "loss": 0.0147, "num_input_tokens_seen": 146079376, "step": 67725 }, { "epoch": 11.048939641109298, "grad_norm": 0.08724914491176605, "learning_rate": 0.0004957434980180307, "loss": 0.1494, "num_input_tokens_seen": 146089648, "step": 67730 }, { "epoch": 11.049755301794454, "grad_norm": 0.7911468744277954, "learning_rate": 0.0004956723208180191, "loss": 0.0831, "num_input_tokens_seen": 146100208, "step": 67735 }, { "epoch": 11.05057096247961, "grad_norm": 0.0055349902249872684, "learning_rate": 0.0004956011437057138, "loss": 0.0968, "num_input_tokens_seen": 146110960, "step": 67740 }, { "epoch": 11.051386623164763, "grad_norm": 0.5615183711051941, "learning_rate": 0.0004955299666825566, "loss": 0.0528, "num_input_tokens_seen": 146121328, "step": 67745 }, { "epoch": 11.052202283849919, "grad_norm": 0.11097829043865204, "learning_rate": 0.0004954587897499905, "loss": 0.0156, "num_input_tokens_seen": 146131728, "step": 67750 }, { "epoch": 11.053017944535073, "grad_norm": 0.051015403121709824, "learning_rate": 0.0004953876129094576, "loss": 0.0203, "num_input_tokens_seen": 146142448, "step": 67755 }, { "epoch": 11.053833605220229, "grad_norm": 0.053235895931720734, "learning_rate": 0.0004953164361624008, "loss": 0.011, "num_input_tokens_seen": 146153296, "step": 67760 }, { "epoch": 11.054649265905383, "grad_norm": 0.06825089454650879, "learning_rate": 0.0004952452595102621, "loss": 0.0699, "num_input_tokens_seen": 146163280, "step": 67765 }, { "epoch": 11.055464926590538, "grad_norm": 0.4075123071670532, "learning_rate": 0.0004951740829544846, "loss": 0.056, "num_input_tokens_seen": 146174064, "step": 67770 }, { "epoch": 11.056280587275694, "grad_norm": 0.6430773138999939, "learning_rate": 0.00049510290649651, "loss": 0.0869, "num_input_tokens_seen": 146186128, "step": 67775 }, { "epoch": 11.057096247960848, "grad_norm": 0.07162778824567795, "learning_rate": 0.0004950317301377813, "loss": 0.0886, "num_input_tokens_seen": 146196048, "step": 67780 }, { "epoch": 11.057911908646004, "grad_norm": 0.004935052245855331, "learning_rate": 0.0004949605538797412, "loss": 0.0176, "num_input_tokens_seen": 146205136, "step": 67785 }, { "epoch": 11.058727569331158, "grad_norm": 0.15272599458694458, "learning_rate": 0.0004948893777238316, "loss": 0.0376, "num_input_tokens_seen": 146214576, "step": 67790 }, { "epoch": 11.059543230016313, "grad_norm": 0.2145013064146042, "learning_rate": 0.0004948182016714954, "loss": 0.0155, "num_input_tokens_seen": 146224784, "step": 67795 }, { "epoch": 11.060358890701469, "grad_norm": 0.04609135538339615, "learning_rate": 0.0004947470257241748, "loss": 0.1097, "num_input_tokens_seen": 146234768, "step": 67800 }, { "epoch": 11.061174551386623, "grad_norm": 0.0032370868138968945, "learning_rate": 0.0004946758498833125, "loss": 0.075, "num_input_tokens_seen": 146245584, "step": 67805 }, { "epoch": 11.061990212071779, "grad_norm": 0.013097151182591915, "learning_rate": 0.0004946046741503507, "loss": 0.0019, "num_input_tokens_seen": 146255504, "step": 67810 }, { "epoch": 11.062805872756933, "grad_norm": 0.07759791612625122, "learning_rate": 0.0004945334985267323, "loss": 0.1643, "num_input_tokens_seen": 146267408, "step": 67815 }, { "epoch": 11.063621533442088, "grad_norm": 0.05265507474541664, "learning_rate": 0.0004944623230138991, "loss": 0.0176, "num_input_tokens_seen": 146276752, "step": 67820 }, { "epoch": 11.064437194127244, "grad_norm": 0.045992277562618256, "learning_rate": 0.0004943911476132943, "loss": 0.0113, "num_input_tokens_seen": 146288688, "step": 67825 }, { "epoch": 11.065252854812398, "grad_norm": 0.01629851758480072, "learning_rate": 0.0004943199723263597, "loss": 0.0067, "num_input_tokens_seen": 146298960, "step": 67830 }, { "epoch": 11.066068515497554, "grad_norm": 0.02240595407783985, "learning_rate": 0.0004942487971545383, "loss": 0.1293, "num_input_tokens_seen": 146308688, "step": 67835 }, { "epoch": 11.066884176182707, "grad_norm": 0.005687420256435871, "learning_rate": 0.0004941776220992722, "loss": 0.0458, "num_input_tokens_seen": 146319760, "step": 67840 }, { "epoch": 11.067699836867863, "grad_norm": 0.030906690284609795, "learning_rate": 0.0004941064471620041, "loss": 0.127, "num_input_tokens_seen": 146330736, "step": 67845 }, { "epoch": 11.068515497553017, "grad_norm": 0.012596914544701576, "learning_rate": 0.0004940352723441763, "loss": 0.028, "num_input_tokens_seen": 146340656, "step": 67850 }, { "epoch": 11.069331158238173, "grad_norm": 0.21691729128360748, "learning_rate": 0.0004939640976472311, "loss": 0.0253, "num_input_tokens_seen": 146352624, "step": 67855 }, { "epoch": 11.070146818923329, "grad_norm": 0.4788616895675659, "learning_rate": 0.0004938929230726111, "loss": 0.1496, "num_input_tokens_seen": 146363472, "step": 67860 }, { "epoch": 11.070962479608482, "grad_norm": 0.2099599987268448, "learning_rate": 0.0004938217486217591, "loss": 0.0493, "num_input_tokens_seen": 146374128, "step": 67865 }, { "epoch": 11.071778140293638, "grad_norm": 0.6797800064086914, "learning_rate": 0.0004937505742961169, "loss": 0.1018, "num_input_tokens_seen": 146384432, "step": 67870 }, { "epoch": 11.072593800978792, "grad_norm": 0.007296163123100996, "learning_rate": 0.0004936794000971274, "loss": 0.0814, "num_input_tokens_seen": 146395024, "step": 67875 }, { "epoch": 11.073409461663948, "grad_norm": 0.006942222360521555, "learning_rate": 0.0004936082260262328, "loss": 0.0195, "num_input_tokens_seen": 146406608, "step": 67880 }, { "epoch": 11.074225122349104, "grad_norm": 0.11782301962375641, "learning_rate": 0.0004935370520848755, "loss": 0.0583, "num_input_tokens_seen": 146418480, "step": 67885 }, { "epoch": 11.075040783034257, "grad_norm": 0.05662049725651741, "learning_rate": 0.0004934658782744983, "loss": 0.0197, "num_input_tokens_seen": 146429840, "step": 67890 }, { "epoch": 11.075856443719413, "grad_norm": 0.012534605339169502, "learning_rate": 0.0004933947045965431, "loss": 0.022, "num_input_tokens_seen": 146441104, "step": 67895 }, { "epoch": 11.076672104404567, "grad_norm": 0.07312393933534622, "learning_rate": 0.0004933235310524528, "loss": 0.047, "num_input_tokens_seen": 146452752, "step": 67900 }, { "epoch": 11.077487765089723, "grad_norm": 0.25127604603767395, "learning_rate": 0.0004932523576436695, "loss": 0.0104, "num_input_tokens_seen": 146462576, "step": 67905 }, { "epoch": 11.078303425774878, "grad_norm": 0.2967259883880615, "learning_rate": 0.0004931811843716358, "loss": 0.0257, "num_input_tokens_seen": 146472976, "step": 67910 }, { "epoch": 11.079119086460032, "grad_norm": 0.08333013951778412, "learning_rate": 0.000493110011237794, "loss": 0.0419, "num_input_tokens_seen": 146484208, "step": 67915 }, { "epoch": 11.079934747145188, "grad_norm": 0.5155033469200134, "learning_rate": 0.0004930388382435866, "loss": 0.1453, "num_input_tokens_seen": 146495088, "step": 67920 }, { "epoch": 11.080750407830342, "grad_norm": 0.018722936511039734, "learning_rate": 0.0004929676653904558, "loss": 0.0302, "num_input_tokens_seen": 146506736, "step": 67925 }, { "epoch": 11.081566068515498, "grad_norm": 0.03747663274407387, "learning_rate": 0.0004928964926798445, "loss": 0.0185, "num_input_tokens_seen": 146517168, "step": 67930 }, { "epoch": 11.082381729200652, "grad_norm": 0.5583581328392029, "learning_rate": 0.0004928253201131945, "loss": 0.0388, "num_input_tokens_seen": 146526640, "step": 67935 }, { "epoch": 11.083197389885807, "grad_norm": 0.0048487442545592785, "learning_rate": 0.0004927541476919487, "loss": 0.0404, "num_input_tokens_seen": 146536496, "step": 67940 }, { "epoch": 11.084013050570963, "grad_norm": 0.30756625533103943, "learning_rate": 0.0004926829754175492, "loss": 0.0477, "num_input_tokens_seen": 146547664, "step": 67945 }, { "epoch": 11.084828711256117, "grad_norm": 0.08144348859786987, "learning_rate": 0.0004926118032914385, "loss": 0.0143, "num_input_tokens_seen": 146558256, "step": 67950 }, { "epoch": 11.085644371941273, "grad_norm": 0.00683627650141716, "learning_rate": 0.0004925406313150589, "loss": 0.0505, "num_input_tokens_seen": 146570128, "step": 67955 }, { "epoch": 11.086460032626427, "grad_norm": 0.05964602530002594, "learning_rate": 0.000492469459489853, "loss": 0.0106, "num_input_tokens_seen": 146580944, "step": 67960 }, { "epoch": 11.087275693311582, "grad_norm": 0.0028949591796845198, "learning_rate": 0.0004923982878172629, "loss": 0.0781, "num_input_tokens_seen": 146591952, "step": 67965 }, { "epoch": 11.088091353996738, "grad_norm": 0.003287223633378744, "learning_rate": 0.0004923271162987314, "loss": 0.064, "num_input_tokens_seen": 146603440, "step": 67970 }, { "epoch": 11.088907014681892, "grad_norm": 0.0037696603685617447, "learning_rate": 0.0004922559449357003, "loss": 0.0767, "num_input_tokens_seen": 146614736, "step": 67975 }, { "epoch": 11.089722675367048, "grad_norm": 0.01381500344723463, "learning_rate": 0.0004921847737296125, "loss": 0.1921, "num_input_tokens_seen": 146626576, "step": 67980 }, { "epoch": 11.090538336052202, "grad_norm": 0.027801990509033203, "learning_rate": 0.0004921136026819101, "loss": 0.025, "num_input_tokens_seen": 146637648, "step": 67985 }, { "epoch": 11.091353996737357, "grad_norm": 0.013425898738205433, "learning_rate": 0.0004920424317940355, "loss": 0.0165, "num_input_tokens_seen": 146648560, "step": 67990 }, { "epoch": 11.092169657422513, "grad_norm": 0.3328157663345337, "learning_rate": 0.0004919712610674312, "loss": 0.0903, "num_input_tokens_seen": 146660112, "step": 67995 }, { "epoch": 11.092985318107667, "grad_norm": 0.07269519567489624, "learning_rate": 0.0004919000905035394, "loss": 0.0544, "num_input_tokens_seen": 146669744, "step": 68000 }, { "epoch": 11.093800978792823, "grad_norm": 0.49982619285583496, "learning_rate": 0.0004918289201038026, "loss": 0.0244, "num_input_tokens_seen": 146681168, "step": 68005 }, { "epoch": 11.094616639477977, "grad_norm": 0.00453852117061615, "learning_rate": 0.0004917577498696631, "loss": 0.1361, "num_input_tokens_seen": 146691984, "step": 68010 }, { "epoch": 11.095432300163132, "grad_norm": 0.04261036962270737, "learning_rate": 0.0004916865798025634, "loss": 0.1085, "num_input_tokens_seen": 146701872, "step": 68015 }, { "epoch": 11.096247960848286, "grad_norm": 0.006915039382874966, "learning_rate": 0.0004916154099039455, "loss": 0.0108, "num_input_tokens_seen": 146713872, "step": 68020 }, { "epoch": 11.097063621533442, "grad_norm": 0.19862298667430878, "learning_rate": 0.000491544240175252, "loss": 0.0285, "num_input_tokens_seen": 146725360, "step": 68025 }, { "epoch": 11.097879282218598, "grad_norm": 0.2074294239282608, "learning_rate": 0.0004914730706179251, "loss": 0.0685, "num_input_tokens_seen": 146734864, "step": 68030 }, { "epoch": 11.098694942903752, "grad_norm": 0.0070910099893808365, "learning_rate": 0.0004914019012334075, "loss": 0.0477, "num_input_tokens_seen": 146746288, "step": 68035 }, { "epoch": 11.099510603588907, "grad_norm": 0.024258200079202652, "learning_rate": 0.000491330732023141, "loss": 0.0076, "num_input_tokens_seen": 146757808, "step": 68040 }, { "epoch": 11.100326264274061, "grad_norm": 0.07031665742397308, "learning_rate": 0.0004912595629885685, "loss": 0.0388, "num_input_tokens_seen": 146769520, "step": 68045 }, { "epoch": 11.101141924959217, "grad_norm": 0.14385612308979034, "learning_rate": 0.0004911883941311319, "loss": 0.0127, "num_input_tokens_seen": 146779920, "step": 68050 }, { "epoch": 11.101957585644373, "grad_norm": 0.006212405860424042, "learning_rate": 0.0004911172254522737, "loss": 0.0872, "num_input_tokens_seen": 146790768, "step": 68055 }, { "epoch": 11.102773246329527, "grad_norm": 0.6643878221511841, "learning_rate": 0.0004910460569534361, "loss": 0.0952, "num_input_tokens_seen": 146802192, "step": 68060 }, { "epoch": 11.103588907014682, "grad_norm": 0.145207479596138, "learning_rate": 0.0004909748886360617, "loss": 0.018, "num_input_tokens_seen": 146813392, "step": 68065 }, { "epoch": 11.104404567699836, "grad_norm": 0.7535772323608398, "learning_rate": 0.0004909037205015924, "loss": 0.0703, "num_input_tokens_seen": 146825040, "step": 68070 }, { "epoch": 11.105220228384992, "grad_norm": 0.013626150786876678, "learning_rate": 0.000490832552551471, "loss": 0.0087, "num_input_tokens_seen": 146836272, "step": 68075 }, { "epoch": 11.106035889070148, "grad_norm": 2.141616106033325, "learning_rate": 0.0004907613847871393, "loss": 0.1051, "num_input_tokens_seen": 146846768, "step": 68080 }, { "epoch": 11.106851549755302, "grad_norm": 0.029276562854647636, "learning_rate": 0.00049069021721004, "loss": 0.1286, "num_input_tokens_seen": 146858128, "step": 68085 }, { "epoch": 11.107667210440457, "grad_norm": 1.597647786140442, "learning_rate": 0.0004906190498216151, "loss": 0.1128, "num_input_tokens_seen": 146868944, "step": 68090 }, { "epoch": 11.108482871125611, "grad_norm": 0.012062541209161282, "learning_rate": 0.0004905478826233072, "loss": 0.1895, "num_input_tokens_seen": 146879824, "step": 68095 }, { "epoch": 11.109298531810767, "grad_norm": 0.1541140079498291, "learning_rate": 0.0004904767156165585, "loss": 0.0157, "num_input_tokens_seen": 146890288, "step": 68100 }, { "epoch": 11.11011419249592, "grad_norm": 0.017101237550377846, "learning_rate": 0.000490405548802811, "loss": 0.0112, "num_input_tokens_seen": 146900368, "step": 68105 }, { "epoch": 11.110929853181077, "grad_norm": 0.20955245196819305, "learning_rate": 0.0004903343821835075, "loss": 0.0371, "num_input_tokens_seen": 146912112, "step": 68110 }, { "epoch": 11.111745513866232, "grad_norm": 0.0646684393286705, "learning_rate": 0.0004902632157600898, "loss": 0.0599, "num_input_tokens_seen": 146923184, "step": 68115 }, { "epoch": 11.112561174551386, "grad_norm": 0.13581673800945282, "learning_rate": 0.0004901920495340007, "loss": 0.0092, "num_input_tokens_seen": 146934576, "step": 68120 }, { "epoch": 11.113376835236542, "grad_norm": 0.003637961344793439, "learning_rate": 0.0004901208835066818, "loss": 0.0031, "num_input_tokens_seen": 146944432, "step": 68125 }, { "epoch": 11.114192495921696, "grad_norm": 0.061594538390636444, "learning_rate": 0.0004900497176795759, "loss": 0.0083, "num_input_tokens_seen": 146955856, "step": 68130 }, { "epoch": 11.115008156606851, "grad_norm": 0.01214695069938898, "learning_rate": 0.000489978552054125, "loss": 0.1307, "num_input_tokens_seen": 146965712, "step": 68135 }, { "epoch": 11.115823817292007, "grad_norm": 0.1156269982457161, "learning_rate": 0.0004899073866317717, "loss": 0.0978, "num_input_tokens_seen": 146975312, "step": 68140 }, { "epoch": 11.116639477977161, "grad_norm": 0.012482743710279465, "learning_rate": 0.0004898362214139577, "loss": 0.0068, "num_input_tokens_seen": 146986384, "step": 68145 }, { "epoch": 11.117455138662317, "grad_norm": 0.07301583886146545, "learning_rate": 0.0004897650564021257, "loss": 0.0333, "num_input_tokens_seen": 146998384, "step": 68150 }, { "epoch": 11.11827079934747, "grad_norm": 0.01543382741510868, "learning_rate": 0.0004896938915977178, "loss": 0.0549, "num_input_tokens_seen": 147009840, "step": 68155 }, { "epoch": 11.119086460032626, "grad_norm": 0.058136098086833954, "learning_rate": 0.0004896227270021763, "loss": 0.0664, "num_input_tokens_seen": 147020816, "step": 68160 }, { "epoch": 11.119902120717782, "grad_norm": 0.10910902172327042, "learning_rate": 0.0004895515626169433, "loss": 0.0862, "num_input_tokens_seen": 147031120, "step": 68165 }, { "epoch": 11.120717781402936, "grad_norm": 0.25030431151390076, "learning_rate": 0.0004894803984434613, "loss": 0.0206, "num_input_tokens_seen": 147042224, "step": 68170 }, { "epoch": 11.121533442088092, "grad_norm": 0.004535115789622068, "learning_rate": 0.0004894092344831722, "loss": 0.1028, "num_input_tokens_seen": 147052144, "step": 68175 }, { "epoch": 11.122349102773246, "grad_norm": 0.023104652762413025, "learning_rate": 0.0004893380707375186, "loss": 0.0188, "num_input_tokens_seen": 147061616, "step": 68180 }, { "epoch": 11.123164763458401, "grad_norm": 0.008085733279585838, "learning_rate": 0.0004892669072079423, "loss": 0.0336, "num_input_tokens_seen": 147071728, "step": 68185 }, { "epoch": 11.123980424143557, "grad_norm": 0.0029536462388932705, "learning_rate": 0.000489195743895886, "loss": 0.112, "num_input_tokens_seen": 147083536, "step": 68190 }, { "epoch": 11.124796084828711, "grad_norm": 0.036093175411224365, "learning_rate": 0.0004891245808027913, "loss": 0.0226, "num_input_tokens_seen": 147093520, "step": 68195 }, { "epoch": 11.125611745513867, "grad_norm": 0.41371747851371765, "learning_rate": 0.0004890534179301009, "loss": 0.1245, "num_input_tokens_seen": 147104720, "step": 68200 }, { "epoch": 11.12642740619902, "grad_norm": 0.6317373514175415, "learning_rate": 0.0004889822552792572, "loss": 0.0622, "num_input_tokens_seen": 147115952, "step": 68205 }, { "epoch": 11.127243066884176, "grad_norm": 0.17889969050884247, "learning_rate": 0.0004889110928517016, "loss": 0.0273, "num_input_tokens_seen": 147127504, "step": 68210 }, { "epoch": 11.12805872756933, "grad_norm": 0.012660090811550617, "learning_rate": 0.0004888399306488771, "loss": 0.0262, "num_input_tokens_seen": 147138448, "step": 68215 }, { "epoch": 11.128874388254486, "grad_norm": 0.028991296887397766, "learning_rate": 0.0004887687686722254, "loss": 0.01, "num_input_tokens_seen": 147148560, "step": 68220 }, { "epoch": 11.129690048939642, "grad_norm": 0.09009960293769836, "learning_rate": 0.000488697606923189, "loss": 0.0502, "num_input_tokens_seen": 147159600, "step": 68225 }, { "epoch": 11.130505709624796, "grad_norm": 0.10953062027692795, "learning_rate": 0.0004886264454032097, "loss": 0.0465, "num_input_tokens_seen": 147170064, "step": 68230 }, { "epoch": 11.131321370309951, "grad_norm": 0.0044024852104485035, "learning_rate": 0.0004885552841137302, "loss": 0.0088, "num_input_tokens_seen": 147180624, "step": 68235 }, { "epoch": 11.132137030995105, "grad_norm": 0.5922150015830994, "learning_rate": 0.0004884841230561922, "loss": 0.1376, "num_input_tokens_seen": 147192464, "step": 68240 }, { "epoch": 11.132952691680261, "grad_norm": 0.043080154806375504, "learning_rate": 0.0004884129622320381, "loss": 0.1046, "num_input_tokens_seen": 147202960, "step": 68245 }, { "epoch": 11.133768352365417, "grad_norm": 0.01975761353969574, "learning_rate": 0.0004883418016427099, "loss": 0.1922, "num_input_tokens_seen": 147213168, "step": 68250 }, { "epoch": 11.13458401305057, "grad_norm": 0.02843403071165085, "learning_rate": 0.00048827064128965014, "loss": 0.0354, "num_input_tokens_seen": 147224048, "step": 68255 }, { "epoch": 11.135399673735726, "grad_norm": 0.07883605360984802, "learning_rate": 0.00048819948117430047, "loss": 0.0653, "num_input_tokens_seen": 147235696, "step": 68260 }, { "epoch": 11.13621533442088, "grad_norm": 0.004699504468590021, "learning_rate": 0.00048812832129810347, "loss": 0.0303, "num_input_tokens_seen": 147244944, "step": 68265 }, { "epoch": 11.137030995106036, "grad_norm": 0.34906715154647827, "learning_rate": 0.0004880571616625009, "loss": 0.0294, "num_input_tokens_seen": 147256112, "step": 68270 }, { "epoch": 11.137846655791192, "grad_norm": 0.016359899193048477, "learning_rate": 0.00048798600226893535, "loss": 0.2224, "num_input_tokens_seen": 147266832, "step": 68275 }, { "epoch": 11.138662316476346, "grad_norm": 0.3681524693965912, "learning_rate": 0.00048791484311884844, "loss": 0.1487, "num_input_tokens_seen": 147279120, "step": 68280 }, { "epoch": 11.139477977161501, "grad_norm": 0.6451890468597412, "learning_rate": 0.0004878436842136828, "loss": 0.0725, "num_input_tokens_seen": 147289744, "step": 68285 }, { "epoch": 11.140293637846655, "grad_norm": 0.45973724126815796, "learning_rate": 0.0004877725255548801, "loss": 0.1027, "num_input_tokens_seen": 147299760, "step": 68290 }, { "epoch": 11.141109298531811, "grad_norm": 0.3250347375869751, "learning_rate": 0.0004877013671438828, "loss": 0.0515, "num_input_tokens_seen": 147310032, "step": 68295 }, { "epoch": 11.141924959216965, "grad_norm": 0.0338030569255352, "learning_rate": 0.0004876302089821329, "loss": 0.2613, "num_input_tokens_seen": 147320624, "step": 68300 }, { "epoch": 11.14274061990212, "grad_norm": 0.013895218260586262, "learning_rate": 0.0004875590510710724, "loss": 0.0079, "num_input_tokens_seen": 147331216, "step": 68305 }, { "epoch": 11.143556280587276, "grad_norm": 0.029839709401130676, "learning_rate": 0.00048748789341214373, "loss": 0.0151, "num_input_tokens_seen": 147344240, "step": 68310 }, { "epoch": 11.14437194127243, "grad_norm": 0.05284141004085541, "learning_rate": 0.00048741673600678857, "loss": 0.0155, "num_input_tokens_seen": 147353392, "step": 68315 }, { "epoch": 11.145187601957586, "grad_norm": 0.11906466633081436, "learning_rate": 0.00048734557885644924, "loss": 0.044, "num_input_tokens_seen": 147365136, "step": 68320 }, { "epoch": 11.14600326264274, "grad_norm": 0.7328969240188599, "learning_rate": 0.00048727442196256786, "loss": 0.0982, "num_input_tokens_seen": 147375952, "step": 68325 }, { "epoch": 11.146818923327896, "grad_norm": 0.03840382769703865, "learning_rate": 0.0004872032653265865, "loss": 0.006, "num_input_tokens_seen": 147387952, "step": 68330 }, { "epoch": 11.147634584013051, "grad_norm": 0.3084070682525635, "learning_rate": 0.0004871321089499472, "loss": 0.0368, "num_input_tokens_seen": 147399120, "step": 68335 }, { "epoch": 11.148450244698205, "grad_norm": 0.04213375225663185, "learning_rate": 0.00048706095283409194, "loss": 0.0121, "num_input_tokens_seen": 147409872, "step": 68340 }, { "epoch": 11.149265905383361, "grad_norm": 0.0600258894264698, "learning_rate": 0.00048698979698046286, "loss": 0.1678, "num_input_tokens_seen": 147420976, "step": 68345 }, { "epoch": 11.150081566068515, "grad_norm": 0.1869877725839615, "learning_rate": 0.0004869186413905023, "loss": 0.0777, "num_input_tokens_seen": 147430608, "step": 68350 }, { "epoch": 11.15089722675367, "grad_norm": 0.32999178767204285, "learning_rate": 0.00048684748606565175, "loss": 0.0518, "num_input_tokens_seen": 147442960, "step": 68355 }, { "epoch": 11.151712887438826, "grad_norm": 0.08095971494913101, "learning_rate": 0.00048677633100735387, "loss": 0.0249, "num_input_tokens_seen": 147454512, "step": 68360 }, { "epoch": 11.15252854812398, "grad_norm": 0.07299808412790298, "learning_rate": 0.00048670517621705016, "loss": 0.043, "num_input_tokens_seen": 147465840, "step": 68365 }, { "epoch": 11.153344208809136, "grad_norm": 0.02825094386935234, "learning_rate": 0.0004866340216961832, "loss": 0.0885, "num_input_tokens_seen": 147476688, "step": 68370 }, { "epoch": 11.15415986949429, "grad_norm": 0.3080146312713623, "learning_rate": 0.00048656286744619447, "loss": 0.0461, "num_input_tokens_seen": 147486192, "step": 68375 }, { "epoch": 11.154975530179446, "grad_norm": 0.09759595990180969, "learning_rate": 0.0004864917134685265, "loss": 0.0481, "num_input_tokens_seen": 147498288, "step": 68380 }, { "epoch": 11.1557911908646, "grad_norm": 0.01160676870495081, "learning_rate": 0.0004864205597646209, "loss": 0.0129, "num_input_tokens_seen": 147509360, "step": 68385 }, { "epoch": 11.156606851549755, "grad_norm": 0.03141835331916809, "learning_rate": 0.00048634940633592006, "loss": 0.0511, "num_input_tokens_seen": 147520400, "step": 68390 }, { "epoch": 11.15742251223491, "grad_norm": 0.14169174432754517, "learning_rate": 0.00048627825318386567, "loss": 0.0624, "num_input_tokens_seen": 147528624, "step": 68395 }, { "epoch": 11.158238172920065, "grad_norm": 0.04841401055455208, "learning_rate": 0.00048620710030990004, "loss": 0.1047, "num_input_tokens_seen": 147539984, "step": 68400 }, { "epoch": 11.15905383360522, "grad_norm": 0.12262149155139923, "learning_rate": 0.0004861359477154648, "loss": 0.01, "num_input_tokens_seen": 147550000, "step": 68405 }, { "epoch": 11.159869494290374, "grad_norm": 0.09017926454544067, "learning_rate": 0.00048606479540200243, "loss": 0.0656, "num_input_tokens_seen": 147559920, "step": 68410 }, { "epoch": 11.16068515497553, "grad_norm": 0.21634677052497864, "learning_rate": 0.00048599364337095443, "loss": 0.0612, "num_input_tokens_seen": 147570864, "step": 68415 }, { "epoch": 11.161500815660686, "grad_norm": 0.09412604570388794, "learning_rate": 0.000485922491623763, "loss": 0.0699, "num_input_tokens_seen": 147580816, "step": 68420 }, { "epoch": 11.16231647634584, "grad_norm": 0.6064545512199402, "learning_rate": 0.0004858513401618704, "loss": 0.051, "num_input_tokens_seen": 147591216, "step": 68425 }, { "epoch": 11.163132137030995, "grad_norm": 0.728542149066925, "learning_rate": 0.00048578018898671804, "loss": 0.1015, "num_input_tokens_seen": 147601712, "step": 68430 }, { "epoch": 11.16394779771615, "grad_norm": 0.009319191798567772, "learning_rate": 0.0004857090380997484, "loss": 0.0362, "num_input_tokens_seen": 147612688, "step": 68435 }, { "epoch": 11.164763458401305, "grad_norm": 0.19620512425899506, "learning_rate": 0.00048563788750240314, "loss": 0.008, "num_input_tokens_seen": 147624368, "step": 68440 }, { "epoch": 11.16557911908646, "grad_norm": 0.08827873319387436, "learning_rate": 0.00048556673719612445, "loss": 0.0165, "num_input_tokens_seen": 147634992, "step": 68445 }, { "epoch": 11.166394779771615, "grad_norm": 0.013416807167232037, "learning_rate": 0.00048549558718235386, "loss": 0.0179, "num_input_tokens_seen": 147646256, "step": 68450 }, { "epoch": 11.16721044045677, "grad_norm": 0.0739019587635994, "learning_rate": 0.0004854244374625339, "loss": 0.0339, "num_input_tokens_seen": 147656944, "step": 68455 }, { "epoch": 11.168026101141924, "grad_norm": 0.022409504279494286, "learning_rate": 0.00048535328803810595, "loss": 0.0034, "num_input_tokens_seen": 147668144, "step": 68460 }, { "epoch": 11.16884176182708, "grad_norm": 0.009837953373789787, "learning_rate": 0.0004852821389105123, "loss": 0.0175, "num_input_tokens_seen": 147679728, "step": 68465 }, { "epoch": 11.169657422512234, "grad_norm": 0.026975117623806, "learning_rate": 0.00048521099008119484, "loss": 0.0055, "num_input_tokens_seen": 147690416, "step": 68470 }, { "epoch": 11.17047308319739, "grad_norm": 0.25027230381965637, "learning_rate": 0.0004851398415515954, "loss": 0.0202, "num_input_tokens_seen": 147701744, "step": 68475 }, { "epoch": 11.171288743882545, "grad_norm": 0.006436120253056288, "learning_rate": 0.0004850686933231559, "loss": 0.0902, "num_input_tokens_seen": 147712816, "step": 68480 }, { "epoch": 11.1721044045677, "grad_norm": 0.5884696245193481, "learning_rate": 0.00048499754539731827, "loss": 0.0675, "num_input_tokens_seen": 147724272, "step": 68485 }, { "epoch": 11.172920065252855, "grad_norm": 0.23944693803787231, "learning_rate": 0.0004849263977755243, "loss": 0.0288, "num_input_tokens_seen": 147736208, "step": 68490 }, { "epoch": 11.173735725938009, "grad_norm": 0.020794939249753952, "learning_rate": 0.00048485525045921627, "loss": 0.0135, "num_input_tokens_seen": 147746960, "step": 68495 }, { "epoch": 11.174551386623165, "grad_norm": 0.014661340042948723, "learning_rate": 0.00048478410344983554, "loss": 0.0399, "num_input_tokens_seen": 147757328, "step": 68500 }, { "epoch": 11.17536704730832, "grad_norm": 0.030835721641778946, "learning_rate": 0.00048471295674882447, "loss": 0.0098, "num_input_tokens_seen": 147767120, "step": 68505 }, { "epoch": 11.176182707993474, "grad_norm": 0.6094796061515808, "learning_rate": 0.0004846418103576245, "loss": 0.0438, "num_input_tokens_seen": 147778192, "step": 68510 }, { "epoch": 11.17699836867863, "grad_norm": 0.024500062689185143, "learning_rate": 0.000484570664277678, "loss": 0.0692, "num_input_tokens_seen": 147788304, "step": 68515 }, { "epoch": 11.177814029363784, "grad_norm": 0.11559928953647614, "learning_rate": 0.00048449951851042627, "loss": 0.0956, "num_input_tokens_seen": 147798288, "step": 68520 }, { "epoch": 11.17862969004894, "grad_norm": 0.04754786193370819, "learning_rate": 0.0004844283730573115, "loss": 0.0277, "num_input_tokens_seen": 147809776, "step": 68525 }, { "epoch": 11.179445350734095, "grad_norm": 0.006235266570001841, "learning_rate": 0.0004843572279197757, "loss": 0.0092, "num_input_tokens_seen": 147820528, "step": 68530 }, { "epoch": 11.18026101141925, "grad_norm": 0.27023226022720337, "learning_rate": 0.0004842860830992604, "loss": 0.0329, "num_input_tokens_seen": 147831728, "step": 68535 }, { "epoch": 11.181076672104405, "grad_norm": 0.04087648168206215, "learning_rate": 0.00048421493859720767, "loss": 0.0154, "num_input_tokens_seen": 147840848, "step": 68540 }, { "epoch": 11.181892332789559, "grad_norm": 0.015296157449483871, "learning_rate": 0.000484143794415059, "loss": 0.0083, "num_input_tokens_seen": 147851056, "step": 68545 }, { "epoch": 11.182707993474715, "grad_norm": 0.057186078280210495, "learning_rate": 0.00048407265055425673, "loss": 0.0422, "num_input_tokens_seen": 147862544, "step": 68550 }, { "epoch": 11.18352365415987, "grad_norm": 0.028340689837932587, "learning_rate": 0.00048400150701624216, "loss": 0.0163, "num_input_tokens_seen": 147873264, "step": 68555 }, { "epoch": 11.184339314845024, "grad_norm": 0.007408503908663988, "learning_rate": 0.0004839303638024576, "loss": 0.0118, "num_input_tokens_seen": 147885232, "step": 68560 }, { "epoch": 11.18515497553018, "grad_norm": 0.009818960912525654, "learning_rate": 0.0004838592209143444, "loss": 0.007, "num_input_tokens_seen": 147896784, "step": 68565 }, { "epoch": 11.185970636215334, "grad_norm": 0.01664491556584835, "learning_rate": 0.0004837880783533447, "loss": 0.1048, "num_input_tokens_seen": 147906704, "step": 68570 }, { "epoch": 11.18678629690049, "grad_norm": 0.002425465499982238, "learning_rate": 0.00048371693612089996, "loss": 0.0023, "num_input_tokens_seen": 147917520, "step": 68575 }, { "epoch": 11.187601957585644, "grad_norm": 0.060494519770145416, "learning_rate": 0.00048364579421845245, "loss": 0.0066, "num_input_tokens_seen": 147928336, "step": 68580 }, { "epoch": 11.1884176182708, "grad_norm": 0.007024856749922037, "learning_rate": 0.0004835746526474434, "loss": 0.0282, "num_input_tokens_seen": 147939696, "step": 68585 }, { "epoch": 11.189233278955955, "grad_norm": 0.0021165311336517334, "learning_rate": 0.00048350351140931505, "loss": 0.0091, "num_input_tokens_seen": 147950832, "step": 68590 }, { "epoch": 11.190048939641109, "grad_norm": 0.03326814994215965, "learning_rate": 0.00048343237050550876, "loss": 0.0363, "num_input_tokens_seen": 147961680, "step": 68595 }, { "epoch": 11.190864600326265, "grad_norm": 0.551831841468811, "learning_rate": 0.0004833612299374667, "loss": 0.0183, "num_input_tokens_seen": 147972560, "step": 68600 }, { "epoch": 11.191680261011419, "grad_norm": 0.013336974196135998, "learning_rate": 0.0004832900897066303, "loss": 0.0168, "num_input_tokens_seen": 147983664, "step": 68605 }, { "epoch": 11.192495921696574, "grad_norm": 0.005504608154296875, "learning_rate": 0.0004832189498144415, "loss": 0.0129, "num_input_tokens_seen": 147994288, "step": 68610 }, { "epoch": 11.19331158238173, "grad_norm": 0.001112022320739925, "learning_rate": 0.0004831478102623419, "loss": 0.1679, "num_input_tokens_seen": 148004400, "step": 68615 }, { "epoch": 11.194127243066884, "grad_norm": 0.004428057465702295, "learning_rate": 0.0004830766710517733, "loss": 0.0029, "num_input_tokens_seen": 148015504, "step": 68620 }, { "epoch": 11.19494290375204, "grad_norm": 0.031340841203927994, "learning_rate": 0.00048300553218417753, "loss": 0.1086, "num_input_tokens_seen": 148025904, "step": 68625 }, { "epoch": 11.195758564437194, "grad_norm": 0.002558451145887375, "learning_rate": 0.0004829343936609961, "loss": 0.0552, "num_input_tokens_seen": 148035920, "step": 68630 }, { "epoch": 11.19657422512235, "grad_norm": 0.030265163630247116, "learning_rate": 0.00048286325548367083, "loss": 0.0306, "num_input_tokens_seen": 148046576, "step": 68635 }, { "epoch": 11.197389885807505, "grad_norm": 1.0759570598602295, "learning_rate": 0.0004827921176536435, "loss": 0.0764, "num_input_tokens_seen": 148057456, "step": 68640 }, { "epoch": 11.198205546492659, "grad_norm": 0.0041409642435610294, "learning_rate": 0.00048272098017235573, "loss": 0.1066, "num_input_tokens_seen": 148069552, "step": 68645 }, { "epoch": 11.199021207177815, "grad_norm": 0.11377374827861786, "learning_rate": 0.0004826498430412492, "loss": 0.0173, "num_input_tokens_seen": 148080048, "step": 68650 }, { "epoch": 11.199836867862969, "grad_norm": 0.01338683906942606, "learning_rate": 0.00048257870626176565, "loss": 0.0071, "num_input_tokens_seen": 148089904, "step": 68655 }, { "epoch": 11.200652528548124, "grad_norm": 0.07801475375890732, "learning_rate": 0.00048250756983534657, "loss": 0.0232, "num_input_tokens_seen": 148101296, "step": 68660 }, { "epoch": 11.201468189233278, "grad_norm": 0.006360513158142567, "learning_rate": 0.000482436433763434, "loss": 0.0117, "num_input_tokens_seen": 148112592, "step": 68665 }, { "epoch": 11.202283849918434, "grad_norm": 0.3516812026500702, "learning_rate": 0.00048236529804746915, "loss": 0.0207, "num_input_tokens_seen": 148123888, "step": 68670 }, { "epoch": 11.20309951060359, "grad_norm": 0.013049885630607605, "learning_rate": 0.0004822941626888941, "loss": 0.0076, "num_input_tokens_seen": 148134096, "step": 68675 }, { "epoch": 11.203915171288743, "grad_norm": 0.0062715401872992516, "learning_rate": 0.0004822230276891502, "loss": 0.0122, "num_input_tokens_seen": 148145072, "step": 68680 }, { "epoch": 11.2047308319739, "grad_norm": 0.10635531693696976, "learning_rate": 0.00048215189304967934, "loss": 0.0478, "num_input_tokens_seen": 148156528, "step": 68685 }, { "epoch": 11.205546492659053, "grad_norm": 0.49549034237861633, "learning_rate": 0.00048208075877192275, "loss": 0.2572, "num_input_tokens_seen": 148166160, "step": 68690 }, { "epoch": 11.206362153344209, "grad_norm": 0.06699488312005997, "learning_rate": 0.0004820096248573226, "loss": 0.0248, "num_input_tokens_seen": 148176176, "step": 68695 }, { "epoch": 11.207177814029365, "grad_norm": 0.06875084340572357, "learning_rate": 0.00048193849130732, "loss": 0.0471, "num_input_tokens_seen": 148186608, "step": 68700 }, { "epoch": 11.207993474714518, "grad_norm": 0.005734405014663935, "learning_rate": 0.00048186735812335695, "loss": 0.0088, "num_input_tokens_seen": 148197168, "step": 68705 }, { "epoch": 11.208809135399674, "grad_norm": 0.0027683544903993607, "learning_rate": 0.0004817962253068747, "loss": 0.0088, "num_input_tokens_seen": 148207408, "step": 68710 }, { "epoch": 11.209624796084828, "grad_norm": 0.3621208369731903, "learning_rate": 0.0004817250928593153, "loss": 0.165, "num_input_tokens_seen": 148218448, "step": 68715 }, { "epoch": 11.210440456769984, "grad_norm": 0.008902602829039097, "learning_rate": 0.0004816539607821198, "loss": 0.0095, "num_input_tokens_seen": 148228656, "step": 68720 }, { "epoch": 11.21125611745514, "grad_norm": 0.14498288929462433, "learning_rate": 0.0004815828290767303, "loss": 0.1492, "num_input_tokens_seen": 148239664, "step": 68725 }, { "epoch": 11.212071778140293, "grad_norm": 0.7100898623466492, "learning_rate": 0.00048151169774458797, "loss": 0.0262, "num_input_tokens_seen": 148251344, "step": 68730 }, { "epoch": 11.21288743882545, "grad_norm": 0.010506555438041687, "learning_rate": 0.00048144056678713445, "loss": 0.0027, "num_input_tokens_seen": 148262640, "step": 68735 }, { "epoch": 11.213703099510603, "grad_norm": 0.006115366239100695, "learning_rate": 0.00048136943620581164, "loss": 0.2893, "num_input_tokens_seen": 148273872, "step": 68740 }, { "epoch": 11.214518760195759, "grad_norm": 0.20870687067508698, "learning_rate": 0.00048129830600206067, "loss": 0.0088, "num_input_tokens_seen": 148283408, "step": 68745 }, { "epoch": 11.215334420880913, "grad_norm": 0.009742727503180504, "learning_rate": 0.0004812271761773234, "loss": 0.0653, "num_input_tokens_seen": 148294672, "step": 68750 }, { "epoch": 11.216150081566068, "grad_norm": 0.02110254392027855, "learning_rate": 0.00048115604673304105, "loss": 0.0228, "num_input_tokens_seen": 148306096, "step": 68755 }, { "epoch": 11.216965742251224, "grad_norm": 0.08864295482635498, "learning_rate": 0.0004810849176706555, "loss": 0.1277, "num_input_tokens_seen": 148318320, "step": 68760 }, { "epoch": 11.217781402936378, "grad_norm": 0.07673174887895584, "learning_rate": 0.00048101378899160786, "loss": 0.0582, "num_input_tokens_seen": 148328880, "step": 68765 }, { "epoch": 11.218597063621534, "grad_norm": 0.05391445755958557, "learning_rate": 0.0004809426606973401, "loss": 0.0329, "num_input_tokens_seen": 148338672, "step": 68770 }, { "epoch": 11.219412724306688, "grad_norm": 0.021276140585541725, "learning_rate": 0.00048087153278929327, "loss": 0.0033, "num_input_tokens_seen": 148349584, "step": 68775 }, { "epoch": 11.220228384991843, "grad_norm": 0.03963439539074898, "learning_rate": 0.0004808004052689093, "loss": 0.026, "num_input_tokens_seen": 148360240, "step": 68780 }, { "epoch": 11.221044045676999, "grad_norm": 0.007854829542338848, "learning_rate": 0.0004807292781376294, "loss": 0.0116, "num_input_tokens_seen": 148370608, "step": 68785 }, { "epoch": 11.221859706362153, "grad_norm": 0.05053314194083214, "learning_rate": 0.0004806581513968951, "loss": 0.0093, "num_input_tokens_seen": 148381168, "step": 68790 }, { "epoch": 11.222675367047309, "grad_norm": 0.07876620441675186, "learning_rate": 0.00048058702504814795, "loss": 0.0654, "num_input_tokens_seen": 148392432, "step": 68795 }, { "epoch": 11.223491027732463, "grad_norm": 0.02686028555035591, "learning_rate": 0.0004805158990928293, "loss": 0.0058, "num_input_tokens_seen": 148403920, "step": 68800 }, { "epoch": 11.224306688417618, "grad_norm": 0.38140884041786194, "learning_rate": 0.0004804447735323806, "loss": 0.1742, "num_input_tokens_seen": 148414960, "step": 68805 }, { "epoch": 11.225122349102774, "grad_norm": 0.20010587573051453, "learning_rate": 0.0004803736483682436, "loss": 0.058, "num_input_tokens_seen": 148425136, "step": 68810 }, { "epoch": 11.225938009787928, "grad_norm": 0.006251391023397446, "learning_rate": 0.0004803025236018593, "loss": 0.0072, "num_input_tokens_seen": 148436592, "step": 68815 }, { "epoch": 11.226753670473084, "grad_norm": 0.24043260514736176, "learning_rate": 0.00048023139923466954, "loss": 0.1016, "num_input_tokens_seen": 148447312, "step": 68820 }, { "epoch": 11.227569331158238, "grad_norm": 0.006649219896644354, "learning_rate": 0.00048016027526811536, "loss": 0.0748, "num_input_tokens_seen": 148459600, "step": 68825 }, { "epoch": 11.228384991843393, "grad_norm": 0.01689784787595272, "learning_rate": 0.00048008915170363853, "loss": 0.1838, "num_input_tokens_seen": 148469648, "step": 68830 }, { "epoch": 11.229200652528547, "grad_norm": 0.011838949285447598, "learning_rate": 0.0004800180285426802, "loss": 0.0194, "num_input_tokens_seen": 148480560, "step": 68835 }, { "epoch": 11.230016313213703, "grad_norm": 0.16525967419147491, "learning_rate": 0.00047994690578668175, "loss": 0.1571, "num_input_tokens_seen": 148490160, "step": 68840 }, { "epoch": 11.230831973898859, "grad_norm": 0.23981080949306488, "learning_rate": 0.000479875783437085, "loss": 0.019, "num_input_tokens_seen": 148501904, "step": 68845 }, { "epoch": 11.231647634584013, "grad_norm": 0.39910152554512024, "learning_rate": 0.00047980466149533075, "loss": 0.0653, "num_input_tokens_seen": 148512432, "step": 68850 }, { "epoch": 11.232463295269168, "grad_norm": 0.02704564854502678, "learning_rate": 0.0004797335399628609, "loss": 0.0346, "num_input_tokens_seen": 148523184, "step": 68855 }, { "epoch": 11.233278955954322, "grad_norm": 0.022918429225683212, "learning_rate": 0.0004796624188411163, "loss": 0.0125, "num_input_tokens_seen": 148534000, "step": 68860 }, { "epoch": 11.234094616639478, "grad_norm": 0.017940323799848557, "learning_rate": 0.00047959129813153885, "loss": 0.083, "num_input_tokens_seen": 148544816, "step": 68865 }, { "epoch": 11.234910277324634, "grad_norm": 0.39272645115852356, "learning_rate": 0.00047952017783556945, "loss": 0.0272, "num_input_tokens_seen": 148555760, "step": 68870 }, { "epoch": 11.235725938009788, "grad_norm": 0.33247920870780945, "learning_rate": 0.00047944905795464977, "loss": 0.0377, "num_input_tokens_seen": 148566448, "step": 68875 }, { "epoch": 11.236541598694943, "grad_norm": 0.009736785665154457, "learning_rate": 0.0004793779384902208, "loss": 0.1451, "num_input_tokens_seen": 148577136, "step": 68880 }, { "epoch": 11.237357259380097, "grad_norm": 0.008996447548270226, "learning_rate": 0.00047930681944372434, "loss": 0.0148, "num_input_tokens_seen": 148587888, "step": 68885 }, { "epoch": 11.238172920065253, "grad_norm": 0.050783414393663406, "learning_rate": 0.00047923570081660115, "loss": 0.0396, "num_input_tokens_seen": 148598000, "step": 68890 }, { "epoch": 11.238988580750409, "grad_norm": 0.6130406856536865, "learning_rate": 0.0004791645826102931, "loss": 0.1657, "num_input_tokens_seen": 148608976, "step": 68895 }, { "epoch": 11.239804241435563, "grad_norm": 0.04314326122403145, "learning_rate": 0.000479093464826241, "loss": 0.0185, "num_input_tokens_seen": 148619088, "step": 68900 }, { "epoch": 11.240619902120718, "grad_norm": 0.1666412502527237, "learning_rate": 0.00047902234746588653, "loss": 0.0498, "num_input_tokens_seen": 148629328, "step": 68905 }, { "epoch": 11.241435562805872, "grad_norm": 0.013562342151999474, "learning_rate": 0.0004789512305306706, "loss": 0.01, "num_input_tokens_seen": 148640112, "step": 68910 }, { "epoch": 11.242251223491028, "grad_norm": 0.005892863031476736, "learning_rate": 0.0004788801140220349, "loss": 0.0097, "num_input_tokens_seen": 148650800, "step": 68915 }, { "epoch": 11.243066884176184, "grad_norm": 0.3721885681152344, "learning_rate": 0.00047880899794142026, "loss": 0.0645, "num_input_tokens_seen": 148661552, "step": 68920 }, { "epoch": 11.243882544861338, "grad_norm": 0.4201958179473877, "learning_rate": 0.00047873788229026826, "loss": 0.1026, "num_input_tokens_seen": 148672304, "step": 68925 }, { "epoch": 11.244698205546493, "grad_norm": 0.023103894665837288, "learning_rate": 0.0004786667670700201, "loss": 0.0071, "num_input_tokens_seen": 148681168, "step": 68930 }, { "epoch": 11.245513866231647, "grad_norm": 0.48031648993492126, "learning_rate": 0.00047859565228211695, "loss": 0.3907, "num_input_tokens_seen": 148691760, "step": 68935 }, { "epoch": 11.246329526916803, "grad_norm": 0.5803284645080566, "learning_rate": 0.00047852453792799997, "loss": 0.1295, "num_input_tokens_seen": 148702384, "step": 68940 }, { "epoch": 11.247145187601957, "grad_norm": 0.28515106439590454, "learning_rate": 0.0004784534240091105, "loss": 0.0501, "num_input_tokens_seen": 148713904, "step": 68945 }, { "epoch": 11.247960848287113, "grad_norm": 0.7548883557319641, "learning_rate": 0.00047838231052688975, "loss": 0.1983, "num_input_tokens_seen": 148725552, "step": 68950 }, { "epoch": 11.248776508972268, "grad_norm": 0.016293343156576157, "learning_rate": 0.0004783111974827789, "loss": 0.0086, "num_input_tokens_seen": 148736464, "step": 68955 }, { "epoch": 11.249592169657422, "grad_norm": 0.4095793068408966, "learning_rate": 0.0004782400848782192, "loss": 0.0316, "num_input_tokens_seen": 148745744, "step": 68960 }, { "epoch": 11.250407830342578, "grad_norm": 0.025810036808252335, "learning_rate": 0.0004781689727146517, "loss": 0.0624, "num_input_tokens_seen": 148756624, "step": 68965 }, { "epoch": 11.251223491027732, "grad_norm": 0.017938770353794098, "learning_rate": 0.0004780978609935178, "loss": 0.0229, "num_input_tokens_seen": 148768016, "step": 68970 }, { "epoch": 11.252039151712887, "grad_norm": 0.04474867880344391, "learning_rate": 0.00047802674971625825, "loss": 0.0288, "num_input_tokens_seen": 148779024, "step": 68975 }, { "epoch": 11.252854812398043, "grad_norm": 0.04219987988471985, "learning_rate": 0.0004779556388843148, "loss": 0.0253, "num_input_tokens_seen": 148788912, "step": 68980 }, { "epoch": 11.253670473083197, "grad_norm": 0.03465859219431877, "learning_rate": 0.0004778845284991281, "loss": 0.0142, "num_input_tokens_seen": 148800432, "step": 68985 }, { "epoch": 11.254486133768353, "grad_norm": 0.034955792129039764, "learning_rate": 0.00047781341856213965, "loss": 0.0176, "num_input_tokens_seen": 148811664, "step": 68990 }, { "epoch": 11.255301794453507, "grad_norm": 0.026646627113223076, "learning_rate": 0.00047774230907479025, "loss": 0.0228, "num_input_tokens_seen": 148820816, "step": 68995 }, { "epoch": 11.256117455138662, "grad_norm": 0.4035150408744812, "learning_rate": 0.0004776712000385214, "loss": 0.1172, "num_input_tokens_seen": 148831056, "step": 69000 }, { "epoch": 11.256933115823816, "grad_norm": 0.022678924724459648, "learning_rate": 0.0004776000914547738, "loss": 0.0155, "num_input_tokens_seen": 148843056, "step": 69005 }, { "epoch": 11.257748776508972, "grad_norm": 0.004987623076885939, "learning_rate": 0.00047752898332498894, "loss": 0.0215, "num_input_tokens_seen": 148854256, "step": 69010 }, { "epoch": 11.258564437194128, "grad_norm": 0.0251457579433918, "learning_rate": 0.00047745787565060756, "loss": 0.1036, "num_input_tokens_seen": 148865104, "step": 69015 }, { "epoch": 11.259380097879282, "grad_norm": 0.07361901551485062, "learning_rate": 0.0004773867684330711, "loss": 0.0156, "num_input_tokens_seen": 148876176, "step": 69020 }, { "epoch": 11.260195758564437, "grad_norm": 0.1402062624692917, "learning_rate": 0.0004773156616738203, "loss": 0.0523, "num_input_tokens_seen": 148887664, "step": 69025 }, { "epoch": 11.261011419249591, "grad_norm": 0.0819048136472702, "learning_rate": 0.00047724455537429656, "loss": 0.0402, "num_input_tokens_seen": 148898128, "step": 69030 }, { "epoch": 11.261827079934747, "grad_norm": 0.026461761444807053, "learning_rate": 0.00047717344953594054, "loss": 0.0239, "num_input_tokens_seen": 148908816, "step": 69035 }, { "epoch": 11.262642740619903, "grad_norm": 0.14239662885665894, "learning_rate": 0.0004771023441601938, "loss": 0.1348, "num_input_tokens_seen": 148919216, "step": 69040 }, { "epoch": 11.263458401305057, "grad_norm": 0.01863921619951725, "learning_rate": 0.0004770312392484968, "loss": 0.0368, "num_input_tokens_seen": 148930224, "step": 69045 }, { "epoch": 11.264274061990212, "grad_norm": 0.007354563567787409, "learning_rate": 0.000476960134802291, "loss": 0.1989, "num_input_tokens_seen": 148941584, "step": 69050 }, { "epoch": 11.265089722675366, "grad_norm": 0.009710678830742836, "learning_rate": 0.00047688903082301746, "loss": 0.0248, "num_input_tokens_seen": 148952848, "step": 69055 }, { "epoch": 11.265905383360522, "grad_norm": 0.5251419544219971, "learning_rate": 0.00047681792731211684, "loss": 0.1433, "num_input_tokens_seen": 148963536, "step": 69060 }, { "epoch": 11.266721044045678, "grad_norm": 0.06087875738739967, "learning_rate": 0.00047674682427103045, "loss": 0.0552, "num_input_tokens_seen": 148973872, "step": 69065 }, { "epoch": 11.267536704730832, "grad_norm": 0.01579585112631321, "learning_rate": 0.00047667572170119905, "loss": 0.019, "num_input_tokens_seen": 148984240, "step": 69070 }, { "epoch": 11.268352365415987, "grad_norm": 0.6888933181762695, "learning_rate": 0.00047660461960406385, "loss": 0.0612, "num_input_tokens_seen": 148994704, "step": 69075 }, { "epoch": 11.269168026101141, "grad_norm": 0.004427399020642042, "learning_rate": 0.0004765335179810656, "loss": 0.0467, "num_input_tokens_seen": 149004176, "step": 69080 }, { "epoch": 11.269983686786297, "grad_norm": 0.29330483078956604, "learning_rate": 0.00047646241683364554, "loss": 0.035, "num_input_tokens_seen": 149013808, "step": 69085 }, { "epoch": 11.270799347471453, "grad_norm": 0.012615316547453403, "learning_rate": 0.0004763913161632443, "loss": 0.0038, "num_input_tokens_seen": 149024976, "step": 69090 }, { "epoch": 11.271615008156607, "grad_norm": 0.005077663343399763, "learning_rate": 0.00047632021597130304, "loss": 0.0118, "num_input_tokens_seen": 149034128, "step": 69095 }, { "epoch": 11.272430668841762, "grad_norm": 0.025324510410428047, "learning_rate": 0.0004762491162592627, "loss": 0.0226, "num_input_tokens_seen": 149045616, "step": 69100 }, { "epoch": 11.273246329526916, "grad_norm": 0.032126978039741516, "learning_rate": 0.00047617801702856406, "loss": 0.1386, "num_input_tokens_seen": 149055216, "step": 69105 }, { "epoch": 11.274061990212072, "grad_norm": 0.026015974581241608, "learning_rate": 0.00047610691828064815, "loss": 0.0213, "num_input_tokens_seen": 149064880, "step": 69110 }, { "epoch": 11.274877650897226, "grad_norm": 0.07774662226438522, "learning_rate": 0.0004760358200169559, "loss": 0.0179, "num_input_tokens_seen": 149077456, "step": 69115 }, { "epoch": 11.275693311582382, "grad_norm": 0.016537116840481758, "learning_rate": 0.000475964722238928, "loss": 0.0183, "num_input_tokens_seen": 149088656, "step": 69120 }, { "epoch": 11.276508972267537, "grad_norm": 0.3386205732822418, "learning_rate": 0.00047589362494800574, "loss": 0.0407, "num_input_tokens_seen": 149098864, "step": 69125 }, { "epoch": 11.277324632952691, "grad_norm": 0.019488751888275146, "learning_rate": 0.00047582252814562954, "loss": 0.0375, "num_input_tokens_seen": 149110288, "step": 69130 }, { "epoch": 11.278140293637847, "grad_norm": 0.0118104862049222, "learning_rate": 0.0004757514318332407, "loss": 0.0067, "num_input_tokens_seen": 149120368, "step": 69135 }, { "epoch": 11.278955954323001, "grad_norm": 0.0036660260520875454, "learning_rate": 0.0004756803360122796, "loss": 0.0256, "num_input_tokens_seen": 149130896, "step": 69140 }, { "epoch": 11.279771615008157, "grad_norm": 0.023419532924890518, "learning_rate": 0.00047560924068418763, "loss": 0.0204, "num_input_tokens_seen": 149142000, "step": 69145 }, { "epoch": 11.280587275693312, "grad_norm": 0.009780245833098888, "learning_rate": 0.00047553814585040506, "loss": 0.0046, "num_input_tokens_seen": 149153296, "step": 69150 }, { "epoch": 11.281402936378466, "grad_norm": 0.03378021717071533, "learning_rate": 0.00047546705151237323, "loss": 0.0521, "num_input_tokens_seen": 149163056, "step": 69155 }, { "epoch": 11.282218597063622, "grad_norm": 1.0410773754119873, "learning_rate": 0.00047539595767153255, "loss": 0.0724, "num_input_tokens_seen": 149174192, "step": 69160 }, { "epoch": 11.283034257748776, "grad_norm": 0.011263465508818626, "learning_rate": 0.00047532486432932394, "loss": 0.0155, "num_input_tokens_seen": 149185456, "step": 69165 }, { "epoch": 11.283849918433932, "grad_norm": 0.009772009216248989, "learning_rate": 0.00047525377148718845, "loss": 0.0253, "num_input_tokens_seen": 149195824, "step": 69170 }, { "epoch": 11.284665579119087, "grad_norm": 0.4020741581916809, "learning_rate": 0.00047518267914656656, "loss": 0.1235, "num_input_tokens_seen": 149207536, "step": 69175 }, { "epoch": 11.285481239804241, "grad_norm": 0.4630303382873535, "learning_rate": 0.0004751115873088992, "loss": 0.1419, "num_input_tokens_seen": 149217936, "step": 69180 }, { "epoch": 11.286296900489397, "grad_norm": 0.19731587171554565, "learning_rate": 0.0004750404959756271, "loss": 0.1697, "num_input_tokens_seen": 149230032, "step": 69185 }, { "epoch": 11.28711256117455, "grad_norm": 0.02867996133863926, "learning_rate": 0.0004749694051481911, "loss": 0.029, "num_input_tokens_seen": 149240720, "step": 69190 }, { "epoch": 11.287928221859707, "grad_norm": 0.017887111753225327, "learning_rate": 0.00047489831482803167, "loss": 0.0057, "num_input_tokens_seen": 149250416, "step": 69195 }, { "epoch": 11.28874388254486, "grad_norm": 0.7879258990287781, "learning_rate": 0.00047482722501658993, "loss": 0.0363, "num_input_tokens_seen": 149262224, "step": 69200 }, { "epoch": 11.289559543230016, "grad_norm": 0.021696623414754868, "learning_rate": 0.00047475613571530624, "loss": 0.0479, "num_input_tokens_seen": 149272208, "step": 69205 }, { "epoch": 11.290375203915172, "grad_norm": 0.00980361644178629, "learning_rate": 0.0004746850469256216, "loss": 0.0334, "num_input_tokens_seen": 149283952, "step": 69210 }, { "epoch": 11.291190864600326, "grad_norm": 0.4570578336715698, "learning_rate": 0.0004746139586489765, "loss": 0.0657, "num_input_tokens_seen": 149295312, "step": 69215 }, { "epoch": 11.292006525285482, "grad_norm": 0.4115006625652313, "learning_rate": 0.00047454287088681194, "loss": 0.0463, "num_input_tokens_seen": 149305936, "step": 69220 }, { "epoch": 11.292822185970635, "grad_norm": 0.0041886623948812485, "learning_rate": 0.0004744717836405681, "loss": 0.0919, "num_input_tokens_seen": 149317680, "step": 69225 }, { "epoch": 11.293637846655791, "grad_norm": 0.0013129523722454906, "learning_rate": 0.00047440069691168617, "loss": 0.0123, "num_input_tokens_seen": 149327088, "step": 69230 }, { "epoch": 11.294453507340947, "grad_norm": 0.11184611171483994, "learning_rate": 0.0004743296107016065, "loss": 0.0303, "num_input_tokens_seen": 149337584, "step": 69235 }, { "epoch": 11.2952691680261, "grad_norm": 0.10007749497890472, "learning_rate": 0.0004742585250117698, "loss": 0.0163, "num_input_tokens_seen": 149347984, "step": 69240 }, { "epoch": 11.296084828711257, "grad_norm": 0.004105337429791689, "learning_rate": 0.00047418743984361676, "loss": 0.0388, "num_input_tokens_seen": 149358512, "step": 69245 }, { "epoch": 11.29690048939641, "grad_norm": 0.8968774676322937, "learning_rate": 0.0004741163551985881, "loss": 0.1026, "num_input_tokens_seen": 149368624, "step": 69250 }, { "epoch": 11.297716150081566, "grad_norm": 0.059237342327833176, "learning_rate": 0.00047404527107812423, "loss": 0.0635, "num_input_tokens_seen": 149379728, "step": 69255 }, { "epoch": 11.298531810766722, "grad_norm": 0.05423299968242645, "learning_rate": 0.00047397418748366596, "loss": 0.0269, "num_input_tokens_seen": 149390288, "step": 69260 }, { "epoch": 11.299347471451876, "grad_norm": 0.0032311498653143644, "learning_rate": 0.0004739031044166536, "loss": 0.0112, "num_input_tokens_seen": 149400272, "step": 69265 }, { "epoch": 11.300163132137031, "grad_norm": 0.011994333006441593, "learning_rate": 0.0004738320218785281, "loss": 0.0184, "num_input_tokens_seen": 149411024, "step": 69270 }, { "epoch": 11.300978792822185, "grad_norm": 0.10612502694129944, "learning_rate": 0.00047376093987072985, "loss": 0.1293, "num_input_tokens_seen": 149422256, "step": 69275 }, { "epoch": 11.301794453507341, "grad_norm": 0.16527323424816132, "learning_rate": 0.00047368985839469946, "loss": 0.087, "num_input_tokens_seen": 149432432, "step": 69280 }, { "epoch": 11.302610114192497, "grad_norm": 0.1097843274474144, "learning_rate": 0.00047361877745187743, "loss": 0.0137, "num_input_tokens_seen": 149443824, "step": 69285 }, { "epoch": 11.30342577487765, "grad_norm": 0.012212603352963924, "learning_rate": 0.0004735476970437043, "loss": 0.0079, "num_input_tokens_seen": 149454640, "step": 69290 }, { "epoch": 11.304241435562806, "grad_norm": 0.5215902328491211, "learning_rate": 0.0004734766171716208, "loss": 0.1397, "num_input_tokens_seen": 149466096, "step": 69295 }, { "epoch": 11.30505709624796, "grad_norm": 0.0094797657802701, "learning_rate": 0.0004734055378370671, "loss": 0.029, "num_input_tokens_seen": 149476816, "step": 69300 }, { "epoch": 11.305872756933116, "grad_norm": 0.05265314504504204, "learning_rate": 0.00047333445904148414, "loss": 0.0578, "num_input_tokens_seen": 149489232, "step": 69305 }, { "epoch": 11.30668841761827, "grad_norm": 0.027298280969262123, "learning_rate": 0.0004732633807863119, "loss": 0.0287, "num_input_tokens_seen": 149500944, "step": 69310 }, { "epoch": 11.307504078303426, "grad_norm": 0.1432339996099472, "learning_rate": 0.0004731923030729915, "loss": 0.1217, "num_input_tokens_seen": 149512752, "step": 69315 }, { "epoch": 11.308319738988581, "grad_norm": 0.07767806202173233, "learning_rate": 0.0004731212259029628, "loss": 0.018, "num_input_tokens_seen": 149524208, "step": 69320 }, { "epoch": 11.309135399673735, "grad_norm": 0.07505153864622116, "learning_rate": 0.0004730501492776668, "loss": 0.2641, "num_input_tokens_seen": 149534128, "step": 69325 }, { "epoch": 11.309951060358891, "grad_norm": 0.006197615060955286, "learning_rate": 0.00047297907319854347, "loss": 0.1902, "num_input_tokens_seen": 149546096, "step": 69330 }, { "epoch": 11.310766721044045, "grad_norm": 0.006627257913351059, "learning_rate": 0.0004729079976670338, "loss": 0.0096, "num_input_tokens_seen": 149556080, "step": 69335 }, { "epoch": 11.3115823817292, "grad_norm": 0.15297362208366394, "learning_rate": 0.00047283692268457764, "loss": 0.023, "num_input_tokens_seen": 149565488, "step": 69340 }, { "epoch": 11.312398042414356, "grad_norm": 0.017872367054224014, "learning_rate": 0.0004727658482526159, "loss": 0.0216, "num_input_tokens_seen": 149576464, "step": 69345 }, { "epoch": 11.31321370309951, "grad_norm": 0.22258399426937103, "learning_rate": 0.00047269477437258863, "loss": 0.077, "num_input_tokens_seen": 149585744, "step": 69350 }, { "epoch": 11.314029363784666, "grad_norm": 0.17419388890266418, "learning_rate": 0.0004726237010459366, "loss": 0.0783, "num_input_tokens_seen": 149596528, "step": 69355 }, { "epoch": 11.31484502446982, "grad_norm": 0.07132498174905777, "learning_rate": 0.00047255262827409974, "loss": 0.0432, "num_input_tokens_seen": 149607632, "step": 69360 }, { "epoch": 11.315660685154976, "grad_norm": 0.4194459617137909, "learning_rate": 0.00047248155605851896, "loss": 0.0426, "num_input_tokens_seen": 149619024, "step": 69365 }, { "epoch": 11.31647634584013, "grad_norm": 1.0931060314178467, "learning_rate": 0.0004724104844006341, "loss": 0.1192, "num_input_tokens_seen": 149629104, "step": 69370 }, { "epoch": 11.317292006525285, "grad_norm": 0.041385333985090256, "learning_rate": 0.0004723394133018858, "loss": 0.013, "num_input_tokens_seen": 149640592, "step": 69375 }, { "epoch": 11.318107667210441, "grad_norm": 0.3124556243419647, "learning_rate": 0.00047226834276371457, "loss": 0.0515, "num_input_tokens_seen": 149651408, "step": 69380 }, { "epoch": 11.318923327895595, "grad_norm": 0.009077414870262146, "learning_rate": 0.00047219727278756033, "loss": 0.0141, "num_input_tokens_seen": 149662192, "step": 69385 }, { "epoch": 11.31973898858075, "grad_norm": 0.9440565705299377, "learning_rate": 0.0004721262033748639, "loss": 0.0341, "num_input_tokens_seen": 149673200, "step": 69390 }, { "epoch": 11.320554649265905, "grad_norm": 0.17995211482048035, "learning_rate": 0.00047205513452706503, "loss": 0.0189, "num_input_tokens_seen": 149684208, "step": 69395 }, { "epoch": 11.32137030995106, "grad_norm": 0.6484889984130859, "learning_rate": 0.0004719840662456046, "loss": 0.248, "num_input_tokens_seen": 149695280, "step": 69400 }, { "epoch": 11.322185970636216, "grad_norm": 0.06213457137346268, "learning_rate": 0.0004719129985319223, "loss": 0.0156, "num_input_tokens_seen": 149706448, "step": 69405 }, { "epoch": 11.32300163132137, "grad_norm": 0.010141161270439625, "learning_rate": 0.0004718419313874589, "loss": 0.0141, "num_input_tokens_seen": 149717232, "step": 69410 }, { "epoch": 11.323817292006526, "grad_norm": 0.0047691017389297485, "learning_rate": 0.00047177086481365444, "loss": 0.0266, "num_input_tokens_seen": 149729232, "step": 69415 }, { "epoch": 11.32463295269168, "grad_norm": 0.48585906624794006, "learning_rate": 0.00047169979881194927, "loss": 0.1908, "num_input_tokens_seen": 149740176, "step": 69420 }, { "epoch": 11.325448613376835, "grad_norm": 0.1370762139558792, "learning_rate": 0.00047162873338378353, "loss": 0.009, "num_input_tokens_seen": 149752016, "step": 69425 }, { "epoch": 11.326264274061991, "grad_norm": 0.054075680673122406, "learning_rate": 0.0004715576685305975, "loss": 0.0086, "num_input_tokens_seen": 149761968, "step": 69430 }, { "epoch": 11.327079934747145, "grad_norm": 0.026773322373628616, "learning_rate": 0.0004714866042538313, "loss": 0.005, "num_input_tokens_seen": 149772304, "step": 69435 }, { "epoch": 11.3278955954323, "grad_norm": 0.005305178463459015, "learning_rate": 0.00047141554055492546, "loss": 0.01, "num_input_tokens_seen": 149781648, "step": 69440 }, { "epoch": 11.328711256117455, "grad_norm": 0.05895174294710159, "learning_rate": 0.0004713444774353197, "loss": 0.0163, "num_input_tokens_seen": 149791888, "step": 69445 }, { "epoch": 11.32952691680261, "grad_norm": 0.011736053042113781, "learning_rate": 0.0004712734148964547, "loss": 0.0264, "num_input_tokens_seen": 149803408, "step": 69450 }, { "epoch": 11.330342577487766, "grad_norm": 0.03290429338812828, "learning_rate": 0.00047120235293977023, "loss": 0.0618, "num_input_tokens_seen": 149813680, "step": 69455 }, { "epoch": 11.33115823817292, "grad_norm": 0.09577078372240067, "learning_rate": 0.00047113129156670677, "loss": 0.0101, "num_input_tokens_seen": 149825520, "step": 69460 }, { "epoch": 11.331973898858076, "grad_norm": 0.057766661047935486, "learning_rate": 0.00047106023077870407, "loss": 0.0086, "num_input_tokens_seen": 149835440, "step": 69465 }, { "epoch": 11.33278955954323, "grad_norm": 0.03954591229557991, "learning_rate": 0.00047098917057720275, "loss": 0.007, "num_input_tokens_seen": 149846128, "step": 69470 }, { "epoch": 11.333605220228385, "grad_norm": 0.019217824563384056, "learning_rate": 0.00047091811096364243, "loss": 0.011, "num_input_tokens_seen": 149856720, "step": 69475 }, { "epoch": 11.33442088091354, "grad_norm": 0.0015795602230355144, "learning_rate": 0.00047084705193946357, "loss": 0.0143, "num_input_tokens_seen": 149868112, "step": 69480 }, { "epoch": 11.335236541598695, "grad_norm": 0.011428968980908394, "learning_rate": 0.0004707759935061063, "loss": 0.0073, "num_input_tokens_seen": 149878640, "step": 69485 }, { "epoch": 11.33605220228385, "grad_norm": 0.7442166209220886, "learning_rate": 0.0004707049356650105, "loss": 0.158, "num_input_tokens_seen": 149889392, "step": 69490 }, { "epoch": 11.336867862969005, "grad_norm": 0.22901560366153717, "learning_rate": 0.0004706338784176165, "loss": 0.0747, "num_input_tokens_seen": 149900048, "step": 69495 }, { "epoch": 11.33768352365416, "grad_norm": 0.04122432321310043, "learning_rate": 0.000470562821765364, "loss": 0.0081, "num_input_tokens_seen": 149911024, "step": 69500 }, { "epoch": 11.338499184339314, "grad_norm": 0.006258562672883272, "learning_rate": 0.0004704917657096934, "loss": 0.0322, "num_input_tokens_seen": 149922032, "step": 69505 }, { "epoch": 11.33931484502447, "grad_norm": 0.07109250873327255, "learning_rate": 0.00047042071025204445, "loss": 0.0201, "num_input_tokens_seen": 149933584, "step": 69510 }, { "epoch": 11.340130505709626, "grad_norm": 0.008464198559522629, "learning_rate": 0.0004703496553938576, "loss": 0.0059, "num_input_tokens_seen": 149943792, "step": 69515 }, { "epoch": 11.34094616639478, "grad_norm": 0.11066413670778275, "learning_rate": 0.00047027860113657235, "loss": 0.0357, "num_input_tokens_seen": 149954960, "step": 69520 }, { "epoch": 11.341761827079935, "grad_norm": 0.122870072722435, "learning_rate": 0.00047020754748162914, "loss": 0.0458, "num_input_tokens_seen": 149965936, "step": 69525 }, { "epoch": 11.34257748776509, "grad_norm": 0.0057017747312784195, "learning_rate": 0.0004701364944304675, "loss": 0.0647, "num_input_tokens_seen": 149976400, "step": 69530 }, { "epoch": 11.343393148450245, "grad_norm": 0.13748645782470703, "learning_rate": 0.000470065441984528, "loss": 0.026, "num_input_tokens_seen": 149986992, "step": 69535 }, { "epoch": 11.3442088091354, "grad_norm": 0.027020972222089767, "learning_rate": 0.00046999439014525004, "loss": 0.0525, "num_input_tokens_seen": 149996144, "step": 69540 }, { "epoch": 11.345024469820554, "grad_norm": 0.04546087235212326, "learning_rate": 0.00046992333891407396, "loss": 0.0214, "num_input_tokens_seen": 150006768, "step": 69545 }, { "epoch": 11.34584013050571, "grad_norm": 0.014700768515467644, "learning_rate": 0.00046985228829243955, "loss": 0.0266, "num_input_tokens_seen": 150018128, "step": 69550 }, { "epoch": 11.346655791190864, "grad_norm": 0.23733387887477875, "learning_rate": 0.0004697812382817868, "loss": 0.0545, "num_input_tokens_seen": 150028656, "step": 69555 }, { "epoch": 11.34747145187602, "grad_norm": 0.016356905922293663, "learning_rate": 0.0004697101888835555, "loss": 0.0133, "num_input_tokens_seen": 150040912, "step": 69560 }, { "epoch": 11.348287112561174, "grad_norm": 0.003106578253209591, "learning_rate": 0.0004696391400991857, "loss": 0.1845, "num_input_tokens_seen": 150051856, "step": 69565 }, { "epoch": 11.34910277324633, "grad_norm": 0.004037776496261358, "learning_rate": 0.0004695680919301173, "loss": 0.0987, "num_input_tokens_seen": 150062928, "step": 69570 }, { "epoch": 11.349918433931485, "grad_norm": 0.04516978561878204, "learning_rate": 0.00046949704437779005, "loss": 0.0099, "num_input_tokens_seen": 150073552, "step": 69575 }, { "epoch": 11.350734094616639, "grad_norm": 0.005960175767540932, "learning_rate": 0.0004694259974436438, "loss": 0.0465, "num_input_tokens_seen": 150083248, "step": 69580 }, { "epoch": 11.351549755301795, "grad_norm": 0.005637995433062315, "learning_rate": 0.00046935495112911856, "loss": 0.0054, "num_input_tokens_seen": 150094992, "step": 69585 }, { "epoch": 11.352365415986949, "grad_norm": 0.017193708568811417, "learning_rate": 0.0004692839054356542, "loss": 0.0206, "num_input_tokens_seen": 150105808, "step": 69590 }, { "epoch": 11.353181076672104, "grad_norm": 0.4765262007713318, "learning_rate": 0.0004692128603646904, "loss": 0.044, "num_input_tokens_seen": 150116816, "step": 69595 }, { "epoch": 11.35399673735726, "grad_norm": 1.1909513473510742, "learning_rate": 0.0004691418159176671, "loss": 0.0467, "num_input_tokens_seen": 150127888, "step": 69600 }, { "epoch": 11.354812398042414, "grad_norm": 0.031031114980578423, "learning_rate": 0.00046907077209602387, "loss": 0.0598, "num_input_tokens_seen": 150139728, "step": 69605 }, { "epoch": 11.35562805872757, "grad_norm": 0.45044320821762085, "learning_rate": 0.0004689997289012009, "loss": 0.0935, "num_input_tokens_seen": 150150864, "step": 69610 }, { "epoch": 11.356443719412724, "grad_norm": 0.4096696376800537, "learning_rate": 0.0004689286863346376, "loss": 0.064, "num_input_tokens_seen": 150160816, "step": 69615 }, { "epoch": 11.35725938009788, "grad_norm": 0.03241623938083649, "learning_rate": 0.00046885764439777406, "loss": 0.0093, "num_input_tokens_seen": 150172176, "step": 69620 }, { "epoch": 11.358075040783035, "grad_norm": 0.015470996499061584, "learning_rate": 0.0004687866030920496, "loss": 0.1093, "num_input_tokens_seen": 150182704, "step": 69625 }, { "epoch": 11.358890701468189, "grad_norm": 0.020214637741446495, "learning_rate": 0.00046871556241890455, "loss": 0.0108, "num_input_tokens_seen": 150191696, "step": 69630 }, { "epoch": 11.359706362153345, "grad_norm": 0.013911193236708641, "learning_rate": 0.000468644522379778, "loss": 0.078, "num_input_tokens_seen": 150202896, "step": 69635 }, { "epoch": 11.360522022838499, "grad_norm": 0.027526943013072014, "learning_rate": 0.00046857348297611024, "loss": 0.007, "num_input_tokens_seen": 150215376, "step": 69640 }, { "epoch": 11.361337683523654, "grad_norm": 0.6273996829986572, "learning_rate": 0.0004685024442093405, "loss": 0.0299, "num_input_tokens_seen": 150226032, "step": 69645 }, { "epoch": 11.362153344208808, "grad_norm": 0.8465483784675598, "learning_rate": 0.00046843140608090897, "loss": 0.0336, "num_input_tokens_seen": 150237264, "step": 69650 }, { "epoch": 11.362969004893964, "grad_norm": 0.6568078398704529, "learning_rate": 0.0004683603685922547, "loss": 0.1932, "num_input_tokens_seen": 150249680, "step": 69655 }, { "epoch": 11.36378466557912, "grad_norm": 0.08765445649623871, "learning_rate": 0.00046828933174481797, "loss": 0.0213, "num_input_tokens_seen": 150260784, "step": 69660 }, { "epoch": 11.364600326264274, "grad_norm": 0.08723331242799759, "learning_rate": 0.000468218295540038, "loss": 0.019, "num_input_tokens_seen": 150271248, "step": 69665 }, { "epoch": 11.36541598694943, "grad_norm": 0.013393505476415157, "learning_rate": 0.0004681472599793547, "loss": 0.0825, "num_input_tokens_seen": 150282384, "step": 69670 }, { "epoch": 11.366231647634583, "grad_norm": 0.25501468777656555, "learning_rate": 0.00046807622506420745, "loss": 0.0282, "num_input_tokens_seen": 150292016, "step": 69675 }, { "epoch": 11.367047308319739, "grad_norm": 0.046749066561460495, "learning_rate": 0.00046800519079603616, "loss": 0.031, "num_input_tokens_seen": 150302960, "step": 69680 }, { "epoch": 11.367862969004895, "grad_norm": 0.006688144989311695, "learning_rate": 0.00046793415717628006, "loss": 0.007, "num_input_tokens_seen": 150312880, "step": 69685 }, { "epoch": 11.368678629690049, "grad_norm": 0.22902892529964447, "learning_rate": 0.000467863124206379, "loss": 0.0302, "num_input_tokens_seen": 150324592, "step": 69690 }, { "epoch": 11.369494290375204, "grad_norm": 0.2499184012413025, "learning_rate": 0.0004677920918877726, "loss": 0.0098, "num_input_tokens_seen": 150336464, "step": 69695 }, { "epoch": 11.370309951060358, "grad_norm": 0.5610254406929016, "learning_rate": 0.0004677210602219002, "loss": 0.0393, "num_input_tokens_seen": 150347536, "step": 69700 }, { "epoch": 11.371125611745514, "grad_norm": 0.003160185879096389, "learning_rate": 0.00046765002921020165, "loss": 0.0362, "num_input_tokens_seen": 150358864, "step": 69705 }, { "epoch": 11.37194127243067, "grad_norm": 0.006164680700749159, "learning_rate": 0.0004675789988541161, "loss": 0.1185, "num_input_tokens_seen": 150369776, "step": 69710 }, { "epoch": 11.372756933115824, "grad_norm": 0.01655641384422779, "learning_rate": 0.0004675079691550833, "loss": 0.041, "num_input_tokens_seen": 150380720, "step": 69715 }, { "epoch": 11.37357259380098, "grad_norm": 0.014474146999418736, "learning_rate": 0.0004674369401145428, "loss": 0.0025, "num_input_tokens_seen": 150391440, "step": 69720 }, { "epoch": 11.374388254486133, "grad_norm": 0.0037023338954895735, "learning_rate": 0.000467365911733934, "loss": 0.0074, "num_input_tokens_seen": 150402224, "step": 69725 }, { "epoch": 11.375203915171289, "grad_norm": 0.012846031226217747, "learning_rate": 0.0004672948840146964, "loss": 0.1276, "num_input_tokens_seen": 150411920, "step": 69730 }, { "epoch": 11.376019575856443, "grad_norm": 0.6107155680656433, "learning_rate": 0.0004672238569582695, "loss": 0.1042, "num_input_tokens_seen": 150422416, "step": 69735 }, { "epoch": 11.376835236541599, "grad_norm": 0.006394654046744108, "learning_rate": 0.00046715283056609255, "loss": 0.0193, "num_input_tokens_seen": 150434448, "step": 69740 }, { "epoch": 11.377650897226754, "grad_norm": 0.07789919525384903, "learning_rate": 0.0004670818048396054, "loss": 0.0088, "num_input_tokens_seen": 150444720, "step": 69745 }, { "epoch": 11.378466557911908, "grad_norm": 0.008656224235892296, "learning_rate": 0.00046701077978024695, "loss": 0.0024, "num_input_tokens_seen": 150456240, "step": 69750 }, { "epoch": 11.379282218597064, "grad_norm": 0.028518201783299446, "learning_rate": 0.0004669397553894572, "loss": 0.0951, "num_input_tokens_seen": 150466672, "step": 69755 }, { "epoch": 11.380097879282218, "grad_norm": 0.13861902058124542, "learning_rate": 0.00046686873166867503, "loss": 0.0108, "num_input_tokens_seen": 150476400, "step": 69760 }, { "epoch": 11.380913539967374, "grad_norm": 0.008120225742459297, "learning_rate": 0.00046679770861934026, "loss": 0.0033, "num_input_tokens_seen": 150486928, "step": 69765 }, { "epoch": 11.38172920065253, "grad_norm": 0.42739951610565186, "learning_rate": 0.00046672668624289177, "loss": 0.0448, "num_input_tokens_seen": 150497456, "step": 69770 }, { "epoch": 11.382544861337683, "grad_norm": 0.003582507139071822, "learning_rate": 0.0004666556645407695, "loss": 0.0253, "num_input_tokens_seen": 150508208, "step": 69775 }, { "epoch": 11.383360522022839, "grad_norm": 0.0022557235788553953, "learning_rate": 0.00046658464351441214, "loss": 0.0344, "num_input_tokens_seen": 150518544, "step": 69780 }, { "epoch": 11.384176182707993, "grad_norm": 0.17820048332214355, "learning_rate": 0.0004665136231652597, "loss": 0.0147, "num_input_tokens_seen": 150530352, "step": 69785 }, { "epoch": 11.384991843393149, "grad_norm": 0.3129952847957611, "learning_rate": 0.0004664426034947509, "loss": 0.1838, "num_input_tokens_seen": 150541552, "step": 69790 }, { "epoch": 11.385807504078304, "grad_norm": 0.15456588566303253, "learning_rate": 0.00046637158450432557, "loss": 0.0253, "num_input_tokens_seen": 150552240, "step": 69795 }, { "epoch": 11.386623164763458, "grad_norm": 0.1288246214389801, "learning_rate": 0.0004663005661954225, "loss": 0.0584, "num_input_tokens_seen": 150563632, "step": 69800 }, { "epoch": 11.387438825448614, "grad_norm": 0.022261830046772957, "learning_rate": 0.0004662295485694812, "loss": 0.0086, "num_input_tokens_seen": 150573840, "step": 69805 }, { "epoch": 11.388254486133768, "grad_norm": 0.007492858916521072, "learning_rate": 0.00046615853162794115, "loss": 0.1439, "num_input_tokens_seen": 150584528, "step": 69810 }, { "epoch": 11.389070146818923, "grad_norm": 0.04444269835948944, "learning_rate": 0.00046608751537224115, "loss": 0.0236, "num_input_tokens_seen": 150595088, "step": 69815 }, { "epoch": 11.38988580750408, "grad_norm": 0.733195960521698, "learning_rate": 0.0004660164998038209, "loss": 0.1365, "num_input_tokens_seen": 150604944, "step": 69820 }, { "epoch": 11.390701468189233, "grad_norm": 0.014957551844418049, "learning_rate": 0.0004659454849241192, "loss": 0.1484, "num_input_tokens_seen": 150615984, "step": 69825 }, { "epoch": 11.391517128874389, "grad_norm": 0.14314493536949158, "learning_rate": 0.0004658744707345757, "loss": 0.0175, "num_input_tokens_seen": 150626544, "step": 69830 }, { "epoch": 11.392332789559543, "grad_norm": 0.11421629786491394, "learning_rate": 0.000465803457236629, "loss": 0.0642, "num_input_tokens_seen": 150638160, "step": 69835 }, { "epoch": 11.393148450244698, "grad_norm": 0.011532318778336048, "learning_rate": 0.00046573244443171897, "loss": 0.0598, "num_input_tokens_seen": 150649520, "step": 69840 }, { "epoch": 11.393964110929852, "grad_norm": 0.024114692583680153, "learning_rate": 0.00046566143232128416, "loss": 0.0502, "num_input_tokens_seen": 150659920, "step": 69845 }, { "epoch": 11.394779771615008, "grad_norm": 0.040003348141908646, "learning_rate": 0.0004655904209067642, "loss": 0.0095, "num_input_tokens_seen": 150671600, "step": 69850 }, { "epoch": 11.395595432300164, "grad_norm": 0.07984590530395508, "learning_rate": 0.0004655194101895978, "loss": 0.0075, "num_input_tokens_seen": 150682608, "step": 69855 }, { "epoch": 11.396411092985318, "grad_norm": 0.010479428805410862, "learning_rate": 0.00046544840017122437, "loss": 0.0159, "num_input_tokens_seen": 150692560, "step": 69860 }, { "epoch": 11.397226753670473, "grad_norm": 1.0071278810501099, "learning_rate": 0.000465377390853083, "loss": 0.2155, "num_input_tokens_seen": 150703472, "step": 69865 }, { "epoch": 11.398042414355627, "grad_norm": 0.3687129616737366, "learning_rate": 0.0004653063822366127, "loss": 0.0278, "num_input_tokens_seen": 150713936, "step": 69870 }, { "epoch": 11.398858075040783, "grad_norm": 0.033327843993902206, "learning_rate": 0.00046523537432325256, "loss": 0.0046, "num_input_tokens_seen": 150726288, "step": 69875 }, { "epoch": 11.399673735725939, "grad_norm": 0.050787270069122314, "learning_rate": 0.00046516436711444166, "loss": 0.045, "num_input_tokens_seen": 150735184, "step": 69880 }, { "epoch": 11.400489396411093, "grad_norm": 0.03601576015353203, "learning_rate": 0.000465093360611619, "loss": 0.005, "num_input_tokens_seen": 150746352, "step": 69885 }, { "epoch": 11.401305057096248, "grad_norm": 0.24261367321014404, "learning_rate": 0.00046502235481622387, "loss": 0.0098, "num_input_tokens_seen": 150756848, "step": 69890 }, { "epoch": 11.402120717781402, "grad_norm": 0.4517940580844879, "learning_rate": 0.00046495134972969476, "loss": 0.0858, "num_input_tokens_seen": 150767472, "step": 69895 }, { "epoch": 11.402936378466558, "grad_norm": 0.012901443056762218, "learning_rate": 0.00046488034535347133, "loss": 0.0095, "num_input_tokens_seen": 150778416, "step": 69900 }, { "epoch": 11.403752039151712, "grad_norm": 0.07447502762079239, "learning_rate": 0.00046480934168899204, "loss": 0.0599, "num_input_tokens_seen": 150789520, "step": 69905 }, { "epoch": 11.404567699836868, "grad_norm": 0.3841421902179718, "learning_rate": 0.0004647383387376961, "loss": 0.0325, "num_input_tokens_seen": 150800816, "step": 69910 }, { "epoch": 11.405383360522023, "grad_norm": 0.5541864633560181, "learning_rate": 0.0004646673365010226, "loss": 0.0847, "num_input_tokens_seen": 150810928, "step": 69915 }, { "epoch": 11.406199021207177, "grad_norm": 0.002222574083134532, "learning_rate": 0.0004645963349804102, "loss": 0.0589, "num_input_tokens_seen": 150822224, "step": 69920 }, { "epoch": 11.407014681892333, "grad_norm": 0.4165201485157013, "learning_rate": 0.0004645253341772982, "loss": 0.0814, "num_input_tokens_seen": 150833808, "step": 69925 }, { "epoch": 11.407830342577487, "grad_norm": 0.006566571071743965, "learning_rate": 0.00046445433409312507, "loss": 0.0073, "num_input_tokens_seen": 150844656, "step": 69930 }, { "epoch": 11.408646003262643, "grad_norm": 0.007964256219565868, "learning_rate": 0.00046438333472933015, "loss": 0.0041, "num_input_tokens_seen": 150855568, "step": 69935 }, { "epoch": 11.409461663947798, "grad_norm": 0.08863414078950882, "learning_rate": 0.0004643123360873519, "loss": 0.0136, "num_input_tokens_seen": 150866416, "step": 69940 }, { "epoch": 11.410277324632952, "grad_norm": 0.004235348664224148, "learning_rate": 0.00046424133816862966, "loss": 0.004, "num_input_tokens_seen": 150877264, "step": 69945 }, { "epoch": 11.411092985318108, "grad_norm": 0.007877707481384277, "learning_rate": 0.00046417034097460193, "loss": 0.0108, "num_input_tokens_seen": 150886992, "step": 69950 }, { "epoch": 11.411908646003262, "grad_norm": 0.06088092923164368, "learning_rate": 0.0004640993445067078, "loss": 0.024, "num_input_tokens_seen": 150898352, "step": 69955 }, { "epoch": 11.412724306688418, "grad_norm": 0.014011125080287457, "learning_rate": 0.00046402834876638584, "loss": 0.0126, "num_input_tokens_seen": 150908592, "step": 69960 }, { "epoch": 11.413539967373573, "grad_norm": 0.011040844954550266, "learning_rate": 0.00046395735375507523, "loss": 0.0144, "num_input_tokens_seen": 150920080, "step": 69965 }, { "epoch": 11.414355628058727, "grad_norm": 0.006899704225361347, "learning_rate": 0.0004638863594742144, "loss": 0.1048, "num_input_tokens_seen": 150931216, "step": 69970 }, { "epoch": 11.415171288743883, "grad_norm": 0.010883388109505177, "learning_rate": 0.00046381536592524244, "loss": 0.0087, "num_input_tokens_seen": 150940624, "step": 69975 }, { "epoch": 11.415986949429037, "grad_norm": 0.054918061941862106, "learning_rate": 0.00046374437310959783, "loss": 0.027, "num_input_tokens_seen": 150951856, "step": 69980 }, { "epoch": 11.416802610114193, "grad_norm": 0.022111941128969193, "learning_rate": 0.0004636733810287197, "loss": 0.0506, "num_input_tokens_seen": 150962160, "step": 69985 }, { "epoch": 11.417618270799348, "grad_norm": 0.05058705434203148, "learning_rate": 0.00046360238968404634, "loss": 0.06, "num_input_tokens_seen": 150973520, "step": 69990 }, { "epoch": 11.418433931484502, "grad_norm": 0.21099309623241425, "learning_rate": 0.000463531399077017, "loss": 0.0336, "num_input_tokens_seen": 150982512, "step": 69995 }, { "epoch": 11.419249592169658, "grad_norm": 0.01692812889814377, "learning_rate": 0.00046346040920906985, "loss": 0.0519, "num_input_tokens_seen": 150993456, "step": 70000 }, { "epoch": 11.420065252854812, "grad_norm": 0.3583369851112366, "learning_rate": 0.000463389420081644, "loss": 0.1609, "num_input_tokens_seen": 151002800, "step": 70005 }, { "epoch": 11.420880913539968, "grad_norm": 0.004464812111109495, "learning_rate": 0.000463318431696178, "loss": 0.0056, "num_input_tokens_seen": 151012336, "step": 70010 }, { "epoch": 11.421696574225122, "grad_norm": 0.013724252581596375, "learning_rate": 0.00046324744405411034, "loss": 0.0078, "num_input_tokens_seen": 151024112, "step": 70015 }, { "epoch": 11.422512234910277, "grad_norm": 0.7499552369117737, "learning_rate": 0.00046317645715688015, "loss": 0.1438, "num_input_tokens_seen": 151034160, "step": 70020 }, { "epoch": 11.423327895595433, "grad_norm": 0.00220853416249156, "learning_rate": 0.00046310547100592557, "loss": 0.0141, "num_input_tokens_seen": 151044848, "step": 70025 }, { "epoch": 11.424143556280587, "grad_norm": 0.012397054582834244, "learning_rate": 0.0004630344856026855, "loss": 0.004, "num_input_tokens_seen": 151055952, "step": 70030 }, { "epoch": 11.424959216965743, "grad_norm": 0.4514055550098419, "learning_rate": 0.0004629635009485984, "loss": 0.1302, "num_input_tokens_seen": 151066320, "step": 70035 }, { "epoch": 11.425774877650896, "grad_norm": 0.0046886843629181385, "learning_rate": 0.000462892517045103, "loss": 0.0261, "num_input_tokens_seen": 151077232, "step": 70040 }, { "epoch": 11.426590538336052, "grad_norm": 0.027565719559788704, "learning_rate": 0.0004628215338936378, "loss": 0.0063, "num_input_tokens_seen": 151088592, "step": 70045 }, { "epoch": 11.427406199021208, "grad_norm": 0.08853894472122192, "learning_rate": 0.0004627505514956414, "loss": 0.0079, "num_input_tokens_seen": 151099408, "step": 70050 }, { "epoch": 11.428221859706362, "grad_norm": 0.012464499101042747, "learning_rate": 0.0004626795698525522, "loss": 0.0675, "num_input_tokens_seen": 151110352, "step": 70055 }, { "epoch": 11.429037520391518, "grad_norm": 0.706075131893158, "learning_rate": 0.00046260858896580916, "loss": 0.1218, "num_input_tokens_seen": 151120752, "step": 70060 }, { "epoch": 11.429853181076671, "grad_norm": 0.13364113867282867, "learning_rate": 0.0004625376088368502, "loss": 0.0715, "num_input_tokens_seen": 151131728, "step": 70065 }, { "epoch": 11.430668841761827, "grad_norm": 0.019462527707219124, "learning_rate": 0.0004624666294671143, "loss": 0.0047, "num_input_tokens_seen": 151142736, "step": 70070 }, { "epoch": 11.431484502446983, "grad_norm": 0.005331916734576225, "learning_rate": 0.00046239565085803966, "loss": 0.1237, "num_input_tokens_seen": 151154512, "step": 70075 }, { "epoch": 11.432300163132137, "grad_norm": 0.10158982127904892, "learning_rate": 0.000462324673011065, "loss": 0.014, "num_input_tokens_seen": 151164592, "step": 70080 }, { "epoch": 11.433115823817293, "grad_norm": 0.023551708087325096, "learning_rate": 0.00046225369592762844, "loss": 0.0095, "num_input_tokens_seen": 151175600, "step": 70085 }, { "epoch": 11.433931484502446, "grad_norm": 0.07136331498622894, "learning_rate": 0.00046218271960916886, "loss": 0.1341, "num_input_tokens_seen": 151185776, "step": 70090 }, { "epoch": 11.434747145187602, "grad_norm": 0.012324361130595207, "learning_rate": 0.0004621117440571242, "loss": 0.1015, "num_input_tokens_seen": 151196112, "step": 70095 }, { "epoch": 11.435562805872756, "grad_norm": 0.08165058493614197, "learning_rate": 0.0004620407692729333, "loss": 0.0677, "num_input_tokens_seen": 151205296, "step": 70100 }, { "epoch": 11.436378466557912, "grad_norm": 0.5726682543754578, "learning_rate": 0.0004619697952580342, "loss": 0.1298, "num_input_tokens_seen": 151215120, "step": 70105 }, { "epoch": 11.437194127243067, "grad_norm": 0.011453239247202873, "learning_rate": 0.00046189882201386564, "loss": 0.0259, "num_input_tokens_seen": 151224912, "step": 70110 }, { "epoch": 11.438009787928221, "grad_norm": 0.19847503304481506, "learning_rate": 0.0004618278495418655, "loss": 0.0802, "num_input_tokens_seen": 151235664, "step": 70115 }, { "epoch": 11.438825448613377, "grad_norm": 0.021076874807476997, "learning_rate": 0.0004617568778434725, "loss": 0.0086, "num_input_tokens_seen": 151246320, "step": 70120 }, { "epoch": 11.439641109298531, "grad_norm": 0.010452844202518463, "learning_rate": 0.0004616859069201251, "loss": 0.1402, "num_input_tokens_seen": 151256624, "step": 70125 }, { "epoch": 11.440456769983687, "grad_norm": 0.05219295620918274, "learning_rate": 0.0004616149367732612, "loss": 0.0389, "num_input_tokens_seen": 151267152, "step": 70130 }, { "epoch": 11.441272430668842, "grad_norm": 0.07638268172740936, "learning_rate": 0.0004615439674043195, "loss": 0.0231, "num_input_tokens_seen": 151276208, "step": 70135 }, { "epoch": 11.442088091353996, "grad_norm": 0.008495875634253025, "learning_rate": 0.00046147299881473783, "loss": 0.107, "num_input_tokens_seen": 151287824, "step": 70140 }, { "epoch": 11.442903752039152, "grad_norm": 0.1257314383983612, "learning_rate": 0.0004614020310059549, "loss": 0.039, "num_input_tokens_seen": 151297744, "step": 70145 }, { "epoch": 11.443719412724306, "grad_norm": 0.08938546478748322, "learning_rate": 0.0004613310639794086, "loss": 0.0404, "num_input_tokens_seen": 151308848, "step": 70150 }, { "epoch": 11.444535073409462, "grad_norm": 0.002766774967312813, "learning_rate": 0.0004612600977365376, "loss": 0.0337, "num_input_tokens_seen": 151319632, "step": 70155 }, { "epoch": 11.445350734094617, "grad_norm": 0.5758596062660217, "learning_rate": 0.0004611891322787796, "loss": 0.1288, "num_input_tokens_seen": 151330320, "step": 70160 }, { "epoch": 11.446166394779771, "grad_norm": 0.10260254144668579, "learning_rate": 0.0004611181676075734, "loss": 0.0067, "num_input_tokens_seen": 151339088, "step": 70165 }, { "epoch": 11.446982055464927, "grad_norm": 0.10541148483753204, "learning_rate": 0.00046104720372435647, "loss": 0.0139, "num_input_tokens_seen": 151349136, "step": 70170 }, { "epoch": 11.447797716150081, "grad_norm": 0.8663244843482971, "learning_rate": 0.0004609762406305676, "loss": 0.0309, "num_input_tokens_seen": 151360144, "step": 70175 }, { "epoch": 11.448613376835237, "grad_norm": 0.5425220131874084, "learning_rate": 0.0004609052783276447, "loss": 0.1497, "num_input_tokens_seen": 151371344, "step": 70180 }, { "epoch": 11.449429037520392, "grad_norm": 0.2737491726875305, "learning_rate": 0.0004608343168170259, "loss": 0.0379, "num_input_tokens_seen": 151383440, "step": 70185 }, { "epoch": 11.450244698205546, "grad_norm": 0.040290068835020065, "learning_rate": 0.0004607633561001493, "loss": 0.0215, "num_input_tokens_seen": 151393264, "step": 70190 }, { "epoch": 11.451060358890702, "grad_norm": 0.3368205428123474, "learning_rate": 0.0004606923961784532, "loss": 0.022, "num_input_tokens_seen": 151405584, "step": 70195 }, { "epoch": 11.451876019575856, "grad_norm": 0.48941439390182495, "learning_rate": 0.00046062143705337535, "loss": 0.1161, "num_input_tokens_seen": 151417840, "step": 70200 }, { "epoch": 11.452691680261012, "grad_norm": 0.011546339839696884, "learning_rate": 0.00046055047872635424, "loss": 0.0194, "num_input_tokens_seen": 151429584, "step": 70205 }, { "epoch": 11.453507340946166, "grad_norm": 0.14736422896385193, "learning_rate": 0.0004604795211988275, "loss": 0.0091, "num_input_tokens_seen": 151440240, "step": 70210 }, { "epoch": 11.454323001631321, "grad_norm": 0.022197742015123367, "learning_rate": 0.00046040856447223375, "loss": 0.0688, "num_input_tokens_seen": 151450864, "step": 70215 }, { "epoch": 11.455138662316477, "grad_norm": 0.4281180799007416, "learning_rate": 0.00046033760854801033, "loss": 0.1357, "num_input_tokens_seen": 151462384, "step": 70220 }, { "epoch": 11.455954323001631, "grad_norm": 0.012683551758527756, "learning_rate": 0.0004602666534275956, "loss": 0.0466, "num_input_tokens_seen": 151474160, "step": 70225 }, { "epoch": 11.456769983686787, "grad_norm": 0.029781511053442955, "learning_rate": 0.0004601956991124278, "loss": 0.0284, "num_input_tokens_seen": 151485680, "step": 70230 }, { "epoch": 11.45758564437194, "grad_norm": 0.565728485584259, "learning_rate": 0.00046012474560394443, "loss": 0.112, "num_input_tokens_seen": 151495824, "step": 70235 }, { "epoch": 11.458401305057096, "grad_norm": 0.009925301186740398, "learning_rate": 0.00046005379290358386, "loss": 0.0852, "num_input_tokens_seen": 151507344, "step": 70240 }, { "epoch": 11.459216965742252, "grad_norm": 0.08156031370162964, "learning_rate": 0.00045998284101278367, "loss": 0.0506, "num_input_tokens_seen": 151518736, "step": 70245 }, { "epoch": 11.460032626427406, "grad_norm": 0.023512817919254303, "learning_rate": 0.0004599118899329821, "loss": 0.0181, "num_input_tokens_seen": 151529456, "step": 70250 }, { "epoch": 11.460848287112562, "grad_norm": 0.060143209993839264, "learning_rate": 0.0004598409396656168, "loss": 0.0115, "num_input_tokens_seen": 151539984, "step": 70255 }, { "epoch": 11.461663947797716, "grad_norm": 0.13791543245315552, "learning_rate": 0.000459769990212126, "loss": 0.036, "num_input_tokens_seen": 151552112, "step": 70260 }, { "epoch": 11.462479608482871, "grad_norm": 0.07419054955244064, "learning_rate": 0.0004596990415739472, "loss": 0.0485, "num_input_tokens_seen": 151562352, "step": 70265 }, { "epoch": 11.463295269168025, "grad_norm": 0.20737969875335693, "learning_rate": 0.0004596280937525186, "loss": 0.2279, "num_input_tokens_seen": 151573552, "step": 70270 }, { "epoch": 11.464110929853181, "grad_norm": 0.25620317459106445, "learning_rate": 0.00045955714674927775, "loss": 0.0233, "num_input_tokens_seen": 151584656, "step": 70275 }, { "epoch": 11.464926590538337, "grad_norm": 0.004371915012598038, "learning_rate": 0.0004594862005656628, "loss": 0.0091, "num_input_tokens_seen": 151594960, "step": 70280 }, { "epoch": 11.46574225122349, "grad_norm": 0.0041153887286782265, "learning_rate": 0.00045941525520311116, "loss": 0.0177, "num_input_tokens_seen": 151606096, "step": 70285 }, { "epoch": 11.466557911908646, "grad_norm": 0.31976667046546936, "learning_rate": 0.0004593443106630611, "loss": 0.0278, "num_input_tokens_seen": 151617200, "step": 70290 }, { "epoch": 11.4673735725938, "grad_norm": 0.0033898057881742716, "learning_rate": 0.00045927336694695, "loss": 0.022, "num_input_tokens_seen": 151628688, "step": 70295 }, { "epoch": 11.468189233278956, "grad_norm": 0.11079110205173492, "learning_rate": 0.00045920242405621595, "loss": 0.021, "num_input_tokens_seen": 151640336, "step": 70300 }, { "epoch": 11.469004893964112, "grad_norm": 0.16678133606910706, "learning_rate": 0.0004591314819922963, "loss": 0.0246, "num_input_tokens_seen": 151651088, "step": 70305 }, { "epoch": 11.469820554649266, "grad_norm": 0.012565212324261665, "learning_rate": 0.0004590605407566292, "loss": 0.0359, "num_input_tokens_seen": 151660912, "step": 70310 }, { "epoch": 11.470636215334421, "grad_norm": 0.03168738633394241, "learning_rate": 0.00045898960035065204, "loss": 0.0096, "num_input_tokens_seen": 151671600, "step": 70315 }, { "epoch": 11.471451876019575, "grad_norm": 0.006412851624190807, "learning_rate": 0.00045891866077580267, "loss": 0.0292, "num_input_tokens_seen": 151682448, "step": 70320 }, { "epoch": 11.47226753670473, "grad_norm": 0.024964723736047745, "learning_rate": 0.0004588477220335188, "loss": 0.0079, "num_input_tokens_seen": 151693136, "step": 70325 }, { "epoch": 11.473083197389887, "grad_norm": 0.007657726760953665, "learning_rate": 0.000458776784125238, "loss": 0.0373, "num_input_tokens_seen": 151703728, "step": 70330 }, { "epoch": 11.47389885807504, "grad_norm": 0.07187838852405548, "learning_rate": 0.0004587058470523981, "loss": 0.0093, "num_input_tokens_seen": 151715248, "step": 70335 }, { "epoch": 11.474714518760196, "grad_norm": 0.011092299595475197, "learning_rate": 0.00045863491081643646, "loss": 0.0099, "num_input_tokens_seen": 151726000, "step": 70340 }, { "epoch": 11.47553017944535, "grad_norm": 0.008813002146780491, "learning_rate": 0.00045856397541879087, "loss": 0.0313, "num_input_tokens_seen": 151736368, "step": 70345 }, { "epoch": 11.476345840130506, "grad_norm": 0.04107338935136795, "learning_rate": 0.0004584930408608989, "loss": 0.0274, "num_input_tokens_seen": 151748816, "step": 70350 }, { "epoch": 11.477161500815662, "grad_norm": 0.004400517325848341, "learning_rate": 0.0004584221071441981, "loss": 0.0204, "num_input_tokens_seen": 151760240, "step": 70355 }, { "epoch": 11.477977161500815, "grad_norm": 0.006176350172609091, "learning_rate": 0.000458351174270126, "loss": 0.0071, "num_input_tokens_seen": 151770096, "step": 70360 }, { "epoch": 11.478792822185971, "grad_norm": 0.057530228048563004, "learning_rate": 0.00045828024224012025, "loss": 0.01, "num_input_tokens_seen": 151778704, "step": 70365 }, { "epoch": 11.479608482871125, "grad_norm": 0.08476022630929947, "learning_rate": 0.00045820931105561817, "loss": 0.0316, "num_input_tokens_seen": 151788624, "step": 70370 }, { "epoch": 11.48042414355628, "grad_norm": 0.05612792447209358, "learning_rate": 0.0004581383807180577, "loss": 0.0104, "num_input_tokens_seen": 151798736, "step": 70375 }, { "epoch": 11.481239804241435, "grad_norm": 0.03458394482731819, "learning_rate": 0.0004580674512288758, "loss": 0.1843, "num_input_tokens_seen": 151809840, "step": 70380 }, { "epoch": 11.48205546492659, "grad_norm": 0.004499739035964012, "learning_rate": 0.0004579965225895104, "loss": 0.007, "num_input_tokens_seen": 151821104, "step": 70385 }, { "epoch": 11.482871125611746, "grad_norm": 0.03095638006925583, "learning_rate": 0.00045792559480139854, "loss": 0.0035, "num_input_tokens_seen": 151832240, "step": 70390 }, { "epoch": 11.4836867862969, "grad_norm": 0.4860515594482422, "learning_rate": 0.0004578546678659781, "loss": 0.0831, "num_input_tokens_seen": 151842800, "step": 70395 }, { "epoch": 11.484502446982056, "grad_norm": 0.11692824214696884, "learning_rate": 0.00045778374178468605, "loss": 0.0549, "num_input_tokens_seen": 151853136, "step": 70400 }, { "epoch": 11.48531810766721, "grad_norm": 0.04792150482535362, "learning_rate": 0.0004577128165589603, "loss": 0.105, "num_input_tokens_seen": 151864240, "step": 70405 }, { "epoch": 11.486133768352365, "grad_norm": 0.010497079230844975, "learning_rate": 0.0004576418921902377, "loss": 0.0219, "num_input_tokens_seen": 151875152, "step": 70410 }, { "epoch": 11.486949429037521, "grad_norm": 0.18284708261489868, "learning_rate": 0.0004575709686799561, "loss": 0.017, "num_input_tokens_seen": 151886128, "step": 70415 }, { "epoch": 11.487765089722675, "grad_norm": 0.23983387649059296, "learning_rate": 0.00045750004602955246, "loss": 0.0093, "num_input_tokens_seen": 151897200, "step": 70420 }, { "epoch": 11.48858075040783, "grad_norm": 0.056810569018125534, "learning_rate": 0.0004574291242404645, "loss": 0.0183, "num_input_tokens_seen": 151907088, "step": 70425 }, { "epoch": 11.489396411092985, "grad_norm": 0.08919530361890793, "learning_rate": 0.00045735820331412914, "loss": 0.0117, "num_input_tokens_seen": 151917776, "step": 70430 }, { "epoch": 11.49021207177814, "grad_norm": 0.040679123252630234, "learning_rate": 0.0004572872832519839, "loss": 0.0278, "num_input_tokens_seen": 151928432, "step": 70435 }, { "epoch": 11.491027732463296, "grad_norm": 0.19265928864479065, "learning_rate": 0.0004572163640554662, "loss": 0.1516, "num_input_tokens_seen": 151940560, "step": 70440 }, { "epoch": 11.49184339314845, "grad_norm": 0.01292188186198473, "learning_rate": 0.00045714544572601296, "loss": 0.2039, "num_input_tokens_seen": 151950288, "step": 70445 }, { "epoch": 11.492659053833606, "grad_norm": 0.006297199986875057, "learning_rate": 0.0004570745282650619, "loss": 0.0027, "num_input_tokens_seen": 151959888, "step": 70450 }, { "epoch": 11.49347471451876, "grad_norm": 0.012761766090989113, "learning_rate": 0.00045700361167404967, "loss": 0.0072, "num_input_tokens_seen": 151972240, "step": 70455 }, { "epoch": 11.494290375203915, "grad_norm": 0.041415806859731674, "learning_rate": 0.0004569326959544141, "loss": 0.1221, "num_input_tokens_seen": 151982672, "step": 70460 }, { "epoch": 11.49510603588907, "grad_norm": 0.039911940693855286, "learning_rate": 0.00045686178110759183, "loss": 0.0051, "num_input_tokens_seen": 151993776, "step": 70465 }, { "epoch": 11.495921696574225, "grad_norm": 0.013242848217487335, "learning_rate": 0.0004567908671350206, "loss": 0.1071, "num_input_tokens_seen": 152003792, "step": 70470 }, { "epoch": 11.49673735725938, "grad_norm": 0.6725298166275024, "learning_rate": 0.00045671995403813686, "loss": 0.0532, "num_input_tokens_seen": 152013936, "step": 70475 }, { "epoch": 11.497553017944535, "grad_norm": 0.1235051304101944, "learning_rate": 0.0004566490418183785, "loss": 0.0938, "num_input_tokens_seen": 152024080, "step": 70480 }, { "epoch": 11.49836867862969, "grad_norm": 0.03612293303012848, "learning_rate": 0.00045657813047718203, "loss": 0.0205, "num_input_tokens_seen": 152034608, "step": 70485 }, { "epoch": 11.499184339314844, "grad_norm": 0.02391981892287731, "learning_rate": 0.000456507220015985, "loss": 0.0038, "num_input_tokens_seen": 152045840, "step": 70490 }, { "epoch": 11.5, "grad_norm": 0.01455886010080576, "learning_rate": 0.00045643631043622426, "loss": 0.0426, "num_input_tokens_seen": 152056208, "step": 70495 }, { "epoch": 11.500815660685156, "grad_norm": 0.01224453840404749, "learning_rate": 0.00045636540173933697, "loss": 0.0144, "num_input_tokens_seen": 152066416, "step": 70500 }, { "epoch": 11.50163132137031, "grad_norm": 0.2034900188446045, "learning_rate": 0.0004562944939267602, "loss": 0.1118, "num_input_tokens_seen": 152077968, "step": 70505 }, { "epoch": 11.502446982055465, "grad_norm": 0.00892978347837925, "learning_rate": 0.00045622358699993093, "loss": 0.12, "num_input_tokens_seen": 152089424, "step": 70510 }, { "epoch": 11.50326264274062, "grad_norm": 1.0323071479797363, "learning_rate": 0.00045615268096028613, "loss": 0.0809, "num_input_tokens_seen": 152099408, "step": 70515 }, { "epoch": 11.504078303425775, "grad_norm": 0.004334879107773304, "learning_rate": 0.0004560817758092631, "loss": 0.012, "num_input_tokens_seen": 152110032, "step": 70520 }, { "epoch": 11.50489396411093, "grad_norm": 0.05706854537129402, "learning_rate": 0.00045601087154829834, "loss": 0.0305, "num_input_tokens_seen": 152121392, "step": 70525 }, { "epoch": 11.505709624796085, "grad_norm": 0.04000268876552582, "learning_rate": 0.00045593996817882925, "loss": 0.027, "num_input_tokens_seen": 152131344, "step": 70530 }, { "epoch": 11.50652528548124, "grad_norm": 0.11605363339185715, "learning_rate": 0.0004558690657022925, "loss": 0.0083, "num_input_tokens_seen": 152143312, "step": 70535 }, { "epoch": 11.507340946166394, "grad_norm": 0.014390820637345314, "learning_rate": 0.0004557981641201252, "loss": 0.0153, "num_input_tokens_seen": 152153936, "step": 70540 }, { "epoch": 11.50815660685155, "grad_norm": 0.0077110701240599155, "learning_rate": 0.000455727263433764, "loss": 0.1211, "num_input_tokens_seen": 152165264, "step": 70545 }, { "epoch": 11.508972267536706, "grad_norm": 0.9520329236984253, "learning_rate": 0.000455656363644646, "loss": 0.0434, "num_input_tokens_seen": 152177520, "step": 70550 }, { "epoch": 11.50978792822186, "grad_norm": 0.2102394849061966, "learning_rate": 0.0004555854647542083, "loss": 0.0173, "num_input_tokens_seen": 152188816, "step": 70555 }, { "epoch": 11.510603588907015, "grad_norm": 0.039075057953596115, "learning_rate": 0.00045551456676388725, "loss": 0.0315, "num_input_tokens_seen": 152198896, "step": 70560 }, { "epoch": 11.51141924959217, "grad_norm": 0.27093350887298584, "learning_rate": 0.00045544366967512014, "loss": 0.1174, "num_input_tokens_seen": 152210704, "step": 70565 }, { "epoch": 11.512234910277325, "grad_norm": 0.006555273197591305, "learning_rate": 0.0004553727734893434, "loss": 0.0171, "num_input_tokens_seen": 152221904, "step": 70570 }, { "epoch": 11.513050570962479, "grad_norm": 0.5592023730278015, "learning_rate": 0.0004553018782079942, "loss": 0.0679, "num_input_tokens_seen": 152233360, "step": 70575 }, { "epoch": 11.513866231647635, "grad_norm": 0.033301740884780884, "learning_rate": 0.00045523098383250894, "loss": 0.0652, "num_input_tokens_seen": 152244496, "step": 70580 }, { "epoch": 11.51468189233279, "grad_norm": 0.00924456026405096, "learning_rate": 0.0004551600903643248, "loss": 0.0116, "num_input_tokens_seen": 152255984, "step": 70585 }, { "epoch": 11.515497553017944, "grad_norm": 0.02625368721783161, "learning_rate": 0.00045508919780487805, "loss": 0.0372, "num_input_tokens_seen": 152267504, "step": 70590 }, { "epoch": 11.5163132137031, "grad_norm": 0.007500887848436832, "learning_rate": 0.000455018306155606, "loss": 0.0431, "num_input_tokens_seen": 152278416, "step": 70595 }, { "epoch": 11.517128874388254, "grad_norm": 0.011894084513187408, "learning_rate": 0.0004549474154179447, "loss": 0.0771, "num_input_tokens_seen": 152288624, "step": 70600 }, { "epoch": 11.51794453507341, "grad_norm": 0.021677086129784584, "learning_rate": 0.0004548765255933315, "loss": 0.0191, "num_input_tokens_seen": 152300848, "step": 70605 }, { "epoch": 11.518760195758565, "grad_norm": 0.01699345000088215, "learning_rate": 0.00045480563668320244, "loss": 0.0383, "num_input_tokens_seen": 152310928, "step": 70610 }, { "epoch": 11.51957585644372, "grad_norm": 0.17724208533763885, "learning_rate": 0.0004547347486889948, "loss": 0.0863, "num_input_tokens_seen": 152323376, "step": 70615 }, { "epoch": 11.520391517128875, "grad_norm": 0.010162370279431343, "learning_rate": 0.00045466386161214465, "loss": 0.1014, "num_input_tokens_seen": 152336016, "step": 70620 }, { "epoch": 11.521207177814029, "grad_norm": 0.5940483808517456, "learning_rate": 0.00045459297545408906, "loss": 0.1022, "num_input_tokens_seen": 152346000, "step": 70625 }, { "epoch": 11.522022838499185, "grad_norm": 0.12429466098546982, "learning_rate": 0.0004545220902162642, "loss": 0.0245, "num_input_tokens_seen": 152356176, "step": 70630 }, { "epoch": 11.522838499184338, "grad_norm": 0.004564465023577213, "learning_rate": 0.000454451205900107, "loss": 0.0122, "num_input_tokens_seen": 152367312, "step": 70635 }, { "epoch": 11.523654159869494, "grad_norm": 0.006530492100864649, "learning_rate": 0.00045438032250705394, "loss": 0.0163, "num_input_tokens_seen": 152378288, "step": 70640 }, { "epoch": 11.52446982055465, "grad_norm": 0.007590336259454489, "learning_rate": 0.00045430944003854143, "loss": 0.0122, "num_input_tokens_seen": 152389520, "step": 70645 }, { "epoch": 11.525285481239804, "grad_norm": 0.012627356685698032, "learning_rate": 0.00045423855849600615, "loss": 0.0779, "num_input_tokens_seen": 152399312, "step": 70650 }, { "epoch": 11.52610114192496, "grad_norm": 0.0030343227554112673, "learning_rate": 0.00045416767788088435, "loss": 0.0211, "num_input_tokens_seen": 152411312, "step": 70655 }, { "epoch": 11.526916802610113, "grad_norm": 0.005728952120989561, "learning_rate": 0.00045409679819461286, "loss": 0.0338, "num_input_tokens_seen": 152422640, "step": 70660 }, { "epoch": 11.52773246329527, "grad_norm": 0.07106272131204605, "learning_rate": 0.000454025919438628, "loss": 0.0091, "num_input_tokens_seen": 152433456, "step": 70665 }, { "epoch": 11.528548123980425, "grad_norm": 0.4708207845687866, "learning_rate": 0.00045395504161436617, "loss": 0.0323, "num_input_tokens_seen": 152445232, "step": 70670 }, { "epoch": 11.529363784665579, "grad_norm": 0.6353825926780701, "learning_rate": 0.0004538841647232639, "loss": 0.0391, "num_input_tokens_seen": 152454896, "step": 70675 }, { "epoch": 11.530179445350734, "grad_norm": 0.3041880130767822, "learning_rate": 0.0004538132887667574, "loss": 0.0585, "num_input_tokens_seen": 152464912, "step": 70680 }, { "epoch": 11.530995106035888, "grad_norm": 0.0640154778957367, "learning_rate": 0.0004537424137462832, "loss": 0.0178, "num_input_tokens_seen": 152475856, "step": 70685 }, { "epoch": 11.531810766721044, "grad_norm": 0.4397546648979187, "learning_rate": 0.0004536715396632779, "loss": 0.0824, "num_input_tokens_seen": 152486960, "step": 70690 }, { "epoch": 11.5326264274062, "grad_norm": 0.09305725991725922, "learning_rate": 0.00045360066651917733, "loss": 0.1141, "num_input_tokens_seen": 152498192, "step": 70695 }, { "epoch": 11.533442088091354, "grad_norm": 0.0798191949725151, "learning_rate": 0.00045352979431541833, "loss": 0.0784, "num_input_tokens_seen": 152509328, "step": 70700 }, { "epoch": 11.53425774877651, "grad_norm": 0.09944009780883789, "learning_rate": 0.0004534589230534368, "loss": 0.0097, "num_input_tokens_seen": 152519408, "step": 70705 }, { "epoch": 11.535073409461663, "grad_norm": 0.599245011806488, "learning_rate": 0.00045338805273466954, "loss": 0.0317, "num_input_tokens_seen": 152529968, "step": 70710 }, { "epoch": 11.535889070146819, "grad_norm": 0.05829024314880371, "learning_rate": 0.00045331718336055223, "loss": 0.0424, "num_input_tokens_seen": 152540944, "step": 70715 }, { "epoch": 11.536704730831975, "grad_norm": 0.01722203940153122, "learning_rate": 0.0004532463149325216, "loss": 0.1414, "num_input_tokens_seen": 152552432, "step": 70720 }, { "epoch": 11.537520391517129, "grad_norm": 0.004326773341745138, "learning_rate": 0.00045317544745201354, "loss": 0.0199, "num_input_tokens_seen": 152563536, "step": 70725 }, { "epoch": 11.538336052202284, "grad_norm": 0.027077220380306244, "learning_rate": 0.00045310458092046464, "loss": 0.0116, "num_input_tokens_seen": 152572688, "step": 70730 }, { "epoch": 11.539151712887438, "grad_norm": 0.4233563542366028, "learning_rate": 0.0004530337153393107, "loss": 0.046, "num_input_tokens_seen": 152582928, "step": 70735 }, { "epoch": 11.539967373572594, "grad_norm": 0.6607957482337952, "learning_rate": 0.00045296285070998835, "loss": 0.116, "num_input_tokens_seen": 152592592, "step": 70740 }, { "epoch": 11.540783034257748, "grad_norm": 0.025212831795215607, "learning_rate": 0.0004528919870339332, "loss": 0.0187, "num_input_tokens_seen": 152602576, "step": 70745 }, { "epoch": 11.541598694942904, "grad_norm": 0.02281561866402626, "learning_rate": 0.00045282112431258194, "loss": 0.0057, "num_input_tokens_seen": 152614128, "step": 70750 }, { "epoch": 11.54241435562806, "grad_norm": 0.011622527614235878, "learning_rate": 0.00045275026254737027, "loss": 0.0101, "num_input_tokens_seen": 152625744, "step": 70755 }, { "epoch": 11.543230016313213, "grad_norm": 0.5141403675079346, "learning_rate": 0.0004526794017397344, "loss": 0.0413, "num_input_tokens_seen": 152636624, "step": 70760 }, { "epoch": 11.544045676998369, "grad_norm": 0.00718394061550498, "learning_rate": 0.0004526085418911108, "loss": 0.0118, "num_input_tokens_seen": 152648752, "step": 70765 }, { "epoch": 11.544861337683523, "grad_norm": 0.030112959444522858, "learning_rate": 0.0004525376830029349, "loss": 0.0072, "num_input_tokens_seen": 152657680, "step": 70770 }, { "epoch": 11.545676998368679, "grad_norm": 0.2331847846508026, "learning_rate": 0.00045246682507664335, "loss": 0.017, "num_input_tokens_seen": 152667600, "step": 70775 }, { "epoch": 11.546492659053834, "grad_norm": 0.018306903541088104, "learning_rate": 0.0004523959681136716, "loss": 0.0247, "num_input_tokens_seen": 152678064, "step": 70780 }, { "epoch": 11.547308319738988, "grad_norm": 0.02114875055849552, "learning_rate": 0.00045232511211545625, "loss": 0.092, "num_input_tokens_seen": 152688656, "step": 70785 }, { "epoch": 11.548123980424144, "grad_norm": 0.3934035897254944, "learning_rate": 0.0004522542570834327, "loss": 0.0218, "num_input_tokens_seen": 152697936, "step": 70790 }, { "epoch": 11.548939641109298, "grad_norm": 0.0030231100972741842, "learning_rate": 0.0004521834030190375, "loss": 0.0084, "num_input_tokens_seen": 152708368, "step": 70795 }, { "epoch": 11.549755301794454, "grad_norm": 0.21377861499786377, "learning_rate": 0.000452112549923706, "loss": 0.0274, "num_input_tokens_seen": 152719184, "step": 70800 }, { "epoch": 11.550570962479608, "grad_norm": 0.036383479833602905, "learning_rate": 0.00045204169779887454, "loss": 0.0047, "num_input_tokens_seen": 152730224, "step": 70805 }, { "epoch": 11.551386623164763, "grad_norm": 0.0021377517841756344, "learning_rate": 0.0004519708466459789, "loss": 0.0441, "num_input_tokens_seen": 152741040, "step": 70810 }, { "epoch": 11.552202283849919, "grad_norm": 0.009659466333687305, "learning_rate": 0.0004518999964664551, "loss": 0.0353, "num_input_tokens_seen": 152751728, "step": 70815 }, { "epoch": 11.553017944535073, "grad_norm": 0.5286193490028381, "learning_rate": 0.0004518291472617387, "loss": 0.0189, "num_input_tokens_seen": 152762000, "step": 70820 }, { "epoch": 11.553833605220229, "grad_norm": 0.003251942340284586, "learning_rate": 0.00045175829903326594, "loss": 0.0125, "num_input_tokens_seen": 152772272, "step": 70825 }, { "epoch": 11.554649265905383, "grad_norm": 0.013677936047315598, "learning_rate": 0.0004516874517824722, "loss": 0.0138, "num_input_tokens_seen": 152784080, "step": 70830 }, { "epoch": 11.555464926590538, "grad_norm": 0.010015873238444328, "learning_rate": 0.0004516166055107938, "loss": 0.0114, "num_input_tokens_seen": 152795184, "step": 70835 }, { "epoch": 11.556280587275694, "grad_norm": 0.011227691546082497, "learning_rate": 0.00045154576021966605, "loss": 0.0125, "num_input_tokens_seen": 152806000, "step": 70840 }, { "epoch": 11.557096247960848, "grad_norm": 0.0026050086598843336, "learning_rate": 0.00045147491591052515, "loss": 0.1293, "num_input_tokens_seen": 152815376, "step": 70845 }, { "epoch": 11.557911908646004, "grad_norm": 0.19831494987010956, "learning_rate": 0.0004514040725848064, "loss": 0.1175, "num_input_tokens_seen": 152825168, "step": 70850 }, { "epoch": 11.558727569331158, "grad_norm": 0.009871584363281727, "learning_rate": 0.0004513332302439461, "loss": 0.0026, "num_input_tokens_seen": 152835952, "step": 70855 }, { "epoch": 11.559543230016313, "grad_norm": 0.9352573752403259, "learning_rate": 0.00045126238888937927, "loss": 0.0836, "num_input_tokens_seen": 152847760, "step": 70860 }, { "epoch": 11.560358890701469, "grad_norm": 0.0021731650922447443, "learning_rate": 0.00045119154852254204, "loss": 0.0022, "num_input_tokens_seen": 152856944, "step": 70865 }, { "epoch": 11.561174551386623, "grad_norm": 0.006410792004317045, "learning_rate": 0.0004511207091448701, "loss": 0.0678, "num_input_tokens_seen": 152867216, "step": 70870 }, { "epoch": 11.561990212071779, "grad_norm": 0.03509215638041496, "learning_rate": 0.0004510498707577989, "loss": 0.0588, "num_input_tokens_seen": 152878288, "step": 70875 }, { "epoch": 11.562805872756933, "grad_norm": 0.011280583217740059, "learning_rate": 0.0004509790333627644, "loss": 0.0547, "num_input_tokens_seen": 152888816, "step": 70880 }, { "epoch": 11.563621533442088, "grad_norm": 0.08911504596471786, "learning_rate": 0.00045090819696120166, "loss": 0.0096, "num_input_tokens_seen": 152899824, "step": 70885 }, { "epoch": 11.564437194127244, "grad_norm": 0.016456160694360733, "learning_rate": 0.0004508373615545469, "loss": 0.0569, "num_input_tokens_seen": 152910320, "step": 70890 }, { "epoch": 11.565252854812398, "grad_norm": 0.0012616817839443684, "learning_rate": 0.00045076652714423507, "loss": 0.0784, "num_input_tokens_seen": 152921968, "step": 70895 }, { "epoch": 11.566068515497554, "grad_norm": 0.7363735437393188, "learning_rate": 0.00045069569373170227, "loss": 0.0203, "num_input_tokens_seen": 152933232, "step": 70900 }, { "epoch": 11.566884176182707, "grad_norm": 0.15100756287574768, "learning_rate": 0.0004506248613183836, "loss": 0.086, "num_input_tokens_seen": 152942384, "step": 70905 }, { "epoch": 11.567699836867863, "grad_norm": 0.004434991627931595, "learning_rate": 0.00045055402990571493, "loss": 0.0223, "num_input_tokens_seen": 152953904, "step": 70910 }, { "epoch": 11.568515497553017, "grad_norm": 0.003914274275302887, "learning_rate": 0.00045048319949513136, "loss": 0.0065, "num_input_tokens_seen": 152964784, "step": 70915 }, { "epoch": 11.569331158238173, "grad_norm": 0.07974116504192352, "learning_rate": 0.0004504123700880688, "loss": 0.0351, "num_input_tokens_seen": 152976176, "step": 70920 }, { "epoch": 11.570146818923329, "grad_norm": 3.0045619010925293, "learning_rate": 0.00045034154168596224, "loss": 0.0525, "num_input_tokens_seen": 152986768, "step": 70925 }, { "epoch": 11.570962479608482, "grad_norm": 0.4105585217475891, "learning_rate": 0.00045027071429024757, "loss": 0.0678, "num_input_tokens_seen": 152999408, "step": 70930 }, { "epoch": 11.571778140293638, "grad_norm": 0.02641172707080841, "learning_rate": 0.00045019988790235974, "loss": 0.2185, "num_input_tokens_seen": 153010064, "step": 70935 }, { "epoch": 11.572593800978792, "grad_norm": 0.5205081701278687, "learning_rate": 0.0004501290625237345, "loss": 0.05, "num_input_tokens_seen": 153020656, "step": 70940 }, { "epoch": 11.573409461663948, "grad_norm": 0.008849749341607094, "learning_rate": 0.00045005823815580696, "loss": 0.0034, "num_input_tokens_seen": 153030256, "step": 70945 }, { "epoch": 11.574225122349104, "grad_norm": 0.13929007947444916, "learning_rate": 0.00044998741480001264, "loss": 0.1446, "num_input_tokens_seen": 153040976, "step": 70950 }, { "epoch": 11.575040783034257, "grad_norm": 0.5661994814872742, "learning_rate": 0.00044991659245778684, "loss": 0.0279, "num_input_tokens_seen": 153051696, "step": 70955 }, { "epoch": 11.575856443719413, "grad_norm": 0.1435222178697586, "learning_rate": 0.00044984577113056477, "loss": 0.0279, "num_input_tokens_seen": 153063728, "step": 70960 }, { "epoch": 11.576672104404567, "grad_norm": 0.05052753537893295, "learning_rate": 0.0004497749508197818, "loss": 0.0069, "num_input_tokens_seen": 153074512, "step": 70965 }, { "epoch": 11.577487765089723, "grad_norm": 0.25067660212516785, "learning_rate": 0.00044970413152687304, "loss": 0.152, "num_input_tokens_seen": 153086032, "step": 70970 }, { "epoch": 11.578303425774878, "grad_norm": 0.0028679079841822386, "learning_rate": 0.000449633313253274, "loss": 0.016, "num_input_tokens_seen": 153097776, "step": 70975 }, { "epoch": 11.579119086460032, "grad_norm": 0.017701853066682816, "learning_rate": 0.00044956249600041975, "loss": 0.0689, "num_input_tokens_seen": 153107696, "step": 70980 }, { "epoch": 11.579934747145188, "grad_norm": 0.02152288146317005, "learning_rate": 0.00044949167976974553, "loss": 0.1422, "num_input_tokens_seen": 153118384, "step": 70985 }, { "epoch": 11.580750407830342, "grad_norm": 0.27247855067253113, "learning_rate": 0.00044942086456268643, "loss": 0.0594, "num_input_tokens_seen": 153130576, "step": 70990 }, { "epoch": 11.581566068515498, "grad_norm": 0.002401249250397086, "learning_rate": 0.0004493500503806777, "loss": 0.014, "num_input_tokens_seen": 153142352, "step": 70995 }, { "epoch": 11.582381729200652, "grad_norm": 0.029120981693267822, "learning_rate": 0.0004492792372251544, "loss": 0.0046, "num_input_tokens_seen": 153152720, "step": 71000 }, { "epoch": 11.583197389885807, "grad_norm": 0.10441119223833084, "learning_rate": 0.00044920842509755187, "loss": 0.0617, "num_input_tokens_seen": 153162640, "step": 71005 }, { "epoch": 11.584013050570963, "grad_norm": 0.003344520227983594, "learning_rate": 0.0004491376139993048, "loss": 0.0577, "num_input_tokens_seen": 153174512, "step": 71010 }, { "epoch": 11.584828711256117, "grad_norm": 0.020733484998345375, "learning_rate": 0.0004490668039318488, "loss": 0.0095, "num_input_tokens_seen": 153186288, "step": 71015 }, { "epoch": 11.585644371941273, "grad_norm": 0.011364296078681946, "learning_rate": 0.00044899599489661837, "loss": 0.0309, "num_input_tokens_seen": 153198096, "step": 71020 }, { "epoch": 11.586460032626427, "grad_norm": 0.07650559395551682, "learning_rate": 0.000448925186895049, "loss": 0.084, "num_input_tokens_seen": 153209968, "step": 71025 }, { "epoch": 11.587275693311582, "grad_norm": 0.32257285714149475, "learning_rate": 0.0004488543799285753, "loss": 0.1018, "num_input_tokens_seen": 153220624, "step": 71030 }, { "epoch": 11.588091353996738, "grad_norm": 0.057665303349494934, "learning_rate": 0.00044878357399863266, "loss": 0.0117, "num_input_tokens_seen": 153230992, "step": 71035 }, { "epoch": 11.588907014681892, "grad_norm": 0.00784731563180685, "learning_rate": 0.0004487127691066558, "loss": 0.0067, "num_input_tokens_seen": 153241840, "step": 71040 }, { "epoch": 11.589722675367048, "grad_norm": 0.03375278785824776, "learning_rate": 0.0004486419652540798, "loss": 0.0101, "num_input_tokens_seen": 153252912, "step": 71045 }, { "epoch": 11.590538336052202, "grad_norm": 0.006662752479314804, "learning_rate": 0.0004485711624423393, "loss": 0.035, "num_input_tokens_seen": 153263568, "step": 71050 }, { "epoch": 11.591353996737357, "grad_norm": 0.47220101952552795, "learning_rate": 0.0004485003606728698, "loss": 0.0431, "num_input_tokens_seen": 153274704, "step": 71055 }, { "epoch": 11.592169657422513, "grad_norm": 0.23632977902889252, "learning_rate": 0.0004484295599471054, "loss": 0.1074, "num_input_tokens_seen": 153286256, "step": 71060 }, { "epoch": 11.592985318107667, "grad_norm": 0.25503629446029663, "learning_rate": 0.00044835876026648176, "loss": 0.0306, "num_input_tokens_seen": 153295760, "step": 71065 }, { "epoch": 11.593800978792823, "grad_norm": 0.32955577969551086, "learning_rate": 0.00044828796163243315, "loss": 0.21, "num_input_tokens_seen": 153305456, "step": 71070 }, { "epoch": 11.594616639477977, "grad_norm": 0.06216052919626236, "learning_rate": 0.0004482171640463945, "loss": 0.0647, "num_input_tokens_seen": 153317040, "step": 71075 }, { "epoch": 11.595432300163132, "grad_norm": 0.3749069273471832, "learning_rate": 0.000448146367509801, "loss": 0.0569, "num_input_tokens_seen": 153327856, "step": 71080 }, { "epoch": 11.596247960848288, "grad_norm": 0.0028909286484122276, "learning_rate": 0.0004480755720240869, "loss": 0.0286, "num_input_tokens_seen": 153338416, "step": 71085 }, { "epoch": 11.597063621533442, "grad_norm": 0.13295872509479523, "learning_rate": 0.0004480047775906874, "loss": 0.0559, "num_input_tokens_seen": 153348528, "step": 71090 }, { "epoch": 11.597879282218598, "grad_norm": 0.02061198465526104, "learning_rate": 0.0004479339842110368, "loss": 0.0293, "num_input_tokens_seen": 153359728, "step": 71095 }, { "epoch": 11.598694942903752, "grad_norm": 0.01515294797718525, "learning_rate": 0.0004478631918865704, "loss": 0.0084, "num_input_tokens_seen": 153370576, "step": 71100 }, { "epoch": 11.599510603588907, "grad_norm": 0.10931520909070969, "learning_rate": 0.00044779240061872225, "loss": 0.0083, "num_input_tokens_seen": 153381808, "step": 71105 }, { "epoch": 11.600326264274061, "grad_norm": 0.009946693666279316, "learning_rate": 0.00044772161040892755, "loss": 0.0065, "num_input_tokens_seen": 153392752, "step": 71110 }, { "epoch": 11.601141924959217, "grad_norm": 0.006996045354753733, "learning_rate": 0.00044765082125862053, "loss": 0.0122, "num_input_tokens_seen": 153402448, "step": 71115 }, { "epoch": 11.601957585644373, "grad_norm": 0.02039383165538311, "learning_rate": 0.0004475800331692361, "loss": 0.0982, "num_input_tokens_seen": 153414448, "step": 71120 }, { "epoch": 11.602773246329527, "grad_norm": 0.0049167354591190815, "learning_rate": 0.0004475092461422089, "loss": 0.1874, "num_input_tokens_seen": 153426160, "step": 71125 }, { "epoch": 11.603588907014682, "grad_norm": 0.03064257651567459, "learning_rate": 0.0004474384601789733, "loss": 0.0167, "num_input_tokens_seen": 153437072, "step": 71130 }, { "epoch": 11.604404567699836, "grad_norm": 0.0045207832008600235, "learning_rate": 0.00044736767528096407, "loss": 0.0325, "num_input_tokens_seen": 153447792, "step": 71135 }, { "epoch": 11.605220228384992, "grad_norm": 0.008043226785957813, "learning_rate": 0.0004472968914496156, "loss": 0.0063, "num_input_tokens_seen": 153458768, "step": 71140 }, { "epoch": 11.606035889070148, "grad_norm": 0.004406215623021126, "learning_rate": 0.00044722610868636243, "loss": 0.1832, "num_input_tokens_seen": 153470352, "step": 71145 }, { "epoch": 11.606851549755302, "grad_norm": 0.07928340882062912, "learning_rate": 0.00044715532699263926, "loss": 0.0087, "num_input_tokens_seen": 153482160, "step": 71150 }, { "epoch": 11.607667210440457, "grad_norm": 0.017992712557315826, "learning_rate": 0.00044708454636988026, "loss": 0.0507, "num_input_tokens_seen": 153492880, "step": 71155 }, { "epoch": 11.608482871125611, "grad_norm": 0.5003166794776917, "learning_rate": 0.00044701376681952033, "loss": 0.2685, "num_input_tokens_seen": 153502864, "step": 71160 }, { "epoch": 11.609298531810767, "grad_norm": 0.002189287915825844, "learning_rate": 0.00044694298834299336, "loss": 0.0145, "num_input_tokens_seen": 153513936, "step": 71165 }, { "epoch": 11.61011419249592, "grad_norm": 0.015547013841569424, "learning_rate": 0.00044687221094173425, "loss": 0.0145, "num_input_tokens_seen": 153523728, "step": 71170 }, { "epoch": 11.610929853181077, "grad_norm": 0.5819246768951416, "learning_rate": 0.0004468014346171769, "loss": 0.24, "num_input_tokens_seen": 153533872, "step": 71175 }, { "epoch": 11.611745513866232, "grad_norm": 0.689093828201294, "learning_rate": 0.0004467306593707563, "loss": 0.255, "num_input_tokens_seen": 153545104, "step": 71180 }, { "epoch": 11.612561174551386, "grad_norm": 0.017829138785600662, "learning_rate": 0.00044665988520390624, "loss": 0.0344, "num_input_tokens_seen": 153556240, "step": 71185 }, { "epoch": 11.613376835236542, "grad_norm": 0.02391608990728855, "learning_rate": 0.0004465891121180612, "loss": 0.0143, "num_input_tokens_seen": 153566224, "step": 71190 }, { "epoch": 11.614192495921696, "grad_norm": 0.02853383868932724, "learning_rate": 0.0004465183401146558, "loss": 0.0898, "num_input_tokens_seen": 153577104, "step": 71195 }, { "epoch": 11.615008156606851, "grad_norm": 0.10722953081130981, "learning_rate": 0.00044644756919512386, "loss": 0.0578, "num_input_tokens_seen": 153588944, "step": 71200 }, { "epoch": 11.615823817292007, "grad_norm": 0.1208651214838028, "learning_rate": 0.00044637679936090013, "loss": 0.0294, "num_input_tokens_seen": 153599952, "step": 71205 }, { "epoch": 11.616639477977161, "grad_norm": 0.08732923865318298, "learning_rate": 0.00044630603061341837, "loss": 0.0507, "num_input_tokens_seen": 153610256, "step": 71210 }, { "epoch": 11.617455138662317, "grad_norm": 0.20653685927391052, "learning_rate": 0.00044623526295411314, "loss": 0.0409, "num_input_tokens_seen": 153621200, "step": 71215 }, { "epoch": 11.61827079934747, "grad_norm": 0.027450265362858772, "learning_rate": 0.00044616449638441836, "loss": 0.0145, "num_input_tokens_seen": 153631216, "step": 71220 }, { "epoch": 11.619086460032626, "grad_norm": 0.04462236166000366, "learning_rate": 0.0004460937309057686, "loss": 0.0265, "num_input_tokens_seen": 153642128, "step": 71225 }, { "epoch": 11.619902120717782, "grad_norm": 0.022490520030260086, "learning_rate": 0.0004460229665195975, "loss": 0.0101, "num_input_tokens_seen": 153652336, "step": 71230 }, { "epoch": 11.620717781402936, "grad_norm": 0.2652267515659332, "learning_rate": 0.0004459522032273397, "loss": 0.0517, "num_input_tokens_seen": 153662768, "step": 71235 }, { "epoch": 11.621533442088092, "grad_norm": 0.06147722527384758, "learning_rate": 0.00044588144103042883, "loss": 0.1395, "num_input_tokens_seen": 153671856, "step": 71240 }, { "epoch": 11.622349102773246, "grad_norm": 0.024843445047736168, "learning_rate": 0.00044581067993029944, "loss": 0.0848, "num_input_tokens_seen": 153683536, "step": 71245 }, { "epoch": 11.623164763458401, "grad_norm": 0.00462621683254838, "learning_rate": 0.0004457399199283852, "loss": 0.0173, "num_input_tokens_seen": 153693072, "step": 71250 }, { "epoch": 11.623980424143557, "grad_norm": 0.02184155024588108, "learning_rate": 0.00044566916102612043, "loss": 0.0139, "num_input_tokens_seen": 153704272, "step": 71255 }, { "epoch": 11.624796084828711, "grad_norm": 0.021285446360707283, "learning_rate": 0.0004455984032249389, "loss": 0.08, "num_input_tokens_seen": 153715472, "step": 71260 }, { "epoch": 11.625611745513867, "grad_norm": 0.05200935900211334, "learning_rate": 0.0004455276465262748, "loss": 0.0295, "num_input_tokens_seen": 153725776, "step": 71265 }, { "epoch": 11.62642740619902, "grad_norm": 0.026450229808688164, "learning_rate": 0.0004454568909315621, "loss": 0.0461, "num_input_tokens_seen": 153736976, "step": 71270 }, { "epoch": 11.627243066884176, "grad_norm": 0.007907722145318985, "learning_rate": 0.0004453861364422347, "loss": 0.0142, "num_input_tokens_seen": 153748624, "step": 71275 }, { "epoch": 11.62805872756933, "grad_norm": 0.35814040899276733, "learning_rate": 0.00044531538305972646, "loss": 0.0373, "num_input_tokens_seen": 153760848, "step": 71280 }, { "epoch": 11.628874388254486, "grad_norm": 0.05967801436781883, "learning_rate": 0.0004452446307854714, "loss": 0.1284, "num_input_tokens_seen": 153771760, "step": 71285 }, { "epoch": 11.629690048939642, "grad_norm": 0.0072881425730884075, "learning_rate": 0.00044517387962090323, "loss": 0.0107, "num_input_tokens_seen": 153782032, "step": 71290 }, { "epoch": 11.630505709624796, "grad_norm": 0.07540024816989899, "learning_rate": 0.00044510312956745607, "loss": 0.0617, "num_input_tokens_seen": 153791600, "step": 71295 }, { "epoch": 11.631321370309951, "grad_norm": 0.1195218488574028, "learning_rate": 0.00044503238062656357, "loss": 0.039, "num_input_tokens_seen": 153802864, "step": 71300 }, { "epoch": 11.632137030995105, "grad_norm": 0.5370811223983765, "learning_rate": 0.0004449616327996597, "loss": 0.0752, "num_input_tokens_seen": 153813552, "step": 71305 }, { "epoch": 11.632952691680261, "grad_norm": 0.04547497630119324, "learning_rate": 0.0004448908860881781, "loss": 0.0136, "num_input_tokens_seen": 153824880, "step": 71310 }, { "epoch": 11.633768352365417, "grad_norm": 0.009301722049713135, "learning_rate": 0.0004448201404935525, "loss": 0.0131, "num_input_tokens_seen": 153837328, "step": 71315 }, { "epoch": 11.63458401305057, "grad_norm": 0.013761993497610092, "learning_rate": 0.00044474939601721705, "loss": 0.0326, "num_input_tokens_seen": 153848944, "step": 71320 }, { "epoch": 11.635399673735726, "grad_norm": 0.006836900487542152, "learning_rate": 0.00044467865266060487, "loss": 0.0127, "num_input_tokens_seen": 153860080, "step": 71325 }, { "epoch": 11.63621533442088, "grad_norm": 0.01421964168548584, "learning_rate": 0.0004446079104251503, "loss": 0.0393, "num_input_tokens_seen": 153871856, "step": 71330 }, { "epoch": 11.637030995106036, "grad_norm": 0.1479797214269638, "learning_rate": 0.0004445371693122863, "loss": 0.017, "num_input_tokens_seen": 153882256, "step": 71335 }, { "epoch": 11.63784665579119, "grad_norm": 0.008084164932370186, "learning_rate": 0.00044446642932344726, "loss": 0.0331, "num_input_tokens_seen": 153892560, "step": 71340 }, { "epoch": 11.638662316476346, "grad_norm": 0.7880727648735046, "learning_rate": 0.0004443956904600663, "loss": 0.0534, "num_input_tokens_seen": 153902448, "step": 71345 }, { "epoch": 11.639477977161501, "grad_norm": 0.0531337670981884, "learning_rate": 0.00044432495272357734, "loss": 0.037, "num_input_tokens_seen": 153912688, "step": 71350 }, { "epoch": 11.640293637846655, "grad_norm": 0.08558989316225052, "learning_rate": 0.00044425421611541364, "loss": 0.0585, "num_input_tokens_seen": 153923408, "step": 71355 }, { "epoch": 11.641109298531811, "grad_norm": 0.02400355413556099, "learning_rate": 0.0004441834806370092, "loss": 0.1437, "num_input_tokens_seen": 153933712, "step": 71360 }, { "epoch": 11.641924959216965, "grad_norm": 2.037050724029541, "learning_rate": 0.00044411274628979714, "loss": 0.0583, "num_input_tokens_seen": 153945616, "step": 71365 }, { "epoch": 11.64274061990212, "grad_norm": 0.00874277576804161, "learning_rate": 0.00044404201307521134, "loss": 0.0108, "num_input_tokens_seen": 153955376, "step": 71370 }, { "epoch": 11.643556280587276, "grad_norm": 0.005710866767913103, "learning_rate": 0.00044397128099468497, "loss": 0.098, "num_input_tokens_seen": 153966384, "step": 71375 }, { "epoch": 11.64437194127243, "grad_norm": 0.012589012272655964, "learning_rate": 0.0004439005500496519, "loss": 0.1055, "num_input_tokens_seen": 153977296, "step": 71380 }, { "epoch": 11.645187601957586, "grad_norm": 0.9234338402748108, "learning_rate": 0.00044382982024154506, "loss": 0.0786, "num_input_tokens_seen": 153987920, "step": 71385 }, { "epoch": 11.64600326264274, "grad_norm": 0.6538709998130798, "learning_rate": 0.0004437590915717984, "loss": 0.1944, "num_input_tokens_seen": 153997744, "step": 71390 }, { "epoch": 11.646818923327896, "grad_norm": 0.07059884816408157, "learning_rate": 0.0004436883640418449, "loss": 0.0475, "num_input_tokens_seen": 154008784, "step": 71395 }, { "epoch": 11.647634584013051, "grad_norm": 0.009884354658424854, "learning_rate": 0.0004436176376531181, "loss": 0.0417, "num_input_tokens_seen": 154019408, "step": 71400 }, { "epoch": 11.648450244698205, "grad_norm": 0.005181219428777695, "learning_rate": 0.00044354691240705167, "loss": 0.185, "num_input_tokens_seen": 154029264, "step": 71405 }, { "epoch": 11.649265905383361, "grad_norm": 0.09308334439992905, "learning_rate": 0.00044347618830507845, "loss": 0.0248, "num_input_tokens_seen": 154038896, "step": 71410 }, { "epoch": 11.650081566068515, "grad_norm": 0.08888015896081924, "learning_rate": 0.00044340546534863226, "loss": 0.0358, "num_input_tokens_seen": 154049744, "step": 71415 }, { "epoch": 11.65089722675367, "grad_norm": 0.008099433965981007, "learning_rate": 0.00044333474353914576, "loss": 0.0057, "num_input_tokens_seen": 154060592, "step": 71420 }, { "epoch": 11.651712887438826, "grad_norm": 0.5211361050605774, "learning_rate": 0.0004432640228780529, "loss": 0.1019, "num_input_tokens_seen": 154071248, "step": 71425 }, { "epoch": 11.65252854812398, "grad_norm": 0.421138733625412, "learning_rate": 0.0004431933033667863, "loss": 0.0305, "num_input_tokens_seen": 154081968, "step": 71430 }, { "epoch": 11.653344208809136, "grad_norm": 0.02354181371629238, "learning_rate": 0.0004431225850067796, "loss": 0.0393, "num_input_tokens_seen": 154093680, "step": 71435 }, { "epoch": 11.65415986949429, "grad_norm": 0.01604207046329975, "learning_rate": 0.0004430518677994659, "loss": 0.014, "num_input_tokens_seen": 154103824, "step": 71440 }, { "epoch": 11.654975530179446, "grad_norm": 0.04721745476126671, "learning_rate": 0.0004429811517462783, "loss": 0.0106, "num_input_tokens_seen": 154113104, "step": 71445 }, { "epoch": 11.655791190864601, "grad_norm": 0.12421513348817825, "learning_rate": 0.00044291043684865, "loss": 0.0708, "num_input_tokens_seen": 154124720, "step": 71450 }, { "epoch": 11.656606851549755, "grad_norm": 0.0906495451927185, "learning_rate": 0.0004428397231080141, "loss": 0.2205, "num_input_tokens_seen": 154135760, "step": 71455 }, { "epoch": 11.65742251223491, "grad_norm": 0.05403032898902893, "learning_rate": 0.0004427690105258037, "loss": 0.0136, "num_input_tokens_seen": 154146896, "step": 71460 }, { "epoch": 11.658238172920065, "grad_norm": 0.01853824220597744, "learning_rate": 0.00044269829910345207, "loss": 0.0059, "num_input_tokens_seen": 154156336, "step": 71465 }, { "epoch": 11.65905383360522, "grad_norm": 0.03900037333369255, "learning_rate": 0.00044262758884239185, "loss": 0.0081, "num_input_tokens_seen": 154166320, "step": 71470 }, { "epoch": 11.659869494290374, "grad_norm": 0.031034687533974648, "learning_rate": 0.00044255687974405656, "loss": 0.1297, "num_input_tokens_seen": 154175888, "step": 71475 }, { "epoch": 11.66068515497553, "grad_norm": 0.15423482656478882, "learning_rate": 0.0004424861718098788, "loss": 0.0426, "num_input_tokens_seen": 154187728, "step": 71480 }, { "epoch": 11.661500815660686, "grad_norm": 0.009153451770544052, "learning_rate": 0.00044241546504129186, "loss": 0.0459, "num_input_tokens_seen": 154198736, "step": 71485 }, { "epoch": 11.66231647634584, "grad_norm": 0.37302783131599426, "learning_rate": 0.0004423447594397284, "loss": 0.0486, "num_input_tokens_seen": 154209200, "step": 71490 }, { "epoch": 11.663132137030995, "grad_norm": 0.10020475089550018, "learning_rate": 0.00044227405500662175, "loss": 0.0393, "num_input_tokens_seen": 154220496, "step": 71495 }, { "epoch": 11.66394779771615, "grad_norm": 0.028192361816763878, "learning_rate": 0.00044220335174340443, "loss": 0.0213, "num_input_tokens_seen": 154230992, "step": 71500 }, { "epoch": 11.664763458401305, "grad_norm": 0.008284736424684525, "learning_rate": 0.00044213264965150943, "loss": 0.0118, "num_input_tokens_seen": 154242352, "step": 71505 }, { "epoch": 11.66557911908646, "grad_norm": 0.27341514825820923, "learning_rate": 0.00044206194873237, "loss": 0.0444, "num_input_tokens_seen": 154254320, "step": 71510 }, { "epoch": 11.666394779771615, "grad_norm": 0.05225376412272453, "learning_rate": 0.00044199124898741844, "loss": 0.1297, "num_input_tokens_seen": 154265168, "step": 71515 }, { "epoch": 11.66721044045677, "grad_norm": 0.7110954523086548, "learning_rate": 0.000441920550418088, "loss": 0.0514, "num_input_tokens_seen": 154274736, "step": 71520 }, { "epoch": 11.668026101141924, "grad_norm": 0.30445510149002075, "learning_rate": 0.00044184985302581103, "loss": 0.2269, "num_input_tokens_seen": 154285360, "step": 71525 }, { "epoch": 11.66884176182708, "grad_norm": 0.10290344804525375, "learning_rate": 0.00044177915681202083, "loss": 0.0188, "num_input_tokens_seen": 154296528, "step": 71530 }, { "epoch": 11.669657422512234, "grad_norm": 0.8287101984024048, "learning_rate": 0.00044170846177814965, "loss": 0.0791, "num_input_tokens_seen": 154306928, "step": 71535 }, { "epoch": 11.67047308319739, "grad_norm": 0.012724130414426327, "learning_rate": 0.0004416377679256307, "loss": 0.0043, "num_input_tokens_seen": 154316624, "step": 71540 }, { "epoch": 11.671288743882545, "grad_norm": 0.07964999228715897, "learning_rate": 0.0004415670752558961, "loss": 0.0089, "num_input_tokens_seen": 154326416, "step": 71545 }, { "epoch": 11.6721044045677, "grad_norm": 0.11060158163309097, "learning_rate": 0.0004414963837703791, "loss": 0.0211, "num_input_tokens_seen": 154337392, "step": 71550 }, { "epoch": 11.672920065252855, "grad_norm": 0.26356300711631775, "learning_rate": 0.0004414256934705119, "loss": 0.0277, "num_input_tokens_seen": 154347600, "step": 71555 }, { "epoch": 11.673735725938009, "grad_norm": 0.6109563112258911, "learning_rate": 0.00044135500435772755, "loss": 0.124, "num_input_tokens_seen": 154356848, "step": 71560 }, { "epoch": 11.674551386623165, "grad_norm": 0.14865735173225403, "learning_rate": 0.0004412843164334582, "loss": 0.0147, "num_input_tokens_seen": 154368592, "step": 71565 }, { "epoch": 11.67536704730832, "grad_norm": 0.08889831602573395, "learning_rate": 0.00044121362969913683, "loss": 0.0375, "num_input_tokens_seen": 154379600, "step": 71570 }, { "epoch": 11.676182707993474, "grad_norm": 0.005796494428068399, "learning_rate": 0.00044114294415619577, "loss": 0.029, "num_input_tokens_seen": 154389776, "step": 71575 }, { "epoch": 11.67699836867863, "grad_norm": 0.5512948036193848, "learning_rate": 0.00044107225980606765, "loss": 0.043, "num_input_tokens_seen": 154400688, "step": 71580 }, { "epoch": 11.677814029363784, "grad_norm": 0.006947787944227457, "learning_rate": 0.0004410015766501849, "loss": 0.0388, "num_input_tokens_seen": 154410352, "step": 71585 }, { "epoch": 11.67862969004894, "grad_norm": 1.608058214187622, "learning_rate": 0.00044093089468998006, "loss": 0.0708, "num_input_tokens_seen": 154421392, "step": 71590 }, { "epoch": 11.679445350734095, "grad_norm": 0.005093876738101244, "learning_rate": 0.0004408602139268856, "loss": 0.087, "num_input_tokens_seen": 154432432, "step": 71595 }, { "epoch": 11.68026101141925, "grad_norm": 1.9780513048171997, "learning_rate": 0.00044078953436233387, "loss": 0.0405, "num_input_tokens_seen": 154444240, "step": 71600 }, { "epoch": 11.681076672104405, "grad_norm": 0.194422647356987, "learning_rate": 0.0004407188559977573, "loss": 0.0157, "num_input_tokens_seen": 154455248, "step": 71605 }, { "epoch": 11.681892332789559, "grad_norm": 0.036033567041158676, "learning_rate": 0.00044064817883458833, "loss": 0.0158, "num_input_tokens_seen": 154465968, "step": 71610 }, { "epoch": 11.682707993474715, "grad_norm": 0.012585141696035862, "learning_rate": 0.0004405775028742594, "loss": 0.1178, "num_input_tokens_seen": 154477872, "step": 71615 }, { "epoch": 11.68352365415987, "grad_norm": 0.09101568907499313, "learning_rate": 0.00044050682811820277, "loss": 0.0894, "num_input_tokens_seen": 154487952, "step": 71620 }, { "epoch": 11.684339314845024, "grad_norm": 1.1757590770721436, "learning_rate": 0.00044043615456785065, "loss": 0.0917, "num_input_tokens_seen": 154498576, "step": 71625 }, { "epoch": 11.68515497553018, "grad_norm": 0.1733485907316208, "learning_rate": 0.00044036548222463535, "loss": 0.0458, "num_input_tokens_seen": 154509424, "step": 71630 }, { "epoch": 11.685970636215334, "grad_norm": 0.46307262778282166, "learning_rate": 0.0004402948110899894, "loss": 0.1485, "num_input_tokens_seen": 154519984, "step": 71635 }, { "epoch": 11.68678629690049, "grad_norm": 0.044623441994190216, "learning_rate": 0.0004402241411653447, "loss": 0.0303, "num_input_tokens_seen": 154530832, "step": 71640 }, { "epoch": 11.687601957585644, "grad_norm": 0.46250560879707336, "learning_rate": 0.00044015347245213377, "loss": 0.0441, "num_input_tokens_seen": 154542128, "step": 71645 }, { "epoch": 11.6884176182708, "grad_norm": 0.16729626059532166, "learning_rate": 0.00044008280495178844, "loss": 0.0391, "num_input_tokens_seen": 154552208, "step": 71650 }, { "epoch": 11.689233278955955, "grad_norm": 0.0402241051197052, "learning_rate": 0.0004400121386657413, "loss": 0.0421, "num_input_tokens_seen": 154563152, "step": 71655 }, { "epoch": 11.690048939641109, "grad_norm": 0.004633928183466196, "learning_rate": 0.000439941473595424, "loss": 0.1642, "num_input_tokens_seen": 154573552, "step": 71660 }, { "epoch": 11.690864600326265, "grad_norm": 0.009622551500797272, "learning_rate": 0.00043987080974226925, "loss": 0.0344, "num_input_tokens_seen": 154583824, "step": 71665 }, { "epoch": 11.691680261011419, "grad_norm": 0.011338973417878151, "learning_rate": 0.00043980014710770857, "loss": 0.0173, "num_input_tokens_seen": 154594256, "step": 71670 }, { "epoch": 11.692495921696574, "grad_norm": 0.027692293748259544, "learning_rate": 0.00043972948569317446, "loss": 0.0152, "num_input_tokens_seen": 154604272, "step": 71675 }, { "epoch": 11.69331158238173, "grad_norm": 0.21677742898464203, "learning_rate": 0.00043965882550009856, "loss": 0.0124, "num_input_tokens_seen": 154615792, "step": 71680 }, { "epoch": 11.694127243066884, "grad_norm": 0.595757246017456, "learning_rate": 0.0004395881665299134, "loss": 0.0761, "num_input_tokens_seen": 154626928, "step": 71685 }, { "epoch": 11.69494290375204, "grad_norm": 0.0013340244768187404, "learning_rate": 0.0004395175087840503, "loss": 0.0129, "num_input_tokens_seen": 154637744, "step": 71690 }, { "epoch": 11.695758564437194, "grad_norm": 0.13044273853302002, "learning_rate": 0.000439446852263942, "loss": 0.0174, "num_input_tokens_seen": 154648432, "step": 71695 }, { "epoch": 11.69657422512235, "grad_norm": 0.06101664900779724, "learning_rate": 0.00043937619697101974, "loss": 0.0426, "num_input_tokens_seen": 154660784, "step": 71700 }, { "epoch": 11.697389885807503, "grad_norm": 0.013198736123740673, "learning_rate": 0.00043930554290671597, "loss": 0.0295, "num_input_tokens_seen": 154670896, "step": 71705 }, { "epoch": 11.698205546492659, "grad_norm": 0.03736181929707527, "learning_rate": 0.0004392348900724622, "loss": 0.028, "num_input_tokens_seen": 154681872, "step": 71710 }, { "epoch": 11.699021207177815, "grad_norm": 0.19524195790290833, "learning_rate": 0.00043916423846969047, "loss": 0.1192, "num_input_tokens_seen": 154692816, "step": 71715 }, { "epoch": 11.699836867862969, "grad_norm": 0.011334164999425411, "learning_rate": 0.0004390935880998329, "loss": 0.0183, "num_input_tokens_seen": 154703984, "step": 71720 }, { "epoch": 11.700652528548124, "grad_norm": 0.03926578164100647, "learning_rate": 0.00043902293896432064, "loss": 0.041, "num_input_tokens_seen": 154715792, "step": 71725 }, { "epoch": 11.701468189233278, "grad_norm": 0.023917051032185555, "learning_rate": 0.0004389522910645862, "loss": 0.009, "num_input_tokens_seen": 154727152, "step": 71730 }, { "epoch": 11.702283849918434, "grad_norm": 0.023448597639799118, "learning_rate": 0.00043888164440206086, "loss": 0.0135, "num_input_tokens_seen": 154738544, "step": 71735 }, { "epoch": 11.70309951060359, "grad_norm": 0.006107778288424015, "learning_rate": 0.0004388109989781766, "loss": 0.0168, "num_input_tokens_seen": 154748464, "step": 71740 }, { "epoch": 11.703915171288743, "grad_norm": 0.444672167301178, "learning_rate": 0.000438740354794365, "loss": 0.2574, "num_input_tokens_seen": 154759728, "step": 71745 }, { "epoch": 11.7047308319739, "grad_norm": 0.2536945343017578, "learning_rate": 0.0004386697118520579, "loss": 0.0495, "num_input_tokens_seen": 154769456, "step": 71750 }, { "epoch": 11.705546492659053, "grad_norm": 0.01962757483124733, "learning_rate": 0.00043859907015268685, "loss": 0.003, "num_input_tokens_seen": 154778640, "step": 71755 }, { "epoch": 11.706362153344209, "grad_norm": 0.025570545345544815, "learning_rate": 0.00043852842969768356, "loss": 0.0377, "num_input_tokens_seen": 154789968, "step": 71760 }, { "epoch": 11.707177814029365, "grad_norm": 0.027583960443735123, "learning_rate": 0.0004384577904884795, "loss": 0.0102, "num_input_tokens_seen": 154801168, "step": 71765 }, { "epoch": 11.707993474714518, "grad_norm": 0.4566868245601654, "learning_rate": 0.0004383871525265066, "loss": 0.1456, "num_input_tokens_seen": 154810992, "step": 71770 }, { "epoch": 11.708809135399674, "grad_norm": 0.014746258035302162, "learning_rate": 0.00043831651581319604, "loss": 0.0142, "num_input_tokens_seen": 154821488, "step": 71775 }, { "epoch": 11.709624796084828, "grad_norm": 0.05359187722206116, "learning_rate": 0.00043824588034997974, "loss": 0.0316, "num_input_tokens_seen": 154832368, "step": 71780 }, { "epoch": 11.710440456769984, "grad_norm": 0.05188470706343651, "learning_rate": 0.0004381752461382888, "loss": 0.0389, "num_input_tokens_seen": 154842512, "step": 71785 }, { "epoch": 11.71125611745514, "grad_norm": 0.0065127476118505, "learning_rate": 0.0004381046131795551, "loss": 0.0034, "num_input_tokens_seen": 154853328, "step": 71790 }, { "epoch": 11.712071778140293, "grad_norm": 0.47887152433395386, "learning_rate": 0.0004380339814752098, "loss": 0.2132, "num_input_tokens_seen": 154863152, "step": 71795 }, { "epoch": 11.71288743882545, "grad_norm": 0.006003732793033123, "learning_rate": 0.0004379633510266846, "loss": 0.0386, "num_input_tokens_seen": 154872528, "step": 71800 }, { "epoch": 11.713703099510603, "grad_norm": 0.01757998578250408, "learning_rate": 0.0004378927218354106, "loss": 0.0442, "num_input_tokens_seen": 154883920, "step": 71805 }, { "epoch": 11.714518760195759, "grad_norm": 0.14948850870132446, "learning_rate": 0.00043782209390281964, "loss": 0.0538, "num_input_tokens_seen": 154894000, "step": 71810 }, { "epoch": 11.715334420880914, "grad_norm": 1.3551521301269531, "learning_rate": 0.00043775146723034253, "loss": 0.0608, "num_input_tokens_seen": 154904240, "step": 71815 }, { "epoch": 11.716150081566068, "grad_norm": 0.23790426552295685, "learning_rate": 0.00043768084181941097, "loss": 0.0412, "num_input_tokens_seen": 154915792, "step": 71820 }, { "epoch": 11.716965742251224, "grad_norm": 0.01482490636408329, "learning_rate": 0.00043761021767145644, "loss": 0.1112, "num_input_tokens_seen": 154926672, "step": 71825 }, { "epoch": 11.717781402936378, "grad_norm": 0.028402449563145638, "learning_rate": 0.0004375395947879097, "loss": 0.0342, "num_input_tokens_seen": 154936528, "step": 71830 }, { "epoch": 11.718597063621534, "grad_norm": 0.008279485628008842, "learning_rate": 0.0004374689731702026, "loss": 0.0215, "num_input_tokens_seen": 154947280, "step": 71835 }, { "epoch": 11.719412724306688, "grad_norm": 0.08881169557571411, "learning_rate": 0.0004373983528197659, "loss": 0.0952, "num_input_tokens_seen": 154958928, "step": 71840 }, { "epoch": 11.720228384991843, "grad_norm": 0.006324186455458403, "learning_rate": 0.0004373277337380311, "loss": 0.0754, "num_input_tokens_seen": 154970992, "step": 71845 }, { "epoch": 11.721044045676999, "grad_norm": 0.005181887652724981, "learning_rate": 0.00043725711592642913, "loss": 0.2276, "num_input_tokens_seen": 154981264, "step": 71850 }, { "epoch": 11.721859706362153, "grad_norm": 0.12789687514305115, "learning_rate": 0.0004371864993863915, "loss": 0.029, "num_input_tokens_seen": 154991440, "step": 71855 }, { "epoch": 11.722675367047309, "grad_norm": 0.0778777152299881, "learning_rate": 0.00043711588411934893, "loss": 0.0134, "num_input_tokens_seen": 155001744, "step": 71860 }, { "epoch": 11.723491027732463, "grad_norm": 0.4660443365573883, "learning_rate": 0.00043704527012673294, "loss": 0.0343, "num_input_tokens_seen": 155012400, "step": 71865 }, { "epoch": 11.724306688417618, "grad_norm": 0.5151549577713013, "learning_rate": 0.00043697465740997424, "loss": 0.0834, "num_input_tokens_seen": 155023664, "step": 71870 }, { "epoch": 11.725122349102774, "grad_norm": 0.40685030817985535, "learning_rate": 0.00043690404597050426, "loss": 0.3132, "num_input_tokens_seen": 155034160, "step": 71875 }, { "epoch": 11.725938009787928, "grad_norm": 0.3028431534767151, "learning_rate": 0.0004368334358097536, "loss": 0.0273, "num_input_tokens_seen": 155044528, "step": 71880 }, { "epoch": 11.726753670473084, "grad_norm": 0.4085318148136139, "learning_rate": 0.00043676282692915367, "loss": 0.159, "num_input_tokens_seen": 155055728, "step": 71885 }, { "epoch": 11.727569331158238, "grad_norm": 0.0276857428252697, "learning_rate": 0.0004366922193301352, "loss": 0.0815, "num_input_tokens_seen": 155066192, "step": 71890 }, { "epoch": 11.728384991843393, "grad_norm": 0.11039572954177856, "learning_rate": 0.00043662161301412925, "loss": 0.0171, "num_input_tokens_seen": 155077296, "step": 71895 }, { "epoch": 11.729200652528547, "grad_norm": 0.004712986759841442, "learning_rate": 0.0004365510079825667, "loss": 0.1377, "num_input_tokens_seen": 155088240, "step": 71900 }, { "epoch": 11.730016313213703, "grad_norm": 0.013263898901641369, "learning_rate": 0.00043648040423687845, "loss": 0.0356, "num_input_tokens_seen": 155099056, "step": 71905 }, { "epoch": 11.730831973898859, "grad_norm": 0.07724368572235107, "learning_rate": 0.00043640980177849534, "loss": 0.0978, "num_input_tokens_seen": 155108944, "step": 71910 }, { "epoch": 11.731647634584013, "grad_norm": 0.030305301770567894, "learning_rate": 0.00043633920060884843, "loss": 0.0085, "num_input_tokens_seen": 155119440, "step": 71915 }, { "epoch": 11.732463295269168, "grad_norm": 0.23072130978107452, "learning_rate": 0.0004362686007293681, "loss": 0.0613, "num_input_tokens_seen": 155130928, "step": 71920 }, { "epoch": 11.733278955954322, "grad_norm": 0.01207263208925724, "learning_rate": 0.0004361980021414858, "loss": 0.1453, "num_input_tokens_seen": 155140784, "step": 71925 }, { "epoch": 11.734094616639478, "grad_norm": 0.008155466988682747, "learning_rate": 0.00043612740484663155, "loss": 0.0137, "num_input_tokens_seen": 155151472, "step": 71930 }, { "epoch": 11.734910277324634, "grad_norm": 0.025105159729719162, "learning_rate": 0.00043605680884623656, "loss": 0.1109, "num_input_tokens_seen": 155161968, "step": 71935 }, { "epoch": 11.735725938009788, "grad_norm": 0.0058405231684446335, "learning_rate": 0.00043598621414173166, "loss": 0.0724, "num_input_tokens_seen": 155173552, "step": 71940 }, { "epoch": 11.736541598694943, "grad_norm": 0.025353707373142242, "learning_rate": 0.0004359156207345471, "loss": 0.0947, "num_input_tokens_seen": 155184368, "step": 71945 }, { "epoch": 11.737357259380097, "grad_norm": 0.2284693866968155, "learning_rate": 0.00043584502862611404, "loss": 0.0243, "num_input_tokens_seen": 155195664, "step": 71950 }, { "epoch": 11.738172920065253, "grad_norm": 0.13955746591091156, "learning_rate": 0.00043577443781786263, "loss": 0.0292, "num_input_tokens_seen": 155206480, "step": 71955 }, { "epoch": 11.738988580750409, "grad_norm": 0.009471099823713303, "learning_rate": 0.0004357038483112239, "loss": 0.0168, "num_input_tokens_seen": 155217328, "step": 71960 }, { "epoch": 11.739804241435563, "grad_norm": 0.007060085888952017, "learning_rate": 0.00043563326010762803, "loss": 0.0299, "num_input_tokens_seen": 155229200, "step": 71965 }, { "epoch": 11.740619902120718, "grad_norm": 0.25914546847343445, "learning_rate": 0.00043556267320850605, "loss": 0.0711, "num_input_tokens_seen": 155240080, "step": 71970 }, { "epoch": 11.741435562805872, "grad_norm": 0.01586800254881382, "learning_rate": 0.000435492087615288, "loss": 0.0375, "num_input_tokens_seen": 155252272, "step": 71975 }, { "epoch": 11.742251223491028, "grad_norm": 0.09248818457126617, "learning_rate": 0.00043542150332940487, "loss": 0.0255, "num_input_tokens_seen": 155263632, "step": 71980 }, { "epoch": 11.743066884176184, "grad_norm": 0.12142807245254517, "learning_rate": 0.00043535092035228666, "loss": 0.0365, "num_input_tokens_seen": 155275024, "step": 71985 }, { "epoch": 11.743882544861338, "grad_norm": 0.0239220280200243, "learning_rate": 0.00043528033868536433, "loss": 0.0198, "num_input_tokens_seen": 155284784, "step": 71990 }, { "epoch": 11.744698205546493, "grad_norm": 0.2067824751138687, "learning_rate": 0.0004352097583300678, "loss": 0.034, "num_input_tokens_seen": 155295888, "step": 71995 }, { "epoch": 11.745513866231647, "grad_norm": 0.00897118542343378, "learning_rate": 0.0004351391792878279, "loss": 0.0119, "num_input_tokens_seen": 155306544, "step": 72000 }, { "epoch": 11.746329526916803, "grad_norm": 0.01250038668513298, "learning_rate": 0.00043506860156007453, "loss": 0.0917, "num_input_tokens_seen": 155316880, "step": 72005 }, { "epoch": 11.747145187601957, "grad_norm": 0.016184603795409203, "learning_rate": 0.00043499802514823866, "loss": 0.0089, "num_input_tokens_seen": 155327568, "step": 72010 }, { "epoch": 11.747960848287113, "grad_norm": 0.18714000284671783, "learning_rate": 0.00043492745005375, "loss": 0.0287, "num_input_tokens_seen": 155338416, "step": 72015 }, { "epoch": 11.748776508972268, "grad_norm": 0.028790002688765526, "learning_rate": 0.00043485687627803935, "loss": 0.1474, "num_input_tokens_seen": 155347600, "step": 72020 }, { "epoch": 11.749592169657422, "grad_norm": 0.014393490739166737, "learning_rate": 0.00043478630382253646, "loss": 0.1456, "num_input_tokens_seen": 155357072, "step": 72025 }, { "epoch": 11.750407830342578, "grad_norm": 0.17760059237480164, "learning_rate": 0.00043471573268867206, "loss": 0.1561, "num_input_tokens_seen": 155367472, "step": 72030 }, { "epoch": 11.751223491027732, "grad_norm": 0.3666444718837738, "learning_rate": 0.00043464516287787617, "loss": 0.0983, "num_input_tokens_seen": 155377904, "step": 72035 }, { "epoch": 11.752039151712887, "grad_norm": 0.020276805385947227, "learning_rate": 0.0004345745943915788, "loss": 0.0262, "num_input_tokens_seen": 155389040, "step": 72040 }, { "epoch": 11.752854812398043, "grad_norm": 0.02979442849755287, "learning_rate": 0.0004345040272312104, "loss": 0.0858, "num_input_tokens_seen": 155400304, "step": 72045 }, { "epoch": 11.753670473083197, "grad_norm": 0.04828314483165741, "learning_rate": 0.00043443346139820086, "loss": 0.0214, "num_input_tokens_seen": 155412144, "step": 72050 }, { "epoch": 11.754486133768353, "grad_norm": 0.01922396570444107, "learning_rate": 0.0004343628968939805, "loss": 0.0177, "num_input_tokens_seen": 155422864, "step": 72055 }, { "epoch": 11.755301794453507, "grad_norm": 0.010878859087824821, "learning_rate": 0.0004342923337199793, "loss": 0.2523, "num_input_tokens_seen": 155434064, "step": 72060 }, { "epoch": 11.756117455138662, "grad_norm": 0.0951152890920639, "learning_rate": 0.0004342217718776273, "loss": 0.0345, "num_input_tokens_seen": 155445552, "step": 72065 }, { "epoch": 11.756933115823816, "grad_norm": 0.021144075319170952, "learning_rate": 0.00043415121136835454, "loss": 0.0103, "num_input_tokens_seen": 155455504, "step": 72070 }, { "epoch": 11.757748776508972, "grad_norm": 0.06017545983195305, "learning_rate": 0.00043408065219359106, "loss": 0.0511, "num_input_tokens_seen": 155466384, "step": 72075 }, { "epoch": 11.758564437194128, "grad_norm": 0.024251675233244896, "learning_rate": 0.00043401009435476665, "loss": 0.0586, "num_input_tokens_seen": 155476496, "step": 72080 }, { "epoch": 11.759380097879282, "grad_norm": 0.04201215133070946, "learning_rate": 0.0004339395378533116, "loss": 0.0077, "num_input_tokens_seen": 155487952, "step": 72085 }, { "epoch": 11.760195758564437, "grad_norm": 0.006956837140023708, "learning_rate": 0.00043386898269065537, "loss": 0.0218, "num_input_tokens_seen": 155498800, "step": 72090 }, { "epoch": 11.761011419249591, "grad_norm": 0.023704318329691887, "learning_rate": 0.00043379842886822836, "loss": 0.265, "num_input_tokens_seen": 155510160, "step": 72095 }, { "epoch": 11.761827079934747, "grad_norm": 0.09176256507635117, "learning_rate": 0.0004337278763874599, "loss": 0.0519, "num_input_tokens_seen": 155520880, "step": 72100 }, { "epoch": 11.762642740619903, "grad_norm": 0.08993791043758392, "learning_rate": 0.0004336573252497804, "loss": 0.0168, "num_input_tokens_seen": 155530896, "step": 72105 }, { "epoch": 11.763458401305057, "grad_norm": 0.5440847873687744, "learning_rate": 0.00043358677545661913, "loss": 0.1305, "num_input_tokens_seen": 155542960, "step": 72110 }, { "epoch": 11.764274061990212, "grad_norm": 0.32714685797691345, "learning_rate": 0.0004335162270094063, "loss": 0.1031, "num_input_tokens_seen": 155552496, "step": 72115 }, { "epoch": 11.765089722675366, "grad_norm": 0.026951028034090996, "learning_rate": 0.0004334456799095712, "loss": 0.0372, "num_input_tokens_seen": 155563088, "step": 72120 }, { "epoch": 11.765905383360522, "grad_norm": 0.007922573015093803, "learning_rate": 0.00043337513415854414, "loss": 0.0169, "num_input_tokens_seen": 155573712, "step": 72125 }, { "epoch": 11.766721044045678, "grad_norm": 0.811251699924469, "learning_rate": 0.0004333045897577542, "loss": 0.0587, "num_input_tokens_seen": 155584144, "step": 72130 }, { "epoch": 11.767536704730832, "grad_norm": 0.10380300879478455, "learning_rate": 0.00043323404670863165, "loss": 0.0329, "num_input_tokens_seen": 155596560, "step": 72135 }, { "epoch": 11.768352365415987, "grad_norm": 0.0270412415266037, "learning_rate": 0.0004331635050126056, "loss": 0.091, "num_input_tokens_seen": 155606384, "step": 72140 }, { "epoch": 11.769168026101141, "grad_norm": 0.1447732299566269, "learning_rate": 0.0004330929646711059, "loss": 0.0125, "num_input_tokens_seen": 155617584, "step": 72145 }, { "epoch": 11.769983686786297, "grad_norm": 0.034624502062797546, "learning_rate": 0.0004330224256855624, "loss": 0.0206, "num_input_tokens_seen": 155629488, "step": 72150 }, { "epoch": 11.770799347471453, "grad_norm": 0.10095750540494919, "learning_rate": 0.00043295188805740414, "loss": 0.0401, "num_input_tokens_seen": 155641424, "step": 72155 }, { "epoch": 11.771615008156607, "grad_norm": 0.03427255153656006, "learning_rate": 0.0004328813517880612, "loss": 0.0396, "num_input_tokens_seen": 155650896, "step": 72160 }, { "epoch": 11.772430668841762, "grad_norm": 0.010121244937181473, "learning_rate": 0.00043281081687896253, "loss": 0.0529, "num_input_tokens_seen": 155661840, "step": 72165 }, { "epoch": 11.773246329526916, "grad_norm": 0.006669145543128252, "learning_rate": 0.0004327402833315381, "loss": 0.0043, "num_input_tokens_seen": 155672720, "step": 72170 }, { "epoch": 11.774061990212072, "grad_norm": 0.3694041967391968, "learning_rate": 0.000432669751147217, "loss": 0.0705, "num_input_tokens_seen": 155682960, "step": 72175 }, { "epoch": 11.774877650897226, "grad_norm": 0.03678375482559204, "learning_rate": 0.000432599220327429, "loss": 0.0225, "num_input_tokens_seen": 155694128, "step": 72180 }, { "epoch": 11.775693311582382, "grad_norm": 0.6276671886444092, "learning_rate": 0.0004325286908736031, "loss": 0.1729, "num_input_tokens_seen": 155705744, "step": 72185 }, { "epoch": 11.776508972267537, "grad_norm": 0.010160553269088268, "learning_rate": 0.0004324581627871691, "loss": 0.0304, "num_input_tokens_seen": 155715856, "step": 72190 }, { "epoch": 11.777324632952691, "grad_norm": 0.010254235938191414, "learning_rate": 0.00043238763606955586, "loss": 0.0242, "num_input_tokens_seen": 155727216, "step": 72195 }, { "epoch": 11.778140293637847, "grad_norm": 0.0373469777405262, "learning_rate": 0.00043231711072219307, "loss": 0.0371, "num_input_tokens_seen": 155738320, "step": 72200 }, { "epoch": 11.778955954323001, "grad_norm": 0.556888997554779, "learning_rate": 0.0004322465867465099, "loss": 0.0565, "num_input_tokens_seen": 155749488, "step": 72205 }, { "epoch": 11.779771615008157, "grad_norm": 0.010719962418079376, "learning_rate": 0.0004321760641439356, "loss": 0.0107, "num_input_tokens_seen": 155761168, "step": 72210 }, { "epoch": 11.780587275693312, "grad_norm": 0.007429102435708046, "learning_rate": 0.00043210554291589937, "loss": 0.0168, "num_input_tokens_seen": 155771952, "step": 72215 }, { "epoch": 11.781402936378466, "grad_norm": 0.04312760382890701, "learning_rate": 0.00043203502306383046, "loss": 0.0153, "num_input_tokens_seen": 155781200, "step": 72220 }, { "epoch": 11.782218597063622, "grad_norm": 0.20965027809143066, "learning_rate": 0.0004319645045891579, "loss": 0.0751, "num_input_tokens_seen": 155791824, "step": 72225 }, { "epoch": 11.783034257748776, "grad_norm": 0.03513307869434357, "learning_rate": 0.0004318939874933113, "loss": 0.0051, "num_input_tokens_seen": 155802864, "step": 72230 }, { "epoch": 11.783849918433932, "grad_norm": 0.11161836981773376, "learning_rate": 0.00043182347177771907, "loss": 0.071, "num_input_tokens_seen": 155813040, "step": 72235 }, { "epoch": 11.784665579119086, "grad_norm": 0.04140867665410042, "learning_rate": 0.000431752957443811, "loss": 0.101, "num_input_tokens_seen": 155824560, "step": 72240 }, { "epoch": 11.785481239804241, "grad_norm": 0.00957273505628109, "learning_rate": 0.00043168244449301555, "loss": 0.1341, "num_input_tokens_seen": 155835376, "step": 72245 }, { "epoch": 11.786296900489397, "grad_norm": 0.06566344201564789, "learning_rate": 0.00043161193292676203, "loss": 0.0105, "num_input_tokens_seen": 155846800, "step": 72250 }, { "epoch": 11.78711256117455, "grad_norm": 0.7373807430267334, "learning_rate": 0.00043154142274647966, "loss": 0.132, "num_input_tokens_seen": 155857872, "step": 72255 }, { "epoch": 11.787928221859707, "grad_norm": 0.35005736351013184, "learning_rate": 0.000431470913953597, "loss": 0.0293, "num_input_tokens_seen": 155868272, "step": 72260 }, { "epoch": 11.78874388254486, "grad_norm": 0.01026091631501913, "learning_rate": 0.00043140040654954346, "loss": 0.0194, "num_input_tokens_seen": 155877680, "step": 72265 }, { "epoch": 11.789559543230016, "grad_norm": 0.1419307142496109, "learning_rate": 0.00043132990053574747, "loss": 0.0591, "num_input_tokens_seen": 155888400, "step": 72270 }, { "epoch": 11.790375203915172, "grad_norm": 0.32013365626335144, "learning_rate": 0.0004312593959136383, "loss": 0.1404, "num_input_tokens_seen": 155898800, "step": 72275 }, { "epoch": 11.791190864600326, "grad_norm": 0.012438350357115269, "learning_rate": 0.0004311888926846445, "loss": 0.147, "num_input_tokens_seen": 155910096, "step": 72280 }, { "epoch": 11.792006525285482, "grad_norm": 0.2099284529685974, "learning_rate": 0.00043111839085019534, "loss": 0.2126, "num_input_tokens_seen": 155921552, "step": 72285 }, { "epoch": 11.792822185970635, "grad_norm": 0.8106554746627808, "learning_rate": 0.0004310478904117191, "loss": 0.0815, "num_input_tokens_seen": 155932624, "step": 72290 }, { "epoch": 11.793637846655791, "grad_norm": 0.02031088061630726, "learning_rate": 0.0004309773913706451, "loss": 0.02, "num_input_tokens_seen": 155944080, "step": 72295 }, { "epoch": 11.794453507340947, "grad_norm": 0.026069244369864464, "learning_rate": 0.00043090689372840156, "loss": 0.0846, "num_input_tokens_seen": 155955536, "step": 72300 }, { "epoch": 11.7952691680261, "grad_norm": 0.25827181339263916, "learning_rate": 0.0004308363974864178, "loss": 0.081, "num_input_tokens_seen": 155966832, "step": 72305 }, { "epoch": 11.796084828711257, "grad_norm": 0.19091357290744781, "learning_rate": 0.0004307659026461218, "loss": 0.0196, "num_input_tokens_seen": 155977840, "step": 72310 }, { "epoch": 11.79690048939641, "grad_norm": 0.060743801295757294, "learning_rate": 0.00043069540920894297, "loss": 0.0387, "num_input_tokens_seen": 155987824, "step": 72315 }, { "epoch": 11.797716150081566, "grad_norm": 0.002279407111927867, "learning_rate": 0.0004306249171763093, "loss": 0.023, "num_input_tokens_seen": 155999152, "step": 72320 }, { "epoch": 11.798531810766722, "grad_norm": 0.15106846392154694, "learning_rate": 0.0004305544265496499, "loss": 0.0635, "num_input_tokens_seen": 156009616, "step": 72325 }, { "epoch": 11.799347471451876, "grad_norm": 0.02937285229563713, "learning_rate": 0.000430483937330393, "loss": 0.0087, "num_input_tokens_seen": 156020848, "step": 72330 }, { "epoch": 11.800163132137031, "grad_norm": 0.08343897759914398, "learning_rate": 0.0004304134495199674, "loss": 0.0406, "num_input_tokens_seen": 156031440, "step": 72335 }, { "epoch": 11.800978792822185, "grad_norm": 0.01700861006975174, "learning_rate": 0.0004303429631198014, "loss": 0.0051, "num_input_tokens_seen": 156042608, "step": 72340 }, { "epoch": 11.801794453507341, "grad_norm": 0.04990555718541145, "learning_rate": 0.0004302724781313237, "loss": 0.0086, "num_input_tokens_seen": 156054992, "step": 72345 }, { "epoch": 11.802610114192497, "grad_norm": 0.7958185076713562, "learning_rate": 0.0004302019945559627, "loss": 0.079, "num_input_tokens_seen": 156066864, "step": 72350 }, { "epoch": 11.80342577487765, "grad_norm": 0.17491139471530914, "learning_rate": 0.0004301315123951467, "loss": 0.0184, "num_input_tokens_seen": 156077712, "step": 72355 }, { "epoch": 11.804241435562806, "grad_norm": 1.066481113433838, "learning_rate": 0.0004300610316503045, "loss": 0.0763, "num_input_tokens_seen": 156088080, "step": 72360 }, { "epoch": 11.80505709624796, "grad_norm": 0.018509283661842346, "learning_rate": 0.00042999055232286387, "loss": 0.1023, "num_input_tokens_seen": 156098864, "step": 72365 }, { "epoch": 11.805872756933116, "grad_norm": 0.03908360004425049, "learning_rate": 0.00042992007441425376, "loss": 0.0536, "num_input_tokens_seen": 156109360, "step": 72370 }, { "epoch": 11.80668841761827, "grad_norm": 0.029677383601665497, "learning_rate": 0.00042984959792590215, "loss": 0.1932, "num_input_tokens_seen": 156119280, "step": 72375 }, { "epoch": 11.807504078303426, "grad_norm": 0.320372998714447, "learning_rate": 0.00042977912285923747, "loss": 0.1878, "num_input_tokens_seen": 156131056, "step": 72380 }, { "epoch": 11.808319738988581, "grad_norm": 0.06528972834348679, "learning_rate": 0.000429708649215688, "loss": 0.0124, "num_input_tokens_seen": 156143120, "step": 72385 }, { "epoch": 11.809135399673735, "grad_norm": 0.026044495403766632, "learning_rate": 0.00042963817699668183, "loss": 0.013, "num_input_tokens_seen": 156152944, "step": 72390 }, { "epoch": 11.809951060358891, "grad_norm": 0.014607712626457214, "learning_rate": 0.0004295677062036472, "loss": 0.1074, "num_input_tokens_seen": 156162832, "step": 72395 }, { "epoch": 11.810766721044045, "grad_norm": 0.024609869346022606, "learning_rate": 0.00042949723683801256, "loss": 0.0923, "num_input_tokens_seen": 156173072, "step": 72400 }, { "epoch": 11.8115823817292, "grad_norm": 0.0037088543176651, "learning_rate": 0.0004294267689012057, "loss": 0.2015, "num_input_tokens_seen": 156183056, "step": 72405 }, { "epoch": 11.812398042414356, "grad_norm": 0.02093810774385929, "learning_rate": 0.000429356302394655, "loss": 0.0303, "num_input_tokens_seen": 156193840, "step": 72410 }, { "epoch": 11.81321370309951, "grad_norm": 0.13892799615859985, "learning_rate": 0.00042928583731978833, "loss": 0.0117, "num_input_tokens_seen": 156204880, "step": 72415 }, { "epoch": 11.814029363784666, "grad_norm": 0.3025413155555725, "learning_rate": 0.00042921537367803403, "loss": 0.0501, "num_input_tokens_seen": 156216112, "step": 72420 }, { "epoch": 11.81484502446982, "grad_norm": 0.08153360337018967, "learning_rate": 0.0004291449114708198, "loss": 0.0486, "num_input_tokens_seen": 156226288, "step": 72425 }, { "epoch": 11.815660685154976, "grad_norm": 0.008864806964993477, "learning_rate": 0.000429074450699574, "loss": 0.0123, "num_input_tokens_seen": 156238448, "step": 72430 }, { "epoch": 11.81647634584013, "grad_norm": 0.08628880977630615, "learning_rate": 0.0004290039913657243, "loss": 0.0978, "num_input_tokens_seen": 156249840, "step": 72435 }, { "epoch": 11.817292006525285, "grad_norm": 0.06744389981031418, "learning_rate": 0.00042893353347069887, "loss": 0.1099, "num_input_tokens_seen": 156262384, "step": 72440 }, { "epoch": 11.818107667210441, "grad_norm": 0.041163135319948196, "learning_rate": 0.0004288630770159254, "loss": 0.0183, "num_input_tokens_seen": 156274512, "step": 72445 }, { "epoch": 11.818923327895595, "grad_norm": 0.11681295931339264, "learning_rate": 0.00042879262200283216, "loss": 0.0261, "num_input_tokens_seen": 156284720, "step": 72450 }, { "epoch": 11.81973898858075, "grad_norm": 0.08443369716405869, "learning_rate": 0.0004287221684328465, "loss": 0.014, "num_input_tokens_seen": 156295184, "step": 72455 }, { "epoch": 11.820554649265905, "grad_norm": 0.006772714201360941, "learning_rate": 0.00042865171630739654, "loss": 0.0586, "num_input_tokens_seen": 156306064, "step": 72460 }, { "epoch": 11.82137030995106, "grad_norm": 0.011958517134189606, "learning_rate": 0.0004285812656279102, "loss": 0.0544, "num_input_tokens_seen": 156316496, "step": 72465 }, { "epoch": 11.822185970636216, "grad_norm": 0.47554323077201843, "learning_rate": 0.000428510816395815, "loss": 0.1173, "num_input_tokens_seen": 156327376, "step": 72470 }, { "epoch": 11.82300163132137, "grad_norm": 0.24994494020938873, "learning_rate": 0.00042844036861253897, "loss": 0.1338, "num_input_tokens_seen": 156339088, "step": 72475 }, { "epoch": 11.823817292006526, "grad_norm": 0.009234968572854996, "learning_rate": 0.00042836992227950944, "loss": 0.0123, "num_input_tokens_seen": 156350384, "step": 72480 }, { "epoch": 11.82463295269168, "grad_norm": 0.0063837142661213875, "learning_rate": 0.0004282994773981546, "loss": 0.0491, "num_input_tokens_seen": 156361872, "step": 72485 }, { "epoch": 11.825448613376835, "grad_norm": 0.18246890604496002, "learning_rate": 0.00042822903396990146, "loss": 0.1194, "num_input_tokens_seen": 156372496, "step": 72490 }, { "epoch": 11.826264274061991, "grad_norm": 0.0752144381403923, "learning_rate": 0.0004281585919961783, "loss": 0.0201, "num_input_tokens_seen": 156382608, "step": 72495 }, { "epoch": 11.827079934747145, "grad_norm": 0.2925660312175751, "learning_rate": 0.00042808815147841214, "loss": 0.051, "num_input_tokens_seen": 156393136, "step": 72500 }, { "epoch": 11.8278955954323, "grad_norm": 0.030639342963695526, "learning_rate": 0.0004280177124180311, "loss": 0.0507, "num_input_tokens_seen": 156404944, "step": 72505 }, { "epoch": 11.828711256117455, "grad_norm": 0.13607312738895416, "learning_rate": 0.0004279472748164621, "loss": 0.0636, "num_input_tokens_seen": 156416464, "step": 72510 }, { "epoch": 11.82952691680261, "grad_norm": 0.266716867685318, "learning_rate": 0.0004278768386751332, "loss": 0.027, "num_input_tokens_seen": 156426352, "step": 72515 }, { "epoch": 11.830342577487766, "grad_norm": 0.19606459140777588, "learning_rate": 0.0004278064039954716, "loss": 0.1023, "num_input_tokens_seen": 156437392, "step": 72520 }, { "epoch": 11.83115823817292, "grad_norm": 0.042617812752723694, "learning_rate": 0.00042773597077890485, "loss": 0.1439, "num_input_tokens_seen": 156448656, "step": 72525 }, { "epoch": 11.831973898858076, "grad_norm": 0.024463798850774765, "learning_rate": 0.0004276655390268603, "loss": 0.0223, "num_input_tokens_seen": 156458512, "step": 72530 }, { "epoch": 11.83278955954323, "grad_norm": 0.015081851743161678, "learning_rate": 0.0004275951087407653, "loss": 0.0065, "num_input_tokens_seen": 156469808, "step": 72535 }, { "epoch": 11.833605220228385, "grad_norm": 0.036503929644823074, "learning_rate": 0.0004275246799220473, "loss": 0.0047, "num_input_tokens_seen": 156481552, "step": 72540 }, { "epoch": 11.83442088091354, "grad_norm": 0.0026087791193276644, "learning_rate": 0.0004274542525721338, "loss": 0.1361, "num_input_tokens_seen": 156493392, "step": 72545 }, { "epoch": 11.835236541598695, "grad_norm": 0.6908515095710754, "learning_rate": 0.00042738382669245157, "loss": 0.1068, "num_input_tokens_seen": 156502960, "step": 72550 }, { "epoch": 11.83605220228385, "grad_norm": 0.005371534265577793, "learning_rate": 0.0004273134022844285, "loss": 0.0136, "num_input_tokens_seen": 156513616, "step": 72555 }, { "epoch": 11.836867862969005, "grad_norm": 0.022902632132172585, "learning_rate": 0.00042724297934949136, "loss": 0.0083, "num_input_tokens_seen": 156525072, "step": 72560 }, { "epoch": 11.83768352365416, "grad_norm": 0.013279478996992111, "learning_rate": 0.0004271725578890675, "loss": 0.1385, "num_input_tokens_seen": 156537296, "step": 72565 }, { "epoch": 11.838499184339314, "grad_norm": 0.04086068272590637, "learning_rate": 0.00042710213790458435, "loss": 0.0119, "num_input_tokens_seen": 156549232, "step": 72570 }, { "epoch": 11.83931484502447, "grad_norm": 0.017745716497302055, "learning_rate": 0.00042703171939746865, "loss": 0.0184, "num_input_tokens_seen": 156559088, "step": 72575 }, { "epoch": 11.840130505709626, "grad_norm": 0.02181595377624035, "learning_rate": 0.00042696130236914796, "loss": 0.0071, "num_input_tokens_seen": 156568784, "step": 72580 }, { "epoch": 11.84094616639478, "grad_norm": 0.23778073489665985, "learning_rate": 0.00042689088682104886, "loss": 0.0114, "num_input_tokens_seen": 156579056, "step": 72585 }, { "epoch": 11.841761827079935, "grad_norm": 0.0225570909678936, "learning_rate": 0.00042682047275459893, "loss": 0.167, "num_input_tokens_seen": 156589584, "step": 72590 }, { "epoch": 11.84257748776509, "grad_norm": 0.5410389304161072, "learning_rate": 0.00042675006017122477, "loss": 0.2216, "num_input_tokens_seen": 156600304, "step": 72595 }, { "epoch": 11.843393148450245, "grad_norm": 0.021621810272336006, "learning_rate": 0.0004266796490723538, "loss": 0.0132, "num_input_tokens_seen": 156610768, "step": 72600 }, { "epoch": 11.844208809135399, "grad_norm": 0.011670658364892006, "learning_rate": 0.0004266092394594124, "loss": 0.0586, "num_input_tokens_seen": 156621392, "step": 72605 }, { "epoch": 11.845024469820554, "grad_norm": 0.003168725874274969, "learning_rate": 0.00042653883133382824, "loss": 0.009, "num_input_tokens_seen": 156632720, "step": 72610 }, { "epoch": 11.84584013050571, "grad_norm": 0.005944069009274244, "learning_rate": 0.00042646842469702754, "loss": 0.0423, "num_input_tokens_seen": 156644368, "step": 72615 }, { "epoch": 11.846655791190864, "grad_norm": 0.14430217444896698, "learning_rate": 0.0004263980195504378, "loss": 0.0178, "num_input_tokens_seen": 156655312, "step": 72620 }, { "epoch": 11.84747145187602, "grad_norm": 0.02338322624564171, "learning_rate": 0.0004263276158954853, "loss": 0.0274, "num_input_tokens_seen": 156665040, "step": 72625 }, { "epoch": 11.848287112561174, "grad_norm": 0.21539098024368286, "learning_rate": 0.0004262572137335973, "loss": 0.015, "num_input_tokens_seen": 156675888, "step": 72630 }, { "epoch": 11.84910277324633, "grad_norm": 0.024262655526399612, "learning_rate": 0.00042618681306620025, "loss": 0.1413, "num_input_tokens_seen": 156687888, "step": 72635 }, { "epoch": 11.849918433931485, "grad_norm": 0.00680202804505825, "learning_rate": 0.00042611641389472127, "loss": 0.0088, "num_input_tokens_seen": 156699312, "step": 72640 }, { "epoch": 11.850734094616639, "grad_norm": 0.007292779162526131, "learning_rate": 0.0004260460162205867, "loss": 0.0694, "num_input_tokens_seen": 156710416, "step": 72645 }, { "epoch": 11.851549755301795, "grad_norm": 0.3180869519710541, "learning_rate": 0.0004259756200452236, "loss": 0.0145, "num_input_tokens_seen": 156721360, "step": 72650 }, { "epoch": 11.852365415986949, "grad_norm": 0.6478880643844604, "learning_rate": 0.00042590522537005825, "loss": 0.0158, "num_input_tokens_seen": 156732336, "step": 72655 }, { "epoch": 11.853181076672104, "grad_norm": 0.009379223920404911, "learning_rate": 0.00042583483219651763, "loss": 0.0156, "num_input_tokens_seen": 156743152, "step": 72660 }, { "epoch": 11.85399673735726, "grad_norm": 0.027274901047348976, "learning_rate": 0.0004257644405260282, "loss": 0.0103, "num_input_tokens_seen": 156752592, "step": 72665 }, { "epoch": 11.854812398042414, "grad_norm": 0.31555449962615967, "learning_rate": 0.0004256940503600166, "loss": 0.082, "num_input_tokens_seen": 156763920, "step": 72670 }, { "epoch": 11.85562805872757, "grad_norm": 0.4312150776386261, "learning_rate": 0.00042562366169990936, "loss": 0.2294, "num_input_tokens_seen": 156774512, "step": 72675 }, { "epoch": 11.856443719412724, "grad_norm": 0.03112630918622017, "learning_rate": 0.00042555327454713276, "loss": 0.0076, "num_input_tokens_seen": 156785968, "step": 72680 }, { "epoch": 11.85725938009788, "grad_norm": 0.010542362928390503, "learning_rate": 0.0004254828889031137, "loss": 0.0858, "num_input_tokens_seen": 156797072, "step": 72685 }, { "epoch": 11.858075040783035, "grad_norm": 0.08964621275663376, "learning_rate": 0.0004254125047692784, "loss": 0.0176, "num_input_tokens_seen": 156807600, "step": 72690 }, { "epoch": 11.858890701468189, "grad_norm": 0.08288874477148056, "learning_rate": 0.00042534212214705326, "loss": 0.0149, "num_input_tokens_seen": 156817168, "step": 72695 }, { "epoch": 11.859706362153345, "grad_norm": 0.047476813197135925, "learning_rate": 0.0004252717410378648, "loss": 0.1024, "num_input_tokens_seen": 156828368, "step": 72700 }, { "epoch": 11.860522022838499, "grad_norm": 0.4004109501838684, "learning_rate": 0.00042520136144313925, "loss": 0.0298, "num_input_tokens_seen": 156838736, "step": 72705 }, { "epoch": 11.861337683523654, "grad_norm": 0.6902902722358704, "learning_rate": 0.0004251309833643029, "loss": 0.0448, "num_input_tokens_seen": 156850480, "step": 72710 }, { "epoch": 11.86215334420881, "grad_norm": 0.04718517139554024, "learning_rate": 0.00042506060680278234, "loss": 0.0293, "num_input_tokens_seen": 156861168, "step": 72715 }, { "epoch": 11.862969004893964, "grad_norm": 0.6012787222862244, "learning_rate": 0.00042499023176000353, "loss": 0.2949, "num_input_tokens_seen": 156871568, "step": 72720 }, { "epoch": 11.86378466557912, "grad_norm": 0.4297243058681488, "learning_rate": 0.000424919858237393, "loss": 0.1866, "num_input_tokens_seen": 156881200, "step": 72725 }, { "epoch": 11.864600326264274, "grad_norm": 0.04537701606750488, "learning_rate": 0.00042484948623637656, "loss": 0.0552, "num_input_tokens_seen": 156892592, "step": 72730 }, { "epoch": 11.86541598694943, "grad_norm": 0.025109652429819107, "learning_rate": 0.0004247791157583808, "loss": 0.0368, "num_input_tokens_seen": 156903312, "step": 72735 }, { "epoch": 11.866231647634583, "grad_norm": 0.059654369950294495, "learning_rate": 0.0004247087468048315, "loss": 0.0132, "num_input_tokens_seen": 156913936, "step": 72740 }, { "epoch": 11.867047308319739, "grad_norm": 0.13610826432704926, "learning_rate": 0.00042463837937715515, "loss": 0.0212, "num_input_tokens_seen": 156925328, "step": 72745 }, { "epoch": 11.867862969004895, "grad_norm": 0.5094715356826782, "learning_rate": 0.0004245680134767775, "loss": 0.2009, "num_input_tokens_seen": 156937488, "step": 72750 }, { "epoch": 11.868678629690049, "grad_norm": 0.1867058128118515, "learning_rate": 0.0004244976491051249, "loss": 0.0309, "num_input_tokens_seen": 156949104, "step": 72755 }, { "epoch": 11.869494290375204, "grad_norm": 0.0341111458837986, "learning_rate": 0.00042442728626362306, "loss": 0.0768, "num_input_tokens_seen": 156958320, "step": 72760 }, { "epoch": 11.870309951060358, "grad_norm": 0.11354099214076996, "learning_rate": 0.00042435692495369824, "loss": 0.0742, "num_input_tokens_seen": 156969424, "step": 72765 }, { "epoch": 11.871125611745514, "grad_norm": 0.008182099089026451, "learning_rate": 0.0004242865651767762, "loss": 0.0869, "num_input_tokens_seen": 156979824, "step": 72770 }, { "epoch": 11.87194127243067, "grad_norm": 0.2776086628437042, "learning_rate": 0.0004242162069342831, "loss": 0.1502, "num_input_tokens_seen": 156991088, "step": 72775 }, { "epoch": 11.872756933115824, "grad_norm": 0.41338229179382324, "learning_rate": 0.0004241458502276446, "loss": 0.0478, "num_input_tokens_seen": 157000976, "step": 72780 }, { "epoch": 11.87357259380098, "grad_norm": 0.06075502932071686, "learning_rate": 0.00042407549505828657, "loss": 0.0457, "num_input_tokens_seen": 157012112, "step": 72785 }, { "epoch": 11.874388254486133, "grad_norm": 0.16415099799633026, "learning_rate": 0.0004240051414276352, "loss": 0.0753, "num_input_tokens_seen": 157024176, "step": 72790 }, { "epoch": 11.875203915171289, "grad_norm": 0.6772935390472412, "learning_rate": 0.00042393478933711585, "loss": 0.0645, "num_input_tokens_seen": 157035152, "step": 72795 }, { "epoch": 11.876019575856443, "grad_norm": 0.011819391511380672, "learning_rate": 0.0004238644387881546, "loss": 0.0258, "num_input_tokens_seen": 157045456, "step": 72800 }, { "epoch": 11.876835236541599, "grad_norm": 0.06904347240924835, "learning_rate": 0.000423794089782177, "loss": 0.0759, "num_input_tokens_seen": 157057232, "step": 72805 }, { "epoch": 11.877650897226754, "grad_norm": 0.0258609838783741, "learning_rate": 0.000423723742320609, "loss": 0.0746, "num_input_tokens_seen": 157068176, "step": 72810 }, { "epoch": 11.878466557911908, "grad_norm": 0.009693213738501072, "learning_rate": 0.00042365339640487596, "loss": 0.1229, "num_input_tokens_seen": 157079408, "step": 72815 }, { "epoch": 11.879282218597064, "grad_norm": 0.010637176223099232, "learning_rate": 0.0004235830520364038, "loss": 0.0146, "num_input_tokens_seen": 157088912, "step": 72820 }, { "epoch": 11.880097879282218, "grad_norm": 0.023591458797454834, "learning_rate": 0.0004235127092166179, "loss": 0.1019, "num_input_tokens_seen": 157099344, "step": 72825 }, { "epoch": 11.880913539967374, "grad_norm": 0.08307381719350815, "learning_rate": 0.0004234423679469441, "loss": 0.1259, "num_input_tokens_seen": 157110416, "step": 72830 }, { "epoch": 11.88172920065253, "grad_norm": 0.019350357353687286, "learning_rate": 0.0004233720282288078, "loss": 0.128, "num_input_tokens_seen": 157121072, "step": 72835 }, { "epoch": 11.882544861337683, "grad_norm": 0.2497057318687439, "learning_rate": 0.00042330169006363455, "loss": 0.0381, "num_input_tokens_seen": 157132816, "step": 72840 }, { "epoch": 11.883360522022839, "grad_norm": 0.24066291749477386, "learning_rate": 0.0004232313534528499, "loss": 0.0222, "num_input_tokens_seen": 157143728, "step": 72845 }, { "epoch": 11.884176182707993, "grad_norm": 0.0208736602216959, "learning_rate": 0.00042316101839787916, "loss": 0.1162, "num_input_tokens_seen": 157155312, "step": 72850 }, { "epoch": 11.884991843393149, "grad_norm": 0.13492028415203094, "learning_rate": 0.00042309068490014787, "loss": 0.0294, "num_input_tokens_seen": 157166064, "step": 72855 }, { "epoch": 11.885807504078304, "grad_norm": 0.2047741711139679, "learning_rate": 0.00042302035296108156, "loss": 0.0726, "num_input_tokens_seen": 157176624, "step": 72860 }, { "epoch": 11.886623164763458, "grad_norm": 0.03653750941157341, "learning_rate": 0.00042295002258210525, "loss": 0.0329, "num_input_tokens_seen": 157187120, "step": 72865 }, { "epoch": 11.887438825448614, "grad_norm": 0.011997684836387634, "learning_rate": 0.00042287969376464466, "loss": 0.0113, "num_input_tokens_seen": 157198800, "step": 72870 }, { "epoch": 11.888254486133768, "grad_norm": 0.014201940968632698, "learning_rate": 0.0004228093665101247, "loss": 0.0755, "num_input_tokens_seen": 157209328, "step": 72875 }, { "epoch": 11.889070146818923, "grad_norm": 0.41382813453674316, "learning_rate": 0.00042273904081997115, "loss": 0.133, "num_input_tokens_seen": 157219952, "step": 72880 }, { "epoch": 11.88988580750408, "grad_norm": 0.3977670669555664, "learning_rate": 0.0004226687166956087, "loss": 0.1357, "num_input_tokens_seen": 157231024, "step": 72885 }, { "epoch": 11.890701468189233, "grad_norm": 0.032246604561805725, "learning_rate": 0.00042259839413846275, "loss": 0.0683, "num_input_tokens_seen": 157241776, "step": 72890 }, { "epoch": 11.891517128874389, "grad_norm": 0.03838464617729187, "learning_rate": 0.0004225280731499588, "loss": 0.054, "num_input_tokens_seen": 157252304, "step": 72895 }, { "epoch": 11.892332789559543, "grad_norm": 0.36054763197898865, "learning_rate": 0.00042245775373152153, "loss": 0.0237, "num_input_tokens_seen": 157264400, "step": 72900 }, { "epoch": 11.893148450244698, "grad_norm": 0.03474174067378044, "learning_rate": 0.0004223874358845764, "loss": 0.1161, "num_input_tokens_seen": 157274160, "step": 72905 }, { "epoch": 11.893964110929852, "grad_norm": 0.013275551609694958, "learning_rate": 0.0004223171196105482, "loss": 0.014, "num_input_tokens_seen": 157285744, "step": 72910 }, { "epoch": 11.894779771615008, "grad_norm": 0.01717224158346653, "learning_rate": 0.0004222468049108623, "loss": 0.0169, "num_input_tokens_seen": 157295504, "step": 72915 }, { "epoch": 11.895595432300164, "grad_norm": 0.17278116941452026, "learning_rate": 0.00042217649178694327, "loss": 0.1191, "num_input_tokens_seen": 157306224, "step": 72920 }, { "epoch": 11.896411092985318, "grad_norm": 0.4478326141834259, "learning_rate": 0.00042210618024021663, "loss": 0.0785, "num_input_tokens_seen": 157317264, "step": 72925 }, { "epoch": 11.897226753670473, "grad_norm": 0.08212458342313766, "learning_rate": 0.00042203587027210684, "loss": 0.0742, "num_input_tokens_seen": 157327344, "step": 72930 }, { "epoch": 11.898042414355627, "grad_norm": 0.009555427357554436, "learning_rate": 0.00042196556188403924, "loss": 0.0237, "num_input_tokens_seen": 157339088, "step": 72935 }, { "epoch": 11.898858075040783, "grad_norm": 0.041616279631853104, "learning_rate": 0.0004218952550774383, "loss": 0.0235, "num_input_tokens_seen": 157350384, "step": 72940 }, { "epoch": 11.899673735725939, "grad_norm": 0.03491552546620369, "learning_rate": 0.00042182494985372937, "loss": 0.0153, "num_input_tokens_seen": 157361680, "step": 72945 }, { "epoch": 11.900489396411093, "grad_norm": 0.02246907539665699, "learning_rate": 0.0004217546462143368, "loss": 0.0734, "num_input_tokens_seen": 157372464, "step": 72950 }, { "epoch": 11.901305057096248, "grad_norm": 0.009402522817254066, "learning_rate": 0.0004216843441606857, "loss": 0.0109, "num_input_tokens_seen": 157382896, "step": 72955 }, { "epoch": 11.902120717781402, "grad_norm": 0.04543184116482735, "learning_rate": 0.0004216140436942006, "loss": 0.017, "num_input_tokens_seen": 157393616, "step": 72960 }, { "epoch": 11.902936378466558, "grad_norm": 0.026042377576231956, "learning_rate": 0.0004215437448163065, "loss": 0.0201, "num_input_tokens_seen": 157405072, "step": 72965 }, { "epoch": 11.903752039151712, "grad_norm": 0.01707102544605732, "learning_rate": 0.00042147344752842774, "loss": 0.2138, "num_input_tokens_seen": 157415120, "step": 72970 }, { "epoch": 11.904567699836868, "grad_norm": 0.20671682059764862, "learning_rate": 0.0004214031518319893, "loss": 0.0176, "num_input_tokens_seen": 157426480, "step": 72975 }, { "epoch": 11.905383360522023, "grad_norm": 0.6336327195167542, "learning_rate": 0.0004213328577284157, "loss": 0.1811, "num_input_tokens_seen": 157437264, "step": 72980 }, { "epoch": 11.906199021207177, "grad_norm": 0.011044961400330067, "learning_rate": 0.0004212625652191315, "loss": 0.1134, "num_input_tokens_seen": 157448240, "step": 72985 }, { "epoch": 11.907014681892333, "grad_norm": 0.015670204535126686, "learning_rate": 0.00042119227430556137, "loss": 0.0158, "num_input_tokens_seen": 157459952, "step": 72990 }, { "epoch": 11.907830342577487, "grad_norm": 0.023730136454105377, "learning_rate": 0.0004211219849891296, "loss": 0.0868, "num_input_tokens_seen": 157470992, "step": 72995 }, { "epoch": 11.908646003262643, "grad_norm": 0.49133771657943726, "learning_rate": 0.00042105169727126094, "loss": 0.0447, "num_input_tokens_seen": 157481104, "step": 73000 }, { "epoch": 11.909461663947798, "grad_norm": 0.043914858251810074, "learning_rate": 0.00042098141115337986, "loss": 0.0512, "num_input_tokens_seen": 157491536, "step": 73005 }, { "epoch": 11.910277324632952, "grad_norm": 0.046711187809705734, "learning_rate": 0.0004209111266369107, "loss": 0.0472, "num_input_tokens_seen": 157504048, "step": 73010 }, { "epoch": 11.911092985318108, "grad_norm": 0.017577311024069786, "learning_rate": 0.0004208408437232779, "loss": 0.0532, "num_input_tokens_seen": 157514288, "step": 73015 }, { "epoch": 11.911908646003262, "grad_norm": 0.05399191752076149, "learning_rate": 0.00042077056241390586, "loss": 0.0461, "num_input_tokens_seen": 157525456, "step": 73020 }, { "epoch": 11.912724306688418, "grad_norm": 0.12883497774600983, "learning_rate": 0.00042070028271021877, "loss": 0.0229, "num_input_tokens_seen": 157536432, "step": 73025 }, { "epoch": 11.913539967373573, "grad_norm": 0.050984837114810944, "learning_rate": 0.0004206300046136412, "loss": 0.076, "num_input_tokens_seen": 157546960, "step": 73030 }, { "epoch": 11.914355628058727, "grad_norm": 0.5117084383964539, "learning_rate": 0.00042055972812559707, "loss": 0.038, "num_input_tokens_seen": 157558288, "step": 73035 }, { "epoch": 11.915171288743883, "grad_norm": 0.05626806989312172, "learning_rate": 0.0004204894532475111, "loss": 0.0274, "num_input_tokens_seen": 157569520, "step": 73040 }, { "epoch": 11.915986949429037, "grad_norm": 0.015065483748912811, "learning_rate": 0.00042041917998080695, "loss": 0.0206, "num_input_tokens_seen": 157579888, "step": 73045 }, { "epoch": 11.916802610114193, "grad_norm": 0.022030090913176537, "learning_rate": 0.0004203489083269093, "loss": 0.0807, "num_input_tokens_seen": 157590704, "step": 73050 }, { "epoch": 11.917618270799348, "grad_norm": 0.0031868035439401865, "learning_rate": 0.0004202786382872419, "loss": 0.0724, "num_input_tokens_seen": 157600720, "step": 73055 }, { "epoch": 11.918433931484502, "grad_norm": 0.48201173543930054, "learning_rate": 0.00042020836986322917, "loss": 0.1584, "num_input_tokens_seen": 157611312, "step": 73060 }, { "epoch": 11.919249592169658, "grad_norm": 0.013275333680212498, "learning_rate": 0.0004201381030562949, "loss": 0.0095, "num_input_tokens_seen": 157621488, "step": 73065 }, { "epoch": 11.920065252854812, "grad_norm": 0.040429044514894485, "learning_rate": 0.00042006783786786346, "loss": 0.0472, "num_input_tokens_seen": 157631440, "step": 73070 }, { "epoch": 11.920880913539968, "grad_norm": 0.03655463084578514, "learning_rate": 0.0004199975742993585, "loss": 0.0894, "num_input_tokens_seen": 157641872, "step": 73075 }, { "epoch": 11.921696574225122, "grad_norm": 0.027619972825050354, "learning_rate": 0.0004199273123522044, "loss": 0.0123, "num_input_tokens_seen": 157652720, "step": 73080 }, { "epoch": 11.922512234910277, "grad_norm": 0.019114721566438675, "learning_rate": 0.00041985705202782464, "loss": 0.0071, "num_input_tokens_seen": 157663664, "step": 73085 }, { "epoch": 11.923327895595433, "grad_norm": 0.022797033190727234, "learning_rate": 0.00041978679332764366, "loss": 0.1437, "num_input_tokens_seen": 157674224, "step": 73090 }, { "epoch": 11.924143556280587, "grad_norm": 0.02315114252269268, "learning_rate": 0.0004197165362530848, "loss": 0.0155, "num_input_tokens_seen": 157685008, "step": 73095 }, { "epoch": 11.924959216965743, "grad_norm": 0.013853387907147408, "learning_rate": 0.00041964628080557224, "loss": 0.0158, "num_input_tokens_seen": 157696784, "step": 73100 }, { "epoch": 11.925774877650896, "grad_norm": 0.07218615710735321, "learning_rate": 0.0004195760269865299, "loss": 0.0931, "num_input_tokens_seen": 157706416, "step": 73105 }, { "epoch": 11.926590538336052, "grad_norm": 0.06797458231449127, "learning_rate": 0.0004195057747973812, "loss": 0.0118, "num_input_tokens_seen": 157717872, "step": 73110 }, { "epoch": 11.927406199021208, "grad_norm": 1.926020622253418, "learning_rate": 0.0004194355242395503, "loss": 0.2519, "num_input_tokens_seen": 157727984, "step": 73115 }, { "epoch": 11.928221859706362, "grad_norm": 0.022264305502176285, "learning_rate": 0.00041936527531446046, "loss": 0.0771, "num_input_tokens_seen": 157739440, "step": 73120 }, { "epoch": 11.929037520391518, "grad_norm": 0.22303183376789093, "learning_rate": 0.0004192950280235359, "loss": 0.0384, "num_input_tokens_seen": 157749872, "step": 73125 }, { "epoch": 11.929853181076671, "grad_norm": 0.08000101149082184, "learning_rate": 0.0004192247823681997, "loss": 0.0775, "num_input_tokens_seen": 157761488, "step": 73130 }, { "epoch": 11.930668841761827, "grad_norm": 0.06861213594675064, "learning_rate": 0.00041915453834987594, "loss": 0.0126, "num_input_tokens_seen": 157772560, "step": 73135 }, { "epoch": 11.931484502446983, "grad_norm": 0.14659623801708221, "learning_rate": 0.0004190842959699879, "loss": 0.0249, "num_input_tokens_seen": 157782288, "step": 73140 }, { "epoch": 11.932300163132137, "grad_norm": 0.01348864659667015, "learning_rate": 0.0004190140552299593, "loss": 0.0316, "num_input_tokens_seen": 157791792, "step": 73145 }, { "epoch": 11.933115823817293, "grad_norm": 0.3032896816730499, "learning_rate": 0.0004189438161312136, "loss": 0.0474, "num_input_tokens_seen": 157802064, "step": 73150 }, { "epoch": 11.933931484502446, "grad_norm": 0.24008546769618988, "learning_rate": 0.00041887357867517435, "loss": 0.0398, "num_input_tokens_seen": 157813456, "step": 73155 }, { "epoch": 11.934747145187602, "grad_norm": 0.042376190423965454, "learning_rate": 0.0004188033428632649, "loss": 0.022, "num_input_tokens_seen": 157823760, "step": 73160 }, { "epoch": 11.935562805872756, "grad_norm": 0.016715744510293007, "learning_rate": 0.00041873310869690875, "loss": 0.0853, "num_input_tokens_seen": 157834608, "step": 73165 }, { "epoch": 11.936378466557912, "grad_norm": 0.023077139630913734, "learning_rate": 0.00041866287617752906, "loss": 0.1376, "num_input_tokens_seen": 157845168, "step": 73170 }, { "epoch": 11.937194127243067, "grad_norm": 0.0027483149897307158, "learning_rate": 0.0004185926453065496, "loss": 0.1017, "num_input_tokens_seen": 157856336, "step": 73175 }, { "epoch": 11.938009787928221, "grad_norm": 0.43626055121421814, "learning_rate": 0.0004185224160853933, "loss": 0.0212, "num_input_tokens_seen": 157869136, "step": 73180 }, { "epoch": 11.938825448613377, "grad_norm": 0.04319378361105919, "learning_rate": 0.00041845218851548375, "loss": 0.0056, "num_input_tokens_seen": 157879312, "step": 73185 }, { "epoch": 11.939641109298531, "grad_norm": 0.053616493940353394, "learning_rate": 0.0004183819625982439, "loss": 0.0596, "num_input_tokens_seen": 157889712, "step": 73190 }, { "epoch": 11.940456769983687, "grad_norm": 0.6336898803710938, "learning_rate": 0.0004183117383350973, "loss": 0.0702, "num_input_tokens_seen": 157900624, "step": 73195 }, { "epoch": 11.941272430668842, "grad_norm": 0.03297612816095352, "learning_rate": 0.0004182415157274668, "loss": 0.011, "num_input_tokens_seen": 157911056, "step": 73200 }, { "epoch": 11.942088091353996, "grad_norm": 0.2813594341278076, "learning_rate": 0.00041817129477677564, "loss": 0.1218, "num_input_tokens_seen": 157922800, "step": 73205 }, { "epoch": 11.942903752039152, "grad_norm": 0.03954872488975525, "learning_rate": 0.0004181010754844472, "loss": 0.0244, "num_input_tokens_seen": 157932656, "step": 73210 }, { "epoch": 11.943719412724306, "grad_norm": 0.27242979407310486, "learning_rate": 0.00041803085785190416, "loss": 0.0225, "num_input_tokens_seen": 157943216, "step": 73215 }, { "epoch": 11.944535073409462, "grad_norm": 0.26992183923721313, "learning_rate": 0.00041796064188057, "loss": 0.0153, "num_input_tokens_seen": 157954544, "step": 73220 }, { "epoch": 11.945350734094617, "grad_norm": 0.06063072755932808, "learning_rate": 0.00041789042757186726, "loss": 0.068, "num_input_tokens_seen": 157964560, "step": 73225 }, { "epoch": 11.946166394779771, "grad_norm": 0.015045109204947948, "learning_rate": 0.00041782021492721937, "loss": 0.129, "num_input_tokens_seen": 157976112, "step": 73230 }, { "epoch": 11.946982055464927, "grad_norm": 0.006266028620302677, "learning_rate": 0.00041775000394804896, "loss": 0.0184, "num_input_tokens_seen": 157987216, "step": 73235 }, { "epoch": 11.947797716150081, "grad_norm": 0.09542680531740189, "learning_rate": 0.0004176797946357792, "loss": 0.028, "num_input_tokens_seen": 157997712, "step": 73240 }, { "epoch": 11.948613376835237, "grad_norm": 0.09864526242017746, "learning_rate": 0.00041760958699183263, "loss": 0.1433, "num_input_tokens_seen": 158008912, "step": 73245 }, { "epoch": 11.949429037520392, "grad_norm": 0.031732719391584396, "learning_rate": 0.0004175393810176325, "loss": 0.0634, "num_input_tokens_seen": 158019440, "step": 73250 }, { "epoch": 11.950244698205546, "grad_norm": 0.45474743843078613, "learning_rate": 0.00041746917671460124, "loss": 0.1154, "num_input_tokens_seen": 158029136, "step": 73255 }, { "epoch": 11.951060358890702, "grad_norm": 0.009881513193249702, "learning_rate": 0.000417398974084162, "loss": 0.0205, "num_input_tokens_seen": 158039664, "step": 73260 }, { "epoch": 11.951876019575856, "grad_norm": 0.04040459915995598, "learning_rate": 0.0004173287731277371, "loss": 0.0168, "num_input_tokens_seen": 158049968, "step": 73265 }, { "epoch": 11.952691680261012, "grad_norm": 0.09699574112892151, "learning_rate": 0.00041725857384674974, "loss": 0.0129, "num_input_tokens_seen": 158061360, "step": 73270 }, { "epoch": 11.953507340946166, "grad_norm": 0.009382757358253002, "learning_rate": 0.0004171883762426221, "loss": 0.0083, "num_input_tokens_seen": 158072208, "step": 73275 }, { "epoch": 11.954323001631321, "grad_norm": 0.016391951590776443, "learning_rate": 0.00041711818031677737, "loss": 0.0258, "num_input_tokens_seen": 158082352, "step": 73280 }, { "epoch": 11.955138662316477, "grad_norm": 0.007841772399842739, "learning_rate": 0.00041704798607063756, "loss": 0.0532, "num_input_tokens_seen": 158093552, "step": 73285 }, { "epoch": 11.955954323001631, "grad_norm": 0.5882191061973572, "learning_rate": 0.0004169777935056257, "loss": 0.0563, "num_input_tokens_seen": 158104912, "step": 73290 }, { "epoch": 11.956769983686787, "grad_norm": 0.03723549470305443, "learning_rate": 0.00041690760262316415, "loss": 0.0395, "num_input_tokens_seen": 158116432, "step": 73295 }, { "epoch": 11.95758564437194, "grad_norm": 0.018250921741127968, "learning_rate": 0.0004168374134246754, "loss": 0.0193, "num_input_tokens_seen": 158127120, "step": 73300 }, { "epoch": 11.958401305057096, "grad_norm": 0.3220383822917938, "learning_rate": 0.000416767225911582, "loss": 0.0632, "num_input_tokens_seen": 158137840, "step": 73305 }, { "epoch": 11.959216965742252, "grad_norm": 0.2808511555194855, "learning_rate": 0.0004166970400853064, "loss": 0.0385, "num_input_tokens_seen": 158148976, "step": 73310 }, { "epoch": 11.960032626427406, "grad_norm": 0.8117482662200928, "learning_rate": 0.00041662685594727076, "loss": 0.0258, "num_input_tokens_seen": 158159536, "step": 73315 }, { "epoch": 11.960848287112562, "grad_norm": 0.057675834745168686, "learning_rate": 0.0004165566734988979, "loss": 0.0453, "num_input_tokens_seen": 158170640, "step": 73320 }, { "epoch": 11.961663947797716, "grad_norm": 0.1492367535829544, "learning_rate": 0.00041648649274160976, "loss": 0.0648, "num_input_tokens_seen": 158180912, "step": 73325 }, { "epoch": 11.962479608482871, "grad_norm": 0.0188741572201252, "learning_rate": 0.0004164163136768289, "loss": 0.0455, "num_input_tokens_seen": 158191088, "step": 73330 }, { "epoch": 11.963295269168025, "grad_norm": 0.0173636544495821, "learning_rate": 0.0004163461363059774, "loss": 0.0172, "num_input_tokens_seen": 158200400, "step": 73335 }, { "epoch": 11.964110929853181, "grad_norm": 0.008057601749897003, "learning_rate": 0.00041627596063047753, "loss": 0.0157, "num_input_tokens_seen": 158211120, "step": 73340 }, { "epoch": 11.964926590538337, "grad_norm": 0.0047826883383095264, "learning_rate": 0.00041620578665175166, "loss": 0.1128, "num_input_tokens_seen": 158220752, "step": 73345 }, { "epoch": 11.96574225122349, "grad_norm": 0.014376291073858738, "learning_rate": 0.00041613561437122163, "loss": 0.0573, "num_input_tokens_seen": 158231984, "step": 73350 }, { "epoch": 11.966557911908646, "grad_norm": 0.03946974501013756, "learning_rate": 0.0004160654437903101, "loss": 0.0581, "num_input_tokens_seen": 158242704, "step": 73355 }, { "epoch": 11.9673735725938, "grad_norm": 0.5748576521873474, "learning_rate": 0.0004159952749104385, "loss": 0.1674, "num_input_tokens_seen": 158252528, "step": 73360 }, { "epoch": 11.968189233278956, "grad_norm": 0.11517991125583649, "learning_rate": 0.00041592510773302946, "loss": 0.0213, "num_input_tokens_seen": 158261392, "step": 73365 }, { "epoch": 11.969004893964112, "grad_norm": 0.5526224970817566, "learning_rate": 0.0004158549422595045, "loss": 0.0282, "num_input_tokens_seen": 158273296, "step": 73370 }, { "epoch": 11.969820554649266, "grad_norm": 0.13211509585380554, "learning_rate": 0.0004157847784912861, "loss": 0.0124, "num_input_tokens_seen": 158284464, "step": 73375 }, { "epoch": 11.970636215334421, "grad_norm": 0.004232446663081646, "learning_rate": 0.0004157146164297959, "loss": 0.0046, "num_input_tokens_seen": 158295088, "step": 73380 }, { "epoch": 11.971451876019575, "grad_norm": 0.5915926694869995, "learning_rate": 0.00041564445607645607, "loss": 0.0774, "num_input_tokens_seen": 158306224, "step": 73385 }, { "epoch": 11.97226753670473, "grad_norm": 0.03190236911177635, "learning_rate": 0.0004155742974326881, "loss": 0.0063, "num_input_tokens_seen": 158317296, "step": 73390 }, { "epoch": 11.973083197389887, "grad_norm": 0.06463105976581573, "learning_rate": 0.00041550414049991435, "loss": 0.0093, "num_input_tokens_seen": 158329296, "step": 73395 }, { "epoch": 11.97389885807504, "grad_norm": 0.005771547555923462, "learning_rate": 0.0004154339852795562, "loss": 0.0231, "num_input_tokens_seen": 158339952, "step": 73400 }, { "epoch": 11.974714518760196, "grad_norm": 0.47353968024253845, "learning_rate": 0.0004153638317730358, "loss": 0.0701, "num_input_tokens_seen": 158351120, "step": 73405 }, { "epoch": 11.97553017944535, "grad_norm": 0.011805918999016285, "learning_rate": 0.00041529367998177446, "loss": 0.1475, "num_input_tokens_seen": 158362192, "step": 73410 }, { "epoch": 11.976345840130506, "grad_norm": 0.05291421338915825, "learning_rate": 0.00041522352990719434, "loss": 0.0162, "num_input_tokens_seen": 158374448, "step": 73415 }, { "epoch": 11.977161500815662, "grad_norm": 0.013077180832624435, "learning_rate": 0.0004151533815507168, "loss": 0.0516, "num_input_tokens_seen": 158385648, "step": 73420 }, { "epoch": 11.977977161500815, "grad_norm": 0.01498448196798563, "learning_rate": 0.00041508323491376364, "loss": 0.0763, "num_input_tokens_seen": 158395472, "step": 73425 }, { "epoch": 11.978792822185971, "grad_norm": 0.022618168964982033, "learning_rate": 0.00041501308999775664, "loss": 0.0298, "num_input_tokens_seen": 158406416, "step": 73430 }, { "epoch": 11.979608482871125, "grad_norm": 0.04178911820054054, "learning_rate": 0.00041494294680411695, "loss": 0.0454, "num_input_tokens_seen": 158417936, "step": 73435 }, { "epoch": 11.98042414355628, "grad_norm": 0.009750105440616608, "learning_rate": 0.0004148728053342665, "loss": 0.0191, "num_input_tokens_seen": 158428464, "step": 73440 }, { "epoch": 11.981239804241435, "grad_norm": 0.017654066905379295, "learning_rate": 0.0004148026655896265, "loss": 0.0403, "num_input_tokens_seen": 158439664, "step": 73445 }, { "epoch": 11.98205546492659, "grad_norm": 0.024397999048233032, "learning_rate": 0.0004147325275716188, "loss": 0.0358, "num_input_tokens_seen": 158451024, "step": 73450 }, { "epoch": 11.982871125611746, "grad_norm": 0.581866979598999, "learning_rate": 0.00041466239128166435, "loss": 0.0422, "num_input_tokens_seen": 158461840, "step": 73455 }, { "epoch": 11.9836867862969, "grad_norm": 0.0038977544754743576, "learning_rate": 0.00041459225672118487, "loss": 0.0206, "num_input_tokens_seen": 158473296, "step": 73460 }, { "epoch": 11.984502446982056, "grad_norm": 0.3326556086540222, "learning_rate": 0.0004145221238916017, "loss": 0.094, "num_input_tokens_seen": 158483248, "step": 73465 }, { "epoch": 11.98531810766721, "grad_norm": 0.029085179790854454, "learning_rate": 0.0004144519927943361, "loss": 0.0792, "num_input_tokens_seen": 158494576, "step": 73470 }, { "epoch": 11.986133768352365, "grad_norm": 0.007555562071502209, "learning_rate": 0.0004143818634308094, "loss": 0.0047, "num_input_tokens_seen": 158505008, "step": 73475 }, { "epoch": 11.986949429037521, "grad_norm": 0.01529519259929657, "learning_rate": 0.00041431173580244284, "loss": 0.1303, "num_input_tokens_seen": 158516752, "step": 73480 }, { "epoch": 11.987765089722675, "grad_norm": 0.006972459144890308, "learning_rate": 0.0004142416099106576, "loss": 0.0123, "num_input_tokens_seen": 158526992, "step": 73485 }, { "epoch": 11.98858075040783, "grad_norm": 0.041457679122686386, "learning_rate": 0.0004141714857568751, "loss": 0.029, "num_input_tokens_seen": 158538192, "step": 73490 }, { "epoch": 11.989396411092985, "grad_norm": 0.5442999005317688, "learning_rate": 0.0004141013633425161, "loss": 0.0329, "num_input_tokens_seen": 158548784, "step": 73495 }, { "epoch": 11.99021207177814, "grad_norm": 0.011788910254836082, "learning_rate": 0.0004140312426690022, "loss": 0.0033, "num_input_tokens_seen": 158559056, "step": 73500 }, { "epoch": 11.991027732463294, "grad_norm": 0.03990941494703293, "learning_rate": 0.000413961123737754, "loss": 0.0164, "num_input_tokens_seen": 158570160, "step": 73505 }, { "epoch": 11.99184339314845, "grad_norm": 0.10911326110363007, "learning_rate": 0.00041389100655019295, "loss": 0.0316, "num_input_tokens_seen": 158579952, "step": 73510 }, { "epoch": 11.992659053833606, "grad_norm": 0.017683153972029686, "learning_rate": 0.00041382089110773975, "loss": 0.0131, "num_input_tokens_seen": 158590544, "step": 73515 }, { "epoch": 11.99347471451876, "grad_norm": 0.006566877942532301, "learning_rate": 0.00041375077741181564, "loss": 0.0028, "num_input_tokens_seen": 158600976, "step": 73520 }, { "epoch": 11.994290375203915, "grad_norm": 0.0036869095638394356, "learning_rate": 0.0004136806654638413, "loss": 0.0539, "num_input_tokens_seen": 158612464, "step": 73525 }, { "epoch": 11.99510603588907, "grad_norm": 0.08164402097463608, "learning_rate": 0.0004136105552652377, "loss": 0.0326, "num_input_tokens_seen": 158622032, "step": 73530 }, { "epoch": 11.995921696574225, "grad_norm": 0.01653778739273548, "learning_rate": 0.0004135404468174261, "loss": 0.0396, "num_input_tokens_seen": 158632752, "step": 73535 }, { "epoch": 11.99673735725938, "grad_norm": 0.02452065609395504, "learning_rate": 0.0004134703401218268, "loss": 0.049, "num_input_tokens_seen": 158644176, "step": 73540 }, { "epoch": 11.997553017944535, "grad_norm": 0.00879335030913353, "learning_rate": 0.00041340023517986096, "loss": 0.0151, "num_input_tokens_seen": 158655152, "step": 73545 }, { "epoch": 11.99836867862969, "grad_norm": 0.008840548805892467, "learning_rate": 0.00041333013199294907, "loss": 0.1269, "num_input_tokens_seen": 158665520, "step": 73550 }, { "epoch": 11.999184339314844, "grad_norm": 0.05727095901966095, "learning_rate": 0.0004132600305625122, "loss": 0.0124, "num_input_tokens_seen": 158676048, "step": 73555 }, { "epoch": 12.0, "grad_norm": 0.02370188757777214, "learning_rate": 0.0004131899308899706, "loss": 0.0337, "num_input_tokens_seen": 158686320, "step": 73560 }, { "epoch": 12.0, "eval_loss": 0.16381755471229553, "eval_runtime": 104.6798, "eval_samples_per_second": 26.032, "eval_steps_per_second": 6.515, "num_input_tokens_seen": 158686320, "step": 73560 }, { "epoch": 12.000815660685156, "grad_norm": 0.008478867821395397, "learning_rate": 0.00041311983297674545, "loss": 0.0366, "num_input_tokens_seen": 158698000, "step": 73565 }, { "epoch": 12.00163132137031, "grad_norm": 0.012445406056940556, "learning_rate": 0.00041304973682425685, "loss": 0.1194, "num_input_tokens_seen": 158708976, "step": 73570 }, { "epoch": 12.002446982055465, "grad_norm": 0.011793774552643299, "learning_rate": 0.00041297964243392583, "loss": 0.0219, "num_input_tokens_seen": 158718768, "step": 73575 }, { "epoch": 12.00326264274062, "grad_norm": 0.08531400561332703, "learning_rate": 0.0004129095498071726, "loss": 0.169, "num_input_tokens_seen": 158729232, "step": 73580 }, { "epoch": 12.004078303425775, "grad_norm": 0.2901374399662018, "learning_rate": 0.000412839458945418, "loss": 0.116, "num_input_tokens_seen": 158739952, "step": 73585 }, { "epoch": 12.00489396411093, "grad_norm": 0.09593775123357773, "learning_rate": 0.0004127693698500821, "loss": 0.0097, "num_input_tokens_seen": 158750864, "step": 73590 }, { "epoch": 12.005709624796085, "grad_norm": 0.018112797290086746, "learning_rate": 0.0004126992825225858, "loss": 0.0108, "num_input_tokens_seen": 158762352, "step": 73595 }, { "epoch": 12.00652528548124, "grad_norm": 0.051894139498472214, "learning_rate": 0.00041262919696434915, "loss": 0.0196, "num_input_tokens_seen": 158773712, "step": 73600 }, { "epoch": 12.007340946166394, "grad_norm": 0.020286327227950096, "learning_rate": 0.0004125591131767927, "loss": 0.0077, "num_input_tokens_seen": 158783504, "step": 73605 }, { "epoch": 12.00815660685155, "grad_norm": 0.12874771654605865, "learning_rate": 0.00041248903116133674, "loss": 0.1111, "num_input_tokens_seen": 158794160, "step": 73610 }, { "epoch": 12.008972267536704, "grad_norm": 0.1070413812994957, "learning_rate": 0.0004124189509194016, "loss": 0.0151, "num_input_tokens_seen": 158805712, "step": 73615 }, { "epoch": 12.00978792822186, "grad_norm": 0.009152588434517384, "learning_rate": 0.00041234887245240756, "loss": 0.0096, "num_input_tokens_seen": 158817168, "step": 73620 }, { "epoch": 12.010603588907015, "grad_norm": 0.06433572620153427, "learning_rate": 0.00041227879576177475, "loss": 0.0284, "num_input_tokens_seen": 158827184, "step": 73625 }, { "epoch": 12.01141924959217, "grad_norm": 0.2980803847312927, "learning_rate": 0.00041220872084892337, "loss": 0.0448, "num_input_tokens_seen": 158838064, "step": 73630 }, { "epoch": 12.012234910277325, "grad_norm": 0.02282801829278469, "learning_rate": 0.00041213864771527366, "loss": 0.1227, "num_input_tokens_seen": 158849232, "step": 73635 }, { "epoch": 12.013050570962479, "grad_norm": 0.004154075402766466, "learning_rate": 0.0004120685763622458, "loss": 0.0167, "num_input_tokens_seen": 158861296, "step": 73640 }, { "epoch": 12.013866231647635, "grad_norm": 0.008946571499109268, "learning_rate": 0.00041199850679125974, "loss": 0.1418, "num_input_tokens_seen": 158870992, "step": 73645 }, { "epoch": 12.01468189233279, "grad_norm": 0.016715897247195244, "learning_rate": 0.0004119284390037356, "loss": 0.0887, "num_input_tokens_seen": 158881616, "step": 73650 }, { "epoch": 12.015497553017944, "grad_norm": 0.01721912808716297, "learning_rate": 0.00041185837300109326, "loss": 0.013, "num_input_tokens_seen": 158892080, "step": 73655 }, { "epoch": 12.0163132137031, "grad_norm": 0.059352364391088486, "learning_rate": 0.00041178830878475304, "loss": 0.0348, "num_input_tokens_seen": 158902416, "step": 73660 }, { "epoch": 12.017128874388254, "grad_norm": 0.00977075845003128, "learning_rate": 0.00041171824635613443, "loss": 0.0055, "num_input_tokens_seen": 158913648, "step": 73665 }, { "epoch": 12.01794453507341, "grad_norm": 0.10423479974269867, "learning_rate": 0.00041164818571665774, "loss": 0.0738, "num_input_tokens_seen": 158923088, "step": 73670 }, { "epoch": 12.018760195758565, "grad_norm": 0.022329889237880707, "learning_rate": 0.00041157812686774245, "loss": 0.1112, "num_input_tokens_seen": 158934320, "step": 73675 }, { "epoch": 12.01957585644372, "grad_norm": 0.0147014781832695, "learning_rate": 0.0004115080698108088, "loss": 0.0136, "num_input_tokens_seen": 158944944, "step": 73680 }, { "epoch": 12.020391517128875, "grad_norm": 0.006968159694224596, "learning_rate": 0.0004114380145472761, "loss": 0.0323, "num_input_tokens_seen": 158956144, "step": 73685 }, { "epoch": 12.021207177814029, "grad_norm": 0.08070676028728485, "learning_rate": 0.00041136796107856465, "loss": 0.02, "num_input_tokens_seen": 158964880, "step": 73690 }, { "epoch": 12.022022838499185, "grad_norm": 0.020995108410716057, "learning_rate": 0.00041129790940609375, "loss": 0.0059, "num_input_tokens_seen": 158976112, "step": 73695 }, { "epoch": 12.022838499184338, "grad_norm": 0.012958096340298653, "learning_rate": 0.0004112278595312834, "loss": 0.0618, "num_input_tokens_seen": 158986896, "step": 73700 }, { "epoch": 12.023654159869494, "grad_norm": 0.015863895416259766, "learning_rate": 0.00041115781145555286, "loss": 0.0115, "num_input_tokens_seen": 158997008, "step": 73705 }, { "epoch": 12.02446982055465, "grad_norm": 0.03521093726158142, "learning_rate": 0.0004110877651803222, "loss": 0.0322, "num_input_tokens_seen": 159007216, "step": 73710 }, { "epoch": 12.025285481239804, "grad_norm": 0.06669262051582336, "learning_rate": 0.0004110177207070106, "loss": 0.1428, "num_input_tokens_seen": 159016880, "step": 73715 }, { "epoch": 12.02610114192496, "grad_norm": 0.1491347998380661, "learning_rate": 0.0004109476780370379, "loss": 0.0667, "num_input_tokens_seen": 159028464, "step": 73720 }, { "epoch": 12.026916802610113, "grad_norm": 0.0052296798676252365, "learning_rate": 0.00041087763717182336, "loss": 0.0402, "num_input_tokens_seen": 159039504, "step": 73725 }, { "epoch": 12.02773246329527, "grad_norm": 0.3088529706001282, "learning_rate": 0.00041080759811278674, "loss": 0.1038, "num_input_tokens_seen": 159049392, "step": 73730 }, { "epoch": 12.028548123980425, "grad_norm": 0.7462801337242126, "learning_rate": 0.00041073756086134705, "loss": 0.1167, "num_input_tokens_seen": 159060368, "step": 73735 }, { "epoch": 12.029363784665579, "grad_norm": 0.007433505728840828, "learning_rate": 0.00041066752541892395, "loss": 0.0226, "num_input_tokens_seen": 159071120, "step": 73740 }, { "epoch": 12.030179445350734, "grad_norm": 0.025007735937833786, "learning_rate": 0.000410597491786937, "loss": 0.024, "num_input_tokens_seen": 159080944, "step": 73745 }, { "epoch": 12.030995106035888, "grad_norm": 0.008760509081184864, "learning_rate": 0.0004105274599668051, "loss": 0.0079, "num_input_tokens_seen": 159091568, "step": 73750 }, { "epoch": 12.031810766721044, "grad_norm": 0.07756485044956207, "learning_rate": 0.00041045742995994783, "loss": 0.0207, "num_input_tokens_seen": 159101584, "step": 73755 }, { "epoch": 12.0326264274062, "grad_norm": 0.4588328003883362, "learning_rate": 0.0004103874017677842, "loss": 0.1356, "num_input_tokens_seen": 159112400, "step": 73760 }, { "epoch": 12.033442088091354, "grad_norm": 0.05563819780945778, "learning_rate": 0.0004103173753917337, "loss": 0.0084, "num_input_tokens_seen": 159122960, "step": 73765 }, { "epoch": 12.03425774877651, "grad_norm": 0.3978593349456787, "learning_rate": 0.0004102473508332153, "loss": 0.0368, "num_input_tokens_seen": 159134320, "step": 73770 }, { "epoch": 12.035073409461663, "grad_norm": 0.05461275205016136, "learning_rate": 0.00041017732809364824, "loss": 0.0291, "num_input_tokens_seen": 159145552, "step": 73775 }, { "epoch": 12.035889070146819, "grad_norm": 0.04154255986213684, "learning_rate": 0.00041010730717445156, "loss": 0.007, "num_input_tokens_seen": 159157744, "step": 73780 }, { "epoch": 12.036704730831975, "grad_norm": 0.07037521153688431, "learning_rate": 0.00041003728807704435, "loss": 0.0152, "num_input_tokens_seen": 159169456, "step": 73785 }, { "epoch": 12.037520391517129, "grad_norm": 0.0067330640740692616, "learning_rate": 0.00040996727080284555, "loss": 0.0108, "num_input_tokens_seen": 159180208, "step": 73790 }, { "epoch": 12.038336052202284, "grad_norm": 0.5390632748603821, "learning_rate": 0.0004098972553532743, "loss": 0.0128, "num_input_tokens_seen": 159191024, "step": 73795 }, { "epoch": 12.039151712887438, "grad_norm": 0.4968206286430359, "learning_rate": 0.00040982724172974926, "loss": 0.0272, "num_input_tokens_seen": 159201232, "step": 73800 }, { "epoch": 12.039967373572594, "grad_norm": 0.5082441568374634, "learning_rate": 0.0004097572299336899, "loss": 0.0717, "num_input_tokens_seen": 159212880, "step": 73805 }, { "epoch": 12.040783034257748, "grad_norm": 0.04948855936527252, "learning_rate": 0.00040968721996651445, "loss": 0.0287, "num_input_tokens_seen": 159223504, "step": 73810 }, { "epoch": 12.041598694942904, "grad_norm": 0.007955935783684254, "learning_rate": 0.00040961721182964235, "loss": 0.0576, "num_input_tokens_seen": 159234640, "step": 73815 }, { "epoch": 12.04241435562806, "grad_norm": 0.013767275027930737, "learning_rate": 0.00040954720552449186, "loss": 0.02, "num_input_tokens_seen": 159246224, "step": 73820 }, { "epoch": 12.043230016313213, "grad_norm": 0.005053498782217503, "learning_rate": 0.0004094772010524822, "loss": 0.0208, "num_input_tokens_seen": 159257680, "step": 73825 }, { "epoch": 12.044045676998369, "grad_norm": 0.22538012266159058, "learning_rate": 0.0004094071984150317, "loss": 0.1727, "num_input_tokens_seen": 159267984, "step": 73830 }, { "epoch": 12.044861337683523, "grad_norm": 0.012159282341599464, "learning_rate": 0.0004093371976135595, "loss": 0.0411, "num_input_tokens_seen": 159278352, "step": 73835 }, { "epoch": 12.045676998368679, "grad_norm": 0.1659296751022339, "learning_rate": 0.0004092671986494837, "loss": 0.0676, "num_input_tokens_seen": 159289776, "step": 73840 }, { "epoch": 12.046492659053834, "grad_norm": 0.04588691145181656, "learning_rate": 0.00040919720152422323, "loss": 0.0142, "num_input_tokens_seen": 159299440, "step": 73845 }, { "epoch": 12.047308319738988, "grad_norm": 0.041985828429460526, "learning_rate": 0.00040912720623919696, "loss": 0.0254, "num_input_tokens_seen": 159310576, "step": 73850 }, { "epoch": 12.048123980424144, "grad_norm": 0.009057646617293358, "learning_rate": 0.00040905721279582284, "loss": 0.0204, "num_input_tokens_seen": 159322064, "step": 73855 }, { "epoch": 12.048939641109298, "grad_norm": 0.2005881667137146, "learning_rate": 0.00040898722119551994, "loss": 0.014, "num_input_tokens_seen": 159333296, "step": 73860 }, { "epoch": 12.049755301794454, "grad_norm": 0.03115665540099144, "learning_rate": 0.0004089172314397063, "loss": 0.0064, "num_input_tokens_seen": 159345392, "step": 73865 }, { "epoch": 12.05057096247961, "grad_norm": 0.013190168887376785, "learning_rate": 0.00040884724352980065, "loss": 0.0184, "num_input_tokens_seen": 159355920, "step": 73870 }, { "epoch": 12.051386623164763, "grad_norm": 0.010946376249194145, "learning_rate": 0.00040877725746722097, "loss": 0.0169, "num_input_tokens_seen": 159366448, "step": 73875 }, { "epoch": 12.052202283849919, "grad_norm": 0.012062588706612587, "learning_rate": 0.0004087072732533862, "loss": 0.0125, "num_input_tokens_seen": 159377456, "step": 73880 }, { "epoch": 12.053017944535073, "grad_norm": 0.022835880517959595, "learning_rate": 0.0004086372908897141, "loss": 0.0222, "num_input_tokens_seen": 159388144, "step": 73885 }, { "epoch": 12.053833605220229, "grad_norm": 0.044996969401836395, "learning_rate": 0.0004085673103776234, "loss": 0.0064, "num_input_tokens_seen": 159399216, "step": 73890 }, { "epoch": 12.054649265905383, "grad_norm": 0.13436917960643768, "learning_rate": 0.000408497331718532, "loss": 0.0878, "num_input_tokens_seen": 159410736, "step": 73895 }, { "epoch": 12.055464926590538, "grad_norm": 0.006636430975049734, "learning_rate": 0.0004084273549138584, "loss": 0.003, "num_input_tokens_seen": 159420496, "step": 73900 }, { "epoch": 12.056280587275694, "grad_norm": 0.02601734921336174, "learning_rate": 0.0004083573799650204, "loss": 0.0153, "num_input_tokens_seen": 159431504, "step": 73905 }, { "epoch": 12.057096247960848, "grad_norm": 0.2840319275856018, "learning_rate": 0.00040828740687343654, "loss": 0.0128, "num_input_tokens_seen": 159442416, "step": 73910 }, { "epoch": 12.057911908646004, "grad_norm": 0.0029783681966364384, "learning_rate": 0.0004082174356405247, "loss": 0.005, "num_input_tokens_seen": 159453072, "step": 73915 }, { "epoch": 12.058727569331158, "grad_norm": 0.5021596550941467, "learning_rate": 0.00040814746626770287, "loss": 0.0522, "num_input_tokens_seen": 159463216, "step": 73920 }, { "epoch": 12.059543230016313, "grad_norm": 0.8189988732337952, "learning_rate": 0.0004080774987563893, "loss": 0.0637, "num_input_tokens_seen": 159472656, "step": 73925 }, { "epoch": 12.060358890701469, "grad_norm": 0.11326828598976135, "learning_rate": 0.0004080075331080017, "loss": 0.0204, "num_input_tokens_seen": 159482064, "step": 73930 }, { "epoch": 12.061174551386623, "grad_norm": 0.00767425075173378, "learning_rate": 0.0004079375693239581, "loss": 0.0267, "num_input_tokens_seen": 159493680, "step": 73935 }, { "epoch": 12.061990212071779, "grad_norm": 0.019670048728585243, "learning_rate": 0.0004078676074056766, "loss": 0.058, "num_input_tokens_seen": 159504400, "step": 73940 }, { "epoch": 12.062805872756933, "grad_norm": 0.005713650491088629, "learning_rate": 0.0004077976473545748, "loss": 0.0107, "num_input_tokens_seen": 159515888, "step": 73945 }, { "epoch": 12.063621533442088, "grad_norm": 0.2497982382774353, "learning_rate": 0.0004077276891720707, "loss": 0.1263, "num_input_tokens_seen": 159527344, "step": 73950 }, { "epoch": 12.064437194127244, "grad_norm": 0.004190264735370874, "learning_rate": 0.000407657732859582, "loss": 0.0083, "num_input_tokens_seen": 159538512, "step": 73955 }, { "epoch": 12.065252854812398, "grad_norm": 0.02354654297232628, "learning_rate": 0.00040758777841852647, "loss": 0.1425, "num_input_tokens_seen": 159548176, "step": 73960 }, { "epoch": 12.066068515497554, "grad_norm": 0.008335534483194351, "learning_rate": 0.000407517825850322, "loss": 0.0036, "num_input_tokens_seen": 159558800, "step": 73965 }, { "epoch": 12.066884176182707, "grad_norm": 0.00404638284817338, "learning_rate": 0.00040744787515638585, "loss": 0.0055, "num_input_tokens_seen": 159569776, "step": 73970 }, { "epoch": 12.067699836867863, "grad_norm": 0.005195849109441042, "learning_rate": 0.00040737792633813624, "loss": 0.3292, "num_input_tokens_seen": 159581008, "step": 73975 }, { "epoch": 12.068515497553017, "grad_norm": 0.03616528585553169, "learning_rate": 0.00040730797939699014, "loss": 0.0147, "num_input_tokens_seen": 159592304, "step": 73980 }, { "epoch": 12.069331158238173, "grad_norm": 0.013765659183263779, "learning_rate": 0.00040723803433436573, "loss": 0.0085, "num_input_tokens_seen": 159602800, "step": 73985 }, { "epoch": 12.070146818923329, "grad_norm": 0.02782883495092392, "learning_rate": 0.00040716809115167997, "loss": 0.0258, "num_input_tokens_seen": 159613104, "step": 73990 }, { "epoch": 12.070962479608482, "grad_norm": 0.10647904127836227, "learning_rate": 0.0004070981498503508, "loss": 0.0103, "num_input_tokens_seen": 159625616, "step": 73995 }, { "epoch": 12.071778140293638, "grad_norm": 0.0038456048350781202, "learning_rate": 0.0004070282104317953, "loss": 0.0091, "num_input_tokens_seen": 159637136, "step": 74000 }, { "epoch": 12.072593800978792, "grad_norm": 0.0037264381535351276, "learning_rate": 0.0004069582728974313, "loss": 0.136, "num_input_tokens_seen": 159647120, "step": 74005 }, { "epoch": 12.073409461663948, "grad_norm": 0.2349800318479538, "learning_rate": 0.00040688833724867565, "loss": 0.046, "num_input_tokens_seen": 159657232, "step": 74010 }, { "epoch": 12.074225122349104, "grad_norm": 1.2030119895935059, "learning_rate": 0.0004068184034869462, "loss": 0.0697, "num_input_tokens_seen": 159668208, "step": 74015 }, { "epoch": 12.075040783034257, "grad_norm": 0.030114563181996346, "learning_rate": 0.0004067484716136598, "loss": 0.068, "num_input_tokens_seen": 159678352, "step": 74020 }, { "epoch": 12.075856443719413, "grad_norm": 0.3572850525379181, "learning_rate": 0.00040667854163023415, "loss": 0.0543, "num_input_tokens_seen": 159689104, "step": 74025 }, { "epoch": 12.076672104404567, "grad_norm": 0.017027070745825768, "learning_rate": 0.000406608613538086, "loss": 0.0033, "num_input_tokens_seen": 159701040, "step": 74030 }, { "epoch": 12.077487765089723, "grad_norm": 0.004174274858087301, "learning_rate": 0.000406538687338633, "loss": 0.0155, "num_input_tokens_seen": 159711536, "step": 74035 }, { "epoch": 12.078303425774878, "grad_norm": 0.42760300636291504, "learning_rate": 0.0004064687630332919, "loss": 0.0494, "num_input_tokens_seen": 159722864, "step": 74040 }, { "epoch": 12.079119086460032, "grad_norm": 0.0214095376431942, "learning_rate": 0.0004063988406234801, "loss": 0.0338, "num_input_tokens_seen": 159734608, "step": 74045 }, { "epoch": 12.079934747145188, "grad_norm": 0.18593746423721313, "learning_rate": 0.0004063289201106144, "loss": 0.0101, "num_input_tokens_seen": 159745168, "step": 74050 }, { "epoch": 12.080750407830342, "grad_norm": 0.3874616026878357, "learning_rate": 0.000406259001496112, "loss": 0.0174, "num_input_tokens_seen": 159756208, "step": 74055 }, { "epoch": 12.081566068515498, "grad_norm": 0.01034525316208601, "learning_rate": 0.00040618908478138986, "loss": 0.0068, "num_input_tokens_seen": 159765872, "step": 74060 }, { "epoch": 12.082381729200652, "grad_norm": 0.022807089611887932, "learning_rate": 0.0004061191699678649, "loss": 0.1546, "num_input_tokens_seen": 159776848, "step": 74065 }, { "epoch": 12.083197389885807, "grad_norm": 0.009470158256590366, "learning_rate": 0.0004060492570569542, "loss": 0.0041, "num_input_tokens_seen": 159788432, "step": 74070 }, { "epoch": 12.084013050570963, "grad_norm": 0.08018023520708084, "learning_rate": 0.0004059793460500742, "loss": 0.0123, "num_input_tokens_seen": 159800304, "step": 74075 }, { "epoch": 12.084828711256117, "grad_norm": 0.15234047174453735, "learning_rate": 0.0004059094369486423, "loss": 0.0859, "num_input_tokens_seen": 159809296, "step": 74080 }, { "epoch": 12.085644371941273, "grad_norm": 0.413926362991333, "learning_rate": 0.00040583952975407493, "loss": 0.0663, "num_input_tokens_seen": 159818960, "step": 74085 }, { "epoch": 12.086460032626427, "grad_norm": 0.16932708024978638, "learning_rate": 0.000405769624467789, "loss": 0.0165, "num_input_tokens_seen": 159829456, "step": 74090 }, { "epoch": 12.087275693311582, "grad_norm": 0.0028401929885149, "learning_rate": 0.0004056997210912011, "loss": 0.0121, "num_input_tokens_seen": 159840944, "step": 74095 }, { "epoch": 12.088091353996738, "grad_norm": 0.03299913927912712, "learning_rate": 0.00040562981962572803, "loss": 0.1213, "num_input_tokens_seen": 159851952, "step": 74100 }, { "epoch": 12.088907014681892, "grad_norm": 0.03128942474722862, "learning_rate": 0.00040555992007278624, "loss": 0.0086, "num_input_tokens_seen": 159863248, "step": 74105 }, { "epoch": 12.089722675367048, "grad_norm": 0.004560539498925209, "learning_rate": 0.00040549002243379267, "loss": 0.011, "num_input_tokens_seen": 159872944, "step": 74110 }, { "epoch": 12.090538336052202, "grad_norm": 0.00920757744461298, "learning_rate": 0.00040542012671016355, "loss": 0.0631, "num_input_tokens_seen": 159883632, "step": 74115 }, { "epoch": 12.091353996737357, "grad_norm": 0.08857864886522293, "learning_rate": 0.00040535023290331573, "loss": 0.04, "num_input_tokens_seen": 159895056, "step": 74120 }, { "epoch": 12.092169657422513, "grad_norm": 0.002525157993659377, "learning_rate": 0.0004052803410146653, "loss": 0.0054, "num_input_tokens_seen": 159906576, "step": 74125 }, { "epoch": 12.092985318107667, "grad_norm": 0.9725717902183533, "learning_rate": 0.0004052104510456291, "loss": 0.0622, "num_input_tokens_seen": 159918672, "step": 74130 }, { "epoch": 12.093800978792823, "grad_norm": 0.008668906055390835, "learning_rate": 0.00040514056299762314, "loss": 0.0094, "num_input_tokens_seen": 159929328, "step": 74135 }, { "epoch": 12.094616639477977, "grad_norm": 0.27049708366394043, "learning_rate": 0.0004050706768720642, "loss": 0.0247, "num_input_tokens_seen": 159940368, "step": 74140 }, { "epoch": 12.095432300163132, "grad_norm": 0.010525517165660858, "learning_rate": 0.00040500079267036834, "loss": 0.0052, "num_input_tokens_seen": 159951728, "step": 74145 }, { "epoch": 12.096247960848286, "grad_norm": 0.001333979656919837, "learning_rate": 0.000404930910393952, "loss": 0.0101, "num_input_tokens_seen": 159962224, "step": 74150 }, { "epoch": 12.097063621533442, "grad_norm": 0.10761525481939316, "learning_rate": 0.0004048610300442313, "loss": 0.0466, "num_input_tokens_seen": 159972432, "step": 74155 }, { "epoch": 12.097879282218598, "grad_norm": 0.26049813628196716, "learning_rate": 0.0004047911516226226, "loss": 0.0604, "num_input_tokens_seen": 159983344, "step": 74160 }, { "epoch": 12.098694942903752, "grad_norm": 0.12951795756816864, "learning_rate": 0.0004047212751305418, "loss": 0.0099, "num_input_tokens_seen": 159994384, "step": 74165 }, { "epoch": 12.099510603588907, "grad_norm": 0.008982861414551735, "learning_rate": 0.00040465140056940524, "loss": 0.1154, "num_input_tokens_seen": 160004208, "step": 74170 }, { "epoch": 12.100326264274061, "grad_norm": 0.06853464245796204, "learning_rate": 0.00040458152794062925, "loss": 0.0232, "num_input_tokens_seen": 160016112, "step": 74175 }, { "epoch": 12.101141924959217, "grad_norm": 0.029205063357949257, "learning_rate": 0.00040451165724562937, "loss": 0.0144, "num_input_tokens_seen": 160027984, "step": 74180 }, { "epoch": 12.101957585644373, "grad_norm": 0.429021418094635, "learning_rate": 0.0004044417884858221, "loss": 0.0575, "num_input_tokens_seen": 160038928, "step": 74185 }, { "epoch": 12.102773246329527, "grad_norm": 0.027922654524445534, "learning_rate": 0.0004043719216626231, "loss": 0.0954, "num_input_tokens_seen": 160049808, "step": 74190 }, { "epoch": 12.103588907014682, "grad_norm": 0.014755363576114178, "learning_rate": 0.00040430205677744857, "loss": 0.1248, "num_input_tokens_seen": 160060080, "step": 74195 }, { "epoch": 12.104404567699836, "grad_norm": 0.169220432639122, "learning_rate": 0.00040423219383171405, "loss": 0.0255, "num_input_tokens_seen": 160071728, "step": 74200 }, { "epoch": 12.105220228384992, "grad_norm": 0.12314831465482712, "learning_rate": 0.0004041623328268358, "loss": 0.0164, "num_input_tokens_seen": 160081328, "step": 74205 }, { "epoch": 12.106035889070148, "grad_norm": 0.056490253657102585, "learning_rate": 0.0004040924737642293, "loss": 0.0136, "num_input_tokens_seen": 160091728, "step": 74210 }, { "epoch": 12.106851549755302, "grad_norm": 0.00717545673251152, "learning_rate": 0.0004040226166453107, "loss": 0.0093, "num_input_tokens_seen": 160103280, "step": 74215 }, { "epoch": 12.107667210440457, "grad_norm": 0.12026861310005188, "learning_rate": 0.00040395276147149524, "loss": 0.2314, "num_input_tokens_seen": 160112080, "step": 74220 }, { "epoch": 12.108482871125611, "grad_norm": 0.015305766835808754, "learning_rate": 0.000403882908244199, "loss": 0.019, "num_input_tokens_seen": 160123184, "step": 74225 }, { "epoch": 12.109298531810767, "grad_norm": 0.04786968231201172, "learning_rate": 0.00040381305696483773, "loss": 0.0979, "num_input_tokens_seen": 160134192, "step": 74230 }, { "epoch": 12.11011419249592, "grad_norm": 0.00884055532515049, "learning_rate": 0.00040374320763482673, "loss": 0.0256, "num_input_tokens_seen": 160143568, "step": 74235 }, { "epoch": 12.110929853181077, "grad_norm": 0.013825084082782269, "learning_rate": 0.0004036733602555818, "loss": 0.0107, "num_input_tokens_seen": 160154768, "step": 74240 }, { "epoch": 12.111745513866232, "grad_norm": 0.737095296382904, "learning_rate": 0.0004036035148285184, "loss": 0.0715, "num_input_tokens_seen": 160164848, "step": 74245 }, { "epoch": 12.112561174551386, "grad_norm": 0.26678237318992615, "learning_rate": 0.00040353367135505193, "loss": 0.0166, "num_input_tokens_seen": 160174800, "step": 74250 }, { "epoch": 12.113376835236542, "grad_norm": 0.05654589831829071, "learning_rate": 0.00040346382983659826, "loss": 0.0208, "num_input_tokens_seen": 160185968, "step": 74255 }, { "epoch": 12.114192495921696, "grad_norm": 0.04880339652299881, "learning_rate": 0.0004033939902745723, "loss": 0.0077, "num_input_tokens_seen": 160196560, "step": 74260 }, { "epoch": 12.115008156606851, "grad_norm": 0.03003457561135292, "learning_rate": 0.0004033241526703899, "loss": 0.0061, "num_input_tokens_seen": 160206384, "step": 74265 }, { "epoch": 12.115823817292007, "grad_norm": 0.050061021000146866, "learning_rate": 0.00040325431702546596, "loss": 0.0161, "num_input_tokens_seen": 160217104, "step": 74270 }, { "epoch": 12.116639477977161, "grad_norm": 0.04654416814446449, "learning_rate": 0.000403184483341216, "loss": 0.0048, "num_input_tokens_seen": 160228368, "step": 74275 }, { "epoch": 12.117455138662317, "grad_norm": 0.006389730144292116, "learning_rate": 0.0004031146516190556, "loss": 0.0411, "num_input_tokens_seen": 160238736, "step": 74280 }, { "epoch": 12.11827079934747, "grad_norm": 0.012472650967538357, "learning_rate": 0.00040304482186039937, "loss": 0.1062, "num_input_tokens_seen": 160248944, "step": 74285 }, { "epoch": 12.119086460032626, "grad_norm": 0.03390290588140488, "learning_rate": 0.0004029749940666631, "loss": 0.0134, "num_input_tokens_seen": 160260560, "step": 74290 }, { "epoch": 12.119902120717782, "grad_norm": 0.017470506951212883, "learning_rate": 0.00040290516823926145, "loss": 0.0744, "num_input_tokens_seen": 160272048, "step": 74295 }, { "epoch": 12.120717781402936, "grad_norm": 0.36640286445617676, "learning_rate": 0.0004028353443796099, "loss": 0.0407, "num_input_tokens_seen": 160282512, "step": 74300 }, { "epoch": 12.121533442088092, "grad_norm": 0.01338358037173748, "learning_rate": 0.00040276552248912317, "loss": 0.0161, "num_input_tokens_seen": 160294512, "step": 74305 }, { "epoch": 12.122349102773246, "grad_norm": 0.1521095335483551, "learning_rate": 0.00040269570256921673, "loss": 0.0082, "num_input_tokens_seen": 160304944, "step": 74310 }, { "epoch": 12.123164763458401, "grad_norm": 0.13591180741786957, "learning_rate": 0.00040262588462130507, "loss": 0.0598, "num_input_tokens_seen": 160315472, "step": 74315 }, { "epoch": 12.123980424143557, "grad_norm": 0.03177114203572273, "learning_rate": 0.0004025560686468036, "loss": 0.0388, "num_input_tokens_seen": 160326704, "step": 74320 }, { "epoch": 12.124796084828711, "grad_norm": 0.01272545475512743, "learning_rate": 0.0004024862546471268, "loss": 0.0821, "num_input_tokens_seen": 160336624, "step": 74325 }, { "epoch": 12.125611745513867, "grad_norm": 0.007154722232371569, "learning_rate": 0.00040241644262368993, "loss": 0.04, "num_input_tokens_seen": 160347856, "step": 74330 }, { "epoch": 12.12642740619902, "grad_norm": 0.006466465070843697, "learning_rate": 0.00040234663257790747, "loss": 0.0166, "num_input_tokens_seen": 160359248, "step": 74335 }, { "epoch": 12.127243066884176, "grad_norm": 0.05033848434686661, "learning_rate": 0.00040227682451119464, "loss": 0.052, "num_input_tokens_seen": 160370640, "step": 74340 }, { "epoch": 12.12805872756933, "grad_norm": 0.18795427680015564, "learning_rate": 0.0004022070184249657, "loss": 0.0111, "num_input_tokens_seen": 160380560, "step": 74345 }, { "epoch": 12.128874388254486, "grad_norm": 0.0089729567989707, "learning_rate": 0.0004021372143206358, "loss": 0.0161, "num_input_tokens_seen": 160392368, "step": 74350 }, { "epoch": 12.129690048939642, "grad_norm": 0.6816865801811218, "learning_rate": 0.0004020674121996191, "loss": 0.0695, "num_input_tokens_seen": 160403312, "step": 74355 }, { "epoch": 12.130505709624796, "grad_norm": 0.1574021577835083, "learning_rate": 0.0004019976120633308, "loss": 0.0621, "num_input_tokens_seen": 160413712, "step": 74360 }, { "epoch": 12.131321370309951, "grad_norm": 0.03588379919528961, "learning_rate": 0.000401927813913185, "loss": 0.0588, "num_input_tokens_seen": 160423120, "step": 74365 }, { "epoch": 12.132137030995105, "grad_norm": 0.01161075010895729, "learning_rate": 0.0004018580177505966, "loss": 0.0288, "num_input_tokens_seen": 160434896, "step": 74370 }, { "epoch": 12.132952691680261, "grad_norm": 0.03392313793301582, "learning_rate": 0.00040178822357698, "loss": 0.0348, "num_input_tokens_seen": 160445616, "step": 74375 }, { "epoch": 12.133768352365417, "grad_norm": 0.3975036144256592, "learning_rate": 0.0004017184313937494, "loss": 0.1331, "num_input_tokens_seen": 160455920, "step": 74380 }, { "epoch": 12.13458401305057, "grad_norm": 0.7836913466453552, "learning_rate": 0.0004016486412023198, "loss": 0.2139, "num_input_tokens_seen": 160466416, "step": 74385 }, { "epoch": 12.135399673735726, "grad_norm": 0.013092154636979103, "learning_rate": 0.000401578853004105, "loss": 0.0061, "num_input_tokens_seen": 160477904, "step": 74390 }, { "epoch": 12.13621533442088, "grad_norm": 0.010323798283934593, "learning_rate": 0.00040150906680051974, "loss": 0.0283, "num_input_tokens_seen": 160489584, "step": 74395 }, { "epoch": 12.137030995106036, "grad_norm": 0.3737667500972748, "learning_rate": 0.00040143928259297817, "loss": 0.0254, "num_input_tokens_seen": 160499952, "step": 74400 }, { "epoch": 12.137846655791192, "grad_norm": 0.019605323672294617, "learning_rate": 0.00040136950038289457, "loss": 0.007, "num_input_tokens_seen": 160510576, "step": 74405 }, { "epoch": 12.138662316476346, "grad_norm": 0.013885759748518467, "learning_rate": 0.0004012997201716831, "loss": 0.0279, "num_input_tokens_seen": 160520432, "step": 74410 }, { "epoch": 12.139477977161501, "grad_norm": 0.28042954206466675, "learning_rate": 0.0004012299419607581, "loss": 0.0224, "num_input_tokens_seen": 160532080, "step": 74415 }, { "epoch": 12.140293637846655, "grad_norm": 0.11336594820022583, "learning_rate": 0.00040116016575153344, "loss": 0.0231, "num_input_tokens_seen": 160543024, "step": 74420 }, { "epoch": 12.141109298531811, "grad_norm": 0.005463029723614454, "learning_rate": 0.0004010903915454237, "loss": 0.0105, "num_input_tokens_seen": 160554672, "step": 74425 }, { "epoch": 12.141924959216965, "grad_norm": 0.007425525691360235, "learning_rate": 0.0004010206193438424, "loss": 0.1063, "num_input_tokens_seen": 160565808, "step": 74430 }, { "epoch": 12.14274061990212, "grad_norm": 0.1310349851846695, "learning_rate": 0.0004009508491482041, "loss": 0.0732, "num_input_tokens_seen": 160576528, "step": 74435 }, { "epoch": 12.143556280587276, "grad_norm": 0.011101285926997662, "learning_rate": 0.00040088108095992216, "loss": 0.1047, "num_input_tokens_seen": 160588368, "step": 74440 }, { "epoch": 12.14437194127243, "grad_norm": 0.05059365555644035, "learning_rate": 0.00040081131478041115, "loss": 0.0549, "num_input_tokens_seen": 160598864, "step": 74445 }, { "epoch": 12.145187601957586, "grad_norm": 0.8272093534469604, "learning_rate": 0.00040074155061108443, "loss": 0.3899, "num_input_tokens_seen": 160610192, "step": 74450 }, { "epoch": 12.14600326264274, "grad_norm": 0.6162428855895996, "learning_rate": 0.00040067178845335633, "loss": 0.0983, "num_input_tokens_seen": 160621680, "step": 74455 }, { "epoch": 12.146818923327896, "grad_norm": 0.01383585948497057, "learning_rate": 0.0004006020283086402, "loss": 0.0139, "num_input_tokens_seen": 160632720, "step": 74460 }, { "epoch": 12.147634584013051, "grad_norm": 0.03568781167268753, "learning_rate": 0.00040053227017835033, "loss": 0.0248, "num_input_tokens_seen": 160642832, "step": 74465 }, { "epoch": 12.148450244698205, "grad_norm": 0.11245814710855484, "learning_rate": 0.00040046251406389993, "loss": 0.0479, "num_input_tokens_seen": 160652912, "step": 74470 }, { "epoch": 12.149265905383361, "grad_norm": 0.0046191527508199215, "learning_rate": 0.0004003927599667032, "loss": 0.0125, "num_input_tokens_seen": 160663472, "step": 74475 }, { "epoch": 12.150081566068515, "grad_norm": 0.02641601301729679, "learning_rate": 0.0004003230078881733, "loss": 0.0064, "num_input_tokens_seen": 160674544, "step": 74480 }, { "epoch": 12.15089722675367, "grad_norm": 0.00698605552315712, "learning_rate": 0.0004002532578297241, "loss": 0.0073, "num_input_tokens_seen": 160685200, "step": 74485 }, { "epoch": 12.151712887438826, "grad_norm": 0.17926287651062012, "learning_rate": 0.0004001835097927694, "loss": 0.0799, "num_input_tokens_seen": 160696208, "step": 74490 }, { "epoch": 12.15252854812398, "grad_norm": 0.040712252259254456, "learning_rate": 0.00040011376377872235, "loss": 0.0653, "num_input_tokens_seen": 160706416, "step": 74495 }, { "epoch": 12.153344208809136, "grad_norm": 0.7276735305786133, "learning_rate": 0.0004000440197889967, "loss": 0.0944, "num_input_tokens_seen": 160718416, "step": 74500 }, { "epoch": 12.15415986949429, "grad_norm": 0.3541339635848999, "learning_rate": 0.0003999742778250056, "loss": 0.033, "num_input_tokens_seen": 160730224, "step": 74505 }, { "epoch": 12.154975530179446, "grad_norm": 0.014347495511174202, "learning_rate": 0.0003999045378881629, "loss": 0.0041, "num_input_tokens_seen": 160741008, "step": 74510 }, { "epoch": 12.1557911908646, "grad_norm": 0.16176581382751465, "learning_rate": 0.0003998347999798815, "loss": 0.05, "num_input_tokens_seen": 160750736, "step": 74515 }, { "epoch": 12.156606851549755, "grad_norm": 0.028150994330644608, "learning_rate": 0.00039976506410157513, "loss": 0.0493, "num_input_tokens_seen": 160761584, "step": 74520 }, { "epoch": 12.15742251223491, "grad_norm": 0.11872951686382294, "learning_rate": 0.0003996953302546567, "loss": 0.0263, "num_input_tokens_seen": 160771856, "step": 74525 }, { "epoch": 12.158238172920065, "grad_norm": 0.01152915321290493, "learning_rate": 0.0003996255984405399, "loss": 0.0217, "num_input_tokens_seen": 160783536, "step": 74530 }, { "epoch": 12.15905383360522, "grad_norm": 0.0435757115483284, "learning_rate": 0.00039955586866063735, "loss": 0.0649, "num_input_tokens_seen": 160794128, "step": 74535 }, { "epoch": 12.159869494290374, "grad_norm": 0.08395544439554214, "learning_rate": 0.0003994861409163628, "loss": 0.0221, "num_input_tokens_seen": 160804720, "step": 74540 }, { "epoch": 12.16068515497553, "grad_norm": 0.7115342617034912, "learning_rate": 0.000399416415209129, "loss": 0.1518, "num_input_tokens_seen": 160815888, "step": 74545 }, { "epoch": 12.161500815660686, "grad_norm": 0.10519499331712723, "learning_rate": 0.0003993466915403492, "loss": 0.0969, "num_input_tokens_seen": 160827824, "step": 74550 }, { "epoch": 12.16231647634584, "grad_norm": 0.07450270652770996, "learning_rate": 0.0003992769699114364, "loss": 0.1216, "num_input_tokens_seen": 160837328, "step": 74555 }, { "epoch": 12.163132137030995, "grad_norm": 0.06041792407631874, "learning_rate": 0.0003992072503238035, "loss": 0.0908, "num_input_tokens_seen": 160847760, "step": 74560 }, { "epoch": 12.16394779771615, "grad_norm": 0.5099493265151978, "learning_rate": 0.0003991375327788635, "loss": 0.0378, "num_input_tokens_seen": 160858896, "step": 74565 }, { "epoch": 12.164763458401305, "grad_norm": 0.02467208355665207, "learning_rate": 0.00039906781727802956, "loss": 0.0147, "num_input_tokens_seen": 160868400, "step": 74570 }, { "epoch": 12.16557911908646, "grad_norm": 0.009804047644138336, "learning_rate": 0.0003989981038227141, "loss": 0.0126, "num_input_tokens_seen": 160878480, "step": 74575 }, { "epoch": 12.166394779771615, "grad_norm": 0.044133830815553665, "learning_rate": 0.0003989283924143304, "loss": 0.0066, "num_input_tokens_seen": 160887952, "step": 74580 }, { "epoch": 12.16721044045677, "grad_norm": 0.16552656888961792, "learning_rate": 0.0003988586830542909, "loss": 0.081, "num_input_tokens_seen": 160898128, "step": 74585 }, { "epoch": 12.168026101141924, "grad_norm": 0.05296900123357773, "learning_rate": 0.00039878897574400845, "loss": 0.0082, "num_input_tokens_seen": 160908784, "step": 74590 }, { "epoch": 12.16884176182708, "grad_norm": 1.0278362035751343, "learning_rate": 0.00039871927048489605, "loss": 0.0413, "num_input_tokens_seen": 160918320, "step": 74595 }, { "epoch": 12.169657422512234, "grad_norm": 0.13810297846794128, "learning_rate": 0.0003986495672783659, "loss": 0.0199, "num_input_tokens_seen": 160930096, "step": 74600 }, { "epoch": 12.17047308319739, "grad_norm": 0.018178101629018784, "learning_rate": 0.000398579866125831, "loss": 0.0534, "num_input_tokens_seen": 160940848, "step": 74605 }, { "epoch": 12.171288743882545, "grad_norm": 0.027984725311398506, "learning_rate": 0.00039851016702870356, "loss": 0.0602, "num_input_tokens_seen": 160951120, "step": 74610 }, { "epoch": 12.1721044045677, "grad_norm": 0.03416503220796585, "learning_rate": 0.0003984404699883966, "loss": 0.0251, "num_input_tokens_seen": 160961328, "step": 74615 }, { "epoch": 12.172920065252855, "grad_norm": 0.014291993342339993, "learning_rate": 0.00039837077500632213, "loss": 0.0083, "num_input_tokens_seen": 160973264, "step": 74620 }, { "epoch": 12.173735725938009, "grad_norm": 0.06815456598997116, "learning_rate": 0.00039830108208389306, "loss": 0.0696, "num_input_tokens_seen": 160982896, "step": 74625 }, { "epoch": 12.174551386623165, "grad_norm": 0.04985464736819267, "learning_rate": 0.00039823139122252126, "loss": 0.0059, "num_input_tokens_seen": 160994288, "step": 74630 }, { "epoch": 12.17536704730832, "grad_norm": 0.5972863435745239, "learning_rate": 0.0003981617024236197, "loss": 0.0834, "num_input_tokens_seen": 161004592, "step": 74635 }, { "epoch": 12.176182707993474, "grad_norm": 0.594738781452179, "learning_rate": 0.0003980920156886003, "loss": 0.0736, "num_input_tokens_seen": 161014032, "step": 74640 }, { "epoch": 12.17699836867863, "grad_norm": 0.003715520491823554, "learning_rate": 0.0003980223310188756, "loss": 0.0665, "num_input_tokens_seen": 161025520, "step": 74645 }, { "epoch": 12.177814029363784, "grad_norm": 0.08713612705469131, "learning_rate": 0.00039795264841585755, "loss": 0.0202, "num_input_tokens_seen": 161036880, "step": 74650 }, { "epoch": 12.17862969004894, "grad_norm": 0.09007509052753448, "learning_rate": 0.00039788296788095866, "loss": 0.0479, "num_input_tokens_seen": 161047024, "step": 74655 }, { "epoch": 12.179445350734095, "grad_norm": 0.023239508271217346, "learning_rate": 0.00039781328941559084, "loss": 0.0071, "num_input_tokens_seen": 161057936, "step": 74660 }, { "epoch": 12.18026101141925, "grad_norm": 0.1088428869843483, "learning_rate": 0.0003977436130211666, "loss": 0.0489, "num_input_tokens_seen": 161067824, "step": 74665 }, { "epoch": 12.181076672104405, "grad_norm": 0.0047751558013260365, "learning_rate": 0.0003976739386990975, "loss": 0.122, "num_input_tokens_seen": 161078896, "step": 74670 }, { "epoch": 12.181892332789559, "grad_norm": 0.011525565758347511, "learning_rate": 0.0003976042664507961, "loss": 0.049, "num_input_tokens_seen": 161089904, "step": 74675 }, { "epoch": 12.182707993474715, "grad_norm": 0.004288562573492527, "learning_rate": 0.0003975345962776738, "loss": 0.071, "num_input_tokens_seen": 161100976, "step": 74680 }, { "epoch": 12.18352365415987, "grad_norm": 0.058702051639556885, "learning_rate": 0.0003974649281811431, "loss": 0.0058, "num_input_tokens_seen": 161111472, "step": 74685 }, { "epoch": 12.184339314845024, "grad_norm": 0.016511529684066772, "learning_rate": 0.00039739526216261566, "loss": 0.0904, "num_input_tokens_seen": 161122000, "step": 74690 }, { "epoch": 12.18515497553018, "grad_norm": 0.829873263835907, "learning_rate": 0.00039732559822350336, "loss": 0.1686, "num_input_tokens_seen": 161132464, "step": 74695 }, { "epoch": 12.185970636215334, "grad_norm": 0.16942539811134338, "learning_rate": 0.00039725593636521817, "loss": 0.0155, "num_input_tokens_seen": 161143632, "step": 74700 }, { "epoch": 12.18678629690049, "grad_norm": 0.20983338356018066, "learning_rate": 0.0003971862765891716, "loss": 0.0284, "num_input_tokens_seen": 161153136, "step": 74705 }, { "epoch": 12.187601957585644, "grad_norm": 0.08365467935800552, "learning_rate": 0.00039711661889677577, "loss": 0.0117, "num_input_tokens_seen": 161163024, "step": 74710 }, { "epoch": 12.1884176182708, "grad_norm": 0.5235198140144348, "learning_rate": 0.00039704696328944205, "loss": 0.0627, "num_input_tokens_seen": 161174000, "step": 74715 }, { "epoch": 12.189233278955955, "grad_norm": 0.44435617327690125, "learning_rate": 0.0003969773097685823, "loss": 0.1391, "num_input_tokens_seen": 161183664, "step": 74720 }, { "epoch": 12.190048939641109, "grad_norm": 0.0413043275475502, "learning_rate": 0.000396907658335608, "loss": 0.0097, "num_input_tokens_seen": 161194224, "step": 74725 }, { "epoch": 12.190864600326265, "grad_norm": 1.2846399545669556, "learning_rate": 0.0003968380089919308, "loss": 0.0797, "num_input_tokens_seen": 161205264, "step": 74730 }, { "epoch": 12.191680261011419, "grad_norm": 0.00761906523257494, "learning_rate": 0.0003967683617389621, "loss": 0.0249, "num_input_tokens_seen": 161214800, "step": 74735 }, { "epoch": 12.192495921696574, "grad_norm": 0.004216664936393499, "learning_rate": 0.0003966987165781138, "loss": 0.0084, "num_input_tokens_seen": 161226736, "step": 74740 }, { "epoch": 12.19331158238173, "grad_norm": 0.5137479305267334, "learning_rate": 0.00039662907351079675, "loss": 0.1114, "num_input_tokens_seen": 161236048, "step": 74745 }, { "epoch": 12.194127243066884, "grad_norm": 0.0590231716632843, "learning_rate": 0.00039655943253842293, "loss": 0.0173, "num_input_tokens_seen": 161246512, "step": 74750 }, { "epoch": 12.19494290375204, "grad_norm": 0.03567468374967575, "learning_rate": 0.00039648979366240325, "loss": 0.023, "num_input_tokens_seen": 161257232, "step": 74755 }, { "epoch": 12.195758564437194, "grad_norm": 0.020210113376379013, "learning_rate": 0.00039642015688414936, "loss": 0.0252, "num_input_tokens_seen": 161268176, "step": 74760 }, { "epoch": 12.19657422512235, "grad_norm": 0.03385559096932411, "learning_rate": 0.00039635052220507216, "loss": 0.0062, "num_input_tokens_seen": 161280240, "step": 74765 }, { "epoch": 12.197389885807505, "grad_norm": 0.5141317248344421, "learning_rate": 0.0003962808896265834, "loss": 0.1177, "num_input_tokens_seen": 161291056, "step": 74770 }, { "epoch": 12.198205546492659, "grad_norm": 0.23622766137123108, "learning_rate": 0.0003962112591500937, "loss": 0.021, "num_input_tokens_seen": 161301488, "step": 74775 }, { "epoch": 12.199021207177815, "grad_norm": 0.956473708152771, "learning_rate": 0.00039614163077701474, "loss": 0.2582, "num_input_tokens_seen": 161310608, "step": 74780 }, { "epoch": 12.199836867862969, "grad_norm": 0.7048952579498291, "learning_rate": 0.00039607200450875716, "loss": 0.104, "num_input_tokens_seen": 161321680, "step": 74785 }, { "epoch": 12.200652528548124, "grad_norm": 0.11562012135982513, "learning_rate": 0.0003960023803467325, "loss": 0.0989, "num_input_tokens_seen": 161331696, "step": 74790 }, { "epoch": 12.201468189233278, "grad_norm": 0.09646695107221603, "learning_rate": 0.0003959327582923513, "loss": 0.0105, "num_input_tokens_seen": 161341200, "step": 74795 }, { "epoch": 12.202283849918434, "grad_norm": 0.011357474140822887, "learning_rate": 0.000395863138347025, "loss": 0.026, "num_input_tokens_seen": 161352976, "step": 74800 }, { "epoch": 12.20309951060359, "grad_norm": 0.014518884010612965, "learning_rate": 0.0003957935205121641, "loss": 0.0055, "num_input_tokens_seen": 161364272, "step": 74805 }, { "epoch": 12.203915171288743, "grad_norm": 0.31230592727661133, "learning_rate": 0.00039572390478917973, "loss": 0.0494, "num_input_tokens_seen": 161374544, "step": 74810 }, { "epoch": 12.2047308319739, "grad_norm": 0.012982561253011227, "learning_rate": 0.00039565429117948287, "loss": 0.0189, "num_input_tokens_seen": 161385424, "step": 74815 }, { "epoch": 12.205546492659053, "grad_norm": 0.11630299687385559, "learning_rate": 0.000395584679684484, "loss": 0.0052, "num_input_tokens_seen": 161396592, "step": 74820 }, { "epoch": 12.206362153344209, "grad_norm": 0.025221414864063263, "learning_rate": 0.00039551507030559423, "loss": 0.1796, "num_input_tokens_seen": 161407024, "step": 74825 }, { "epoch": 12.207177814029365, "grad_norm": 0.41970908641815186, "learning_rate": 0.0003954454630442239, "loss": 0.0126, "num_input_tokens_seen": 161416976, "step": 74830 }, { "epoch": 12.207993474714518, "grad_norm": 0.1963176727294922, "learning_rate": 0.0003953758579017842, "loss": 0.0311, "num_input_tokens_seen": 161428496, "step": 74835 }, { "epoch": 12.208809135399674, "grad_norm": 0.02213437855243683, "learning_rate": 0.00039530625487968507, "loss": 0.0088, "num_input_tokens_seen": 161439088, "step": 74840 }, { "epoch": 12.209624796084828, "grad_norm": 0.22383996844291687, "learning_rate": 0.00039523665397933784, "loss": 0.1341, "num_input_tokens_seen": 161450640, "step": 74845 }, { "epoch": 12.210440456769984, "grad_norm": 0.005620373412966728, "learning_rate": 0.0003951670552021525, "loss": 0.0527, "num_input_tokens_seen": 161462352, "step": 74850 }, { "epoch": 12.21125611745514, "grad_norm": 0.03383687511086464, "learning_rate": 0.0003950974585495399, "loss": 0.0767, "num_input_tokens_seen": 161473776, "step": 74855 }, { "epoch": 12.212071778140293, "grad_norm": 0.023051083087921143, "learning_rate": 0.0003950278640229103, "loss": 0.0455, "num_input_tokens_seen": 161484720, "step": 74860 }, { "epoch": 12.21288743882545, "grad_norm": 0.017782943323254585, "learning_rate": 0.0003949582716236743, "loss": 0.0489, "num_input_tokens_seen": 161494704, "step": 74865 }, { "epoch": 12.213703099510603, "grad_norm": 0.7121814489364624, "learning_rate": 0.0003948886813532421, "loss": 0.0386, "num_input_tokens_seen": 161506160, "step": 74870 }, { "epoch": 12.214518760195759, "grad_norm": 0.032098885625600815, "learning_rate": 0.00039481909321302413, "loss": 0.0938, "num_input_tokens_seen": 161516848, "step": 74875 }, { "epoch": 12.215334420880913, "grad_norm": 0.018993297591805458, "learning_rate": 0.0003947495072044306, "loss": 0.0102, "num_input_tokens_seen": 161527984, "step": 74880 }, { "epoch": 12.216150081566068, "grad_norm": 0.034090980887413025, "learning_rate": 0.00039467992332887196, "loss": 0.026, "num_input_tokens_seen": 161539952, "step": 74885 }, { "epoch": 12.216965742251224, "grad_norm": 0.9625977873802185, "learning_rate": 0.0003946103415877582, "loss": 0.0668, "num_input_tokens_seen": 161550448, "step": 74890 }, { "epoch": 12.217781402936378, "grad_norm": 0.01857658475637436, "learning_rate": 0.00039454076198249964, "loss": 0.0892, "num_input_tokens_seen": 161560400, "step": 74895 }, { "epoch": 12.218597063621534, "grad_norm": 0.032711826264858246, "learning_rate": 0.00039447118451450613, "loss": 0.0096, "num_input_tokens_seen": 161570928, "step": 74900 }, { "epoch": 12.219412724306688, "grad_norm": 0.15428206324577332, "learning_rate": 0.00039440160918518825, "loss": 0.0555, "num_input_tokens_seen": 161581104, "step": 74905 }, { "epoch": 12.220228384991843, "grad_norm": 0.547364354133606, "learning_rate": 0.00039433203599595546, "loss": 0.1193, "num_input_tokens_seen": 161593648, "step": 74910 }, { "epoch": 12.221044045676999, "grad_norm": 0.03655211627483368, "learning_rate": 0.00039426246494821793, "loss": 0.0146, "num_input_tokens_seen": 161604272, "step": 74915 }, { "epoch": 12.221859706362153, "grad_norm": 0.007028940133750439, "learning_rate": 0.000394192896043386, "loss": 0.0094, "num_input_tokens_seen": 161614192, "step": 74920 }, { "epoch": 12.222675367047309, "grad_norm": 0.0119027616456151, "learning_rate": 0.000394123329282869, "loss": 0.0152, "num_input_tokens_seen": 161623824, "step": 74925 }, { "epoch": 12.223491027732463, "grad_norm": 0.012449878267943859, "learning_rate": 0.0003940537646680773, "loss": 0.0074, "num_input_tokens_seen": 161634544, "step": 74930 }, { "epoch": 12.224306688417618, "grad_norm": 0.009560869075357914, "learning_rate": 0.0003939842022004202, "loss": 0.0114, "num_input_tokens_seen": 161644848, "step": 74935 }, { "epoch": 12.225122349102774, "grad_norm": 0.016618067398667336, "learning_rate": 0.00039391464188130796, "loss": 0.0043, "num_input_tokens_seen": 161655888, "step": 74940 }, { "epoch": 12.225938009787928, "grad_norm": 0.7725915908813477, "learning_rate": 0.0003938450837121499, "loss": 0.0545, "num_input_tokens_seen": 161667088, "step": 74945 }, { "epoch": 12.226753670473084, "grad_norm": 0.0015827735187485814, "learning_rate": 0.00039377552769435606, "loss": 0.0222, "num_input_tokens_seen": 161678416, "step": 74950 }, { "epoch": 12.227569331158238, "grad_norm": 0.455867201089859, "learning_rate": 0.0003937059738293357, "loss": 0.0819, "num_input_tokens_seen": 161689968, "step": 74955 }, { "epoch": 12.228384991843393, "grad_norm": 0.017926346510648727, "learning_rate": 0.0003936364221184988, "loss": 0.1066, "num_input_tokens_seen": 161700048, "step": 74960 }, { "epoch": 12.229200652528547, "grad_norm": 0.007473854813724756, "learning_rate": 0.00039356687256325465, "loss": 0.0393, "num_input_tokens_seen": 161710576, "step": 74965 }, { "epoch": 12.230016313213703, "grad_norm": 0.0032830722630023956, "learning_rate": 0.0003934973251650129, "loss": 0.0144, "num_input_tokens_seen": 161721168, "step": 74970 }, { "epoch": 12.230831973898859, "grad_norm": 0.007268395274877548, "learning_rate": 0.0003934277799251829, "loss": 0.0167, "num_input_tokens_seen": 161733328, "step": 74975 }, { "epoch": 12.231647634584013, "grad_norm": 0.004309126175940037, "learning_rate": 0.00039335823684517423, "loss": 0.0464, "num_input_tokens_seen": 161744016, "step": 74980 }, { "epoch": 12.232463295269168, "grad_norm": 0.008335969410836697, "learning_rate": 0.00039328869592639604, "loss": 0.0047, "num_input_tokens_seen": 161754608, "step": 74985 }, { "epoch": 12.233278955954322, "grad_norm": 0.1529814898967743, "learning_rate": 0.00039321915717025797, "loss": 0.0346, "num_input_tokens_seen": 161765328, "step": 74990 }, { "epoch": 12.234094616639478, "grad_norm": 0.039140064269304276, "learning_rate": 0.00039314962057816896, "loss": 0.0176, "num_input_tokens_seen": 161775568, "step": 74995 }, { "epoch": 12.234910277324634, "grad_norm": 0.038266103714704514, "learning_rate": 0.0003930800861515385, "loss": 0.0564, "num_input_tokens_seen": 161785296, "step": 75000 }, { "epoch": 12.235725938009788, "grad_norm": 0.17783349752426147, "learning_rate": 0.00039301055389177577, "loss": 0.0118, "num_input_tokens_seen": 161795888, "step": 75005 }, { "epoch": 12.236541598694943, "grad_norm": 0.33641305565834045, "learning_rate": 0.00039294102380028987, "loss": 0.0203, "num_input_tokens_seen": 161807472, "step": 75010 }, { "epoch": 12.237357259380097, "grad_norm": 0.38689103722572327, "learning_rate": 0.0003928714958784899, "loss": 0.0325, "num_input_tokens_seen": 161820176, "step": 75015 }, { "epoch": 12.238172920065253, "grad_norm": 0.011015702970325947, "learning_rate": 0.00039280197012778493, "loss": 0.1626, "num_input_tokens_seen": 161828912, "step": 75020 }, { "epoch": 12.238988580750409, "grad_norm": 0.015676261857151985, "learning_rate": 0.0003927324465495841, "loss": 0.0092, "num_input_tokens_seen": 161839216, "step": 75025 }, { "epoch": 12.239804241435563, "grad_norm": 0.05365163832902908, "learning_rate": 0.0003926629251452963, "loss": 0.0312, "num_input_tokens_seen": 161849424, "step": 75030 }, { "epoch": 12.240619902120718, "grad_norm": 0.06218748912215233, "learning_rate": 0.0003925934059163306, "loss": 0.0072, "num_input_tokens_seen": 161861232, "step": 75035 }, { "epoch": 12.241435562805872, "grad_norm": 0.6301116347312927, "learning_rate": 0.0003925238888640957, "loss": 0.1386, "num_input_tokens_seen": 161871888, "step": 75040 }, { "epoch": 12.242251223491028, "grad_norm": 0.003952207043766975, "learning_rate": 0.0003924543739900005, "loss": 0.0372, "num_input_tokens_seen": 161882000, "step": 75045 }, { "epoch": 12.243066884176184, "grad_norm": 0.004046841524541378, "learning_rate": 0.00039238486129545376, "loss": 0.0086, "num_input_tokens_seen": 161891632, "step": 75050 }, { "epoch": 12.243882544861338, "grad_norm": 0.3328939378261566, "learning_rate": 0.0003923153507818645, "loss": 0.1154, "num_input_tokens_seen": 161902064, "step": 75055 }, { "epoch": 12.244698205546493, "grad_norm": 0.2566170394420624, "learning_rate": 0.00039224584245064114, "loss": 0.0503, "num_input_tokens_seen": 161911568, "step": 75060 }, { "epoch": 12.245513866231647, "grad_norm": 0.005525566171854734, "learning_rate": 0.00039217633630319264, "loss": 0.0116, "num_input_tokens_seen": 161922800, "step": 75065 }, { "epoch": 12.246329526916803, "grad_norm": 0.004351208917796612, "learning_rate": 0.00039210683234092733, "loss": 0.082, "num_input_tokens_seen": 161932752, "step": 75070 }, { "epoch": 12.247145187601957, "grad_norm": 0.062235135585069656, "learning_rate": 0.000392037330565254, "loss": 0.0444, "num_input_tokens_seen": 161943920, "step": 75075 }, { "epoch": 12.247960848287113, "grad_norm": 0.46529263257980347, "learning_rate": 0.000391967830977581, "loss": 0.015, "num_input_tokens_seen": 161955376, "step": 75080 }, { "epoch": 12.248776508972268, "grad_norm": 0.004618046805262566, "learning_rate": 0.0003918983335793173, "loss": 0.0073, "num_input_tokens_seen": 161966224, "step": 75085 }, { "epoch": 12.249592169657422, "grad_norm": 0.22658365964889526, "learning_rate": 0.00039182883837187056, "loss": 0.1294, "num_input_tokens_seen": 161975888, "step": 75090 }, { "epoch": 12.250407830342578, "grad_norm": 0.015589535236358643, "learning_rate": 0.00039175934535665, "loss": 0.1552, "num_input_tokens_seen": 161987824, "step": 75095 }, { "epoch": 12.251223491027732, "grad_norm": 0.8100619316101074, "learning_rate": 0.00039168985453506334, "loss": 0.0885, "num_input_tokens_seen": 161999568, "step": 75100 }, { "epoch": 12.252039151712887, "grad_norm": 0.9661038517951965, "learning_rate": 0.0003916203659085194, "loss": 0.0707, "num_input_tokens_seen": 162010064, "step": 75105 }, { "epoch": 12.252854812398043, "grad_norm": 0.03163062781095505, "learning_rate": 0.00039155087947842607, "loss": 0.0861, "num_input_tokens_seen": 162021136, "step": 75110 }, { "epoch": 12.253670473083197, "grad_norm": 0.44362178444862366, "learning_rate": 0.00039148139524619184, "loss": 0.0242, "num_input_tokens_seen": 162032624, "step": 75115 }, { "epoch": 12.254486133768353, "grad_norm": 0.006401136517524719, "learning_rate": 0.00039141191321322464, "loss": 0.1166, "num_input_tokens_seen": 162043696, "step": 75120 }, { "epoch": 12.255301794453507, "grad_norm": 0.06302186846733093, "learning_rate": 0.00039134243338093285, "loss": 0.0128, "num_input_tokens_seen": 162055792, "step": 75125 }, { "epoch": 12.256117455138662, "grad_norm": 0.014522004872560501, "learning_rate": 0.0003912729557507246, "loss": 0.0053, "num_input_tokens_seen": 162066832, "step": 75130 }, { "epoch": 12.256933115823816, "grad_norm": 0.30135777592658997, "learning_rate": 0.0003912034803240077, "loss": 0.1501, "num_input_tokens_seen": 162078448, "step": 75135 }, { "epoch": 12.257748776508972, "grad_norm": 0.013315478339791298, "learning_rate": 0.0003911340071021905, "loss": 0.0094, "num_input_tokens_seen": 162089104, "step": 75140 }, { "epoch": 12.258564437194128, "grad_norm": 0.6932283043861389, "learning_rate": 0.00039106453608668047, "loss": 0.0765, "num_input_tokens_seen": 162100176, "step": 75145 }, { "epoch": 12.259380097879282, "grad_norm": 0.03982196003198624, "learning_rate": 0.0003909950672788861, "loss": 0.0211, "num_input_tokens_seen": 162111824, "step": 75150 }, { "epoch": 12.260195758564437, "grad_norm": 0.016217298805713654, "learning_rate": 0.0003909256006802147, "loss": 0.0129, "num_input_tokens_seen": 162123824, "step": 75155 }, { "epoch": 12.261011419249591, "grad_norm": 0.03301455080509186, "learning_rate": 0.0003908561362920746, "loss": 0.0103, "num_input_tokens_seen": 162135184, "step": 75160 }, { "epoch": 12.261827079934747, "grad_norm": 0.5239769816398621, "learning_rate": 0.00039078667411587316, "loss": 0.2072, "num_input_tokens_seen": 162144560, "step": 75165 }, { "epoch": 12.262642740619903, "grad_norm": 0.04003845155239105, "learning_rate": 0.0003907172141530184, "loss": 0.0145, "num_input_tokens_seen": 162153616, "step": 75170 }, { "epoch": 12.263458401305057, "grad_norm": 0.06474819779396057, "learning_rate": 0.00039064775640491796, "loss": 0.0382, "num_input_tokens_seen": 162164400, "step": 75175 }, { "epoch": 12.264274061990212, "grad_norm": 0.0025073750875890255, "learning_rate": 0.00039057830087297946, "loss": 0.02, "num_input_tokens_seen": 162176304, "step": 75180 }, { "epoch": 12.265089722675366, "grad_norm": 0.008581621572375298, "learning_rate": 0.0003905088475586105, "loss": 0.181, "num_input_tokens_seen": 162187792, "step": 75185 }, { "epoch": 12.265905383360522, "grad_norm": 0.006541701033711433, "learning_rate": 0.0003904393964632186, "loss": 0.0268, "num_input_tokens_seen": 162198480, "step": 75190 }, { "epoch": 12.266721044045678, "grad_norm": 0.0644923597574234, "learning_rate": 0.00039036994758821124, "loss": 0.0193, "num_input_tokens_seen": 162209488, "step": 75195 }, { "epoch": 12.267536704730832, "grad_norm": 0.03382834047079086, "learning_rate": 0.00039030050093499623, "loss": 0.0175, "num_input_tokens_seen": 162221008, "step": 75200 }, { "epoch": 12.268352365415987, "grad_norm": 0.0205338466912508, "learning_rate": 0.0003902310565049805, "loss": 0.0616, "num_input_tokens_seen": 162231952, "step": 75205 }, { "epoch": 12.269168026101141, "grad_norm": 0.004986535292118788, "learning_rate": 0.0003901616142995718, "loss": 0.0059, "num_input_tokens_seen": 162243312, "step": 75210 }, { "epoch": 12.269983686786297, "grad_norm": 0.0030673425644636154, "learning_rate": 0.0003900921743201772, "loss": 0.1736, "num_input_tokens_seen": 162253456, "step": 75215 }, { "epoch": 12.270799347471453, "grad_norm": 0.007879951037466526, "learning_rate": 0.00039002273656820423, "loss": 0.004, "num_input_tokens_seen": 162262320, "step": 75220 }, { "epoch": 12.271615008156607, "grad_norm": 0.0064381193369627, "learning_rate": 0.0003899533010450599, "loss": 0.0231, "num_input_tokens_seen": 162273008, "step": 75225 }, { "epoch": 12.272430668841762, "grad_norm": 0.0937180444598198, "learning_rate": 0.0003898838677521515, "loss": 0.014, "num_input_tokens_seen": 162283280, "step": 75230 }, { "epoch": 12.273246329526916, "grad_norm": 0.002834079787135124, "learning_rate": 0.00038981443669088646, "loss": 0.0137, "num_input_tokens_seen": 162293232, "step": 75235 }, { "epoch": 12.274061990212072, "grad_norm": 0.04539687559008598, "learning_rate": 0.0003897450078626714, "loss": 0.0119, "num_input_tokens_seen": 162304848, "step": 75240 }, { "epoch": 12.274877650897226, "grad_norm": 0.006977009121328592, "learning_rate": 0.0003896755812689138, "loss": 0.0144, "num_input_tokens_seen": 162315184, "step": 75245 }, { "epoch": 12.275693311582382, "grad_norm": 0.022313179448246956, "learning_rate": 0.0003896061569110203, "loss": 0.0048, "num_input_tokens_seen": 162325424, "step": 75250 }, { "epoch": 12.276508972267537, "grad_norm": 0.5669946074485779, "learning_rate": 0.0003895367347903983, "loss": 0.1211, "num_input_tokens_seen": 162334800, "step": 75255 }, { "epoch": 12.277324632952691, "grad_norm": 0.005156332161277533, "learning_rate": 0.0003894673149084543, "loss": 0.0112, "num_input_tokens_seen": 162345200, "step": 75260 }, { "epoch": 12.278140293637847, "grad_norm": 0.14445383846759796, "learning_rate": 0.0003893978972665956, "loss": 0.0423, "num_input_tokens_seen": 162354448, "step": 75265 }, { "epoch": 12.278955954323001, "grad_norm": 0.03495357185602188, "learning_rate": 0.0003893284818662286, "loss": 0.0042, "num_input_tokens_seen": 162363536, "step": 75270 }, { "epoch": 12.279771615008157, "grad_norm": 0.050448670983314514, "learning_rate": 0.0003892590687087605, "loss": 0.0144, "num_input_tokens_seen": 162375088, "step": 75275 }, { "epoch": 12.280587275693312, "grad_norm": 0.028258051723241806, "learning_rate": 0.0003891896577955977, "loss": 0.0203, "num_input_tokens_seen": 162385424, "step": 75280 }, { "epoch": 12.281402936378466, "grad_norm": 0.02308092638850212, "learning_rate": 0.0003891202491281472, "loss": 0.005, "num_input_tokens_seen": 162397200, "step": 75285 }, { "epoch": 12.282218597063622, "grad_norm": 0.013062660582363605, "learning_rate": 0.0003890508427078153, "loss": 0.0508, "num_input_tokens_seen": 162407088, "step": 75290 }, { "epoch": 12.283034257748776, "grad_norm": 0.2435280829668045, "learning_rate": 0.0003889814385360091, "loss": 0.0176, "num_input_tokens_seen": 162417648, "step": 75295 }, { "epoch": 12.283849918433932, "grad_norm": 0.011580669321119785, "learning_rate": 0.0003889120366141347, "loss": 0.0959, "num_input_tokens_seen": 162428752, "step": 75300 }, { "epoch": 12.284665579119087, "grad_norm": 1.026945948600769, "learning_rate": 0.0003888426369435989, "loss": 0.1741, "num_input_tokens_seen": 162439952, "step": 75305 }, { "epoch": 12.285481239804241, "grad_norm": 0.048225171864032745, "learning_rate": 0.0003887732395258079, "loss": 0.014, "num_input_tokens_seen": 162450832, "step": 75310 }, { "epoch": 12.286296900489397, "grad_norm": 0.017613038420677185, "learning_rate": 0.0003887038443621684, "loss": 0.1105, "num_input_tokens_seen": 162461168, "step": 75315 }, { "epoch": 12.28711256117455, "grad_norm": 0.6232366561889648, "learning_rate": 0.0003886344514540868, "loss": 0.1543, "num_input_tokens_seen": 162472816, "step": 75320 }, { "epoch": 12.287928221859707, "grad_norm": 0.055316805839538574, "learning_rate": 0.0003885650608029692, "loss": 0.0207, "num_input_tokens_seen": 162484144, "step": 75325 }, { "epoch": 12.28874388254486, "grad_norm": 0.0045505305752158165, "learning_rate": 0.00038849567241022205, "loss": 0.0069, "num_input_tokens_seen": 162494032, "step": 75330 }, { "epoch": 12.289559543230016, "grad_norm": 0.597123920917511, "learning_rate": 0.0003884262862772514, "loss": 0.1147, "num_input_tokens_seen": 162505520, "step": 75335 }, { "epoch": 12.290375203915172, "grad_norm": 0.46310120820999146, "learning_rate": 0.0003883569024054638, "loss": 0.0487, "num_input_tokens_seen": 162516432, "step": 75340 }, { "epoch": 12.291190864600326, "grad_norm": 0.01990097016096115, "learning_rate": 0.0003882875207962651, "loss": 0.0786, "num_input_tokens_seen": 162527216, "step": 75345 }, { "epoch": 12.292006525285482, "grad_norm": 0.007414130959659815, "learning_rate": 0.0003882181414510616, "loss": 0.0502, "num_input_tokens_seen": 162536688, "step": 75350 }, { "epoch": 12.292822185970635, "grad_norm": 0.03613249212503433, "learning_rate": 0.00038814876437125916, "loss": 0.0176, "num_input_tokens_seen": 162547632, "step": 75355 }, { "epoch": 12.293637846655791, "grad_norm": 0.04707645624876022, "learning_rate": 0.000388079389558264, "loss": 0.0077, "num_input_tokens_seen": 162558800, "step": 75360 }, { "epoch": 12.294453507340947, "grad_norm": 0.02096550539135933, "learning_rate": 0.0003880100170134818, "loss": 0.0732, "num_input_tokens_seen": 162569904, "step": 75365 }, { "epoch": 12.2952691680261, "grad_norm": 0.05317756161093712, "learning_rate": 0.00038794064673831896, "loss": 0.0108, "num_input_tokens_seen": 162581392, "step": 75370 }, { "epoch": 12.296084828711257, "grad_norm": 0.0274104755371809, "learning_rate": 0.0003878712787341809, "loss": 0.0515, "num_input_tokens_seen": 162591856, "step": 75375 }, { "epoch": 12.29690048939641, "grad_norm": 0.00704601313918829, "learning_rate": 0.0003878019130024737, "loss": 0.0105, "num_input_tokens_seen": 162603408, "step": 75380 }, { "epoch": 12.297716150081566, "grad_norm": 0.50138258934021, "learning_rate": 0.000387732549544603, "loss": 0.0179, "num_input_tokens_seen": 162614256, "step": 75385 }, { "epoch": 12.298531810766722, "grad_norm": 0.26423805952072144, "learning_rate": 0.0003876631883619747, "loss": 0.2506, "num_input_tokens_seen": 162623728, "step": 75390 }, { "epoch": 12.299347471451876, "grad_norm": 0.1356430947780609, "learning_rate": 0.0003875938294559942, "loss": 0.0135, "num_input_tokens_seen": 162633232, "step": 75395 }, { "epoch": 12.300163132137031, "grad_norm": 0.00777399493381381, "learning_rate": 0.0003875244728280676, "loss": 0.0295, "num_input_tokens_seen": 162643248, "step": 75400 }, { "epoch": 12.300978792822185, "grad_norm": 0.5313088297843933, "learning_rate": 0.00038745511847960003, "loss": 0.0526, "num_input_tokens_seen": 162654768, "step": 75405 }, { "epoch": 12.301794453507341, "grad_norm": 0.005560186691582203, "learning_rate": 0.0003873857664119974, "loss": 0.0037, "num_input_tokens_seen": 162664752, "step": 75410 }, { "epoch": 12.302610114192497, "grad_norm": 0.039127275347709656, "learning_rate": 0.00038731641662666493, "loss": 0.0277, "num_input_tokens_seen": 162675952, "step": 75415 }, { "epoch": 12.30342577487765, "grad_norm": 0.3397650122642517, "learning_rate": 0.00038724706912500847, "loss": 0.1165, "num_input_tokens_seen": 162686448, "step": 75420 }, { "epoch": 12.304241435562806, "grad_norm": 0.3268750309944153, "learning_rate": 0.0003871777239084329, "loss": 0.047, "num_input_tokens_seen": 162697136, "step": 75425 }, { "epoch": 12.30505709624796, "grad_norm": 0.06375301629304886, "learning_rate": 0.00038710838097834414, "loss": 0.042, "num_input_tokens_seen": 162708496, "step": 75430 }, { "epoch": 12.305872756933116, "grad_norm": 0.07532596588134766, "learning_rate": 0.000387039040336147, "loss": 0.0349, "num_input_tokens_seen": 162718704, "step": 75435 }, { "epoch": 12.30668841761827, "grad_norm": 0.7182519435882568, "learning_rate": 0.0003869697019832473, "loss": 0.1069, "num_input_tokens_seen": 162728752, "step": 75440 }, { "epoch": 12.307504078303426, "grad_norm": 0.005934921558946371, "learning_rate": 0.0003869003659210497, "loss": 0.0486, "num_input_tokens_seen": 162740016, "step": 75445 }, { "epoch": 12.308319738988581, "grad_norm": 0.7087914347648621, "learning_rate": 0.00038683103215095965, "loss": 0.0229, "num_input_tokens_seen": 162750192, "step": 75450 }, { "epoch": 12.309135399673735, "grad_norm": 0.03688497468829155, "learning_rate": 0.00038676170067438256, "loss": 0.0051, "num_input_tokens_seen": 162761008, "step": 75455 }, { "epoch": 12.309951060358891, "grad_norm": 0.052912306040525436, "learning_rate": 0.00038669237149272303, "loss": 0.0066, "num_input_tokens_seen": 162772944, "step": 75460 }, { "epoch": 12.310766721044045, "grad_norm": 0.0015300051309168339, "learning_rate": 0.0003866230446073865, "loss": 0.0112, "num_input_tokens_seen": 162783472, "step": 75465 }, { "epoch": 12.3115823817292, "grad_norm": 1.3775086402893066, "learning_rate": 0.0003865537200197776, "loss": 0.0234, "num_input_tokens_seen": 162793712, "step": 75470 }, { "epoch": 12.312398042414356, "grad_norm": 0.03915144503116608, "learning_rate": 0.0003864843977313017, "loss": 0.0562, "num_input_tokens_seen": 162804368, "step": 75475 }, { "epoch": 12.31321370309951, "grad_norm": 1.6702507734298706, "learning_rate": 0.0003864150777433634, "loss": 0.0757, "num_input_tokens_seen": 162815792, "step": 75480 }, { "epoch": 12.314029363784666, "grad_norm": 0.005646785721182823, "learning_rate": 0.0003863457600573676, "loss": 0.3219, "num_input_tokens_seen": 162826928, "step": 75485 }, { "epoch": 12.31484502446982, "grad_norm": 0.3302176594734192, "learning_rate": 0.00038627644467471915, "loss": 0.0373, "num_input_tokens_seen": 162838544, "step": 75490 }, { "epoch": 12.315660685154976, "grad_norm": 0.018164703622460365, "learning_rate": 0.00038620713159682286, "loss": 0.0078, "num_input_tokens_seen": 162850512, "step": 75495 }, { "epoch": 12.31647634584013, "grad_norm": 0.0666789636015892, "learning_rate": 0.0003861378208250834, "loss": 0.0118, "num_input_tokens_seen": 162860592, "step": 75500 }, { "epoch": 12.317292006525285, "grad_norm": 0.016752062365412712, "learning_rate": 0.00038606851236090543, "loss": 0.0067, "num_input_tokens_seen": 162870736, "step": 75505 }, { "epoch": 12.318107667210441, "grad_norm": 0.019690683111548424, "learning_rate": 0.00038599920620569357, "loss": 0.0108, "num_input_tokens_seen": 162880656, "step": 75510 }, { "epoch": 12.318923327895595, "grad_norm": 0.3065456449985504, "learning_rate": 0.00038592990236085257, "loss": 0.146, "num_input_tokens_seen": 162891824, "step": 75515 }, { "epoch": 12.31973898858075, "grad_norm": 0.01065349206328392, "learning_rate": 0.0003858606008277866, "loss": 0.1374, "num_input_tokens_seen": 162902736, "step": 75520 }, { "epoch": 12.320554649265905, "grad_norm": 0.029920026659965515, "learning_rate": 0.0003857913016079005, "loss": 0.1, "num_input_tokens_seen": 162913488, "step": 75525 }, { "epoch": 12.32137030995106, "grad_norm": 0.02076866291463375, "learning_rate": 0.0003857220047025984, "loss": 0.0128, "num_input_tokens_seen": 162923888, "step": 75530 }, { "epoch": 12.322185970636216, "grad_norm": 0.13742879033088684, "learning_rate": 0.00038565271011328507, "loss": 0.1342, "num_input_tokens_seen": 162935760, "step": 75535 }, { "epoch": 12.32300163132137, "grad_norm": 0.011969635263085365, "learning_rate": 0.00038558341784136437, "loss": 0.0114, "num_input_tokens_seen": 162947696, "step": 75540 }, { "epoch": 12.323817292006526, "grad_norm": 0.12961776554584503, "learning_rate": 0.00038551412788824106, "loss": 0.0375, "num_input_tokens_seen": 162958512, "step": 75545 }, { "epoch": 12.32463295269168, "grad_norm": 0.05914083868265152, "learning_rate": 0.0003854448402553191, "loss": 0.1692, "num_input_tokens_seen": 162968560, "step": 75550 }, { "epoch": 12.325448613376835, "grad_norm": 0.40248963236808777, "learning_rate": 0.0003853755549440026, "loss": 0.0335, "num_input_tokens_seen": 162979408, "step": 75555 }, { "epoch": 12.326264274061991, "grad_norm": 0.39195582270622253, "learning_rate": 0.0003853062719556962, "loss": 0.0262, "num_input_tokens_seen": 162989648, "step": 75560 }, { "epoch": 12.327079934747145, "grad_norm": 0.9844707250595093, "learning_rate": 0.0003852369912918035, "loss": 0.0819, "num_input_tokens_seen": 162999984, "step": 75565 }, { "epoch": 12.3278955954323, "grad_norm": 0.038741856813430786, "learning_rate": 0.00038516771295372894, "loss": 0.0421, "num_input_tokens_seen": 163010512, "step": 75570 }, { "epoch": 12.328711256117455, "grad_norm": 0.05136241763830185, "learning_rate": 0.00038509843694287615, "loss": 0.0081, "num_input_tokens_seen": 163020176, "step": 75575 }, { "epoch": 12.32952691680261, "grad_norm": 0.049829691648483276, "learning_rate": 0.0003850291632606495, "loss": 0.0307, "num_input_tokens_seen": 163030992, "step": 75580 }, { "epoch": 12.330342577487766, "grad_norm": 0.10264070332050323, "learning_rate": 0.00038495989190845246, "loss": 0.0082, "num_input_tokens_seen": 163041296, "step": 75585 }, { "epoch": 12.33115823817292, "grad_norm": 0.020925071090459824, "learning_rate": 0.00038489062288768944, "loss": 0.1457, "num_input_tokens_seen": 163052240, "step": 75590 }, { "epoch": 12.331973898858076, "grad_norm": 0.12010475248098373, "learning_rate": 0.00038482135619976373, "loss": 0.0196, "num_input_tokens_seen": 163062960, "step": 75595 }, { "epoch": 12.33278955954323, "grad_norm": 0.2778264880180359, "learning_rate": 0.0003847520918460795, "loss": 0.0219, "num_input_tokens_seen": 163074416, "step": 75600 }, { "epoch": 12.333605220228385, "grad_norm": 0.5912114977836609, "learning_rate": 0.00038468282982804023, "loss": 0.1299, "num_input_tokens_seen": 163084528, "step": 75605 }, { "epoch": 12.33442088091354, "grad_norm": 0.240101158618927, "learning_rate": 0.00038461357014704986, "loss": 0.0637, "num_input_tokens_seen": 163095024, "step": 75610 }, { "epoch": 12.335236541598695, "grad_norm": 0.12646017968654633, "learning_rate": 0.00038454431280451163, "loss": 0.0115, "num_input_tokens_seen": 163105328, "step": 75615 }, { "epoch": 12.33605220228385, "grad_norm": 0.165180504322052, "learning_rate": 0.00038447505780182963, "loss": 0.0197, "num_input_tokens_seen": 163115344, "step": 75620 }, { "epoch": 12.336867862969005, "grad_norm": 0.03098953142762184, "learning_rate": 0.0003844058051404069, "loss": 0.0063, "num_input_tokens_seen": 163126000, "step": 75625 }, { "epoch": 12.33768352365416, "grad_norm": 0.05831137299537659, "learning_rate": 0.00038433655482164727, "loss": 0.0057, "num_input_tokens_seen": 163135888, "step": 75630 }, { "epoch": 12.338499184339314, "grad_norm": 0.036122165620326996, "learning_rate": 0.0003842673068469541, "loss": 0.175, "num_input_tokens_seen": 163147536, "step": 75635 }, { "epoch": 12.33931484502447, "grad_norm": 0.019776033237576485, "learning_rate": 0.0003841980612177308, "loss": 0.0175, "num_input_tokens_seen": 163157904, "step": 75640 }, { "epoch": 12.340130505709626, "grad_norm": 0.010085292160511017, "learning_rate": 0.00038412881793538063, "loss": 0.0203, "num_input_tokens_seen": 163168304, "step": 75645 }, { "epoch": 12.34094616639478, "grad_norm": 0.04757087305188179, "learning_rate": 0.000384059577001307, "loss": 0.0068, "num_input_tokens_seen": 163179792, "step": 75650 }, { "epoch": 12.341761827079935, "grad_norm": 0.0319545604288578, "learning_rate": 0.000383990338416913, "loss": 0.0218, "num_input_tokens_seen": 163190608, "step": 75655 }, { "epoch": 12.34257748776509, "grad_norm": 0.020290879532694817, "learning_rate": 0.00038392110218360203, "loss": 0.0092, "num_input_tokens_seen": 163200720, "step": 75660 }, { "epoch": 12.343393148450245, "grad_norm": 0.002333683893084526, "learning_rate": 0.0003838518683027772, "loss": 0.0753, "num_input_tokens_seen": 163211856, "step": 75665 }, { "epoch": 12.3442088091354, "grad_norm": 0.028258109465241432, "learning_rate": 0.0003837826367758417, "loss": 0.0176, "num_input_tokens_seen": 163222704, "step": 75670 }, { "epoch": 12.345024469820554, "grad_norm": 0.003943005111068487, "learning_rate": 0.0003837134076041984, "loss": 0.0104, "num_input_tokens_seen": 163233168, "step": 75675 }, { "epoch": 12.34584013050571, "grad_norm": 0.018401047214865685, "learning_rate": 0.00038364418078925037, "loss": 0.0173, "num_input_tokens_seen": 163244304, "step": 75680 }, { "epoch": 12.346655791190864, "grad_norm": 0.006495045032352209, "learning_rate": 0.0003835749563324008, "loss": 0.058, "num_input_tokens_seen": 163255248, "step": 75685 }, { "epoch": 12.34747145187602, "grad_norm": 0.1586838960647583, "learning_rate": 0.0003835057342350522, "loss": 0.022, "num_input_tokens_seen": 163267088, "step": 75690 }, { "epoch": 12.348287112561174, "grad_norm": 0.14900203049182892, "learning_rate": 0.0003834365144986079, "loss": 0.0134, "num_input_tokens_seen": 163277776, "step": 75695 }, { "epoch": 12.34910277324633, "grad_norm": 0.28918543457984924, "learning_rate": 0.00038336729712447034, "loss": 0.0665, "num_input_tokens_seen": 163287632, "step": 75700 }, { "epoch": 12.349918433931485, "grad_norm": 0.09254862368106842, "learning_rate": 0.0003832980821140426, "loss": 0.0164, "num_input_tokens_seen": 163298896, "step": 75705 }, { "epoch": 12.350734094616639, "grad_norm": 0.0033889564219862223, "learning_rate": 0.00038322886946872716, "loss": 0.0321, "num_input_tokens_seen": 163309168, "step": 75710 }, { "epoch": 12.351549755301795, "grad_norm": 0.10456783324480057, "learning_rate": 0.000383159659189927, "loss": 0.0364, "num_input_tokens_seen": 163319216, "step": 75715 }, { "epoch": 12.352365415986949, "grad_norm": 0.04119764640927315, "learning_rate": 0.0003830904512790443, "loss": 0.0116, "num_input_tokens_seen": 163329232, "step": 75720 }, { "epoch": 12.353181076672104, "grad_norm": 0.007273167371749878, "learning_rate": 0.0003830212457374821, "loss": 0.0216, "num_input_tokens_seen": 163339280, "step": 75725 }, { "epoch": 12.35399673735726, "grad_norm": 0.07448381185531616, "learning_rate": 0.00038295204256664264, "loss": 0.0072, "num_input_tokens_seen": 163351504, "step": 75730 }, { "epoch": 12.354812398042414, "grad_norm": 0.8378725647926331, "learning_rate": 0.00038288284176792866, "loss": 0.0399, "num_input_tokens_seen": 163361840, "step": 75735 }, { "epoch": 12.35562805872757, "grad_norm": 0.4317421019077301, "learning_rate": 0.0003828136433427423, "loss": 0.0158, "num_input_tokens_seen": 163371504, "step": 75740 }, { "epoch": 12.356443719412724, "grad_norm": 0.041952259838581085, "learning_rate": 0.00038274444729248633, "loss": 0.0241, "num_input_tokens_seen": 163383184, "step": 75745 }, { "epoch": 12.35725938009788, "grad_norm": 0.0903538390994072, "learning_rate": 0.00038267525361856264, "loss": 0.0357, "num_input_tokens_seen": 163393744, "step": 75750 }, { "epoch": 12.358075040783035, "grad_norm": 0.25721636414527893, "learning_rate": 0.000382606062322374, "loss": 0.0187, "num_input_tokens_seen": 163404080, "step": 75755 }, { "epoch": 12.358890701468189, "grad_norm": 0.12671978771686554, "learning_rate": 0.00038253687340532224, "loss": 0.0262, "num_input_tokens_seen": 163414128, "step": 75760 }, { "epoch": 12.359706362153345, "grad_norm": 0.008647649548947811, "learning_rate": 0.0003824676868688097, "loss": 0.136, "num_input_tokens_seen": 163425040, "step": 75765 }, { "epoch": 12.360522022838499, "grad_norm": 0.007501132320612669, "learning_rate": 0.0003823985027142389, "loss": 0.0041, "num_input_tokens_seen": 163436080, "step": 75770 }, { "epoch": 12.361337683523654, "grad_norm": 0.4920627176761627, "learning_rate": 0.0003823293209430113, "loss": 0.1644, "num_input_tokens_seen": 163447472, "step": 75775 }, { "epoch": 12.362153344208808, "grad_norm": 0.0052056158892810345, "learning_rate": 0.00038226014155652956, "loss": 0.0071, "num_input_tokens_seen": 163458704, "step": 75780 }, { "epoch": 12.362969004893964, "grad_norm": 0.047712862491607666, "learning_rate": 0.0003821909645561952, "loss": 0.0114, "num_input_tokens_seen": 163469488, "step": 75785 }, { "epoch": 12.36378466557912, "grad_norm": 0.7216401696205139, "learning_rate": 0.0003821217899434106, "loss": 0.054, "num_input_tokens_seen": 163480144, "step": 75790 }, { "epoch": 12.364600326264274, "grad_norm": 0.0058282180689275265, "learning_rate": 0.0003820526177195772, "loss": 0.0043, "num_input_tokens_seen": 163491216, "step": 75795 }, { "epoch": 12.36541598694943, "grad_norm": 2.57367205619812, "learning_rate": 0.00038198344788609737, "loss": 0.0705, "num_input_tokens_seen": 163503280, "step": 75800 }, { "epoch": 12.366231647634583, "grad_norm": 0.03722761943936348, "learning_rate": 0.0003819142804443726, "loss": 0.0549, "num_input_tokens_seen": 163514320, "step": 75805 }, { "epoch": 12.367047308319739, "grad_norm": 0.009505067020654678, "learning_rate": 0.0003818451153958047, "loss": 0.0098, "num_input_tokens_seen": 163524688, "step": 75810 }, { "epoch": 12.367862969004895, "grad_norm": 0.8721757531166077, "learning_rate": 0.0003817759527417955, "loss": 0.0538, "num_input_tokens_seen": 163534640, "step": 75815 }, { "epoch": 12.368678629690049, "grad_norm": 0.00860567670315504, "learning_rate": 0.00038170679248374653, "loss": 0.0197, "num_input_tokens_seen": 163544656, "step": 75820 }, { "epoch": 12.369494290375204, "grad_norm": 0.005892583169043064, "learning_rate": 0.00038163763462305944, "loss": 0.1204, "num_input_tokens_seen": 163555632, "step": 75825 }, { "epoch": 12.370309951060358, "grad_norm": 0.1313542276620865, "learning_rate": 0.000381568479161136, "loss": 0.0865, "num_input_tokens_seen": 163565360, "step": 75830 }, { "epoch": 12.371125611745514, "grad_norm": 0.006368995178490877, "learning_rate": 0.00038149932609937736, "loss": 0.0053, "num_input_tokens_seen": 163575568, "step": 75835 }, { "epoch": 12.37194127243067, "grad_norm": 0.008557685650885105, "learning_rate": 0.00038143017543918546, "loss": 0.0173, "num_input_tokens_seen": 163587376, "step": 75840 }, { "epoch": 12.372756933115824, "grad_norm": 0.06517430394887924, "learning_rate": 0.0003813610271819612, "loss": 0.0748, "num_input_tokens_seen": 163598704, "step": 75845 }, { "epoch": 12.37357259380098, "grad_norm": 0.0021782447583973408, "learning_rate": 0.00038129188132910645, "loss": 0.005, "num_input_tokens_seen": 163609680, "step": 75850 }, { "epoch": 12.374388254486133, "grad_norm": 0.0025557097978889942, "learning_rate": 0.00038122273788202216, "loss": 0.0038, "num_input_tokens_seen": 163619952, "step": 75855 }, { "epoch": 12.375203915171289, "grad_norm": 0.017526017501950264, "learning_rate": 0.00038115359684210993, "loss": 0.2093, "num_input_tokens_seen": 163629776, "step": 75860 }, { "epoch": 12.376019575856443, "grad_norm": 0.0062386468052864075, "learning_rate": 0.00038108445821077066, "loss": 0.0979, "num_input_tokens_seen": 163639472, "step": 75865 }, { "epoch": 12.376835236541599, "grad_norm": 0.009008281864225864, "learning_rate": 0.00038101532198940563, "loss": 0.0393, "num_input_tokens_seen": 163649840, "step": 75870 }, { "epoch": 12.377650897226754, "grad_norm": 0.016464056447148323, "learning_rate": 0.0003809461881794163, "loss": 0.0266, "num_input_tokens_seen": 163661264, "step": 75875 }, { "epoch": 12.378466557911908, "grad_norm": 0.04343675449490547, "learning_rate": 0.0003808770567822033, "loss": 0.008, "num_input_tokens_seen": 163672912, "step": 75880 }, { "epoch": 12.379282218597064, "grad_norm": 0.3261345326900482, "learning_rate": 0.000380807927799168, "loss": 0.0316, "num_input_tokens_seen": 163683440, "step": 75885 }, { "epoch": 12.380097879282218, "grad_norm": 0.41077104210853577, "learning_rate": 0.0003807388012317111, "loss": 0.0166, "num_input_tokens_seen": 163694064, "step": 75890 }, { "epoch": 12.380913539967374, "grad_norm": 0.3336528539657593, "learning_rate": 0.0003806696770812339, "loss": 0.0118, "num_input_tokens_seen": 163704240, "step": 75895 }, { "epoch": 12.38172920065253, "grad_norm": 0.09843586385250092, "learning_rate": 0.00038060055534913683, "loss": 0.0738, "num_input_tokens_seen": 163715696, "step": 75900 }, { "epoch": 12.382544861337683, "grad_norm": 0.2536364495754242, "learning_rate": 0.0003805314360368212, "loss": 0.0224, "num_input_tokens_seen": 163726064, "step": 75905 }, { "epoch": 12.383360522022839, "grad_norm": 0.019220635294914246, "learning_rate": 0.0003804623191456874, "loss": 0.0309, "num_input_tokens_seen": 163735664, "step": 75910 }, { "epoch": 12.384176182707993, "grad_norm": 0.02437998354434967, "learning_rate": 0.00038039320467713654, "loss": 0.0343, "num_input_tokens_seen": 163745872, "step": 75915 }, { "epoch": 12.384991843393149, "grad_norm": 0.05894165858626366, "learning_rate": 0.0003803240926325689, "loss": 0.0142, "num_input_tokens_seen": 163756560, "step": 75920 }, { "epoch": 12.385807504078304, "grad_norm": 0.005522827617824078, "learning_rate": 0.00038025498301338554, "loss": 0.0249, "num_input_tokens_seen": 163766608, "step": 75925 }, { "epoch": 12.386623164763458, "grad_norm": 0.00622938247397542, "learning_rate": 0.00038018587582098665, "loss": 0.0963, "num_input_tokens_seen": 163777232, "step": 75930 }, { "epoch": 12.387438825448614, "grad_norm": 0.4185888469219208, "learning_rate": 0.0003801167710567731, "loss": 0.1593, "num_input_tokens_seen": 163787728, "step": 75935 }, { "epoch": 12.388254486133768, "grad_norm": 0.19428984820842743, "learning_rate": 0.00038004766872214526, "loss": 0.0176, "num_input_tokens_seen": 163798032, "step": 75940 }, { "epoch": 12.389070146818923, "grad_norm": 0.022369321435689926, "learning_rate": 0.0003799785688185036, "loss": 0.0273, "num_input_tokens_seen": 163809200, "step": 75945 }, { "epoch": 12.38988580750408, "grad_norm": 0.0038328177761286497, "learning_rate": 0.00037990947134724845, "loss": 0.0101, "num_input_tokens_seen": 163819568, "step": 75950 }, { "epoch": 12.390701468189233, "grad_norm": 0.007359956856817007, "learning_rate": 0.00037984037630978026, "loss": 0.0239, "num_input_tokens_seen": 163830640, "step": 75955 }, { "epoch": 12.391517128874389, "grad_norm": 0.010038433596491814, "learning_rate": 0.00037977128370749916, "loss": 0.0119, "num_input_tokens_seen": 163841040, "step": 75960 }, { "epoch": 12.392332789559543, "grad_norm": 0.024821912869811058, "learning_rate": 0.00037970219354180573, "loss": 0.0893, "num_input_tokens_seen": 163850512, "step": 75965 }, { "epoch": 12.393148450244698, "grad_norm": 0.01640705205500126, "learning_rate": 0.0003796331058140997, "loss": 0.0046, "num_input_tokens_seen": 163862608, "step": 75970 }, { "epoch": 12.393964110929852, "grad_norm": 0.07918771356344223, "learning_rate": 0.00037956402052578164, "loss": 0.0107, "num_input_tokens_seen": 163873296, "step": 75975 }, { "epoch": 12.394779771615008, "grad_norm": 0.0473797470331192, "learning_rate": 0.0003794949376782515, "loss": 0.0088, "num_input_tokens_seen": 163885648, "step": 75980 }, { "epoch": 12.395595432300164, "grad_norm": 0.13570450246334076, "learning_rate": 0.00037942585727290926, "loss": 0.0691, "num_input_tokens_seen": 163895696, "step": 75985 }, { "epoch": 12.396411092985318, "grad_norm": 0.023735037073493004, "learning_rate": 0.000379356779311155, "loss": 0.0139, "num_input_tokens_seen": 163907312, "step": 75990 }, { "epoch": 12.397226753670473, "grad_norm": 0.0015332981711253524, "learning_rate": 0.0003792877037943886, "loss": 0.097, "num_input_tokens_seen": 163918640, "step": 75995 }, { "epoch": 12.398042414355627, "grad_norm": 0.3860754072666168, "learning_rate": 0.0003792186307240102, "loss": 0.0383, "num_input_tokens_seen": 163929680, "step": 76000 }, { "epoch": 12.398858075040783, "grad_norm": 0.08777333050966263, "learning_rate": 0.0003791495601014192, "loss": 0.0113, "num_input_tokens_seen": 163939888, "step": 76005 }, { "epoch": 12.399673735725939, "grad_norm": 0.011628851294517517, "learning_rate": 0.00037908049192801596, "loss": 0.0262, "num_input_tokens_seen": 163950800, "step": 76010 }, { "epoch": 12.400489396411093, "grad_norm": 0.05138872191309929, "learning_rate": 0.00037901142620519967, "loss": 0.031, "num_input_tokens_seen": 163962320, "step": 76015 }, { "epoch": 12.401305057096248, "grad_norm": 0.013119551353156567, "learning_rate": 0.00037894236293437055, "loss": 0.05, "num_input_tokens_seen": 163972848, "step": 76020 }, { "epoch": 12.402120717781402, "grad_norm": 0.11848347634077072, "learning_rate": 0.00037887330211692783, "loss": 0.0069, "num_input_tokens_seen": 163983536, "step": 76025 }, { "epoch": 12.402936378466558, "grad_norm": 0.8513240218162537, "learning_rate": 0.00037880424375427154, "loss": 0.0295, "num_input_tokens_seen": 163994064, "step": 76030 }, { "epoch": 12.403752039151712, "grad_norm": 0.006382249761372805, "learning_rate": 0.00037873518784780074, "loss": 0.0119, "num_input_tokens_seen": 164003440, "step": 76035 }, { "epoch": 12.404567699836868, "grad_norm": 0.06222998723387718, "learning_rate": 0.0003786661343989154, "loss": 0.0046, "num_input_tokens_seen": 164013072, "step": 76040 }, { "epoch": 12.405383360522023, "grad_norm": 0.1773010641336441, "learning_rate": 0.00037859708340901455, "loss": 0.0322, "num_input_tokens_seen": 164023568, "step": 76045 }, { "epoch": 12.406199021207177, "grad_norm": 0.18970520794391632, "learning_rate": 0.00037852803487949804, "loss": 0.0101, "num_input_tokens_seen": 164034224, "step": 76050 }, { "epoch": 12.407014681892333, "grad_norm": 0.0214232187718153, "learning_rate": 0.0003784589888117648, "loss": 0.0078, "num_input_tokens_seen": 164045616, "step": 76055 }, { "epoch": 12.407830342577487, "grad_norm": 0.11980067938566208, "learning_rate": 0.0003783899452072146, "loss": 0.0096, "num_input_tokens_seen": 164056784, "step": 76060 }, { "epoch": 12.408646003262643, "grad_norm": 0.4310544431209564, "learning_rate": 0.00037832090406724617, "loss": 0.0808, "num_input_tokens_seen": 164067664, "step": 76065 }, { "epoch": 12.409461663947798, "grad_norm": 0.08365502953529358, "learning_rate": 0.0003782518653932592, "loss": 0.0089, "num_input_tokens_seen": 164079120, "step": 76070 }, { "epoch": 12.410277324632952, "grad_norm": 0.17131882905960083, "learning_rate": 0.00037818282918665236, "loss": 0.088, "num_input_tokens_seen": 164089552, "step": 76075 }, { "epoch": 12.411092985318108, "grad_norm": 0.002572376513853669, "learning_rate": 0.0003781137954488251, "loss": 0.0243, "num_input_tokens_seen": 164099760, "step": 76080 }, { "epoch": 12.411908646003262, "grad_norm": 0.004184895660728216, "learning_rate": 0.0003780447641811766, "loss": 0.0135, "num_input_tokens_seen": 164111024, "step": 76085 }, { "epoch": 12.412724306688418, "grad_norm": 0.026450302451848984, "learning_rate": 0.0003779757353851054, "loss": 0.0278, "num_input_tokens_seen": 164122800, "step": 76090 }, { "epoch": 12.413539967373573, "grad_norm": 0.08951037377119064, "learning_rate": 0.000377906709062011, "loss": 0.0574, "num_input_tokens_seen": 164133680, "step": 76095 }, { "epoch": 12.414355628058727, "grad_norm": 0.5194507837295532, "learning_rate": 0.00037783768521329177, "loss": 0.0089, "num_input_tokens_seen": 164144976, "step": 76100 }, { "epoch": 12.415171288743883, "grad_norm": 0.007628216873854399, "learning_rate": 0.0003777686638403469, "loss": 0.0233, "num_input_tokens_seen": 164156240, "step": 76105 }, { "epoch": 12.415986949429037, "grad_norm": 0.4641849994659424, "learning_rate": 0.0003776996449445752, "loss": 0.2303, "num_input_tokens_seen": 164166128, "step": 76110 }, { "epoch": 12.416802610114193, "grad_norm": 0.07054765522480011, "learning_rate": 0.0003776306285273753, "loss": 0.0212, "num_input_tokens_seen": 164176176, "step": 76115 }, { "epoch": 12.417618270799348, "grad_norm": 0.04297991469502449, "learning_rate": 0.0003775616145901459, "loss": 0.0079, "num_input_tokens_seen": 164186448, "step": 76120 }, { "epoch": 12.418433931484502, "grad_norm": 0.06314510107040405, "learning_rate": 0.0003774926031342858, "loss": 0.0442, "num_input_tokens_seen": 164196816, "step": 76125 }, { "epoch": 12.419249592169658, "grad_norm": 0.32981741428375244, "learning_rate": 0.0003774235941611934, "loss": 0.0473, "num_input_tokens_seen": 164209008, "step": 76130 }, { "epoch": 12.420065252854812, "grad_norm": 0.007955588400363922, "learning_rate": 0.0003773545876722675, "loss": 0.0529, "num_input_tokens_seen": 164220208, "step": 76135 }, { "epoch": 12.420880913539968, "grad_norm": 0.025882501155138016, "learning_rate": 0.00037728558366890633, "loss": 0.0088, "num_input_tokens_seen": 164231952, "step": 76140 }, { "epoch": 12.421696574225122, "grad_norm": 0.011530703864991665, "learning_rate": 0.00037721658215250864, "loss": 0.0608, "num_input_tokens_seen": 164244080, "step": 76145 }, { "epoch": 12.422512234910277, "grad_norm": 0.025699110701680183, "learning_rate": 0.00037714758312447247, "loss": 0.0205, "num_input_tokens_seen": 164254640, "step": 76150 }, { "epoch": 12.423327895595433, "grad_norm": 0.08161211758852005, "learning_rate": 0.0003770785865861966, "loss": 0.0072, "num_input_tokens_seen": 164266064, "step": 76155 }, { "epoch": 12.424143556280587, "grad_norm": 0.005202502943575382, "learning_rate": 0.0003770095925390789, "loss": 0.0392, "num_input_tokens_seen": 164277520, "step": 76160 }, { "epoch": 12.424959216965743, "grad_norm": 0.4939342141151428, "learning_rate": 0.000376940600984518, "loss": 0.015, "num_input_tokens_seen": 164289584, "step": 76165 }, { "epoch": 12.425774877650896, "grad_norm": 0.01871560886502266, "learning_rate": 0.0003768716119239118, "loss": 0.0049, "num_input_tokens_seen": 164300336, "step": 76170 }, { "epoch": 12.426590538336052, "grad_norm": 0.06212686374783516, "learning_rate": 0.0003768026253586587, "loss": 0.0394, "num_input_tokens_seen": 164310608, "step": 76175 }, { "epoch": 12.427406199021208, "grad_norm": 0.13981162011623383, "learning_rate": 0.00037673364129015653, "loss": 0.0077, "num_input_tokens_seen": 164322000, "step": 76180 }, { "epoch": 12.428221859706362, "grad_norm": 0.05254131183028221, "learning_rate": 0.0003766646597198037, "loss": 0.0078, "num_input_tokens_seen": 164334064, "step": 76185 }, { "epoch": 12.429037520391518, "grad_norm": 0.01648622751235962, "learning_rate": 0.0003765956806489978, "loss": 0.0042, "num_input_tokens_seen": 164344592, "step": 76190 }, { "epoch": 12.429853181076671, "grad_norm": 0.18884992599487305, "learning_rate": 0.00037652670407913697, "loss": 0.0089, "num_input_tokens_seen": 164354832, "step": 76195 }, { "epoch": 12.430668841761827, "grad_norm": 0.011018466204404831, "learning_rate": 0.00037645773001161937, "loss": 0.0047, "num_input_tokens_seen": 164364848, "step": 76200 }, { "epoch": 12.431484502446983, "grad_norm": 0.400225967168808, "learning_rate": 0.0003763887584478423, "loss": 0.0538, "num_input_tokens_seen": 164375536, "step": 76205 }, { "epoch": 12.432300163132137, "grad_norm": 0.05627979710698128, "learning_rate": 0.00037631978938920414, "loss": 0.0055, "num_input_tokens_seen": 164385456, "step": 76210 }, { "epoch": 12.433115823817293, "grad_norm": 0.003225720254704356, "learning_rate": 0.0003762508228371021, "loss": 0.0371, "num_input_tokens_seen": 164397104, "step": 76215 }, { "epoch": 12.433931484502446, "grad_norm": 0.6571810245513916, "learning_rate": 0.0003761818587929344, "loss": 0.0593, "num_input_tokens_seen": 164407632, "step": 76220 }, { "epoch": 12.434747145187602, "grad_norm": 0.7229149341583252, "learning_rate": 0.0003761128972580981, "loss": 0.0164, "num_input_tokens_seen": 164419472, "step": 76225 }, { "epoch": 12.435562805872756, "grad_norm": 0.02776920422911644, "learning_rate": 0.00037604393823399137, "loss": 0.0035, "num_input_tokens_seen": 164430416, "step": 76230 }, { "epoch": 12.436378466557912, "grad_norm": 0.001729966257698834, "learning_rate": 0.00037597498172201125, "loss": 0.0154, "num_input_tokens_seen": 164439920, "step": 76235 }, { "epoch": 12.437194127243067, "grad_norm": 0.02982546202838421, "learning_rate": 0.0003759060277235556, "loss": 0.106, "num_input_tokens_seen": 164451216, "step": 76240 }, { "epoch": 12.438009787928221, "grad_norm": 0.7161892056465149, "learning_rate": 0.00037583707624002163, "loss": 0.1002, "num_input_tokens_seen": 164461616, "step": 76245 }, { "epoch": 12.438825448613377, "grad_norm": 0.005013397894799709, "learning_rate": 0.00037576812727280683, "loss": 0.0063, "num_input_tokens_seen": 164472528, "step": 76250 }, { "epoch": 12.439641109298531, "grad_norm": 0.02634131722152233, "learning_rate": 0.0003756991808233086, "loss": 0.0078, "num_input_tokens_seen": 164482768, "step": 76255 }, { "epoch": 12.440456769983687, "grad_norm": 0.0071792700327932835, "learning_rate": 0.0003756302368929241, "loss": 0.0555, "num_input_tokens_seen": 164494160, "step": 76260 }, { "epoch": 12.441272430668842, "grad_norm": 0.02008189633488655, "learning_rate": 0.00037556129548305074, "loss": 0.0229, "num_input_tokens_seen": 164505872, "step": 76265 }, { "epoch": 12.442088091353996, "grad_norm": 0.5437281131744385, "learning_rate": 0.0003754923565950855, "loss": 0.0497, "num_input_tokens_seen": 164516816, "step": 76270 }, { "epoch": 12.442903752039152, "grad_norm": 0.025355523452162743, "learning_rate": 0.0003754234202304255, "loss": 0.0029, "num_input_tokens_seen": 164527536, "step": 76275 }, { "epoch": 12.443719412724306, "grad_norm": 0.027756275609135628, "learning_rate": 0.00037535448639046816, "loss": 0.0076, "num_input_tokens_seen": 164538256, "step": 76280 }, { "epoch": 12.444535073409462, "grad_norm": 0.4805568754673004, "learning_rate": 0.00037528555507661, "loss": 0.0301, "num_input_tokens_seen": 164550096, "step": 76285 }, { "epoch": 12.445350734094617, "grad_norm": 0.30364561080932617, "learning_rate": 0.00037521662629024855, "loss": 0.0278, "num_input_tokens_seen": 164560912, "step": 76290 }, { "epoch": 12.446166394779771, "grad_norm": 0.021758640184998512, "learning_rate": 0.00037514770003278027, "loss": 0.0068, "num_input_tokens_seen": 164571632, "step": 76295 }, { "epoch": 12.446982055464927, "grad_norm": 0.06099969148635864, "learning_rate": 0.00037507877630560215, "loss": 0.0359, "num_input_tokens_seen": 164582224, "step": 76300 }, { "epoch": 12.447797716150081, "grad_norm": 0.0025253682397305965, "learning_rate": 0.00037500985511011145, "loss": 0.0542, "num_input_tokens_seen": 164592912, "step": 76305 }, { "epoch": 12.448613376835237, "grad_norm": 0.046032220125198364, "learning_rate": 0.00037494093644770425, "loss": 0.0048, "num_input_tokens_seen": 164603664, "step": 76310 }, { "epoch": 12.449429037520392, "grad_norm": 1.776839017868042, "learning_rate": 0.000374872020319778, "loss": 0.2131, "num_input_tokens_seen": 164613936, "step": 76315 }, { "epoch": 12.450244698205546, "grad_norm": 0.12750113010406494, "learning_rate": 0.0003748031067277286, "loss": 0.0231, "num_input_tokens_seen": 164625392, "step": 76320 }, { "epoch": 12.451060358890702, "grad_norm": 0.4543822109699249, "learning_rate": 0.00037473419567295337, "loss": 0.0397, "num_input_tokens_seen": 164635984, "step": 76325 }, { "epoch": 12.451876019575856, "grad_norm": 0.00505569763481617, "learning_rate": 0.0003746652871568483, "loss": 0.0456, "num_input_tokens_seen": 164646032, "step": 76330 }, { "epoch": 12.452691680261012, "grad_norm": 0.04756057262420654, "learning_rate": 0.0003745963811808105, "loss": 0.005, "num_input_tokens_seen": 164656656, "step": 76335 }, { "epoch": 12.453507340946166, "grad_norm": 0.4717046320438385, "learning_rate": 0.00037452747774623584, "loss": 0.1066, "num_input_tokens_seen": 164666832, "step": 76340 }, { "epoch": 12.454323001631321, "grad_norm": 0.00734631298109889, "learning_rate": 0.0003744585768545212, "loss": 0.1629, "num_input_tokens_seen": 164677424, "step": 76345 }, { "epoch": 12.455138662316477, "grad_norm": 0.011023299768567085, "learning_rate": 0.00037438967850706264, "loss": 0.0378, "num_input_tokens_seen": 164687312, "step": 76350 }, { "epoch": 12.455954323001631, "grad_norm": 0.005511350464075804, "learning_rate": 0.0003743207827052567, "loss": 0.0077, "num_input_tokens_seen": 164698672, "step": 76355 }, { "epoch": 12.456769983686787, "grad_norm": 0.5876865983009338, "learning_rate": 0.0003742518894504994, "loss": 0.0386, "num_input_tokens_seen": 164709808, "step": 76360 }, { "epoch": 12.45758564437194, "grad_norm": 0.03303162753582001, "learning_rate": 0.00037418299874418726, "loss": 0.0039, "num_input_tokens_seen": 164722096, "step": 76365 }, { "epoch": 12.458401305057096, "grad_norm": 0.05500126630067825, "learning_rate": 0.00037411411058771606, "loss": 0.0706, "num_input_tokens_seen": 164732592, "step": 76370 }, { "epoch": 12.459216965742252, "grad_norm": 0.1445506364107132, "learning_rate": 0.00037404522498248234, "loss": 0.005, "num_input_tokens_seen": 164743472, "step": 76375 }, { "epoch": 12.460032626427406, "grad_norm": 0.4745340943336487, "learning_rate": 0.0003739763419298817, "loss": 0.0939, "num_input_tokens_seen": 164754256, "step": 76380 }, { "epoch": 12.460848287112562, "grad_norm": 0.8473243713378906, "learning_rate": 0.0003739074614313105, "loss": 0.0241, "num_input_tokens_seen": 164764752, "step": 76385 }, { "epoch": 12.461663947797716, "grad_norm": 0.019948706030845642, "learning_rate": 0.00037383858348816445, "loss": 0.1405, "num_input_tokens_seen": 164775920, "step": 76390 }, { "epoch": 12.462479608482871, "grad_norm": 0.00867717619985342, "learning_rate": 0.0003737697081018396, "loss": 0.2204, "num_input_tokens_seen": 164787408, "step": 76395 }, { "epoch": 12.463295269168025, "grad_norm": 0.012139490805566311, "learning_rate": 0.0003737008352737318, "loss": 0.0933, "num_input_tokens_seen": 164799408, "step": 76400 }, { "epoch": 12.464110929853181, "grad_norm": 0.010460463352501392, "learning_rate": 0.0003736319650052366, "loss": 0.0361, "num_input_tokens_seen": 164809520, "step": 76405 }, { "epoch": 12.464926590538337, "grad_norm": 0.0345287024974823, "learning_rate": 0.0003735630972977502, "loss": 0.0069, "num_input_tokens_seen": 164819376, "step": 76410 }, { "epoch": 12.46574225122349, "grad_norm": 0.006350891198962927, "learning_rate": 0.00037349423215266784, "loss": 0.0051, "num_input_tokens_seen": 164830064, "step": 76415 }, { "epoch": 12.466557911908646, "grad_norm": 0.4882001578807831, "learning_rate": 0.0003734253695713854, "loss": 0.1643, "num_input_tokens_seen": 164839568, "step": 76420 }, { "epoch": 12.4673735725938, "grad_norm": 0.004277059342712164, "learning_rate": 0.0003733565095552985, "loss": 0.0117, "num_input_tokens_seen": 164850992, "step": 76425 }, { "epoch": 12.468189233278956, "grad_norm": 0.06611525267362595, "learning_rate": 0.0003732876521058025, "loss": 0.1982, "num_input_tokens_seen": 164860976, "step": 76430 }, { "epoch": 12.469004893964112, "grad_norm": 0.02493908442556858, "learning_rate": 0.000373218797224293, "loss": 0.0108, "num_input_tokens_seen": 164872368, "step": 76435 }, { "epoch": 12.469820554649266, "grad_norm": 0.017577778548002243, "learning_rate": 0.00037314994491216547, "loss": 0.0156, "num_input_tokens_seen": 164883408, "step": 76440 }, { "epoch": 12.470636215334421, "grad_norm": 0.005126205738633871, "learning_rate": 0.00037308109517081506, "loss": 0.118, "num_input_tokens_seen": 164894256, "step": 76445 }, { "epoch": 12.471451876019575, "grad_norm": 0.02716851606965065, "learning_rate": 0.0003730122480016375, "loss": 0.1095, "num_input_tokens_seen": 164905200, "step": 76450 }, { "epoch": 12.47226753670473, "grad_norm": 0.1012546494603157, "learning_rate": 0.00037294340340602764, "loss": 0.0906, "num_input_tokens_seen": 164915920, "step": 76455 }, { "epoch": 12.473083197389887, "grad_norm": 0.10041335225105286, "learning_rate": 0.0003728745613853811, "loss": 0.033, "num_input_tokens_seen": 164927024, "step": 76460 }, { "epoch": 12.47389885807504, "grad_norm": 0.16399502754211426, "learning_rate": 0.00037280572194109255, "loss": 0.081, "num_input_tokens_seen": 164936240, "step": 76465 }, { "epoch": 12.474714518760196, "grad_norm": 0.32892316579818726, "learning_rate": 0.00037273688507455773, "loss": 0.0901, "num_input_tokens_seen": 164947088, "step": 76470 }, { "epoch": 12.47553017944535, "grad_norm": 0.9605597853660583, "learning_rate": 0.00037266805078717106, "loss": 0.1031, "num_input_tokens_seen": 164958064, "step": 76475 }, { "epoch": 12.476345840130506, "grad_norm": 0.09950543195009232, "learning_rate": 0.00037259921908032814, "loss": 0.0361, "num_input_tokens_seen": 164969360, "step": 76480 }, { "epoch": 12.477161500815662, "grad_norm": 0.023138023912906647, "learning_rate": 0.0003725303899554234, "loss": 0.1518, "num_input_tokens_seen": 164980400, "step": 76485 }, { "epoch": 12.477977161500815, "grad_norm": 0.0038420886266976595, "learning_rate": 0.00037246156341385234, "loss": 0.019, "num_input_tokens_seen": 164991120, "step": 76490 }, { "epoch": 12.478792822185971, "grad_norm": 0.017539070919156075, "learning_rate": 0.0003723927394570092, "loss": 0.0832, "num_input_tokens_seen": 165001584, "step": 76495 }, { "epoch": 12.479608482871125, "grad_norm": 0.013591896742582321, "learning_rate": 0.0003723239180862893, "loss": 0.0196, "num_input_tokens_seen": 165012944, "step": 76500 }, { "epoch": 12.48042414355628, "grad_norm": 0.07966836541891098, "learning_rate": 0.00037225509930308696, "loss": 0.1047, "num_input_tokens_seen": 165023504, "step": 76505 }, { "epoch": 12.481239804241435, "grad_norm": 0.007599941920489073, "learning_rate": 0.0003721862831087971, "loss": 0.07, "num_input_tokens_seen": 165034704, "step": 76510 }, { "epoch": 12.48205546492659, "grad_norm": 0.4603855013847351, "learning_rate": 0.0003721174695048145, "loss": 0.0225, "num_input_tokens_seen": 165043728, "step": 76515 }, { "epoch": 12.482871125611746, "grad_norm": 0.04861701652407646, "learning_rate": 0.0003720486584925335, "loss": 0.0247, "num_input_tokens_seen": 165054160, "step": 76520 }, { "epoch": 12.4836867862969, "grad_norm": 0.04126899689435959, "learning_rate": 0.0003719798500733489, "loss": 0.0095, "num_input_tokens_seen": 165065168, "step": 76525 }, { "epoch": 12.484502446982056, "grad_norm": 0.027260450646281242, "learning_rate": 0.00037191104424865487, "loss": 0.0823, "num_input_tokens_seen": 165076368, "step": 76530 }, { "epoch": 12.48531810766721, "grad_norm": 0.09145528078079224, "learning_rate": 0.0003718422410198462, "loss": 0.0108, "num_input_tokens_seen": 165087184, "step": 76535 }, { "epoch": 12.486133768352365, "grad_norm": 0.4693119525909424, "learning_rate": 0.0003717734403883169, "loss": 0.0557, "num_input_tokens_seen": 165099152, "step": 76540 }, { "epoch": 12.486949429037521, "grad_norm": 0.010703105479478836, "learning_rate": 0.0003717046423554617, "loss": 0.0067, "num_input_tokens_seen": 165109648, "step": 76545 }, { "epoch": 12.487765089722675, "grad_norm": 0.5575474500656128, "learning_rate": 0.0003716358469226745, "loss": 0.0839, "num_input_tokens_seen": 165119792, "step": 76550 }, { "epoch": 12.48858075040783, "grad_norm": 0.32954174280166626, "learning_rate": 0.0003715670540913499, "loss": 0.0152, "num_input_tokens_seen": 165131440, "step": 76555 }, { "epoch": 12.489396411092985, "grad_norm": 0.010479763150215149, "learning_rate": 0.0003714982638628817, "loss": 0.0235, "num_input_tokens_seen": 165141936, "step": 76560 }, { "epoch": 12.49021207177814, "grad_norm": 0.2623178958892822, "learning_rate": 0.00037142947623866417, "loss": 0.0627, "num_input_tokens_seen": 165152432, "step": 76565 }, { "epoch": 12.491027732463296, "grad_norm": 0.017723675817251205, "learning_rate": 0.0003713606912200915, "loss": 0.0507, "num_input_tokens_seen": 165162960, "step": 76570 }, { "epoch": 12.49184339314845, "grad_norm": 0.3112884759902954, "learning_rate": 0.00037129190880855764, "loss": 0.0272, "num_input_tokens_seen": 165174672, "step": 76575 }, { "epoch": 12.492659053833606, "grad_norm": 0.16416187584400177, "learning_rate": 0.00037122312900545644, "loss": 0.0381, "num_input_tokens_seen": 165184464, "step": 76580 }, { "epoch": 12.49347471451876, "grad_norm": 0.02547912299633026, "learning_rate": 0.000371154351812182, "loss": 0.0071, "num_input_tokens_seen": 165195408, "step": 76585 }, { "epoch": 12.494290375203915, "grad_norm": 0.44394010305404663, "learning_rate": 0.0003710855772301279, "loss": 0.0313, "num_input_tokens_seen": 165206000, "step": 76590 }, { "epoch": 12.49510603588907, "grad_norm": 0.7794050574302673, "learning_rate": 0.00037101680526068837, "loss": 0.0426, "num_input_tokens_seen": 165216496, "step": 76595 }, { "epoch": 12.495921696574225, "grad_norm": 0.0032726754434406757, "learning_rate": 0.0003709480359052566, "loss": 0.0133, "num_input_tokens_seen": 165227536, "step": 76600 }, { "epoch": 12.49673735725938, "grad_norm": 0.012527940794825554, "learning_rate": 0.0003708792691652269, "loss": 0.0304, "num_input_tokens_seen": 165238224, "step": 76605 }, { "epoch": 12.497553017944535, "grad_norm": 0.0065552485175430775, "learning_rate": 0.00037081050504199245, "loss": 0.0233, "num_input_tokens_seen": 165249232, "step": 76610 }, { "epoch": 12.49836867862969, "grad_norm": 0.001726039918139577, "learning_rate": 0.0003707417435369469, "loss": 0.0038, "num_input_tokens_seen": 165259600, "step": 76615 }, { "epoch": 12.499184339314844, "grad_norm": 0.009959772229194641, "learning_rate": 0.00037067298465148416, "loss": 0.2161, "num_input_tokens_seen": 165269072, "step": 76620 }, { "epoch": 12.5, "grad_norm": 1.1364222764968872, "learning_rate": 0.00037060422838699716, "loss": 0.0413, "num_input_tokens_seen": 165279024, "step": 76625 }, { "epoch": 12.500815660685156, "grad_norm": 0.006499846000224352, "learning_rate": 0.0003705354747448799, "loss": 0.0215, "num_input_tokens_seen": 165289008, "step": 76630 }, { "epoch": 12.50163132137031, "grad_norm": 0.11728450655937195, "learning_rate": 0.00037046672372652523, "loss": 0.0444, "num_input_tokens_seen": 165300272, "step": 76635 }, { "epoch": 12.502446982055465, "grad_norm": 0.043675053864717484, "learning_rate": 0.00037039797533332697, "loss": 0.0636, "num_input_tokens_seen": 165310736, "step": 76640 }, { "epoch": 12.50326264274062, "grad_norm": 0.015301533974707127, "learning_rate": 0.000370329229566678, "loss": 0.021, "num_input_tokens_seen": 165321200, "step": 76645 }, { "epoch": 12.504078303425775, "grad_norm": 0.05058901011943817, "learning_rate": 0.0003702604864279718, "loss": 0.0194, "num_input_tokens_seen": 165331824, "step": 76650 }, { "epoch": 12.50489396411093, "grad_norm": 0.17519426345825195, "learning_rate": 0.00037019174591860127, "loss": 0.0112, "num_input_tokens_seen": 165342992, "step": 76655 }, { "epoch": 12.505709624796085, "grad_norm": 0.009115289896726608, "learning_rate": 0.0003701230080399599, "loss": 0.0254, "num_input_tokens_seen": 165353168, "step": 76660 }, { "epoch": 12.50652528548124, "grad_norm": 0.04421913996338844, "learning_rate": 0.00037005427279344027, "loss": 0.0497, "num_input_tokens_seen": 165364592, "step": 76665 }, { "epoch": 12.507340946166394, "grad_norm": 0.5203434824943542, "learning_rate": 0.0003699855401804359, "loss": 0.0554, "num_input_tokens_seen": 165376272, "step": 76670 }, { "epoch": 12.50815660685155, "grad_norm": 0.02131824940443039, "learning_rate": 0.0003699168102023393, "loss": 0.008, "num_input_tokens_seen": 165387920, "step": 76675 }, { "epoch": 12.508972267536706, "grad_norm": 0.25594562292099, "learning_rate": 0.0003698480828605437, "loss": 0.0149, "num_input_tokens_seen": 165399344, "step": 76680 }, { "epoch": 12.50978792822186, "grad_norm": 0.09724727272987366, "learning_rate": 0.0003697793581564417, "loss": 0.0121, "num_input_tokens_seen": 165410576, "step": 76685 }, { "epoch": 12.510603588907015, "grad_norm": 0.36107999086380005, "learning_rate": 0.00036971063609142637, "loss": 0.0219, "num_input_tokens_seen": 165421360, "step": 76690 }, { "epoch": 12.51141924959217, "grad_norm": 0.173379048705101, "learning_rate": 0.00036964191666689005, "loss": 0.0118, "num_input_tokens_seen": 165431888, "step": 76695 }, { "epoch": 12.512234910277325, "grad_norm": 0.02285286970436573, "learning_rate": 0.00036957319988422586, "loss": 0.0581, "num_input_tokens_seen": 165442832, "step": 76700 }, { "epoch": 12.513050570962479, "grad_norm": 0.014950202777981758, "learning_rate": 0.0003695044857448261, "loss": 0.0443, "num_input_tokens_seen": 165452144, "step": 76705 }, { "epoch": 12.513866231647635, "grad_norm": 0.012982943095266819, "learning_rate": 0.0003694357742500835, "loss": 0.1431, "num_input_tokens_seen": 165462672, "step": 76710 }, { "epoch": 12.51468189233279, "grad_norm": 0.1406213790178299, "learning_rate": 0.00036936706540139063, "loss": 0.0073, "num_input_tokens_seen": 165473648, "step": 76715 }, { "epoch": 12.515497553017944, "grad_norm": 0.019735757261514664, "learning_rate": 0.0003692983592001398, "loss": 0.0156, "num_input_tokens_seen": 165485040, "step": 76720 }, { "epoch": 12.5163132137031, "grad_norm": 0.03177529573440552, "learning_rate": 0.0003692296556477237, "loss": 0.2381, "num_input_tokens_seen": 165496016, "step": 76725 }, { "epoch": 12.517128874388254, "grad_norm": 0.005518175661563873, "learning_rate": 0.0003691609547455343, "loss": 0.0045, "num_input_tokens_seen": 165507024, "step": 76730 }, { "epoch": 12.51794453507341, "grad_norm": 0.21470975875854492, "learning_rate": 0.0003690922564949643, "loss": 0.0234, "num_input_tokens_seen": 165517008, "step": 76735 }, { "epoch": 12.518760195758565, "grad_norm": 0.007764747831970453, "learning_rate": 0.0003690235608974057, "loss": 0.0024, "num_input_tokens_seen": 165528240, "step": 76740 }, { "epoch": 12.51957585644372, "grad_norm": 0.019158300012350082, "learning_rate": 0.0003689548679542508, "loss": 0.0041, "num_input_tokens_seen": 165540144, "step": 76745 }, { "epoch": 12.520391517128875, "grad_norm": 0.037602100521326065, "learning_rate": 0.0003688861776668918, "loss": 0.0117, "num_input_tokens_seen": 165551440, "step": 76750 }, { "epoch": 12.521207177814029, "grad_norm": 0.47875311970710754, "learning_rate": 0.0003688174900367207, "loss": 0.1967, "num_input_tokens_seen": 165562448, "step": 76755 }, { "epoch": 12.522022838499185, "grad_norm": 0.052018903195858, "learning_rate": 0.00036874880506512954, "loss": 0.0128, "num_input_tokens_seen": 165573872, "step": 76760 }, { "epoch": 12.522838499184338, "grad_norm": 0.027096256613731384, "learning_rate": 0.0003686801227535105, "loss": 0.0778, "num_input_tokens_seen": 165584496, "step": 76765 }, { "epoch": 12.523654159869494, "grad_norm": 0.014308972284197807, "learning_rate": 0.00036861144310325523, "loss": 0.0203, "num_input_tokens_seen": 165595056, "step": 76770 }, { "epoch": 12.52446982055465, "grad_norm": 0.5601135492324829, "learning_rate": 0.0003685427661157559, "loss": 0.0669, "num_input_tokens_seen": 165604784, "step": 76775 }, { "epoch": 12.525285481239804, "grad_norm": 0.01837463304400444, "learning_rate": 0.00036847409179240396, "loss": 0.0795, "num_input_tokens_seen": 165615600, "step": 76780 }, { "epoch": 12.52610114192496, "grad_norm": 0.028383338823914528, "learning_rate": 0.00036840542013459154, "loss": 0.102, "num_input_tokens_seen": 165624656, "step": 76785 }, { "epoch": 12.526916802610113, "grad_norm": 0.059089913964271545, "learning_rate": 0.00036833675114371014, "loss": 0.0043, "num_input_tokens_seen": 165635760, "step": 76790 }, { "epoch": 12.52773246329527, "grad_norm": 0.30823245644569397, "learning_rate": 0.00036826808482115167, "loss": 0.0276, "num_input_tokens_seen": 165647056, "step": 76795 }, { "epoch": 12.528548123980425, "grad_norm": 1.227827548980713, "learning_rate": 0.00036819942116830736, "loss": 0.0588, "num_input_tokens_seen": 165657712, "step": 76800 }, { "epoch": 12.529363784665579, "grad_norm": 0.014510038308799267, "learning_rate": 0.0003681307601865692, "loss": 0.0076, "num_input_tokens_seen": 165668240, "step": 76805 }, { "epoch": 12.530179445350734, "grad_norm": 0.120442233979702, "learning_rate": 0.00036806210187732824, "loss": 0.0153, "num_input_tokens_seen": 165678384, "step": 76810 }, { "epoch": 12.530995106035888, "grad_norm": 1.3344663381576538, "learning_rate": 0.00036799344624197637, "loss": 0.0929, "num_input_tokens_seen": 165687952, "step": 76815 }, { "epoch": 12.531810766721044, "grad_norm": 0.0637737587094307, "learning_rate": 0.00036792479328190457, "loss": 0.0065, "num_input_tokens_seen": 165698288, "step": 76820 }, { "epoch": 12.5326264274062, "grad_norm": 0.0310041606426239, "learning_rate": 0.0003678561429985044, "loss": 0.0257, "num_input_tokens_seen": 165709488, "step": 76825 }, { "epoch": 12.533442088091354, "grad_norm": 0.02272508665919304, "learning_rate": 0.00036778749539316736, "loss": 0.0244, "num_input_tokens_seen": 165719824, "step": 76830 }, { "epoch": 12.53425774877651, "grad_norm": 0.08114591985940933, "learning_rate": 0.00036771885046728417, "loss": 0.0763, "num_input_tokens_seen": 165731344, "step": 76835 }, { "epoch": 12.535073409461663, "grad_norm": 0.14975933730602264, "learning_rate": 0.00036765020822224654, "loss": 0.0226, "num_input_tokens_seen": 165741616, "step": 76840 }, { "epoch": 12.535889070146819, "grad_norm": 0.0034491028636693954, "learning_rate": 0.0003675815686594451, "loss": 0.0458, "num_input_tokens_seen": 165753072, "step": 76845 }, { "epoch": 12.536704730831975, "grad_norm": 0.0028658737428486347, "learning_rate": 0.00036751293178027144, "loss": 0.0061, "num_input_tokens_seen": 165764016, "step": 76850 }, { "epoch": 12.537520391517129, "grad_norm": 0.2617292106151581, "learning_rate": 0.000367444297586116, "loss": 0.0191, "num_input_tokens_seen": 165775056, "step": 76855 }, { "epoch": 12.538336052202284, "grad_norm": 0.3640655279159546, "learning_rate": 0.0003673756660783703, "loss": 0.0272, "num_input_tokens_seen": 165786416, "step": 76860 }, { "epoch": 12.539151712887438, "grad_norm": 0.023978717625141144, "learning_rate": 0.00036730703725842474, "loss": 0.0081, "num_input_tokens_seen": 165797616, "step": 76865 }, { "epoch": 12.539967373572594, "grad_norm": 0.03701100870966911, "learning_rate": 0.0003672384111276705, "loss": 0.0049, "num_input_tokens_seen": 165808432, "step": 76870 }, { "epoch": 12.540783034257748, "grad_norm": 0.2598564922809601, "learning_rate": 0.0003671697876874982, "loss": 0.0103, "num_input_tokens_seen": 165818288, "step": 76875 }, { "epoch": 12.541598694942904, "grad_norm": 0.0011021639220416546, "learning_rate": 0.00036710116693929875, "loss": 0.0036, "num_input_tokens_seen": 165829616, "step": 76880 }, { "epoch": 12.54241435562806, "grad_norm": 0.023621659725904465, "learning_rate": 0.0003670325488844627, "loss": 0.0275, "num_input_tokens_seen": 165838576, "step": 76885 }, { "epoch": 12.543230016313213, "grad_norm": 0.002423632889986038, "learning_rate": 0.00036696393352438083, "loss": 0.0192, "num_input_tokens_seen": 165848720, "step": 76890 }, { "epoch": 12.544045676998369, "grad_norm": 0.00472877686843276, "learning_rate": 0.0003668953208604435, "loss": 0.0983, "num_input_tokens_seen": 165859440, "step": 76895 }, { "epoch": 12.544861337683523, "grad_norm": 0.7228971123695374, "learning_rate": 0.0003668267108940414, "loss": 0.0603, "num_input_tokens_seen": 165869072, "step": 76900 }, { "epoch": 12.545676998368679, "grad_norm": 0.06407814472913742, "learning_rate": 0.00036675810362656486, "loss": 0.0407, "num_input_tokens_seen": 165881360, "step": 76905 }, { "epoch": 12.546492659053834, "grad_norm": 0.03607793152332306, "learning_rate": 0.00036668949905940455, "loss": 0.0118, "num_input_tokens_seen": 165890480, "step": 76910 }, { "epoch": 12.547308319738988, "grad_norm": 0.012747437693178654, "learning_rate": 0.0003666208971939505, "loss": 0.0083, "num_input_tokens_seen": 165900624, "step": 76915 }, { "epoch": 12.548123980424144, "grad_norm": 0.048963721841573715, "learning_rate": 0.0003665522980315933, "loss": 0.2053, "num_input_tokens_seen": 165910704, "step": 76920 }, { "epoch": 12.548939641109298, "grad_norm": 0.10132893174886703, "learning_rate": 0.0003664837015737229, "loss": 0.1154, "num_input_tokens_seen": 165920656, "step": 76925 }, { "epoch": 12.549755301794454, "grad_norm": 0.002097984543070197, "learning_rate": 0.00036641510782172993, "loss": 0.0049, "num_input_tokens_seen": 165928944, "step": 76930 }, { "epoch": 12.550570962479608, "grad_norm": 0.14129255712032318, "learning_rate": 0.0003663465167770039, "loss": 0.0134, "num_input_tokens_seen": 165940176, "step": 76935 }, { "epoch": 12.551386623164763, "grad_norm": 0.021482331678271294, "learning_rate": 0.00036627792844093544, "loss": 0.0084, "num_input_tokens_seen": 165949712, "step": 76940 }, { "epoch": 12.552202283849919, "grad_norm": 0.006481764372438192, "learning_rate": 0.0003662093428149145, "loss": 0.0688, "num_input_tokens_seen": 165960848, "step": 76945 }, { "epoch": 12.553017944535073, "grad_norm": 0.36622703075408936, "learning_rate": 0.0003661407599003308, "loss": 0.0257, "num_input_tokens_seen": 165972336, "step": 76950 }, { "epoch": 12.553833605220229, "grad_norm": 0.007416302338242531, "learning_rate": 0.0003660721796985746, "loss": 0.0055, "num_input_tokens_seen": 165982960, "step": 76955 }, { "epoch": 12.554649265905383, "grad_norm": 0.037546731531620026, "learning_rate": 0.0003660036022110353, "loss": 0.0157, "num_input_tokens_seen": 165992624, "step": 76960 }, { "epoch": 12.555464926590538, "grad_norm": 0.00819827988743782, "learning_rate": 0.00036593502743910336, "loss": 0.0046, "num_input_tokens_seen": 166004400, "step": 76965 }, { "epoch": 12.556280587275694, "grad_norm": 0.1541050672531128, "learning_rate": 0.00036586645538416783, "loss": 0.0077, "num_input_tokens_seen": 166014832, "step": 76970 }, { "epoch": 12.557096247960848, "grad_norm": 0.00421961210668087, "learning_rate": 0.00036579788604761896, "loss": 0.0374, "num_input_tokens_seen": 166025520, "step": 76975 }, { "epoch": 12.557911908646004, "grad_norm": 0.005069664679467678, "learning_rate": 0.000365729319430846, "loss": 0.0064, "num_input_tokens_seen": 166036720, "step": 76980 }, { "epoch": 12.558727569331158, "grad_norm": 0.14786280691623688, "learning_rate": 0.00036566075553523894, "loss": 0.1576, "num_input_tokens_seen": 166047664, "step": 76985 }, { "epoch": 12.559543230016313, "grad_norm": 0.02487325109541416, "learning_rate": 0.0003655921943621868, "loss": 0.0194, "num_input_tokens_seen": 166058736, "step": 76990 }, { "epoch": 12.560358890701469, "grad_norm": 0.07015492767095566, "learning_rate": 0.0003655236359130796, "loss": 0.0501, "num_input_tokens_seen": 166069584, "step": 76995 }, { "epoch": 12.561174551386623, "grad_norm": 0.017592210322618484, "learning_rate": 0.0003654550801893063, "loss": 0.043, "num_input_tokens_seen": 166079408, "step": 77000 }, { "epoch": 12.561990212071779, "grad_norm": 0.179296612739563, "learning_rate": 0.00036538652719225674, "loss": 0.0078, "num_input_tokens_seen": 166090256, "step": 77005 }, { "epoch": 12.562805872756933, "grad_norm": 0.6274123191833496, "learning_rate": 0.0003653179769233197, "loss": 0.0169, "num_input_tokens_seen": 166101328, "step": 77010 }, { "epoch": 12.563621533442088, "grad_norm": 0.16428276896476746, "learning_rate": 0.00036524942938388495, "loss": 0.0167, "num_input_tokens_seen": 166111952, "step": 77015 }, { "epoch": 12.564437194127244, "grad_norm": 0.3482365608215332, "learning_rate": 0.00036518088457534125, "loss": 0.0773, "num_input_tokens_seen": 166123184, "step": 77020 }, { "epoch": 12.565252854812398, "grad_norm": 0.150766059756279, "learning_rate": 0.0003651123424990781, "loss": 0.0218, "num_input_tokens_seen": 166135248, "step": 77025 }, { "epoch": 12.566068515497554, "grad_norm": 0.6309272050857544, "learning_rate": 0.00036504380315648447, "loss": 0.0743, "num_input_tokens_seen": 166145360, "step": 77030 }, { "epoch": 12.566884176182707, "grad_norm": 0.005227636080235243, "learning_rate": 0.0003649752665489492, "loss": 0.0042, "num_input_tokens_seen": 166156720, "step": 77035 }, { "epoch": 12.567699836867863, "grad_norm": 0.007594875991344452, "learning_rate": 0.00036490673267786154, "loss": 0.0013, "num_input_tokens_seen": 166166704, "step": 77040 }, { "epoch": 12.568515497553017, "grad_norm": 0.19046859443187714, "learning_rate": 0.0003648382015446103, "loss": 0.015, "num_input_tokens_seen": 166177904, "step": 77045 }, { "epoch": 12.569331158238173, "grad_norm": 0.5553524494171143, "learning_rate": 0.0003647696731505844, "loss": 0.0393, "num_input_tokens_seen": 166188528, "step": 77050 }, { "epoch": 12.570146818923329, "grad_norm": 0.2943854331970215, "learning_rate": 0.00036470114749717267, "loss": 0.0305, "num_input_tokens_seen": 166200016, "step": 77055 }, { "epoch": 12.570962479608482, "grad_norm": 0.360281765460968, "learning_rate": 0.00036463262458576374, "loss": 0.085, "num_input_tokens_seen": 166210160, "step": 77060 }, { "epoch": 12.571778140293638, "grad_norm": 0.012036622501909733, "learning_rate": 0.0003645641044177465, "loss": 0.0176, "num_input_tokens_seen": 166220880, "step": 77065 }, { "epoch": 12.572593800978792, "grad_norm": 0.0007670011837035418, "learning_rate": 0.00036449558699450937, "loss": 0.0092, "num_input_tokens_seen": 166232016, "step": 77070 }, { "epoch": 12.573409461663948, "grad_norm": 0.004217095207422972, "learning_rate": 0.0003644270723174411, "loss": 0.0192, "num_input_tokens_seen": 166244880, "step": 77075 }, { "epoch": 12.574225122349104, "grad_norm": 0.004208546597510576, "learning_rate": 0.0003643585603879303, "loss": 0.0034, "num_input_tokens_seen": 166256688, "step": 77080 }, { "epoch": 12.575040783034257, "grad_norm": 0.004553171806037426, "learning_rate": 0.0003642900512073652, "loss": 0.0146, "num_input_tokens_seen": 166267472, "step": 77085 }, { "epoch": 12.575856443719413, "grad_norm": 0.02926795929670334, "learning_rate": 0.00036422154477713456, "loss": 0.1151, "num_input_tokens_seen": 166278704, "step": 77090 }, { "epoch": 12.576672104404567, "grad_norm": 0.002491420367732644, "learning_rate": 0.00036415304109862633, "loss": 0.0192, "num_input_tokens_seen": 166288784, "step": 77095 }, { "epoch": 12.577487765089723, "grad_norm": 0.0018360574031248689, "learning_rate": 0.0003640845401732293, "loss": 0.0075, "num_input_tokens_seen": 166299888, "step": 77100 }, { "epoch": 12.578303425774878, "grad_norm": 0.010764651000499725, "learning_rate": 0.0003640160420023313, "loss": 0.0182, "num_input_tokens_seen": 166309648, "step": 77105 }, { "epoch": 12.579119086460032, "grad_norm": 0.0739232525229454, "learning_rate": 0.00036394754658732086, "loss": 0.0189, "num_input_tokens_seen": 166321616, "step": 77110 }, { "epoch": 12.579934747145188, "grad_norm": 0.7674230933189392, "learning_rate": 0.00036387905392958574, "loss": 0.1367, "num_input_tokens_seen": 166332112, "step": 77115 }, { "epoch": 12.580750407830342, "grad_norm": 0.005439311265945435, "learning_rate": 0.0003638105640305146, "loss": 0.0269, "num_input_tokens_seen": 166342480, "step": 77120 }, { "epoch": 12.581566068515498, "grad_norm": 0.0017219950677827, "learning_rate": 0.00036374207689149487, "loss": 0.0018, "num_input_tokens_seen": 166353040, "step": 77125 }, { "epoch": 12.582381729200652, "grad_norm": 0.7359156012535095, "learning_rate": 0.00036367359251391506, "loss": 0.0729, "num_input_tokens_seen": 166363664, "step": 77130 }, { "epoch": 12.583197389885807, "grad_norm": 0.5555078983306885, "learning_rate": 0.0003636051108991626, "loss": 0.0403, "num_input_tokens_seen": 166374480, "step": 77135 }, { "epoch": 12.584013050570963, "grad_norm": 0.5471969246864319, "learning_rate": 0.0003635366320486258, "loss": 0.0217, "num_input_tokens_seen": 166385936, "step": 77140 }, { "epoch": 12.584828711256117, "grad_norm": 0.043274782598018646, "learning_rate": 0.0003634681559636921, "loss": 0.0087, "num_input_tokens_seen": 166397264, "step": 77145 }, { "epoch": 12.585644371941273, "grad_norm": 0.025361627340316772, "learning_rate": 0.0003633996826457494, "loss": 0.0827, "num_input_tokens_seen": 166408176, "step": 77150 }, { "epoch": 12.586460032626427, "grad_norm": 0.006511182989925146, "learning_rate": 0.0003633312120961856, "loss": 0.03, "num_input_tokens_seen": 166417840, "step": 77155 }, { "epoch": 12.587275693311582, "grad_norm": 0.144336998462677, "learning_rate": 0.000363262744316388, "loss": 0.1232, "num_input_tokens_seen": 166428272, "step": 77160 }, { "epoch": 12.588091353996738, "grad_norm": 0.009770065546035767, "learning_rate": 0.00036319427930774453, "loss": 0.0026, "num_input_tokens_seen": 166439600, "step": 77165 }, { "epoch": 12.588907014681892, "grad_norm": 0.018513932824134827, "learning_rate": 0.0003631258170716423, "loss": 0.0506, "num_input_tokens_seen": 166451248, "step": 77170 }, { "epoch": 12.589722675367048, "grad_norm": 0.07371993362903595, "learning_rate": 0.0003630573576094693, "loss": 0.0072, "num_input_tokens_seen": 166462896, "step": 77175 }, { "epoch": 12.590538336052202, "grad_norm": 0.03559550270438194, "learning_rate": 0.0003629889009226124, "loss": 0.0173, "num_input_tokens_seen": 166472432, "step": 77180 }, { "epoch": 12.591353996737357, "grad_norm": 0.00308591197244823, "learning_rate": 0.0003629204470124595, "loss": 0.0384, "num_input_tokens_seen": 166484048, "step": 77185 }, { "epoch": 12.592169657422513, "grad_norm": 0.02178574725985527, "learning_rate": 0.00036285199588039743, "loss": 0.0136, "num_input_tokens_seen": 166495312, "step": 77190 }, { "epoch": 12.592985318107667, "grad_norm": 0.009114828892052174, "learning_rate": 0.0003627835475278137, "loss": 0.0206, "num_input_tokens_seen": 166506224, "step": 77195 }, { "epoch": 12.593800978792823, "grad_norm": 0.007106289733201265, "learning_rate": 0.0003627151019560955, "loss": 0.0036, "num_input_tokens_seen": 166517360, "step": 77200 }, { "epoch": 12.594616639477977, "grad_norm": 0.014718030579388142, "learning_rate": 0.00036264665916662986, "loss": 0.0042, "num_input_tokens_seen": 166528880, "step": 77205 }, { "epoch": 12.595432300163132, "grad_norm": 0.006239260546863079, "learning_rate": 0.000362578219160804, "loss": 0.0208, "num_input_tokens_seen": 166539024, "step": 77210 }, { "epoch": 12.596247960848288, "grad_norm": 0.006533618550747633, "learning_rate": 0.0003625097819400048, "loss": 0.0494, "num_input_tokens_seen": 166548816, "step": 77215 }, { "epoch": 12.597063621533442, "grad_norm": 0.011455407366156578, "learning_rate": 0.0003624413475056192, "loss": 0.0039, "num_input_tokens_seen": 166559280, "step": 77220 }, { "epoch": 12.597879282218598, "grad_norm": 0.2145671397447586, "learning_rate": 0.00036237291585903436, "loss": 0.0854, "num_input_tokens_seen": 166569744, "step": 77225 }, { "epoch": 12.598694942903752, "grad_norm": 0.007200933992862701, "learning_rate": 0.0003623044870016368, "loss": 0.1888, "num_input_tokens_seen": 166580720, "step": 77230 }, { "epoch": 12.599510603588907, "grad_norm": 0.06514982879161835, "learning_rate": 0.0003622360609348138, "loss": 0.0057, "num_input_tokens_seen": 166591216, "step": 77235 }, { "epoch": 12.600326264274061, "grad_norm": 0.020682508125901222, "learning_rate": 0.0003621676376599514, "loss": 0.0154, "num_input_tokens_seen": 166601104, "step": 77240 }, { "epoch": 12.601141924959217, "grad_norm": 0.37414079904556274, "learning_rate": 0.00036209921717843697, "loss": 0.0123, "num_input_tokens_seen": 166612528, "step": 77245 }, { "epoch": 12.601957585644373, "grad_norm": 0.005562840029597282, "learning_rate": 0.00036203079949165664, "loss": 0.1035, "num_input_tokens_seen": 166622768, "step": 77250 }, { "epoch": 12.602773246329527, "grad_norm": 2.242063522338867, "learning_rate": 0.00036196238460099717, "loss": 0.21, "num_input_tokens_seen": 166633488, "step": 77255 }, { "epoch": 12.603588907014682, "grad_norm": 0.053151700645685196, "learning_rate": 0.0003618939725078453, "loss": 0.1555, "num_input_tokens_seen": 166644080, "step": 77260 }, { "epoch": 12.604404567699836, "grad_norm": 0.03219044208526611, "learning_rate": 0.0003618255632135871, "loss": 0.0052, "num_input_tokens_seen": 166654768, "step": 77265 }, { "epoch": 12.605220228384992, "grad_norm": 0.055317021906375885, "learning_rate": 0.00036175715671960934, "loss": 0.0131, "num_input_tokens_seen": 166666256, "step": 77270 }, { "epoch": 12.606035889070148, "grad_norm": 0.0985916331410408, "learning_rate": 0.000361688753027298, "loss": 0.0175, "num_input_tokens_seen": 166676592, "step": 77275 }, { "epoch": 12.606851549755302, "grad_norm": 0.012617107480764389, "learning_rate": 0.0003616203521380397, "loss": 0.0073, "num_input_tokens_seen": 166687888, "step": 77280 }, { "epoch": 12.607667210440457, "grad_norm": 0.7373045086860657, "learning_rate": 0.00036155195405322026, "loss": 0.2017, "num_input_tokens_seen": 166698032, "step": 77285 }, { "epoch": 12.608482871125611, "grad_norm": 0.027318118140101433, "learning_rate": 0.0003614835587742264, "loss": 0.0326, "num_input_tokens_seen": 166708560, "step": 77290 }, { "epoch": 12.609298531810767, "grad_norm": 0.006077687256038189, "learning_rate": 0.0003614151663024436, "loss": 0.0329, "num_input_tokens_seen": 166718992, "step": 77295 }, { "epoch": 12.61011419249592, "grad_norm": 0.7640355229377747, "learning_rate": 0.0003613467766392586, "loss": 0.0397, "num_input_tokens_seen": 166728624, "step": 77300 }, { "epoch": 12.610929853181077, "grad_norm": 0.04322412610054016, "learning_rate": 0.00036127838978605687, "loss": 0.1842, "num_input_tokens_seen": 166740368, "step": 77305 }, { "epoch": 12.611745513866232, "grad_norm": 0.3347030282020569, "learning_rate": 0.0003612100057442247, "loss": 0.0103, "num_input_tokens_seen": 166751760, "step": 77310 }, { "epoch": 12.612561174551386, "grad_norm": 0.0040704915300011635, "learning_rate": 0.00036114162451514765, "loss": 0.0043, "num_input_tokens_seen": 166762672, "step": 77315 }, { "epoch": 12.613376835236542, "grad_norm": 0.08733467757701874, "learning_rate": 0.000361073246100212, "loss": 0.0081, "num_input_tokens_seen": 166772624, "step": 77320 }, { "epoch": 12.614192495921696, "grad_norm": 0.00571870943531394, "learning_rate": 0.0003610048705008029, "loss": 0.1087, "num_input_tokens_seen": 166783216, "step": 77325 }, { "epoch": 12.615008156606851, "grad_norm": 0.23487912118434906, "learning_rate": 0.00036093649771830674, "loss": 0.0062, "num_input_tokens_seen": 166794768, "step": 77330 }, { "epoch": 12.615823817292007, "grad_norm": 0.11222836375236511, "learning_rate": 0.0003608681277541086, "loss": 0.0104, "num_input_tokens_seen": 166805872, "step": 77335 }, { "epoch": 12.616639477977161, "grad_norm": 0.03115290030837059, "learning_rate": 0.00036079976060959454, "loss": 0.0142, "num_input_tokens_seen": 166817296, "step": 77340 }, { "epoch": 12.617455138662317, "grad_norm": 0.1498255878686905, "learning_rate": 0.0003607313962861499, "loss": 0.0585, "num_input_tokens_seen": 166828176, "step": 77345 }, { "epoch": 12.61827079934747, "grad_norm": 0.09293574839830399, "learning_rate": 0.00036066303478516016, "loss": 0.0077, "num_input_tokens_seen": 166839568, "step": 77350 }, { "epoch": 12.619086460032626, "grad_norm": 0.008245404809713364, "learning_rate": 0.0003605946761080108, "loss": 0.0058, "num_input_tokens_seen": 166850992, "step": 77355 }, { "epoch": 12.619902120717782, "grad_norm": 0.020306089892983437, "learning_rate": 0.000360526320256087, "loss": 0.0303, "num_input_tokens_seen": 166861136, "step": 77360 }, { "epoch": 12.620717781402936, "grad_norm": 0.009491086937487125, "learning_rate": 0.0003604579672307744, "loss": 0.0111, "num_input_tokens_seen": 166871600, "step": 77365 }, { "epoch": 12.621533442088092, "grad_norm": 0.025295045226812363, "learning_rate": 0.00036038961703345815, "loss": 0.0642, "num_input_tokens_seen": 166881776, "step": 77370 }, { "epoch": 12.622349102773246, "grad_norm": 0.044267889112234116, "learning_rate": 0.00036032126966552335, "loss": 0.0141, "num_input_tokens_seen": 166892368, "step": 77375 }, { "epoch": 12.623164763458401, "grad_norm": 2.560544729232788, "learning_rate": 0.0003602529251283553, "loss": 0.0961, "num_input_tokens_seen": 166902352, "step": 77380 }, { "epoch": 12.623980424143557, "grad_norm": 0.008362163789570332, "learning_rate": 0.000360184583423339, "loss": 0.0039, "num_input_tokens_seen": 166912848, "step": 77385 }, { "epoch": 12.624796084828711, "grad_norm": 0.01620294526219368, "learning_rate": 0.0003601162445518593, "loss": 0.0042, "num_input_tokens_seen": 166923088, "step": 77390 }, { "epoch": 12.625611745513867, "grad_norm": 0.026325814425945282, "learning_rate": 0.0003600479085153017, "loss": 0.0029, "num_input_tokens_seen": 166935536, "step": 77395 }, { "epoch": 12.62642740619902, "grad_norm": 1.1352559328079224, "learning_rate": 0.00035997957531505045, "loss": 0.0557, "num_input_tokens_seen": 166947248, "step": 77400 }, { "epoch": 12.627243066884176, "grad_norm": 0.00667258445173502, "learning_rate": 0.00035991124495249094, "loss": 0.0439, "num_input_tokens_seen": 166957808, "step": 77405 }, { "epoch": 12.62805872756933, "grad_norm": 0.0152681153267622, "learning_rate": 0.0003598429174290076, "loss": 0.1441, "num_input_tokens_seen": 166968528, "step": 77410 }, { "epoch": 12.628874388254486, "grad_norm": 0.06274442374706268, "learning_rate": 0.0003597745927459856, "loss": 0.0088, "num_input_tokens_seen": 166979888, "step": 77415 }, { "epoch": 12.629690048939642, "grad_norm": 0.006630095187574625, "learning_rate": 0.00035970627090480906, "loss": 0.021, "num_input_tokens_seen": 166991120, "step": 77420 }, { "epoch": 12.630505709624796, "grad_norm": 0.013409950770437717, "learning_rate": 0.0003596379519068632, "loss": 0.0054, "num_input_tokens_seen": 167003632, "step": 77425 }, { "epoch": 12.631321370309951, "grad_norm": 0.0021637564059346914, "learning_rate": 0.000359569635753532, "loss": 0.0398, "num_input_tokens_seen": 167015248, "step": 77430 }, { "epoch": 12.632137030995105, "grad_norm": 0.0157206729054451, "learning_rate": 0.00035950132244620057, "loss": 0.0099, "num_input_tokens_seen": 167024752, "step": 77435 }, { "epoch": 12.632952691680261, "grad_norm": 0.014077788218855858, "learning_rate": 0.0003594330119862529, "loss": 0.0317, "num_input_tokens_seen": 167033744, "step": 77440 }, { "epoch": 12.633768352365417, "grad_norm": 0.7440185546875, "learning_rate": 0.00035936470437507366, "loss": 0.21, "num_input_tokens_seen": 167044400, "step": 77445 }, { "epoch": 12.63458401305057, "grad_norm": 0.036778777837753296, "learning_rate": 0.000359296399614047, "loss": 0.0363, "num_input_tokens_seen": 167054640, "step": 77450 }, { "epoch": 12.635399673735726, "grad_norm": 0.009116832166910172, "learning_rate": 0.00035922809770455745, "loss": 0.0282, "num_input_tokens_seen": 167065456, "step": 77455 }, { "epoch": 12.63621533442088, "grad_norm": 0.05394542217254639, "learning_rate": 0.00035915979864798884, "loss": 0.0148, "num_input_tokens_seen": 167076176, "step": 77460 }, { "epoch": 12.637030995106036, "grad_norm": 0.007149635348469019, "learning_rate": 0.0003590915024457256, "loss": 0.0059, "num_input_tokens_seen": 167087824, "step": 77465 }, { "epoch": 12.63784665579119, "grad_norm": 0.11396108567714691, "learning_rate": 0.0003590232090991521, "loss": 0.0285, "num_input_tokens_seen": 167097616, "step": 77470 }, { "epoch": 12.638662316476346, "grad_norm": 0.04461178928613663, "learning_rate": 0.0003589549186096518, "loss": 0.033, "num_input_tokens_seen": 167108016, "step": 77475 }, { "epoch": 12.639477977161501, "grad_norm": 0.01348812784999609, "learning_rate": 0.0003588866309786093, "loss": 0.0272, "num_input_tokens_seen": 167118960, "step": 77480 }, { "epoch": 12.640293637846655, "grad_norm": 0.12076926231384277, "learning_rate": 0.00035881834620740796, "loss": 0.0236, "num_input_tokens_seen": 167128912, "step": 77485 }, { "epoch": 12.641109298531811, "grad_norm": 0.09061788022518158, "learning_rate": 0.0003587500642974322, "loss": 0.0095, "num_input_tokens_seen": 167140304, "step": 77490 }, { "epoch": 12.641924959216965, "grad_norm": 0.038287460803985596, "learning_rate": 0.0003586817852500653, "loss": 0.0051, "num_input_tokens_seen": 167150320, "step": 77495 }, { "epoch": 12.64274061990212, "grad_norm": 0.011597780510783195, "learning_rate": 0.00035861350906669156, "loss": 0.0236, "num_input_tokens_seen": 167161680, "step": 77500 }, { "epoch": 12.643556280587276, "grad_norm": 0.007960635237395763, "learning_rate": 0.00035854523574869416, "loss": 0.0162, "num_input_tokens_seen": 167172784, "step": 77505 }, { "epoch": 12.64437194127243, "grad_norm": 0.2139074057340622, "learning_rate": 0.00035847696529745714, "loss": 0.0126, "num_input_tokens_seen": 167183792, "step": 77510 }, { "epoch": 12.645187601957586, "grad_norm": 0.002732169581577182, "learning_rate": 0.000358408697714364, "loss": 0.0184, "num_input_tokens_seen": 167194640, "step": 77515 }, { "epoch": 12.64600326264274, "grad_norm": 0.008221602998673916, "learning_rate": 0.0003583404330007981, "loss": 0.0077, "num_input_tokens_seen": 167204880, "step": 77520 }, { "epoch": 12.646818923327896, "grad_norm": 0.007143992464989424, "learning_rate": 0.00035827217115814313, "loss": 0.0055, "num_input_tokens_seen": 167215888, "step": 77525 }, { "epoch": 12.647634584013051, "grad_norm": 0.00794088002294302, "learning_rate": 0.0003582039121877824, "loss": 0.0033, "num_input_tokens_seen": 167226384, "step": 77530 }, { "epoch": 12.648450244698205, "grad_norm": 0.1341748833656311, "learning_rate": 0.0003581356560910992, "loss": 0.0215, "num_input_tokens_seen": 167236944, "step": 77535 }, { "epoch": 12.649265905383361, "grad_norm": 0.004294542595744133, "learning_rate": 0.00035806740286947704, "loss": 0.1384, "num_input_tokens_seen": 167248272, "step": 77540 }, { "epoch": 12.650081566068515, "grad_norm": 0.6940644383430481, "learning_rate": 0.0003579991525242988, "loss": 0.0785, "num_input_tokens_seen": 167259664, "step": 77545 }, { "epoch": 12.65089722675367, "grad_norm": 0.19397184252738953, "learning_rate": 0.0003579309050569481, "loss": 0.0356, "num_input_tokens_seen": 167270320, "step": 77550 }, { "epoch": 12.651712887438826, "grad_norm": 0.009966906160116196, "learning_rate": 0.00035786266046880765, "loss": 0.0038, "num_input_tokens_seen": 167280880, "step": 77555 }, { "epoch": 12.65252854812398, "grad_norm": 0.02695809118449688, "learning_rate": 0.0003577944187612609, "loss": 0.02, "num_input_tokens_seen": 167292304, "step": 77560 }, { "epoch": 12.653344208809136, "grad_norm": 0.01831822656095028, "learning_rate": 0.0003577261799356905, "loss": 0.1107, "num_input_tokens_seen": 167303472, "step": 77565 }, { "epoch": 12.65415986949429, "grad_norm": 0.01194059569388628, "learning_rate": 0.0003576579439934796, "loss": 0.005, "num_input_tokens_seen": 167312944, "step": 77570 }, { "epoch": 12.654975530179446, "grad_norm": 0.0070284027606248856, "learning_rate": 0.000357589710936011, "loss": 0.0046, "num_input_tokens_seen": 167323120, "step": 77575 }, { "epoch": 12.655791190864601, "grad_norm": 1.255548119544983, "learning_rate": 0.0003575214807646675, "loss": 0.0573, "num_input_tokens_seen": 167333648, "step": 77580 }, { "epoch": 12.656606851549755, "grad_norm": 0.07463961094617844, "learning_rate": 0.0003574532534808321, "loss": 0.0102, "num_input_tokens_seen": 167343376, "step": 77585 }, { "epoch": 12.65742251223491, "grad_norm": 0.4005419611930847, "learning_rate": 0.00035738502908588723, "loss": 0.0766, "num_input_tokens_seen": 167353552, "step": 77590 }, { "epoch": 12.658238172920065, "grad_norm": 0.016932399943470955, "learning_rate": 0.0003573168075812158, "loss": 0.0052, "num_input_tokens_seen": 167364656, "step": 77595 }, { "epoch": 12.65905383360522, "grad_norm": 0.014514518901705742, "learning_rate": 0.0003572485889682001, "loss": 0.1124, "num_input_tokens_seen": 167375248, "step": 77600 }, { "epoch": 12.659869494290374, "grad_norm": 0.008210406638681889, "learning_rate": 0.00035718037324822304, "loss": 0.0117, "num_input_tokens_seen": 167387216, "step": 77605 }, { "epoch": 12.66068515497553, "grad_norm": 0.7704275250434875, "learning_rate": 0.0003571121604226667, "loss": 0.0607, "num_input_tokens_seen": 167397808, "step": 77610 }, { "epoch": 12.661500815660686, "grad_norm": 0.028577029705047607, "learning_rate": 0.0003570439504929139, "loss": 0.0047, "num_input_tokens_seen": 167408816, "step": 77615 }, { "epoch": 12.66231647634584, "grad_norm": 0.022977739572525024, "learning_rate": 0.00035697574346034655, "loss": 0.0954, "num_input_tokens_seen": 167420400, "step": 77620 }, { "epoch": 12.663132137030995, "grad_norm": 0.03689314052462578, "learning_rate": 0.0003569075393263475, "loss": 0.0167, "num_input_tokens_seen": 167431248, "step": 77625 }, { "epoch": 12.66394779771615, "grad_norm": 0.0038340999744832516, "learning_rate": 0.0003568393380922984, "loss": 0.0098, "num_input_tokens_seen": 167442352, "step": 77630 }, { "epoch": 12.664763458401305, "grad_norm": 0.195373997092247, "learning_rate": 0.0003567711397595819, "loss": 0.0426, "num_input_tokens_seen": 167453104, "step": 77635 }, { "epoch": 12.66557911908646, "grad_norm": 0.014324640855193138, "learning_rate": 0.00035670294432957984, "loss": 0.0849, "num_input_tokens_seen": 167463696, "step": 77640 }, { "epoch": 12.666394779771615, "grad_norm": 0.00381258106790483, "learning_rate": 0.00035663475180367453, "loss": 0.1548, "num_input_tokens_seen": 167474480, "step": 77645 }, { "epoch": 12.66721044045677, "grad_norm": 0.6546381115913391, "learning_rate": 0.00035656656218324765, "loss": 0.104, "num_input_tokens_seen": 167484912, "step": 77650 }, { "epoch": 12.668026101141924, "grad_norm": 0.005321448668837547, "learning_rate": 0.0003564983754696815, "loss": 0.0181, "num_input_tokens_seen": 167496336, "step": 77655 }, { "epoch": 12.66884176182708, "grad_norm": 0.6135666966438293, "learning_rate": 0.00035643019166435775, "loss": 0.1109, "num_input_tokens_seen": 167505520, "step": 77660 }, { "epoch": 12.669657422512234, "grad_norm": 0.34050193428993225, "learning_rate": 0.00035636201076865836, "loss": 0.0123, "num_input_tokens_seen": 167516912, "step": 77665 }, { "epoch": 12.67047308319739, "grad_norm": 0.034731779247522354, "learning_rate": 0.000356293832783965, "loss": 0.0068, "num_input_tokens_seen": 167527696, "step": 77670 }, { "epoch": 12.671288743882545, "grad_norm": 0.016032949090003967, "learning_rate": 0.0003562256577116595, "loss": 0.0056, "num_input_tokens_seen": 167538864, "step": 77675 }, { "epoch": 12.6721044045677, "grad_norm": 0.22366826236248016, "learning_rate": 0.0003561574855531232, "loss": 0.0166, "num_input_tokens_seen": 167548976, "step": 77680 }, { "epoch": 12.672920065252855, "grad_norm": 0.5759492516517639, "learning_rate": 0.00035608931630973814, "loss": 0.0799, "num_input_tokens_seen": 167559792, "step": 77685 }, { "epoch": 12.673735725938009, "grad_norm": 0.018790217116475105, "learning_rate": 0.0003560211499828856, "loss": 0.0102, "num_input_tokens_seen": 167571344, "step": 77690 }, { "epoch": 12.674551386623165, "grad_norm": 0.015331006608903408, "learning_rate": 0.00035595298657394714, "loss": 0.032, "num_input_tokens_seen": 167581904, "step": 77695 }, { "epoch": 12.67536704730832, "grad_norm": 0.007580565754324198, "learning_rate": 0.0003558848260843041, "loss": 0.0063, "num_input_tokens_seen": 167592912, "step": 77700 }, { "epoch": 12.676182707993474, "grad_norm": 0.021139413118362427, "learning_rate": 0.00035581666851533777, "loss": 0.0104, "num_input_tokens_seen": 167605104, "step": 77705 }, { "epoch": 12.67699836867863, "grad_norm": 0.026527609676122665, "learning_rate": 0.0003557485138684299, "loss": 0.0118, "num_input_tokens_seen": 167614704, "step": 77710 }, { "epoch": 12.677814029363784, "grad_norm": 0.02175075374543667, "learning_rate": 0.00035568036214496103, "loss": 0.0037, "num_input_tokens_seen": 167624496, "step": 77715 }, { "epoch": 12.67862969004894, "grad_norm": 0.09360365569591522, "learning_rate": 0.000355612213346313, "loss": 0.05, "num_input_tokens_seen": 167636336, "step": 77720 }, { "epoch": 12.679445350734095, "grad_norm": 0.006980986800044775, "learning_rate": 0.00035554406747386635, "loss": 0.0352, "num_input_tokens_seen": 167647792, "step": 77725 }, { "epoch": 12.68026101141925, "grad_norm": 1.5654058456420898, "learning_rate": 0.0003554759245290027, "loss": 0.0425, "num_input_tokens_seen": 167659472, "step": 77730 }, { "epoch": 12.681076672104405, "grad_norm": 0.0024384637363255024, "learning_rate": 0.0003554077845131025, "loss": 0.0087, "num_input_tokens_seen": 167670608, "step": 77735 }, { "epoch": 12.681892332789559, "grad_norm": 0.004831403493881226, "learning_rate": 0.0003553396474275473, "loss": 0.0076, "num_input_tokens_seen": 167681904, "step": 77740 }, { "epoch": 12.682707993474715, "grad_norm": 0.009864117950201035, "learning_rate": 0.00035527151327371736, "loss": 0.0037, "num_input_tokens_seen": 167692560, "step": 77745 }, { "epoch": 12.68352365415987, "grad_norm": 0.09383443742990494, "learning_rate": 0.00035520338205299407, "loss": 0.0486, "num_input_tokens_seen": 167703600, "step": 77750 }, { "epoch": 12.684339314845024, "grad_norm": 0.013466471806168556, "learning_rate": 0.0003551352537667577, "loss": 0.0061, "num_input_tokens_seen": 167714512, "step": 77755 }, { "epoch": 12.68515497553018, "grad_norm": 0.001126780523918569, "learning_rate": 0.0003550671284163894, "loss": 0.0073, "num_input_tokens_seen": 167725008, "step": 77760 }, { "epoch": 12.685970636215334, "grad_norm": 0.014836586080491543, "learning_rate": 0.00035499900600326933, "loss": 0.0082, "num_input_tokens_seen": 167736112, "step": 77765 }, { "epoch": 12.68678629690049, "grad_norm": 0.006593931466341019, "learning_rate": 0.00035493088652877866, "loss": 0.0021, "num_input_tokens_seen": 167745456, "step": 77770 }, { "epoch": 12.687601957585644, "grad_norm": 0.008998274803161621, "learning_rate": 0.00035486276999429733, "loss": 0.0037, "num_input_tokens_seen": 167755792, "step": 77775 }, { "epoch": 12.6884176182708, "grad_norm": 1.2066569328308105, "learning_rate": 0.00035479465640120636, "loss": 0.2008, "num_input_tokens_seen": 167766928, "step": 77780 }, { "epoch": 12.689233278955955, "grad_norm": 0.7440032362937927, "learning_rate": 0.0003547265457508856, "loss": 0.2876, "num_input_tokens_seen": 167777968, "step": 77785 }, { "epoch": 12.690048939641109, "grad_norm": 0.1267242580652237, "learning_rate": 0.0003546584380447157, "loss": 0.0254, "num_input_tokens_seen": 167789328, "step": 77790 }, { "epoch": 12.690864600326265, "grad_norm": 0.00688669690862298, "learning_rate": 0.0003545903332840772, "loss": 0.0148, "num_input_tokens_seen": 167799664, "step": 77795 }, { "epoch": 12.691680261011419, "grad_norm": 0.007077553775161505, "learning_rate": 0.0003545222314703498, "loss": 0.0114, "num_input_tokens_seen": 167810832, "step": 77800 }, { "epoch": 12.692495921696574, "grad_norm": 0.017146464437246323, "learning_rate": 0.0003544541326049141, "loss": 0.0052, "num_input_tokens_seen": 167822352, "step": 77805 }, { "epoch": 12.69331158238173, "grad_norm": 0.09376560151576996, "learning_rate": 0.0003543860366891499, "loss": 0.0048, "num_input_tokens_seen": 167833392, "step": 77810 }, { "epoch": 12.694127243066884, "grad_norm": 0.12058135122060776, "learning_rate": 0.0003543179437244376, "loss": 0.0093, "num_input_tokens_seen": 167844880, "step": 77815 }, { "epoch": 12.69494290375204, "grad_norm": 0.523445725440979, "learning_rate": 0.0003542498537121567, "loss": 0.097, "num_input_tokens_seen": 167854576, "step": 77820 }, { "epoch": 12.695758564437194, "grad_norm": 0.04401801526546478, "learning_rate": 0.0003541817666536876, "loss": 0.0122, "num_input_tokens_seen": 167864272, "step": 77825 }, { "epoch": 12.69657422512235, "grad_norm": 0.0018799330573529005, "learning_rate": 0.00035411368255040994, "loss": 0.1147, "num_input_tokens_seen": 167875088, "step": 77830 }, { "epoch": 12.697389885807503, "grad_norm": 0.8557748198509216, "learning_rate": 0.0003540456014037036, "loss": 0.0663, "num_input_tokens_seen": 167885424, "step": 77835 }, { "epoch": 12.698205546492659, "grad_norm": 0.045516569167375565, "learning_rate": 0.00035397752321494826, "loss": 0.0616, "num_input_tokens_seen": 167896720, "step": 77840 }, { "epoch": 12.699021207177815, "grad_norm": 0.01768627017736435, "learning_rate": 0.0003539094479855237, "loss": 0.0047, "num_input_tokens_seen": 167907152, "step": 77845 }, { "epoch": 12.699836867862969, "grad_norm": 0.6987684369087219, "learning_rate": 0.00035384137571680936, "loss": 0.0529, "num_input_tokens_seen": 167919056, "step": 77850 }, { "epoch": 12.700652528548124, "grad_norm": 0.48806625604629517, "learning_rate": 0.0003537733064101852, "loss": 0.1027, "num_input_tokens_seen": 167930704, "step": 77855 }, { "epoch": 12.701468189233278, "grad_norm": 0.036979787051677704, "learning_rate": 0.0003537052400670303, "loss": 0.0103, "num_input_tokens_seen": 167942096, "step": 77860 }, { "epoch": 12.702283849918434, "grad_norm": 0.0051063536666333675, "learning_rate": 0.00035363717668872443, "loss": 0.1218, "num_input_tokens_seen": 167952912, "step": 77865 }, { "epoch": 12.70309951060359, "grad_norm": 0.006220867857336998, "learning_rate": 0.00035356911627664665, "loss": 0.1276, "num_input_tokens_seen": 167964272, "step": 77870 }, { "epoch": 12.703915171288743, "grad_norm": 0.00961611233651638, "learning_rate": 0.00035350105883217675, "loss": 0.076, "num_input_tokens_seen": 167975152, "step": 77875 }, { "epoch": 12.7047308319739, "grad_norm": 0.01176263764500618, "learning_rate": 0.00035343300435669356, "loss": 0.0087, "num_input_tokens_seen": 167985936, "step": 77880 }, { "epoch": 12.705546492659053, "grad_norm": 0.024660272523760796, "learning_rate": 0.0003533649528515766, "loss": 0.0235, "num_input_tokens_seen": 167996976, "step": 77885 }, { "epoch": 12.706362153344209, "grad_norm": 0.03467089310288429, "learning_rate": 0.0003532969043182047, "loss": 0.0107, "num_input_tokens_seen": 168008016, "step": 77890 }, { "epoch": 12.707177814029365, "grad_norm": 0.013338250108063221, "learning_rate": 0.0003532288587579572, "loss": 0.01, "num_input_tokens_seen": 168019312, "step": 77895 }, { "epoch": 12.707993474714518, "grad_norm": 0.018884364515542984, "learning_rate": 0.0003531608161722132, "loss": 0.04, "num_input_tokens_seen": 168030320, "step": 77900 }, { "epoch": 12.708809135399674, "grad_norm": 0.010697862133383751, "learning_rate": 0.00035309277656235137, "loss": 0.1184, "num_input_tokens_seen": 168040304, "step": 77905 }, { "epoch": 12.709624796084828, "grad_norm": 0.0402478463947773, "learning_rate": 0.000353024739929751, "loss": 0.1476, "num_input_tokens_seen": 168051408, "step": 77910 }, { "epoch": 12.710440456769984, "grad_norm": 0.004462614189833403, "learning_rate": 0.0003529567062757905, "loss": 0.1138, "num_input_tokens_seen": 168060624, "step": 77915 }, { "epoch": 12.71125611745514, "grad_norm": 0.44581490755081177, "learning_rate": 0.0003528886756018491, "loss": 0.0621, "num_input_tokens_seen": 168070800, "step": 77920 }, { "epoch": 12.712071778140293, "grad_norm": 0.030489355325698853, "learning_rate": 0.0003528206479093051, "loss": 0.1466, "num_input_tokens_seen": 168081264, "step": 77925 }, { "epoch": 12.71288743882545, "grad_norm": 0.17076869308948517, "learning_rate": 0.0003527526231995376, "loss": 0.0171, "num_input_tokens_seen": 168092592, "step": 77930 }, { "epoch": 12.713703099510603, "grad_norm": 0.0538906492292881, "learning_rate": 0.0003526846014739248, "loss": 0.0093, "num_input_tokens_seen": 168103216, "step": 77935 }, { "epoch": 12.714518760195759, "grad_norm": 0.017880670726299286, "learning_rate": 0.00035261658273384554, "loss": 0.0416, "num_input_tokens_seen": 168114576, "step": 77940 }, { "epoch": 12.715334420880914, "grad_norm": 0.01100874599069357, "learning_rate": 0.00035254856698067806, "loss": 0.0062, "num_input_tokens_seen": 168126032, "step": 77945 }, { "epoch": 12.716150081566068, "grad_norm": 0.014228198677301407, "learning_rate": 0.00035248055421580114, "loss": 0.0312, "num_input_tokens_seen": 168137552, "step": 77950 }, { "epoch": 12.716965742251224, "grad_norm": 0.018051210790872574, "learning_rate": 0.0003524125444405928, "loss": 0.1309, "num_input_tokens_seen": 168146608, "step": 77955 }, { "epoch": 12.717781402936378, "grad_norm": 0.7434613704681396, "learning_rate": 0.00035234453765643146, "loss": 0.1553, "num_input_tokens_seen": 168156336, "step": 77960 }, { "epoch": 12.718597063621534, "grad_norm": 0.007628794759511948, "learning_rate": 0.0003522765338646954, "loss": 0.0076, "num_input_tokens_seen": 168167024, "step": 77965 }, { "epoch": 12.719412724306688, "grad_norm": 0.00992138497531414, "learning_rate": 0.00035220853306676284, "loss": 0.0068, "num_input_tokens_seen": 168177200, "step": 77970 }, { "epoch": 12.720228384991843, "grad_norm": 0.1759665608406067, "learning_rate": 0.0003521405352640118, "loss": 0.0185, "num_input_tokens_seen": 168187600, "step": 77975 }, { "epoch": 12.721044045676999, "grad_norm": 0.057746753096580505, "learning_rate": 0.00035207254045782036, "loss": 0.0109, "num_input_tokens_seen": 168199472, "step": 77980 }, { "epoch": 12.721859706362153, "grad_norm": 0.24444794654846191, "learning_rate": 0.00035200454864956653, "loss": 0.0503, "num_input_tokens_seen": 168211600, "step": 77985 }, { "epoch": 12.722675367047309, "grad_norm": 0.007334553170949221, "learning_rate": 0.00035193655984062835, "loss": 0.0102, "num_input_tokens_seen": 168221456, "step": 77990 }, { "epoch": 12.723491027732463, "grad_norm": 0.5446126461029053, "learning_rate": 0.0003518685740323835, "loss": 0.0848, "num_input_tokens_seen": 168232624, "step": 77995 }, { "epoch": 12.724306688417618, "grad_norm": 0.0525912381708622, "learning_rate": 0.00035180059122621, "loss": 0.0125, "num_input_tokens_seen": 168243952, "step": 78000 }, { "epoch": 12.725122349102774, "grad_norm": 0.014477339573204517, "learning_rate": 0.0003517326114234855, "loss": 0.181, "num_input_tokens_seen": 168252784, "step": 78005 }, { "epoch": 12.725938009787928, "grad_norm": 0.269758939743042, "learning_rate": 0.0003516646346255877, "loss": 0.0554, "num_input_tokens_seen": 168264016, "step": 78010 }, { "epoch": 12.726753670473084, "grad_norm": 0.08979897946119308, "learning_rate": 0.00035159666083389436, "loss": 0.121, "num_input_tokens_seen": 168275728, "step": 78015 }, { "epoch": 12.727569331158238, "grad_norm": 0.02093280479311943, "learning_rate": 0.00035152869004978276, "loss": 0.011, "num_input_tokens_seen": 168286800, "step": 78020 }, { "epoch": 12.728384991843393, "grad_norm": 0.03764188289642334, "learning_rate": 0.0003514607222746309, "loss": 0.0067, "num_input_tokens_seen": 168298032, "step": 78025 }, { "epoch": 12.729200652528547, "grad_norm": 0.034607868641614914, "learning_rate": 0.0003513927575098156, "loss": 0.0083, "num_input_tokens_seen": 168308944, "step": 78030 }, { "epoch": 12.730016313213703, "grad_norm": 0.04364435374736786, "learning_rate": 0.0003513247957567149, "loss": 0.1055, "num_input_tokens_seen": 168319440, "step": 78035 }, { "epoch": 12.730831973898859, "grad_norm": 0.19797900319099426, "learning_rate": 0.0003512568370167055, "loss": 0.0135, "num_input_tokens_seen": 168330256, "step": 78040 }, { "epoch": 12.731647634584013, "grad_norm": 0.042147159576416016, "learning_rate": 0.0003511888812911653, "loss": 0.0069, "num_input_tokens_seen": 168341488, "step": 78045 }, { "epoch": 12.732463295269168, "grad_norm": 0.11463060975074768, "learning_rate": 0.00035112092858147106, "loss": 0.0171, "num_input_tokens_seen": 168352752, "step": 78050 }, { "epoch": 12.733278955954322, "grad_norm": 0.13228359818458557, "learning_rate": 0.0003510529788890001, "loss": 0.0423, "num_input_tokens_seen": 168362960, "step": 78055 }, { "epoch": 12.734094616639478, "grad_norm": 0.01215174701064825, "learning_rate": 0.0003509850322151294, "loss": 0.0165, "num_input_tokens_seen": 168372976, "step": 78060 }, { "epoch": 12.734910277324634, "grad_norm": 0.03226783871650696, "learning_rate": 0.0003509170885612362, "loss": 0.0306, "num_input_tokens_seen": 168384528, "step": 78065 }, { "epoch": 12.735725938009788, "grad_norm": 0.5375596284866333, "learning_rate": 0.00035084914792869715, "loss": 0.0789, "num_input_tokens_seen": 168394864, "step": 78070 }, { "epoch": 12.736541598694943, "grad_norm": 0.28148171305656433, "learning_rate": 0.0003507812103188895, "loss": 0.0326, "num_input_tokens_seen": 168406448, "step": 78075 }, { "epoch": 12.737357259380097, "grad_norm": 0.0795280933380127, "learning_rate": 0.0003507132757331898, "loss": 0.0165, "num_input_tokens_seen": 168417648, "step": 78080 }, { "epoch": 12.738172920065253, "grad_norm": 0.010512933135032654, "learning_rate": 0.00035064534417297513, "loss": 0.0819, "num_input_tokens_seen": 168428848, "step": 78085 }, { "epoch": 12.738988580750409, "grad_norm": 0.08219227194786072, "learning_rate": 0.00035057741563962176, "loss": 0.0276, "num_input_tokens_seen": 168439984, "step": 78090 }, { "epoch": 12.739804241435563, "grad_norm": 0.045009661465883255, "learning_rate": 0.00035050949013450686, "loss": 0.0091, "num_input_tokens_seen": 168451504, "step": 78095 }, { "epoch": 12.740619902120718, "grad_norm": 0.03985137119889259, "learning_rate": 0.0003504415676590066, "loss": 0.0122, "num_input_tokens_seen": 168461968, "step": 78100 }, { "epoch": 12.741435562805872, "grad_norm": 0.10806810110807419, "learning_rate": 0.00035037364821449766, "loss": 0.0214, "num_input_tokens_seen": 168473008, "step": 78105 }, { "epoch": 12.742251223491028, "grad_norm": 0.6831148266792297, "learning_rate": 0.0003503057318023568, "loss": 0.033, "num_input_tokens_seen": 168483920, "step": 78110 }, { "epoch": 12.743066884176184, "grad_norm": 0.040956661105155945, "learning_rate": 0.00035023781842395994, "loss": 0.0091, "num_input_tokens_seen": 168496464, "step": 78115 }, { "epoch": 12.743882544861338, "grad_norm": 0.008608592674136162, "learning_rate": 0.0003501699080806839, "loss": 0.2962, "num_input_tokens_seen": 168507056, "step": 78120 }, { "epoch": 12.744698205546493, "grad_norm": 0.025838438421487808, "learning_rate": 0.0003501020007739045, "loss": 0.0058, "num_input_tokens_seen": 168518160, "step": 78125 }, { "epoch": 12.745513866231647, "grad_norm": 0.08618154376745224, "learning_rate": 0.0003500340965049984, "loss": 0.0432, "num_input_tokens_seen": 168529392, "step": 78130 }, { "epoch": 12.746329526916803, "grad_norm": 0.3332953155040741, "learning_rate": 0.00034996619527534153, "loss": 0.0833, "num_input_tokens_seen": 168539824, "step": 78135 }, { "epoch": 12.747145187601957, "grad_norm": 0.025852497667074203, "learning_rate": 0.00034989829708631005, "loss": 0.0081, "num_input_tokens_seen": 168551600, "step": 78140 }, { "epoch": 12.747960848287113, "grad_norm": 0.00783397164195776, "learning_rate": 0.00034983040193927996, "loss": 0.0074, "num_input_tokens_seen": 168562416, "step": 78145 }, { "epoch": 12.748776508972268, "grad_norm": 0.014943626709282398, "learning_rate": 0.0003497625098356273, "loss": 0.0793, "num_input_tokens_seen": 168572848, "step": 78150 }, { "epoch": 12.749592169657422, "grad_norm": 0.6502835750579834, "learning_rate": 0.00034969462077672793, "loss": 0.2822, "num_input_tokens_seen": 168583024, "step": 78155 }, { "epoch": 12.750407830342578, "grad_norm": 0.36432957649230957, "learning_rate": 0.0003496267347639579, "loss": 0.0441, "num_input_tokens_seen": 168594480, "step": 78160 }, { "epoch": 12.751223491027732, "grad_norm": 0.08001220971345901, "learning_rate": 0.00034955885179869265, "loss": 0.0103, "num_input_tokens_seen": 168605424, "step": 78165 }, { "epoch": 12.752039151712887, "grad_norm": 0.026702601462602615, "learning_rate": 0.0003494909718823083, "loss": 0.0124, "num_input_tokens_seen": 168616208, "step": 78170 }, { "epoch": 12.752854812398043, "grad_norm": 0.013944551348686218, "learning_rate": 0.00034942309501618016, "loss": 0.0544, "num_input_tokens_seen": 168626992, "step": 78175 }, { "epoch": 12.753670473083197, "grad_norm": 0.0551200732588768, "learning_rate": 0.00034935522120168417, "loss": 0.027, "num_input_tokens_seen": 168637840, "step": 78180 }, { "epoch": 12.754486133768353, "grad_norm": 0.009341658093035221, "learning_rate": 0.0003492873504401956, "loss": 0.0631, "num_input_tokens_seen": 168647536, "step": 78185 }, { "epoch": 12.755301794453507, "grad_norm": 0.004269873257726431, "learning_rate": 0.0003492194827330902, "loss": 0.026, "num_input_tokens_seen": 168657616, "step": 78190 }, { "epoch": 12.756117455138662, "grad_norm": 0.03290918096899986, "learning_rate": 0.00034915161808174314, "loss": 0.0036, "num_input_tokens_seen": 168669296, "step": 78195 }, { "epoch": 12.756933115823816, "grad_norm": 0.012617192231118679, "learning_rate": 0.0003490837564875301, "loss": 0.0063, "num_input_tokens_seen": 168681072, "step": 78200 }, { "epoch": 12.757748776508972, "grad_norm": 0.0807793065905571, "learning_rate": 0.0003490158979518259, "loss": 0.0153, "num_input_tokens_seen": 168692176, "step": 78205 }, { "epoch": 12.758564437194128, "grad_norm": 0.015366978012025356, "learning_rate": 0.00034894804247600613, "loss": 0.0068, "num_input_tokens_seen": 168704400, "step": 78210 }, { "epoch": 12.759380097879282, "grad_norm": 0.028537947684526443, "learning_rate": 0.0003488801900614461, "loss": 0.015, "num_input_tokens_seen": 168715312, "step": 78215 }, { "epoch": 12.760195758564437, "grad_norm": 0.116228848695755, "learning_rate": 0.0003488123407095205, "loss": 0.0078, "num_input_tokens_seen": 168725744, "step": 78220 }, { "epoch": 12.761011419249591, "grad_norm": 0.03881040960550308, "learning_rate": 0.00034874449442160485, "loss": 0.1063, "num_input_tokens_seen": 168737296, "step": 78225 }, { "epoch": 12.761827079934747, "grad_norm": 0.04761180654168129, "learning_rate": 0.00034867665119907363, "loss": 0.0354, "num_input_tokens_seen": 168749104, "step": 78230 }, { "epoch": 12.762642740619903, "grad_norm": 0.009827990084886551, "learning_rate": 0.0003486088110433023, "loss": 0.0192, "num_input_tokens_seen": 168759728, "step": 78235 }, { "epoch": 12.763458401305057, "grad_norm": 1.3155925273895264, "learning_rate": 0.0003485409739556653, "loss": 0.074, "num_input_tokens_seen": 168770768, "step": 78240 }, { "epoch": 12.764274061990212, "grad_norm": 0.16011884808540344, "learning_rate": 0.0003484731399375377, "loss": 0.0482, "num_input_tokens_seen": 168782064, "step": 78245 }, { "epoch": 12.765089722675366, "grad_norm": 0.5158717036247253, "learning_rate": 0.00034840530899029405, "loss": 0.2659, "num_input_tokens_seen": 168791984, "step": 78250 }, { "epoch": 12.765905383360522, "grad_norm": 0.023392386734485626, "learning_rate": 0.00034833748111530926, "loss": 0.1037, "num_input_tokens_seen": 168803216, "step": 78255 }, { "epoch": 12.766721044045678, "grad_norm": 0.21962271630764008, "learning_rate": 0.00034826965631395767, "loss": 0.0188, "num_input_tokens_seen": 168814064, "step": 78260 }, { "epoch": 12.767536704730832, "grad_norm": 0.06946704536676407, "learning_rate": 0.0003482018345876141, "loss": 0.0496, "num_input_tokens_seen": 168824976, "step": 78265 }, { "epoch": 12.768352365415987, "grad_norm": 0.005544033832848072, "learning_rate": 0.0003481340159376528, "loss": 0.0505, "num_input_tokens_seen": 168835312, "step": 78270 }, { "epoch": 12.769168026101141, "grad_norm": 0.07650532573461533, "learning_rate": 0.0003480662003654483, "loss": 0.1035, "num_input_tokens_seen": 168847408, "step": 78275 }, { "epoch": 12.769983686786297, "grad_norm": 0.024768883362412453, "learning_rate": 0.00034799838787237514, "loss": 0.0144, "num_input_tokens_seen": 168857968, "step": 78280 }, { "epoch": 12.770799347471453, "grad_norm": 0.011166704818606377, "learning_rate": 0.00034793057845980744, "loss": 0.1141, "num_input_tokens_seen": 168869072, "step": 78285 }, { "epoch": 12.771615008156607, "grad_norm": 0.019548188894987106, "learning_rate": 0.00034786277212911943, "loss": 0.0103, "num_input_tokens_seen": 168879056, "step": 78290 }, { "epoch": 12.772430668841762, "grad_norm": 0.6319789290428162, "learning_rate": 0.0003477949688816854, "loss": 0.1861, "num_input_tokens_seen": 168890928, "step": 78295 }, { "epoch": 12.773246329526916, "grad_norm": 0.6219123005867004, "learning_rate": 0.00034772716871887924, "loss": 0.0684, "num_input_tokens_seen": 168901456, "step": 78300 }, { "epoch": 12.774061990212072, "grad_norm": 0.008974830619990826, "learning_rate": 0.0003476593716420754, "loss": 0.0051, "num_input_tokens_seen": 168910320, "step": 78305 }, { "epoch": 12.774877650897226, "grad_norm": 0.13511916995048523, "learning_rate": 0.00034759157765264746, "loss": 0.0296, "num_input_tokens_seen": 168921360, "step": 78310 }, { "epoch": 12.775693311582382, "grad_norm": 0.16989202797412872, "learning_rate": 0.00034752378675196975, "loss": 0.0323, "num_input_tokens_seen": 168932560, "step": 78315 }, { "epoch": 12.776508972267537, "grad_norm": 0.14921464025974274, "learning_rate": 0.0003474559989414158, "loss": 0.0214, "num_input_tokens_seen": 168942864, "step": 78320 }, { "epoch": 12.777324632952691, "grad_norm": 0.11094070971012115, "learning_rate": 0.00034738821422235943, "loss": 0.0129, "num_input_tokens_seen": 168953072, "step": 78325 }, { "epoch": 12.778140293637847, "grad_norm": 0.18795067071914673, "learning_rate": 0.00034732043259617473, "loss": 0.0718, "num_input_tokens_seen": 168963856, "step": 78330 }, { "epoch": 12.778955954323001, "grad_norm": 0.0050772796384990215, "learning_rate": 0.000347252654064235, "loss": 0.0206, "num_input_tokens_seen": 168973616, "step": 78335 }, { "epoch": 12.779771615008157, "grad_norm": 0.19655804336071014, "learning_rate": 0.00034718487862791413, "loss": 0.0822, "num_input_tokens_seen": 168985520, "step": 78340 }, { "epoch": 12.780587275693312, "grad_norm": 0.5080746412277222, "learning_rate": 0.0003471171062885854, "loss": 0.0357, "num_input_tokens_seen": 168996496, "step": 78345 }, { "epoch": 12.781402936378466, "grad_norm": 0.22758749127388, "learning_rate": 0.00034704933704762266, "loss": 0.011, "num_input_tokens_seen": 169006864, "step": 78350 }, { "epoch": 12.782218597063622, "grad_norm": 0.1189570352435112, "learning_rate": 0.00034698157090639893, "loss": 0.0307, "num_input_tokens_seen": 169017296, "step": 78355 }, { "epoch": 12.783034257748776, "grad_norm": 0.06258486211299896, "learning_rate": 0.000346913807866288, "loss": 0.0179, "num_input_tokens_seen": 169027696, "step": 78360 }, { "epoch": 12.783849918433932, "grad_norm": 0.025525525212287903, "learning_rate": 0.00034684604792866277, "loss": 0.0185, "num_input_tokens_seen": 169039280, "step": 78365 }, { "epoch": 12.784665579119086, "grad_norm": 0.015964709222316742, "learning_rate": 0.00034677829109489684, "loss": 0.0092, "num_input_tokens_seen": 169050160, "step": 78370 }, { "epoch": 12.785481239804241, "grad_norm": 0.3753780126571655, "learning_rate": 0.00034671053736636307, "loss": 0.02, "num_input_tokens_seen": 169061840, "step": 78375 }, { "epoch": 12.786296900489397, "grad_norm": 0.02786896750330925, "learning_rate": 0.0003466427867444348, "loss": 0.0318, "num_input_tokens_seen": 169073552, "step": 78380 }, { "epoch": 12.78711256117455, "grad_norm": 1.252168893814087, "learning_rate": 0.00034657503923048497, "loss": 0.0474, "num_input_tokens_seen": 169084304, "step": 78385 }, { "epoch": 12.787928221859707, "grad_norm": 0.08398839086294174, "learning_rate": 0.00034650729482588665, "loss": 0.0504, "num_input_tokens_seen": 169095696, "step": 78390 }, { "epoch": 12.78874388254486, "grad_norm": 0.01031500194221735, "learning_rate": 0.0003464395535320126, "loss": 0.0613, "num_input_tokens_seen": 169106640, "step": 78395 }, { "epoch": 12.789559543230016, "grad_norm": 0.01703634485602379, "learning_rate": 0.000346371815350236, "loss": 0.0069, "num_input_tokens_seen": 169118576, "step": 78400 }, { "epoch": 12.790375203915172, "grad_norm": 0.04676428437232971, "learning_rate": 0.0003463040802819292, "loss": 0.0509, "num_input_tokens_seen": 169130448, "step": 78405 }, { "epoch": 12.791190864600326, "grad_norm": 0.012886070646345615, "learning_rate": 0.0003462363483284654, "loss": 0.0204, "num_input_tokens_seen": 169141104, "step": 78410 }, { "epoch": 12.792006525285482, "grad_norm": 0.04591165855526924, "learning_rate": 0.0003461686194912169, "loss": 0.1212, "num_input_tokens_seen": 169151824, "step": 78415 }, { "epoch": 12.792822185970635, "grad_norm": 0.03464460000395775, "learning_rate": 0.00034610089377155656, "loss": 0.0086, "num_input_tokens_seen": 169160752, "step": 78420 }, { "epoch": 12.793637846655791, "grad_norm": 0.012382515706121922, "learning_rate": 0.0003460331711708569, "loss": 0.0053, "num_input_tokens_seen": 169172048, "step": 78425 }, { "epoch": 12.794453507340947, "grad_norm": 0.012393874116241932, "learning_rate": 0.00034596545169049013, "loss": 0.0164, "num_input_tokens_seen": 169181328, "step": 78430 }, { "epoch": 12.7952691680261, "grad_norm": 0.01892450451850891, "learning_rate": 0.00034589773533182924, "loss": 0.0755, "num_input_tokens_seen": 169191568, "step": 78435 }, { "epoch": 12.796084828711257, "grad_norm": 0.02035718597471714, "learning_rate": 0.00034583002209624594, "loss": 0.0065, "num_input_tokens_seen": 169202768, "step": 78440 }, { "epoch": 12.79690048939641, "grad_norm": 0.3571002781391144, "learning_rate": 0.0003457623119851129, "loss": 0.0433, "num_input_tokens_seen": 169212560, "step": 78445 }, { "epoch": 12.797716150081566, "grad_norm": 0.600735604763031, "learning_rate": 0.00034569460499980233, "loss": 0.0416, "num_input_tokens_seen": 169224080, "step": 78450 }, { "epoch": 12.798531810766722, "grad_norm": 0.06275240331888199, "learning_rate": 0.00034562690114168626, "loss": 0.0701, "num_input_tokens_seen": 169235440, "step": 78455 }, { "epoch": 12.799347471451876, "grad_norm": 1.8597105741500854, "learning_rate": 0.000345559200412137, "loss": 0.0482, "num_input_tokens_seen": 169246128, "step": 78460 }, { "epoch": 12.800163132137031, "grad_norm": 0.013815524987876415, "learning_rate": 0.00034549150281252633, "loss": 0.0136, "num_input_tokens_seen": 169256624, "step": 78465 }, { "epoch": 12.800978792822185, "grad_norm": 0.04091867431998253, "learning_rate": 0.00034542380834422633, "loss": 0.006, "num_input_tokens_seen": 169267152, "step": 78470 }, { "epoch": 12.801794453507341, "grad_norm": 0.09216175973415375, "learning_rate": 0.00034535611700860913, "loss": 0.0073, "num_input_tokens_seen": 169277264, "step": 78475 }, { "epoch": 12.802610114192497, "grad_norm": 0.013031466864049435, "learning_rate": 0.00034528842880704626, "loss": 0.0992, "num_input_tokens_seen": 169288176, "step": 78480 }, { "epoch": 12.80342577487765, "grad_norm": 0.062341488897800446, "learning_rate": 0.0003452207437409097, "loss": 0.0053, "num_input_tokens_seen": 169299920, "step": 78485 }, { "epoch": 12.804241435562806, "grad_norm": 0.0295098964124918, "learning_rate": 0.00034515306181157106, "loss": 0.0124, "num_input_tokens_seen": 169309744, "step": 78490 }, { "epoch": 12.80505709624796, "grad_norm": 0.06527496129274368, "learning_rate": 0.00034508538302040225, "loss": 0.0729, "num_input_tokens_seen": 169321552, "step": 78495 }, { "epoch": 12.805872756933116, "grad_norm": 0.028638366609811783, "learning_rate": 0.00034501770736877443, "loss": 0.0335, "num_input_tokens_seen": 169332496, "step": 78500 }, { "epoch": 12.80668841761827, "grad_norm": 0.8673416376113892, "learning_rate": 0.0003449500348580596, "loss": 0.1428, "num_input_tokens_seen": 169342768, "step": 78505 }, { "epoch": 12.807504078303426, "grad_norm": 0.03205728903412819, "learning_rate": 0.0003448823654896288, "loss": 0.0268, "num_input_tokens_seen": 169353264, "step": 78510 }, { "epoch": 12.808319738988581, "grad_norm": 0.01410661544650793, "learning_rate": 0.00034481469926485385, "loss": 0.2618, "num_input_tokens_seen": 169362544, "step": 78515 }, { "epoch": 12.809135399673735, "grad_norm": 0.04922281578183174, "learning_rate": 0.00034474703618510565, "loss": 0.0311, "num_input_tokens_seen": 169373104, "step": 78520 }, { "epoch": 12.809951060358891, "grad_norm": 0.16217413544654846, "learning_rate": 0.00034467937625175596, "loss": 0.0458, "num_input_tokens_seen": 169383536, "step": 78525 }, { "epoch": 12.810766721044045, "grad_norm": 0.9458820819854736, "learning_rate": 0.00034461171946617553, "loss": 0.0813, "num_input_tokens_seen": 169393648, "step": 78530 }, { "epoch": 12.8115823817292, "grad_norm": 0.03830963000655174, "learning_rate": 0.0003445440658297357, "loss": 0.0319, "num_input_tokens_seen": 169403760, "step": 78535 }, { "epoch": 12.812398042414356, "grad_norm": 0.7959111928939819, "learning_rate": 0.0003444764153438079, "loss": 0.1092, "num_input_tokens_seen": 169414032, "step": 78540 }, { "epoch": 12.81321370309951, "grad_norm": 0.01901678740978241, "learning_rate": 0.0003444087680097625, "loss": 0.0824, "num_input_tokens_seen": 169423856, "step": 78545 }, { "epoch": 12.814029363784666, "grad_norm": 0.06620033830404282, "learning_rate": 0.00034434112382897107, "loss": 0.0389, "num_input_tokens_seen": 169434224, "step": 78550 }, { "epoch": 12.81484502446982, "grad_norm": 0.21150067448616028, "learning_rate": 0.000344273482802804, "loss": 0.0309, "num_input_tokens_seen": 169445392, "step": 78555 }, { "epoch": 12.815660685154976, "grad_norm": 0.011069171130657196, "learning_rate": 0.00034420584493263264, "loss": 0.0797, "num_input_tokens_seen": 169455888, "step": 78560 }, { "epoch": 12.81647634584013, "grad_norm": 0.05038369446992874, "learning_rate": 0.0003441382102198272, "loss": 0.0142, "num_input_tokens_seen": 169467152, "step": 78565 }, { "epoch": 12.817292006525285, "grad_norm": 0.014211660251021385, "learning_rate": 0.0003440705786657588, "loss": 0.0065, "num_input_tokens_seen": 169478960, "step": 78570 }, { "epoch": 12.818107667210441, "grad_norm": 0.005085754208266735, "learning_rate": 0.00034400295027179776, "loss": 0.0425, "num_input_tokens_seen": 169489904, "step": 78575 }, { "epoch": 12.818923327895595, "grad_norm": 0.16913136839866638, "learning_rate": 0.00034393532503931514, "loss": 0.0066, "num_input_tokens_seen": 169500816, "step": 78580 }, { "epoch": 12.81973898858075, "grad_norm": 0.012777515687048435, "learning_rate": 0.0003438677029696808, "loss": 0.0038, "num_input_tokens_seen": 169512368, "step": 78585 }, { "epoch": 12.820554649265905, "grad_norm": 0.014333324506878853, "learning_rate": 0.0003438000840642657, "loss": 0.0135, "num_input_tokens_seen": 169522288, "step": 78590 }, { "epoch": 12.82137030995106, "grad_norm": 0.004052395932376385, "learning_rate": 0.00034373246832444007, "loss": 0.017, "num_input_tokens_seen": 169532592, "step": 78595 }, { "epoch": 12.822185970636216, "grad_norm": 0.00997909251600504, "learning_rate": 0.00034366485575157413, "loss": 0.0067, "num_input_tokens_seen": 169544176, "step": 78600 }, { "epoch": 12.82300163132137, "grad_norm": 0.008722551167011261, "learning_rate": 0.00034359724634703827, "loss": 0.0307, "num_input_tokens_seen": 169555568, "step": 78605 }, { "epoch": 12.823817292006526, "grad_norm": 0.14296135306358337, "learning_rate": 0.0003435296401122027, "loss": 0.0557, "num_input_tokens_seen": 169565296, "step": 78610 }, { "epoch": 12.82463295269168, "grad_norm": 0.009762870147824287, "learning_rate": 0.0003434620370484372, "loss": 0.0465, "num_input_tokens_seen": 169575408, "step": 78615 }, { "epoch": 12.825448613376835, "grad_norm": 0.5977383852005005, "learning_rate": 0.0003433944371571124, "loss": 0.0297, "num_input_tokens_seen": 169587664, "step": 78620 }, { "epoch": 12.826264274061991, "grad_norm": 0.022896917536854744, "learning_rate": 0.00034332684043959777, "loss": 0.0044, "num_input_tokens_seen": 169597840, "step": 78625 }, { "epoch": 12.827079934747145, "grad_norm": 0.22046716511249542, "learning_rate": 0.00034325924689726376, "loss": 0.0075, "num_input_tokens_seen": 169609264, "step": 78630 }, { "epoch": 12.8278955954323, "grad_norm": 0.029716283082962036, "learning_rate": 0.00034319165653147964, "loss": 0.0096, "num_input_tokens_seen": 169619184, "step": 78635 }, { "epoch": 12.828711256117455, "grad_norm": 0.5353707671165466, "learning_rate": 0.00034312406934361553, "loss": 0.1013, "num_input_tokens_seen": 169629744, "step": 78640 }, { "epoch": 12.82952691680261, "grad_norm": 0.01486015971750021, "learning_rate": 0.0003430564853350414, "loss": 0.0756, "num_input_tokens_seen": 169640752, "step": 78645 }, { "epoch": 12.830342577487766, "grad_norm": 0.5186010003089905, "learning_rate": 0.0003429889045071265, "loss": 0.0255, "num_input_tokens_seen": 169650224, "step": 78650 }, { "epoch": 12.83115823817292, "grad_norm": 0.022123832255601883, "learning_rate": 0.0003429213268612408, "loss": 0.0178, "num_input_tokens_seen": 169660976, "step": 78655 }, { "epoch": 12.831973898858076, "grad_norm": 0.1695677936077118, "learning_rate": 0.0003428537523987535, "loss": 0.0083, "num_input_tokens_seen": 169671984, "step": 78660 }, { "epoch": 12.83278955954323, "grad_norm": 0.048943858593702316, "learning_rate": 0.0003427861811210345, "loss": 0.2455, "num_input_tokens_seen": 169681520, "step": 78665 }, { "epoch": 12.833605220228385, "grad_norm": 0.007220787461847067, "learning_rate": 0.0003427186130294527, "loss": 0.0162, "num_input_tokens_seen": 169692976, "step": 78670 }, { "epoch": 12.83442088091354, "grad_norm": 0.02041354402899742, "learning_rate": 0.00034265104812537805, "loss": 0.0082, "num_input_tokens_seen": 169704592, "step": 78675 }, { "epoch": 12.835236541598695, "grad_norm": 0.028711974620819092, "learning_rate": 0.0003425834864101792, "loss": 0.0123, "num_input_tokens_seen": 169714640, "step": 78680 }, { "epoch": 12.83605220228385, "grad_norm": 0.011021840386092663, "learning_rate": 0.000342515927885226, "loss": 0.1286, "num_input_tokens_seen": 169726000, "step": 78685 }, { "epoch": 12.836867862969005, "grad_norm": 0.5748363733291626, "learning_rate": 0.000342448372551887, "loss": 0.0781, "num_input_tokens_seen": 169736144, "step": 78690 }, { "epoch": 12.83768352365416, "grad_norm": 0.01698858104646206, "learning_rate": 0.0003423808204115318, "loss": 0.0858, "num_input_tokens_seen": 169747504, "step": 78695 }, { "epoch": 12.838499184339314, "grad_norm": 0.6056413650512695, "learning_rate": 0.00034231327146552916, "loss": 0.0808, "num_input_tokens_seen": 169757360, "step": 78700 }, { "epoch": 12.83931484502447, "grad_norm": 1.1371183395385742, "learning_rate": 0.00034224572571524823, "loss": 0.0873, "num_input_tokens_seen": 169768272, "step": 78705 }, { "epoch": 12.840130505709626, "grad_norm": 0.040265634655952454, "learning_rate": 0.00034217818316205757, "loss": 0.0079, "num_input_tokens_seen": 169779696, "step": 78710 }, { "epoch": 12.84094616639478, "grad_norm": 0.036794502288103104, "learning_rate": 0.0003421106438073265, "loss": 0.0167, "num_input_tokens_seen": 169790288, "step": 78715 }, { "epoch": 12.841761827079935, "grad_norm": 0.053528182208538055, "learning_rate": 0.0003420431076524233, "loss": 0.0676, "num_input_tokens_seen": 169801040, "step": 78720 }, { "epoch": 12.84257748776509, "grad_norm": 0.023284804075956345, "learning_rate": 0.0003419755746987171, "loss": 0.0096, "num_input_tokens_seen": 169811216, "step": 78725 }, { "epoch": 12.843393148450245, "grad_norm": 0.001369150006212294, "learning_rate": 0.0003419080449475761, "loss": 0.0075, "num_input_tokens_seen": 169821680, "step": 78730 }, { "epoch": 12.844208809135399, "grad_norm": 0.14650027453899384, "learning_rate": 0.0003418405184003693, "loss": 0.1093, "num_input_tokens_seen": 169832656, "step": 78735 }, { "epoch": 12.845024469820554, "grad_norm": 0.5554248690605164, "learning_rate": 0.000341772995058465, "loss": 0.1009, "num_input_tokens_seen": 169842544, "step": 78740 }, { "epoch": 12.84584013050571, "grad_norm": 0.034579500555992126, "learning_rate": 0.0003417054749232316, "loss": 0.0031, "num_input_tokens_seen": 169854384, "step": 78745 }, { "epoch": 12.846655791190864, "grad_norm": 0.07825126498937607, "learning_rate": 0.0003416379579960377, "loss": 0.006, "num_input_tokens_seen": 169864944, "step": 78750 }, { "epoch": 12.84747145187602, "grad_norm": 0.04348042979836464, "learning_rate": 0.00034157044427825137, "loss": 0.0106, "num_input_tokens_seen": 169875568, "step": 78755 }, { "epoch": 12.848287112561174, "grad_norm": 0.010604463517665863, "learning_rate": 0.000341502933771241, "loss": 0.0961, "num_input_tokens_seen": 169887344, "step": 78760 }, { "epoch": 12.84910277324633, "grad_norm": 0.004719897639006376, "learning_rate": 0.00034143542647637474, "loss": 0.1644, "num_input_tokens_seen": 169898000, "step": 78765 }, { "epoch": 12.849918433931485, "grad_norm": 1.0269463062286377, "learning_rate": 0.00034136792239502074, "loss": 0.0427, "num_input_tokens_seen": 169908656, "step": 78770 }, { "epoch": 12.850734094616639, "grad_norm": 0.11115413904190063, "learning_rate": 0.000341300421528547, "loss": 0.118, "num_input_tokens_seen": 169919248, "step": 78775 }, { "epoch": 12.851549755301795, "grad_norm": 0.1936875879764557, "learning_rate": 0.0003412329238783216, "loss": 0.1018, "num_input_tokens_seen": 169930768, "step": 78780 }, { "epoch": 12.852365415986949, "grad_norm": 0.04439888149499893, "learning_rate": 0.00034116542944571227, "loss": 0.0398, "num_input_tokens_seen": 169941680, "step": 78785 }, { "epoch": 12.853181076672104, "grad_norm": 0.015593971125781536, "learning_rate": 0.00034109793823208724, "loss": 0.0145, "num_input_tokens_seen": 169953008, "step": 78790 }, { "epoch": 12.85399673735726, "grad_norm": 0.008271503262221813, "learning_rate": 0.0003410304502388139, "loss": 0.0107, "num_input_tokens_seen": 169963056, "step": 78795 }, { "epoch": 12.854812398042414, "grad_norm": 0.032415036112070084, "learning_rate": 0.0003409629654672602, "loss": 0.0358, "num_input_tokens_seen": 169974896, "step": 78800 }, { "epoch": 12.85562805872757, "grad_norm": 0.5965150594711304, "learning_rate": 0.0003408954839187938, "loss": 0.0193, "num_input_tokens_seen": 169985808, "step": 78805 }, { "epoch": 12.856443719412724, "grad_norm": 1.1522592306137085, "learning_rate": 0.0003408280055947823, "loss": 0.0308, "num_input_tokens_seen": 169995824, "step": 78810 }, { "epoch": 12.85725938009788, "grad_norm": 0.013181619346141815, "learning_rate": 0.00034076053049659295, "loss": 0.0033, "num_input_tokens_seen": 170006480, "step": 78815 }, { "epoch": 12.858075040783035, "grad_norm": 0.00971638597548008, "learning_rate": 0.00034069305862559373, "loss": 0.0121, "num_input_tokens_seen": 170018128, "step": 78820 }, { "epoch": 12.858890701468189, "grad_norm": 0.014647018164396286, "learning_rate": 0.00034062558998315163, "loss": 0.0225, "num_input_tokens_seen": 170028432, "step": 78825 }, { "epoch": 12.859706362153345, "grad_norm": 0.004390793386846781, "learning_rate": 0.0003405581245706342, "loss": 0.0158, "num_input_tokens_seen": 170039824, "step": 78830 }, { "epoch": 12.860522022838499, "grad_norm": 0.006895644124597311, "learning_rate": 0.0003404906623894085, "loss": 0.005, "num_input_tokens_seen": 170051408, "step": 78835 }, { "epoch": 12.861337683523654, "grad_norm": 0.004343315493315458, "learning_rate": 0.0003404232034408421, "loss": 0.0062, "num_input_tokens_seen": 170060880, "step": 78840 }, { "epoch": 12.86215334420881, "grad_norm": 0.19200146198272705, "learning_rate": 0.00034035574772630175, "loss": 0.0135, "num_input_tokens_seen": 170072752, "step": 78845 }, { "epoch": 12.862969004893964, "grad_norm": 0.02898036688566208, "learning_rate": 0.00034028829524715464, "loss": 0.0306, "num_input_tokens_seen": 170081424, "step": 78850 }, { "epoch": 12.86378466557912, "grad_norm": 0.23168791830539703, "learning_rate": 0.000340220846004768, "loss": 0.0201, "num_input_tokens_seen": 170091920, "step": 78855 }, { "epoch": 12.864600326264274, "grad_norm": 0.4599772095680237, "learning_rate": 0.00034015340000050846, "loss": 0.049, "num_input_tokens_seen": 170101680, "step": 78860 }, { "epoch": 12.86541598694943, "grad_norm": 0.009353806264698505, "learning_rate": 0.00034008595723574326, "loss": 0.053, "num_input_tokens_seen": 170112880, "step": 78865 }, { "epoch": 12.866231647634583, "grad_norm": 0.0034097321331501007, "learning_rate": 0.00034001851771183877, "loss": 0.0096, "num_input_tokens_seen": 170122928, "step": 78870 }, { "epoch": 12.867047308319739, "grad_norm": 0.005829621106386185, "learning_rate": 0.00033995108143016216, "loss": 0.0205, "num_input_tokens_seen": 170134864, "step": 78875 }, { "epoch": 12.867862969004895, "grad_norm": 0.04024157300591469, "learning_rate": 0.0003398836483920798, "loss": 0.0087, "num_input_tokens_seen": 170145840, "step": 78880 }, { "epoch": 12.868678629690049, "grad_norm": 0.006794747896492481, "learning_rate": 0.0003398162185989586, "loss": 0.0201, "num_input_tokens_seen": 170156976, "step": 78885 }, { "epoch": 12.869494290375204, "grad_norm": 0.014841032214462757, "learning_rate": 0.0003397487920521647, "loss": 0.2228, "num_input_tokens_seen": 170169232, "step": 78890 }, { "epoch": 12.870309951060358, "grad_norm": 0.007755957078188658, "learning_rate": 0.00033968136875306496, "loss": 0.0279, "num_input_tokens_seen": 170179984, "step": 78895 }, { "epoch": 12.871125611745514, "grad_norm": 0.049557603895664215, "learning_rate": 0.0003396139487030256, "loss": 0.0384, "num_input_tokens_seen": 170190640, "step": 78900 }, { "epoch": 12.87194127243067, "grad_norm": 0.41948795318603516, "learning_rate": 0.00033954653190341306, "loss": 0.1503, "num_input_tokens_seen": 170201296, "step": 78905 }, { "epoch": 12.872756933115824, "grad_norm": 0.31551995873451233, "learning_rate": 0.0003394791183555936, "loss": 0.1664, "num_input_tokens_seen": 170211760, "step": 78910 }, { "epoch": 12.87357259380098, "grad_norm": 0.10964018106460571, "learning_rate": 0.0003394117080609335, "loss": 0.0106, "num_input_tokens_seen": 170223024, "step": 78915 }, { "epoch": 12.874388254486133, "grad_norm": 0.4047240614891052, "learning_rate": 0.0003393443010207988, "loss": 0.0552, "num_input_tokens_seen": 170234608, "step": 78920 }, { "epoch": 12.875203915171289, "grad_norm": 0.00837387703359127, "learning_rate": 0.0003392768972365556, "loss": 0.0353, "num_input_tokens_seen": 170245360, "step": 78925 }, { "epoch": 12.876019575856443, "grad_norm": 0.0154897291213274, "learning_rate": 0.00033920949670956994, "loss": 0.0154, "num_input_tokens_seen": 170255888, "step": 78930 }, { "epoch": 12.876835236541599, "grad_norm": 0.003944879397749901, "learning_rate": 0.000339142099441208, "loss": 0.172, "num_input_tokens_seen": 170265712, "step": 78935 }, { "epoch": 12.877650897226754, "grad_norm": 0.008585168048739433, "learning_rate": 0.0003390747054328353, "loss": 0.0885, "num_input_tokens_seen": 170276208, "step": 78940 }, { "epoch": 12.878466557911908, "grad_norm": 0.12104292958974838, "learning_rate": 0.00033900731468581804, "loss": 0.1016, "num_input_tokens_seen": 170286704, "step": 78945 }, { "epoch": 12.879282218597064, "grad_norm": 0.18715766072273254, "learning_rate": 0.0003389399272015215, "loss": 0.1196, "num_input_tokens_seen": 170297808, "step": 78950 }, { "epoch": 12.880097879282218, "grad_norm": 0.13791930675506592, "learning_rate": 0.0003388725429813117, "loss": 0.0202, "num_input_tokens_seen": 170309520, "step": 78955 }, { "epoch": 12.880913539967374, "grad_norm": 0.01562865637242794, "learning_rate": 0.0003388051620265544, "loss": 0.0097, "num_input_tokens_seen": 170319760, "step": 78960 }, { "epoch": 12.88172920065253, "grad_norm": 0.4073830246925354, "learning_rate": 0.0003387377843386148, "loss": 0.1465, "num_input_tokens_seen": 170330608, "step": 78965 }, { "epoch": 12.882544861337683, "grad_norm": 0.10770117491483688, "learning_rate": 0.00033867040991885885, "loss": 0.0988, "num_input_tokens_seen": 170342384, "step": 78970 }, { "epoch": 12.883360522022839, "grad_norm": 0.7565659880638123, "learning_rate": 0.0003386030387686514, "loss": 0.0234, "num_input_tokens_seen": 170354096, "step": 78975 }, { "epoch": 12.884176182707993, "grad_norm": 0.00691474974155426, "learning_rate": 0.0003385356708893584, "loss": 0.0097, "num_input_tokens_seen": 170365232, "step": 78980 }, { "epoch": 12.884991843393149, "grad_norm": 0.00527843926101923, "learning_rate": 0.0003384683062823446, "loss": 0.1797, "num_input_tokens_seen": 170376400, "step": 78985 }, { "epoch": 12.885807504078304, "grad_norm": 0.3936546742916107, "learning_rate": 0.00033840094494897566, "loss": 0.0639, "num_input_tokens_seen": 170386608, "step": 78990 }, { "epoch": 12.886623164763458, "grad_norm": 0.12531837821006775, "learning_rate": 0.0003383335868906164, "loss": 0.0343, "num_input_tokens_seen": 170396912, "step": 78995 }, { "epoch": 12.887438825448614, "grad_norm": 0.01980554684996605, "learning_rate": 0.0003382662321086324, "loss": 0.0052, "num_input_tokens_seen": 170408144, "step": 79000 }, { "epoch": 12.888254486133768, "grad_norm": 0.1694885939359665, "learning_rate": 0.0003381988806043881, "loss": 0.0386, "num_input_tokens_seen": 170419216, "step": 79005 }, { "epoch": 12.889070146818923, "grad_norm": 0.10888317227363586, "learning_rate": 0.0003381315323792489, "loss": 0.0123, "num_input_tokens_seen": 170429520, "step": 79010 }, { "epoch": 12.88988580750408, "grad_norm": 0.014914930798113346, "learning_rate": 0.00033806418743457937, "loss": 0.0085, "num_input_tokens_seen": 170440240, "step": 79015 }, { "epoch": 12.890701468189233, "grad_norm": 0.0625443160533905, "learning_rate": 0.0003379968457717447, "loss": 0.0148, "num_input_tokens_seen": 170451120, "step": 79020 }, { "epoch": 12.891517128874389, "grad_norm": 0.01875525340437889, "learning_rate": 0.00033792950739210934, "loss": 0.0065, "num_input_tokens_seen": 170461936, "step": 79025 }, { "epoch": 12.892332789559543, "grad_norm": 0.2683972120285034, "learning_rate": 0.0003378621722970382, "loss": 0.0153, "num_input_tokens_seen": 170473040, "step": 79030 }, { "epoch": 12.893148450244698, "grad_norm": 0.03685545548796654, "learning_rate": 0.00033779484048789574, "loss": 0.0324, "num_input_tokens_seen": 170484048, "step": 79035 }, { "epoch": 12.893964110929852, "grad_norm": 0.008368806913495064, "learning_rate": 0.0003377275119660467, "loss": 0.0316, "num_input_tokens_seen": 170495536, "step": 79040 }, { "epoch": 12.894779771615008, "grad_norm": 0.4539031386375427, "learning_rate": 0.00033766018673285535, "loss": 0.0189, "num_input_tokens_seen": 170506928, "step": 79045 }, { "epoch": 12.895595432300164, "grad_norm": 0.06189935654401779, "learning_rate": 0.0003375928647896863, "loss": 0.0342, "num_input_tokens_seen": 170517488, "step": 79050 }, { "epoch": 12.896411092985318, "grad_norm": 0.007135889492928982, "learning_rate": 0.000337525546137904, "loss": 0.0966, "num_input_tokens_seen": 170527824, "step": 79055 }, { "epoch": 12.897226753670473, "grad_norm": 0.3545204699039459, "learning_rate": 0.0003374582307788725, "loss": 0.0082, "num_input_tokens_seen": 170537328, "step": 79060 }, { "epoch": 12.898042414355627, "grad_norm": 0.014589983969926834, "learning_rate": 0.0003373909187139562, "loss": 0.1158, "num_input_tokens_seen": 170547056, "step": 79065 }, { "epoch": 12.898858075040783, "grad_norm": 0.7361128926277161, "learning_rate": 0.0003373236099445191, "loss": 0.0375, "num_input_tokens_seen": 170558800, "step": 79070 }, { "epoch": 12.899673735725939, "grad_norm": 0.16518612205982208, "learning_rate": 0.00033725630447192556, "loss": 0.0365, "num_input_tokens_seen": 170569968, "step": 79075 }, { "epoch": 12.900489396411093, "grad_norm": 0.004852434154599905, "learning_rate": 0.0003371890022975394, "loss": 0.0067, "num_input_tokens_seen": 170580336, "step": 79080 }, { "epoch": 12.901305057096248, "grad_norm": 0.00913508702069521, "learning_rate": 0.0003371217034227247, "loss": 0.0162, "num_input_tokens_seen": 170592976, "step": 79085 }, { "epoch": 12.902120717781402, "grad_norm": 0.005937790032476187, "learning_rate": 0.0003370544078488453, "loss": 0.1059, "num_input_tokens_seen": 170604208, "step": 79090 }, { "epoch": 12.902936378466558, "grad_norm": 0.5919108390808105, "learning_rate": 0.000336987115577265, "loss": 0.0672, "num_input_tokens_seen": 170616112, "step": 79095 }, { "epoch": 12.903752039151712, "grad_norm": 0.5019410252571106, "learning_rate": 0.0003369198266093475, "loss": 0.1659, "num_input_tokens_seen": 170626256, "step": 79100 }, { "epoch": 12.904567699836868, "grad_norm": 0.006555736996233463, "learning_rate": 0.00033685254094645685, "loss": 0.0041, "num_input_tokens_seen": 170636176, "step": 79105 }, { "epoch": 12.905383360522023, "grad_norm": 0.03407563641667366, "learning_rate": 0.0003367852585899562, "loss": 0.0289, "num_input_tokens_seen": 170646992, "step": 79110 }, { "epoch": 12.906199021207177, "grad_norm": 0.08035101741552353, "learning_rate": 0.00033671797954120953, "loss": 0.034, "num_input_tokens_seen": 170658448, "step": 79115 }, { "epoch": 12.907014681892333, "grad_norm": 0.015437953174114227, "learning_rate": 0.0003366507038015799, "loss": 0.1114, "num_input_tokens_seen": 170669520, "step": 79120 }, { "epoch": 12.907830342577487, "grad_norm": 0.13704386353492737, "learning_rate": 0.0003365834313724312, "loss": 0.0119, "num_input_tokens_seen": 170679792, "step": 79125 }, { "epoch": 12.908646003262643, "grad_norm": 0.01451383251696825, "learning_rate": 0.00033651616225512636, "loss": 0.007, "num_input_tokens_seen": 170692144, "step": 79130 }, { "epoch": 12.909461663947798, "grad_norm": 0.010049179196357727, "learning_rate": 0.0003364488964510292, "loss": 0.0339, "num_input_tokens_seen": 170703984, "step": 79135 }, { "epoch": 12.910277324632952, "grad_norm": 0.009471967816352844, "learning_rate": 0.00033638163396150234, "loss": 0.0165, "num_input_tokens_seen": 170715536, "step": 79140 }, { "epoch": 12.911092985318108, "grad_norm": 0.015848377719521523, "learning_rate": 0.0003363143747879094, "loss": 0.0109, "num_input_tokens_seen": 170726640, "step": 79145 }, { "epoch": 12.911908646003262, "grad_norm": 0.008597053587436676, "learning_rate": 0.00033624711893161317, "loss": 0.0345, "num_input_tokens_seen": 170737040, "step": 79150 }, { "epoch": 12.912724306688418, "grad_norm": 0.005537273827940226, "learning_rate": 0.000336179866393977, "loss": 0.0678, "num_input_tokens_seen": 170747312, "step": 79155 }, { "epoch": 12.913539967373573, "grad_norm": 0.5180066823959351, "learning_rate": 0.0003361126171763634, "loss": 0.0995, "num_input_tokens_seen": 170758064, "step": 79160 }, { "epoch": 12.914355628058727, "grad_norm": 0.04791230708360672, "learning_rate": 0.0003360453712801358, "loss": 0.0226, "num_input_tokens_seen": 170768912, "step": 79165 }, { "epoch": 12.915171288743883, "grad_norm": 0.27734121680259705, "learning_rate": 0.00033597812870665657, "loss": 0.0325, "num_input_tokens_seen": 170779536, "step": 79170 }, { "epoch": 12.915986949429037, "grad_norm": 0.009608741849660873, "learning_rate": 0.00033591088945728856, "loss": 0.0069, "num_input_tokens_seen": 170790704, "step": 79175 }, { "epoch": 12.916802610114193, "grad_norm": 0.018391478806734085, "learning_rate": 0.0003358436535333947, "loss": 0.012, "num_input_tokens_seen": 170801232, "step": 79180 }, { "epoch": 12.917618270799348, "grad_norm": 1.1276603937149048, "learning_rate": 0.0003357764209363373, "loss": 0.0717, "num_input_tokens_seen": 170812080, "step": 79185 }, { "epoch": 12.918433931484502, "grad_norm": 0.004836163017898798, "learning_rate": 0.00033570919166747926, "loss": 0.0411, "num_input_tokens_seen": 170823152, "step": 79190 }, { "epoch": 12.919249592169658, "grad_norm": 0.013786256313323975, "learning_rate": 0.0003356419657281827, "loss": 0.0146, "num_input_tokens_seen": 170833872, "step": 79195 }, { "epoch": 12.920065252854812, "grad_norm": 0.9194090366363525, "learning_rate": 0.0003355747431198104, "loss": 0.0901, "num_input_tokens_seen": 170845200, "step": 79200 }, { "epoch": 12.920880913539968, "grad_norm": 1.007218837738037, "learning_rate": 0.0003355075238437243, "loss": 0.1285, "num_input_tokens_seen": 170854928, "step": 79205 }, { "epoch": 12.921696574225122, "grad_norm": 0.37598174810409546, "learning_rate": 0.0003354403079012871, "loss": 0.021, "num_input_tokens_seen": 170865584, "step": 79210 }, { "epoch": 12.922512234910277, "grad_norm": 0.003041264833882451, "learning_rate": 0.0003353730952938606, "loss": 0.0307, "num_input_tokens_seen": 170876560, "step": 79215 }, { "epoch": 12.923327895595433, "grad_norm": 0.04956516623497009, "learning_rate": 0.0003353058860228073, "loss": 0.1322, "num_input_tokens_seen": 170888080, "step": 79220 }, { "epoch": 12.924143556280587, "grad_norm": 0.008220975287258625, "learning_rate": 0.0003352386800894891, "loss": 0.01, "num_input_tokens_seen": 170899696, "step": 79225 }, { "epoch": 12.924959216965743, "grad_norm": 0.25454649329185486, "learning_rate": 0.0003351714774952681, "loss": 0.0137, "num_input_tokens_seen": 170909968, "step": 79230 }, { "epoch": 12.925774877650896, "grad_norm": 0.017737364396452904, "learning_rate": 0.00033510427824150625, "loss": 0.0317, "num_input_tokens_seen": 170920304, "step": 79235 }, { "epoch": 12.926590538336052, "grad_norm": 0.0035439101047813892, "learning_rate": 0.0003350370823295653, "loss": 0.0042, "num_input_tokens_seen": 170930960, "step": 79240 }, { "epoch": 12.927406199021208, "grad_norm": 0.5477140545845032, "learning_rate": 0.0003349698897608071, "loss": 0.0874, "num_input_tokens_seen": 170941648, "step": 79245 }, { "epoch": 12.928221859706362, "grad_norm": 0.025673111900687218, "learning_rate": 0.00033490270053659367, "loss": 0.0246, "num_input_tokens_seen": 170953168, "step": 79250 }, { "epoch": 12.929037520391518, "grad_norm": 0.057056862860918045, "learning_rate": 0.0003348355146582862, "loss": 0.0035, "num_input_tokens_seen": 170964336, "step": 79255 }, { "epoch": 12.929853181076671, "grad_norm": 0.11867150664329529, "learning_rate": 0.00033476833212724676, "loss": 0.0355, "num_input_tokens_seen": 170974960, "step": 79260 }, { "epoch": 12.930668841761827, "grad_norm": 0.0031172670423984528, "learning_rate": 0.0003347011529448365, "loss": 0.0126, "num_input_tokens_seen": 170986064, "step": 79265 }, { "epoch": 12.931484502446983, "grad_norm": 0.012266564182937145, "learning_rate": 0.00033463397711241727, "loss": 0.0031, "num_input_tokens_seen": 170996464, "step": 79270 }, { "epoch": 12.932300163132137, "grad_norm": 0.05255286023020744, "learning_rate": 0.00033456680463135006, "loss": 0.0932, "num_input_tokens_seen": 171007216, "step": 79275 }, { "epoch": 12.933115823817293, "grad_norm": 0.012688565999269485, "learning_rate": 0.00033449963550299646, "loss": 0.2102, "num_input_tokens_seen": 171016720, "step": 79280 }, { "epoch": 12.933931484502446, "grad_norm": 0.08412957191467285, "learning_rate": 0.00033443246972871785, "loss": 0.0176, "num_input_tokens_seen": 171028144, "step": 79285 }, { "epoch": 12.934747145187602, "grad_norm": 0.6191343069076538, "learning_rate": 0.000334365307309875, "loss": 0.0617, "num_input_tokens_seen": 171039152, "step": 79290 }, { "epoch": 12.935562805872756, "grad_norm": 0.03664500638842583, "learning_rate": 0.00033429814824782967, "loss": 0.0039, "num_input_tokens_seen": 171048784, "step": 79295 }, { "epoch": 12.936378466557912, "grad_norm": 0.1302722841501236, "learning_rate": 0.0003342309925439423, "loss": 0.0676, "num_input_tokens_seen": 171059440, "step": 79300 }, { "epoch": 12.937194127243067, "grad_norm": 0.07879389077425003, "learning_rate": 0.0003341638401995744, "loss": 0.0094, "num_input_tokens_seen": 171071152, "step": 79305 }, { "epoch": 12.938009787928221, "grad_norm": 0.22878162562847137, "learning_rate": 0.0003340966912160864, "loss": 0.0311, "num_input_tokens_seen": 171082192, "step": 79310 }, { "epoch": 12.938825448613377, "grad_norm": 0.060266438871622086, "learning_rate": 0.00033402954559483966, "loss": 0.0752, "num_input_tokens_seen": 171093264, "step": 79315 }, { "epoch": 12.939641109298531, "grad_norm": 0.02961350604891777, "learning_rate": 0.0003339624033371945, "loss": 0.0166, "num_input_tokens_seen": 171104080, "step": 79320 }, { "epoch": 12.940456769983687, "grad_norm": 0.04618978872895241, "learning_rate": 0.00033389526444451215, "loss": 0.0043, "num_input_tokens_seen": 171114800, "step": 79325 }, { "epoch": 12.941272430668842, "grad_norm": 0.07385611534118652, "learning_rate": 0.00033382812891815267, "loss": 0.1389, "num_input_tokens_seen": 171126160, "step": 79330 }, { "epoch": 12.942088091353996, "grad_norm": 0.09406785666942596, "learning_rate": 0.00033376099675947726, "loss": 0.0365, "num_input_tokens_seen": 171136048, "step": 79335 }, { "epoch": 12.942903752039152, "grad_norm": 0.007662178482860327, "learning_rate": 0.0003336938679698459, "loss": 0.0324, "num_input_tokens_seen": 171147120, "step": 79340 }, { "epoch": 12.943719412724306, "grad_norm": 0.44092661142349243, "learning_rate": 0.0003336267425506194, "loss": 0.0303, "num_input_tokens_seen": 171156944, "step": 79345 }, { "epoch": 12.944535073409462, "grad_norm": 0.00914795696735382, "learning_rate": 0.0003335596205031579, "loss": 0.1013, "num_input_tokens_seen": 171167728, "step": 79350 }, { "epoch": 12.945350734094617, "grad_norm": 0.004262133035808802, "learning_rate": 0.00033349250182882205, "loss": 0.0026, "num_input_tokens_seen": 171179216, "step": 79355 }, { "epoch": 12.946166394779771, "grad_norm": 0.018488585948944092, "learning_rate": 0.0003334253865289717, "loss": 0.1226, "num_input_tokens_seen": 171190640, "step": 79360 }, { "epoch": 12.946982055464927, "grad_norm": 0.00538907153531909, "learning_rate": 0.00033335827460496725, "loss": 0.0043, "num_input_tokens_seen": 171202064, "step": 79365 }, { "epoch": 12.947797716150081, "grad_norm": 0.01199566200375557, "learning_rate": 0.0003332911660581688, "loss": 0.1162, "num_input_tokens_seen": 171212336, "step": 79370 }, { "epoch": 12.948613376835237, "grad_norm": 0.10808078199625015, "learning_rate": 0.0003332240608899363, "loss": 0.0197, "num_input_tokens_seen": 171223248, "step": 79375 }, { "epoch": 12.949429037520392, "grad_norm": 0.0030730434227734804, "learning_rate": 0.0003331569591016298, "loss": 0.0241, "num_input_tokens_seen": 171234992, "step": 79380 }, { "epoch": 12.950244698205546, "grad_norm": 0.01343297678977251, "learning_rate": 0.0003330898606946091, "loss": 0.0142, "num_input_tokens_seen": 171247312, "step": 79385 }, { "epoch": 12.951060358890702, "grad_norm": 0.46936339139938354, "learning_rate": 0.0003330227656702342, "loss": 0.196, "num_input_tokens_seen": 171257840, "step": 79390 }, { "epoch": 12.951876019575856, "grad_norm": 0.004872238729149103, "learning_rate": 0.00033295567402986476, "loss": 0.0044, "num_input_tokens_seen": 171267856, "step": 79395 }, { "epoch": 12.952691680261012, "grad_norm": 0.048237286508083344, "learning_rate": 0.0003328885857748605, "loss": 0.1432, "num_input_tokens_seen": 171278064, "step": 79400 }, { "epoch": 12.953507340946166, "grad_norm": 0.3840959072113037, "learning_rate": 0.00033282150090658115, "loss": 0.0238, "num_input_tokens_seen": 171289232, "step": 79405 }, { "epoch": 12.954323001631321, "grad_norm": 0.8429303765296936, "learning_rate": 0.0003327544194263861, "loss": 0.0837, "num_input_tokens_seen": 171300880, "step": 79410 }, { "epoch": 12.955138662316477, "grad_norm": 0.3362564146518707, "learning_rate": 0.0003326873413356347, "loss": 0.0209, "num_input_tokens_seen": 171313168, "step": 79415 }, { "epoch": 12.955954323001631, "grad_norm": 0.01283212099224329, "learning_rate": 0.0003326202666356869, "loss": 0.0049, "num_input_tokens_seen": 171323440, "step": 79420 }, { "epoch": 12.956769983686787, "grad_norm": 0.0036190415266901255, "learning_rate": 0.0003325531953279015, "loss": 0.0088, "num_input_tokens_seen": 171334640, "step": 79425 }, { "epoch": 12.95758564437194, "grad_norm": 0.9581454992294312, "learning_rate": 0.0003324861274136382, "loss": 0.0511, "num_input_tokens_seen": 171344944, "step": 79430 }, { "epoch": 12.958401305057096, "grad_norm": 0.07185550034046173, "learning_rate": 0.0003324190628942558, "loss": 0.1366, "num_input_tokens_seen": 171355792, "step": 79435 }, { "epoch": 12.959216965742252, "grad_norm": 0.44730597734451294, "learning_rate": 0.000332352001771114, "loss": 0.0542, "num_input_tokens_seen": 171365680, "step": 79440 }, { "epoch": 12.960032626427406, "grad_norm": 0.10291837900876999, "learning_rate": 0.0003322849440455713, "loss": 0.0863, "num_input_tokens_seen": 171377200, "step": 79445 }, { "epoch": 12.960848287112562, "grad_norm": 0.5152474045753479, "learning_rate": 0.0003322178897189871, "loss": 0.0207, "num_input_tokens_seen": 171388752, "step": 79450 }, { "epoch": 12.961663947797716, "grad_norm": 0.04963094741106033, "learning_rate": 0.00033215083879272015, "loss": 0.0083, "num_input_tokens_seen": 171399408, "step": 79455 }, { "epoch": 12.962479608482871, "grad_norm": 0.0158648993819952, "learning_rate": 0.00033208379126812947, "loss": 0.0094, "num_input_tokens_seen": 171409104, "step": 79460 }, { "epoch": 12.963295269168025, "grad_norm": 0.01095091924071312, "learning_rate": 0.0003320167471465736, "loss": 0.1254, "num_input_tokens_seen": 171420528, "step": 79465 }, { "epoch": 12.964110929853181, "grad_norm": 0.007327871862798929, "learning_rate": 0.0003319497064294117, "loss": 0.1179, "num_input_tokens_seen": 171432720, "step": 79470 }, { "epoch": 12.964926590538337, "grad_norm": 0.01704256609082222, "learning_rate": 0.0003318826691180019, "loss": 0.0094, "num_input_tokens_seen": 171444816, "step": 79475 }, { "epoch": 12.96574225122349, "grad_norm": 0.6040958762168884, "learning_rate": 0.00033181563521370337, "loss": 0.107, "num_input_tokens_seen": 171455728, "step": 79480 }, { "epoch": 12.966557911908646, "grad_norm": 0.007908456027507782, "learning_rate": 0.0003317486047178742, "loss": 0.0568, "num_input_tokens_seen": 171467408, "step": 79485 }, { "epoch": 12.9673735725938, "grad_norm": 0.07975002378225327, "learning_rate": 0.00033168157763187285, "loss": 0.0171, "num_input_tokens_seen": 171478448, "step": 79490 }, { "epoch": 12.968189233278956, "grad_norm": 0.27502283453941345, "learning_rate": 0.0003316145539570581, "loss": 0.0761, "num_input_tokens_seen": 171487856, "step": 79495 }, { "epoch": 12.969004893964112, "grad_norm": 0.02959425374865532, "learning_rate": 0.00033154753369478787, "loss": 0.2078, "num_input_tokens_seen": 171499056, "step": 79500 }, { "epoch": 12.969820554649266, "grad_norm": 0.009458215907216072, "learning_rate": 0.00033148051684642074, "loss": 0.0089, "num_input_tokens_seen": 171510320, "step": 79505 }, { "epoch": 12.970636215334421, "grad_norm": 0.015142896212637424, "learning_rate": 0.00033141350341331447, "loss": 0.0384, "num_input_tokens_seen": 171521808, "step": 79510 }, { "epoch": 12.971451876019575, "grad_norm": 0.8065319657325745, "learning_rate": 0.00033134649339682773, "loss": 0.0636, "num_input_tokens_seen": 171531216, "step": 79515 }, { "epoch": 12.97226753670473, "grad_norm": 0.05723694711923599, "learning_rate": 0.000331279486798318, "loss": 0.0874, "num_input_tokens_seen": 171542352, "step": 79520 }, { "epoch": 12.973083197389887, "grad_norm": 0.5046406984329224, "learning_rate": 0.0003312124836191437, "loss": 0.1469, "num_input_tokens_seen": 171553104, "step": 79525 }, { "epoch": 12.97389885807504, "grad_norm": 0.3942175507545471, "learning_rate": 0.00033114548386066234, "loss": 0.0452, "num_input_tokens_seen": 171564752, "step": 79530 }, { "epoch": 12.974714518760196, "grad_norm": 0.08305969089269638, "learning_rate": 0.00033107848752423203, "loss": 0.0256, "num_input_tokens_seen": 171575568, "step": 79535 }, { "epoch": 12.97553017944535, "grad_norm": 0.021487191319465637, "learning_rate": 0.0003310114946112105, "loss": 0.0232, "num_input_tokens_seen": 171586512, "step": 79540 }, { "epoch": 12.976345840130506, "grad_norm": 0.05888400971889496, "learning_rate": 0.00033094450512295535, "loss": 0.0717, "num_input_tokens_seen": 171597552, "step": 79545 }, { "epoch": 12.977161500815662, "grad_norm": 0.042984455823898315, "learning_rate": 0.00033087751906082436, "loss": 0.1781, "num_input_tokens_seen": 171608528, "step": 79550 }, { "epoch": 12.977977161500815, "grad_norm": 0.06600980460643768, "learning_rate": 0.000330810536426175, "loss": 0.0238, "num_input_tokens_seen": 171620176, "step": 79555 }, { "epoch": 12.978792822185971, "grad_norm": 0.10764613747596741, "learning_rate": 0.0003307435572203645, "loss": 0.0137, "num_input_tokens_seen": 171630896, "step": 79560 }, { "epoch": 12.979608482871125, "grad_norm": 0.01029970869421959, "learning_rate": 0.00033067658144475087, "loss": 0.1448, "num_input_tokens_seen": 171641360, "step": 79565 }, { "epoch": 12.98042414355628, "grad_norm": 0.005701141897588968, "learning_rate": 0.0003306096091006909, "loss": 0.0161, "num_input_tokens_seen": 171651696, "step": 79570 }, { "epoch": 12.981239804241435, "grad_norm": 0.08851869404315948, "learning_rate": 0.0003305426401895423, "loss": 0.071, "num_input_tokens_seen": 171662736, "step": 79575 }, { "epoch": 12.98205546492659, "grad_norm": 0.312248557806015, "learning_rate": 0.0003304756747126618, "loss": 0.0373, "num_input_tokens_seen": 171673616, "step": 79580 }, { "epoch": 12.982871125611746, "grad_norm": 0.021762264892458916, "learning_rate": 0.00033040871267140705, "loss": 0.0129, "num_input_tokens_seen": 171682160, "step": 79585 }, { "epoch": 12.9836867862969, "grad_norm": 0.17285288870334625, "learning_rate": 0.00033034175406713464, "loss": 0.1281, "num_input_tokens_seen": 171693456, "step": 79590 }, { "epoch": 12.984502446982056, "grad_norm": 0.2014392763376236, "learning_rate": 0.0003302747989012019, "loss": 0.0189, "num_input_tokens_seen": 171705200, "step": 79595 }, { "epoch": 12.98531810766721, "grad_norm": 0.008954297751188278, "learning_rate": 0.00033020784717496576, "loss": 0.0436, "num_input_tokens_seen": 171715824, "step": 79600 }, { "epoch": 12.986133768352365, "grad_norm": 0.04858795925974846, "learning_rate": 0.0003301408988897829, "loss": 0.0111, "num_input_tokens_seen": 171726896, "step": 79605 }, { "epoch": 12.986949429037521, "grad_norm": 0.012820033356547356, "learning_rate": 0.00033007395404701035, "loss": 0.0258, "num_input_tokens_seen": 171738640, "step": 79610 }, { "epoch": 12.987765089722675, "grad_norm": 0.12385153770446777, "learning_rate": 0.0003300070126480045, "loss": 0.0214, "num_input_tokens_seen": 171749712, "step": 79615 }, { "epoch": 12.98858075040783, "grad_norm": 0.1571178138256073, "learning_rate": 0.00032994007469412234, "loss": 0.0226, "num_input_tokens_seen": 171759600, "step": 79620 }, { "epoch": 12.989396411092985, "grad_norm": 0.04709313064813614, "learning_rate": 0.0003298731401867202, "loss": 0.0395, "num_input_tokens_seen": 171770768, "step": 79625 }, { "epoch": 12.99021207177814, "grad_norm": 0.0487193688750267, "learning_rate": 0.0003298062091271548, "loss": 0.0062, "num_input_tokens_seen": 171781712, "step": 79630 }, { "epoch": 12.991027732463294, "grad_norm": 0.5086286067962646, "learning_rate": 0.00032973928151678233, "loss": 0.0689, "num_input_tokens_seen": 171791376, "step": 79635 }, { "epoch": 12.99184339314845, "grad_norm": 1.2341604232788086, "learning_rate": 0.00032967235735695955, "loss": 0.0341, "num_input_tokens_seen": 171801968, "step": 79640 }, { "epoch": 12.992659053833606, "grad_norm": 0.12874674797058105, "learning_rate": 0.00032960543664904224, "loss": 0.0519, "num_input_tokens_seen": 171812240, "step": 79645 }, { "epoch": 12.99347471451876, "grad_norm": 0.9838602542877197, "learning_rate": 0.0003295385193943872, "loss": 0.0374, "num_input_tokens_seen": 171822192, "step": 79650 }, { "epoch": 12.994290375203915, "grad_norm": 0.18078124523162842, "learning_rate": 0.00032947160559435, "loss": 0.1003, "num_input_tokens_seen": 171832752, "step": 79655 }, { "epoch": 12.99510603588907, "grad_norm": 0.005862295161932707, "learning_rate": 0.00032940469525028735, "loss": 0.0191, "num_input_tokens_seen": 171843568, "step": 79660 }, { "epoch": 12.995921696574225, "grad_norm": 0.03426656126976013, "learning_rate": 0.0003293377883635547, "loss": 0.0098, "num_input_tokens_seen": 171855408, "step": 79665 }, { "epoch": 12.99673735725938, "grad_norm": 0.16880154609680176, "learning_rate": 0.0003292708849355085, "loss": 0.0146, "num_input_tokens_seen": 171866864, "step": 79670 }, { "epoch": 12.997553017944535, "grad_norm": 0.24113263189792633, "learning_rate": 0.0003292039849675042, "loss": 0.0152, "num_input_tokens_seen": 171878096, "step": 79675 }, { "epoch": 12.99836867862969, "grad_norm": 0.10400758683681488, "learning_rate": 0.0003291370884608979, "loss": 0.0263, "num_input_tokens_seen": 171889712, "step": 79680 }, { "epoch": 12.999184339314844, "grad_norm": 0.23988863825798035, "learning_rate": 0.00032907019541704533, "loss": 0.0299, "num_input_tokens_seen": 171901200, "step": 79685 }, { "epoch": 13.0, "grad_norm": 0.029159465804696083, "learning_rate": 0.00032900330583730196, "loss": 0.0081, "num_input_tokens_seen": 171910720, "step": 79690 }, { "epoch": 13.0, "eval_loss": 0.16136515140533447, "eval_runtime": 104.6903, "eval_samples_per_second": 26.029, "eval_steps_per_second": 6.514, "num_input_tokens_seen": 171910720, "step": 79690 }, { "epoch": 13.000815660685156, "grad_norm": 0.08558525890111923, "learning_rate": 0.0003289364197230236, "loss": 0.0387, "num_input_tokens_seen": 171922432, "step": 79695 }, { "epoch": 13.00163132137031, "grad_norm": 0.0034907974768429995, "learning_rate": 0.0003288695370755657, "loss": 0.0099, "num_input_tokens_seen": 171933536, "step": 79700 }, { "epoch": 13.002446982055465, "grad_norm": 0.16311341524124146, "learning_rate": 0.0003288026578962836, "loss": 0.0087, "num_input_tokens_seen": 171945408, "step": 79705 }, { "epoch": 13.00326264274062, "grad_norm": 0.010812400840222836, "learning_rate": 0.0003287357821865329, "loss": 0.0643, "num_input_tokens_seen": 171955264, "step": 79710 }, { "epoch": 13.004078303425775, "grad_norm": 0.04773535206913948, "learning_rate": 0.0003286689099476689, "loss": 0.0115, "num_input_tokens_seen": 171965760, "step": 79715 }, { "epoch": 13.00489396411093, "grad_norm": 0.00682013388723135, "learning_rate": 0.00032860204118104674, "loss": 0.0104, "num_input_tokens_seen": 171976960, "step": 79720 }, { "epoch": 13.005709624796085, "grad_norm": 0.6423061490058899, "learning_rate": 0.00032853517588802173, "loss": 0.1678, "num_input_tokens_seen": 171988384, "step": 79725 }, { "epoch": 13.00652528548124, "grad_norm": 0.12096865475177765, "learning_rate": 0.0003284683140699487, "loss": 0.0102, "num_input_tokens_seen": 171999680, "step": 79730 }, { "epoch": 13.007340946166394, "grad_norm": 0.03548979014158249, "learning_rate": 0.00032840145572818314, "loss": 0.0077, "num_input_tokens_seen": 172009824, "step": 79735 }, { "epoch": 13.00815660685155, "grad_norm": 0.3079151511192322, "learning_rate": 0.0003283346008640795, "loss": 0.0821, "num_input_tokens_seen": 172020704, "step": 79740 }, { "epoch": 13.008972267536704, "grad_norm": 0.020223397761583328, "learning_rate": 0.0003282677494789933, "loss": 0.0436, "num_input_tokens_seen": 172032416, "step": 79745 }, { "epoch": 13.00978792822186, "grad_norm": 0.2932673692703247, "learning_rate": 0.0003282009015742787, "loss": 0.0219, "num_input_tokens_seen": 172043328, "step": 79750 }, { "epoch": 13.010603588907015, "grad_norm": 0.14482398331165314, "learning_rate": 0.00032813405715129097, "loss": 0.0157, "num_input_tokens_seen": 172054080, "step": 79755 }, { "epoch": 13.01141924959217, "grad_norm": 0.004827328957617283, "learning_rate": 0.00032806721621138444, "loss": 0.0129, "num_input_tokens_seen": 172064736, "step": 79760 }, { "epoch": 13.012234910277325, "grad_norm": 0.15541167557239532, "learning_rate": 0.00032800037875591406, "loss": 0.1297, "num_input_tokens_seen": 172075072, "step": 79765 }, { "epoch": 13.013050570962479, "grad_norm": 0.01347341202199459, "learning_rate": 0.000327933544786234, "loss": 0.0483, "num_input_tokens_seen": 172085120, "step": 79770 }, { "epoch": 13.013866231647635, "grad_norm": 0.1046624481678009, "learning_rate": 0.00032786671430369915, "loss": 0.0087, "num_input_tokens_seen": 172095296, "step": 79775 }, { "epoch": 13.01468189233279, "grad_norm": 0.47281670570373535, "learning_rate": 0.0003277998873096635, "loss": 0.0237, "num_input_tokens_seen": 172106144, "step": 79780 }, { "epoch": 13.015497553017944, "grad_norm": 0.44485732913017273, "learning_rate": 0.00032773306380548176, "loss": 0.0193, "num_input_tokens_seen": 172116800, "step": 79785 }, { "epoch": 13.0163132137031, "grad_norm": 0.442548930644989, "learning_rate": 0.0003276662437925079, "loss": 0.0143, "num_input_tokens_seen": 172128928, "step": 79790 }, { "epoch": 13.017128874388254, "grad_norm": 0.021406574174761772, "learning_rate": 0.0003275994272720963, "loss": 0.003, "num_input_tokens_seen": 172141440, "step": 79795 }, { "epoch": 13.01794453507341, "grad_norm": 0.1656419038772583, "learning_rate": 0.0003275326142456009, "loss": 0.0236, "num_input_tokens_seen": 172153056, "step": 79800 }, { "epoch": 13.018760195758565, "grad_norm": 0.018963899463415146, "learning_rate": 0.00032746580471437606, "loss": 0.02, "num_input_tokens_seen": 172162848, "step": 79805 }, { "epoch": 13.01957585644372, "grad_norm": 0.002809838391840458, "learning_rate": 0.0003273989986797753, "loss": 0.0047, "num_input_tokens_seen": 172172032, "step": 79810 }, { "epoch": 13.020391517128875, "grad_norm": 0.005966924596577883, "learning_rate": 0.00032733219614315283, "loss": 0.0105, "num_input_tokens_seen": 172182496, "step": 79815 }, { "epoch": 13.021207177814029, "grad_norm": 0.0044192238710820675, "learning_rate": 0.00032726539710586266, "loss": 0.0064, "num_input_tokens_seen": 172194368, "step": 79820 }, { "epoch": 13.022022838499185, "grad_norm": 0.010571627877652645, "learning_rate": 0.0003271986015692582, "loss": 0.0691, "num_input_tokens_seen": 172205376, "step": 79825 }, { "epoch": 13.022838499184338, "grad_norm": 0.010511090978980064, "learning_rate": 0.0003271318095346934, "loss": 0.0529, "num_input_tokens_seen": 172216896, "step": 79830 }, { "epoch": 13.023654159869494, "grad_norm": 0.13437996804714203, "learning_rate": 0.00032706502100352165, "loss": 0.012, "num_input_tokens_seen": 172228288, "step": 79835 }, { "epoch": 13.02446982055465, "grad_norm": 0.13945025205612183, "learning_rate": 0.00032699823597709675, "loss": 0.0121, "num_input_tokens_seen": 172238336, "step": 79840 }, { "epoch": 13.025285481239804, "grad_norm": 0.41707009077072144, "learning_rate": 0.00032693145445677194, "loss": 0.0113, "num_input_tokens_seen": 172249536, "step": 79845 }, { "epoch": 13.02610114192496, "grad_norm": 0.05036475881934166, "learning_rate": 0.00032686467644390085, "loss": 0.0087, "num_input_tokens_seen": 172261152, "step": 79850 }, { "epoch": 13.026916802610113, "grad_norm": 0.5649818778038025, "learning_rate": 0.00032679790193983666, "loss": 0.2842, "num_input_tokens_seen": 172271616, "step": 79855 }, { "epoch": 13.02773246329527, "grad_norm": 0.00434821005910635, "learning_rate": 0.0003267311309459328, "loss": 0.011, "num_input_tokens_seen": 172283744, "step": 79860 }, { "epoch": 13.028548123980425, "grad_norm": 0.007520576473325491, "learning_rate": 0.00032666436346354236, "loss": 0.041, "num_input_tokens_seen": 172295680, "step": 79865 }, { "epoch": 13.029363784665579, "grad_norm": 0.047947678714990616, "learning_rate": 0.0003265975994940185, "loss": 0.0073, "num_input_tokens_seen": 172306432, "step": 79870 }, { "epoch": 13.030179445350734, "grad_norm": 0.37860891222953796, "learning_rate": 0.00032653083903871406, "loss": 0.153, "num_input_tokens_seen": 172318688, "step": 79875 }, { "epoch": 13.030995106035888, "grad_norm": 0.00768206175416708, "learning_rate": 0.0003264640820989825, "loss": 0.0034, "num_input_tokens_seen": 172327904, "step": 79880 }, { "epoch": 13.031810766721044, "grad_norm": 0.6924547553062439, "learning_rate": 0.0003263973286761762, "loss": 0.1393, "num_input_tokens_seen": 172338560, "step": 79885 }, { "epoch": 13.0326264274062, "grad_norm": 0.009162414819002151, "learning_rate": 0.0003263305787716486, "loss": 0.0078, "num_input_tokens_seen": 172348320, "step": 79890 }, { "epoch": 13.033442088091354, "grad_norm": 0.4425072968006134, "learning_rate": 0.00032626383238675184, "loss": 0.0578, "num_input_tokens_seen": 172359008, "step": 79895 }, { "epoch": 13.03425774877651, "grad_norm": 0.006265557836741209, "learning_rate": 0.0003261970895228391, "loss": 0.0072, "num_input_tokens_seen": 172369536, "step": 79900 }, { "epoch": 13.035073409461663, "grad_norm": 0.008879105560481548, "learning_rate": 0.00032613035018126267, "loss": 0.0071, "num_input_tokens_seen": 172381344, "step": 79905 }, { "epoch": 13.035889070146819, "grad_norm": 0.032495710998773575, "learning_rate": 0.0003260636143633755, "loss": 0.0118, "num_input_tokens_seen": 172391424, "step": 79910 }, { "epoch": 13.036704730831975, "grad_norm": 0.036246247589588165, "learning_rate": 0.0003259968820705296, "loss": 0.0616, "num_input_tokens_seen": 172401888, "step": 79915 }, { "epoch": 13.037520391517129, "grad_norm": 0.04756350442767143, "learning_rate": 0.0003259301533040776, "loss": 0.0442, "num_input_tokens_seen": 172412864, "step": 79920 }, { "epoch": 13.038336052202284, "grad_norm": 0.014818107709288597, "learning_rate": 0.00032586342806537207, "loss": 0.012, "num_input_tokens_seen": 172421312, "step": 79925 }, { "epoch": 13.039151712887438, "grad_norm": 0.29139894247055054, "learning_rate": 0.0003257967063557649, "loss": 0.0175, "num_input_tokens_seen": 172431872, "step": 79930 }, { "epoch": 13.039967373572594, "grad_norm": 0.02991260215640068, "learning_rate": 0.0003257299881766087, "loss": 0.0123, "num_input_tokens_seen": 172443488, "step": 79935 }, { "epoch": 13.040783034257748, "grad_norm": 0.009305900894105434, "learning_rate": 0.0003256632735292551, "loss": 0.051, "num_input_tokens_seen": 172453888, "step": 79940 }, { "epoch": 13.041598694942904, "grad_norm": 0.11447829753160477, "learning_rate": 0.00032559656241505663, "loss": 0.0161, "num_input_tokens_seen": 172464352, "step": 79945 }, { "epoch": 13.04241435562806, "grad_norm": 0.07972276955842972, "learning_rate": 0.0003255298548353649, "loss": 0.0068, "num_input_tokens_seen": 172475360, "step": 79950 }, { "epoch": 13.043230016313213, "grad_norm": 1.1518919467926025, "learning_rate": 0.0003254631507915322, "loss": 0.0977, "num_input_tokens_seen": 172485792, "step": 79955 }, { "epoch": 13.044045676998369, "grad_norm": 0.07391713559627533, "learning_rate": 0.00032539645028490993, "loss": 0.106, "num_input_tokens_seen": 172495744, "step": 79960 }, { "epoch": 13.044861337683523, "grad_norm": 0.012322865426540375, "learning_rate": 0.0003253297533168503, "loss": 0.0081, "num_input_tokens_seen": 172506624, "step": 79965 }, { "epoch": 13.045676998368679, "grad_norm": 0.01859993301331997, "learning_rate": 0.0003252630598887046, "loss": 0.0484, "num_input_tokens_seen": 172516896, "step": 79970 }, { "epoch": 13.046492659053834, "grad_norm": 0.15613065659999847, "learning_rate": 0.00032519637000182495, "loss": 0.0172, "num_input_tokens_seen": 172527904, "step": 79975 }, { "epoch": 13.047308319738988, "grad_norm": 0.030830714851617813, "learning_rate": 0.0003251296836575623, "loss": 0.0259, "num_input_tokens_seen": 172539584, "step": 79980 }, { "epoch": 13.048123980424144, "grad_norm": 0.010481961071491241, "learning_rate": 0.00032506300085726874, "loss": 0.0028, "num_input_tokens_seen": 172550432, "step": 79985 }, { "epoch": 13.048939641109298, "grad_norm": 0.35440269112586975, "learning_rate": 0.0003249963216022951, "loss": 0.0368, "num_input_tokens_seen": 172561248, "step": 79990 }, { "epoch": 13.049755301794454, "grad_norm": 0.023927956819534302, "learning_rate": 0.0003249296458939932, "loss": 0.0166, "num_input_tokens_seen": 172571584, "step": 79995 }, { "epoch": 13.05057096247961, "grad_norm": 0.005211255047470331, "learning_rate": 0.0003248629737337141, "loss": 0.0099, "num_input_tokens_seen": 172582144, "step": 80000 }, { "epoch": 13.051386623164763, "grad_norm": 0.46662262082099915, "learning_rate": 0.000324796305122809, "loss": 0.023, "num_input_tokens_seen": 172593056, "step": 80005 }, { "epoch": 13.052202283849919, "grad_norm": 0.02291063591837883, "learning_rate": 0.000324729640062629, "loss": 0.0027, "num_input_tokens_seen": 172604736, "step": 80010 }, { "epoch": 13.053017944535073, "grad_norm": 0.03768923506140709, "learning_rate": 0.0003246629785545252, "loss": 0.098, "num_input_tokens_seen": 172616896, "step": 80015 }, { "epoch": 13.053833605220229, "grad_norm": 0.03661172837018967, "learning_rate": 0.0003245963205998485, "loss": 0.0044, "num_input_tokens_seen": 172627584, "step": 80020 }, { "epoch": 13.054649265905383, "grad_norm": 0.0713721439242363, "learning_rate": 0.00032452966619994997, "loss": 0.022, "num_input_tokens_seen": 172639360, "step": 80025 }, { "epoch": 13.055464926590538, "grad_norm": 0.003793990006670356, "learning_rate": 0.00032446301535618034, "loss": 0.0109, "num_input_tokens_seen": 172650880, "step": 80030 }, { "epoch": 13.056280587275694, "grad_norm": 0.006750556640326977, "learning_rate": 0.0003243963680698904, "loss": 0.1085, "num_input_tokens_seen": 172661888, "step": 80035 }, { "epoch": 13.057096247960848, "grad_norm": 0.07947990298271179, "learning_rate": 0.0003243297243424308, "loss": 0.0302, "num_input_tokens_seen": 172673056, "step": 80040 }, { "epoch": 13.057911908646004, "grad_norm": 0.4710869789123535, "learning_rate": 0.0003242630841751522, "loss": 0.0472, "num_input_tokens_seen": 172684832, "step": 80045 }, { "epoch": 13.058727569331158, "grad_norm": 0.5151106119155884, "learning_rate": 0.00032419644756940527, "loss": 0.0458, "num_input_tokens_seen": 172695872, "step": 80050 }, { "epoch": 13.059543230016313, "grad_norm": 0.01815764419734478, "learning_rate": 0.0003241298145265401, "loss": 0.0315, "num_input_tokens_seen": 172706880, "step": 80055 }, { "epoch": 13.060358890701469, "grad_norm": 0.08889348804950714, "learning_rate": 0.00032406318504790753, "loss": 0.003, "num_input_tokens_seen": 172718304, "step": 80060 }, { "epoch": 13.061174551386623, "grad_norm": 0.017449025064706802, "learning_rate": 0.0003239965591348576, "loss": 0.0041, "num_input_tokens_seen": 172730048, "step": 80065 }, { "epoch": 13.061990212071779, "grad_norm": 0.045721568167209625, "learning_rate": 0.00032392993678874085, "loss": 0.0054, "num_input_tokens_seen": 172742368, "step": 80070 }, { "epoch": 13.062805872756933, "grad_norm": 0.15640510618686676, "learning_rate": 0.0003238633180109071, "loss": 0.0143, "num_input_tokens_seen": 172753184, "step": 80075 }, { "epoch": 13.063621533442088, "grad_norm": 0.45091527700424194, "learning_rate": 0.00032379670280270677, "loss": 0.0185, "num_input_tokens_seen": 172764736, "step": 80080 }, { "epoch": 13.064437194127244, "grad_norm": 0.021592259407043457, "learning_rate": 0.0003237300911654897, "loss": 0.0027, "num_input_tokens_seen": 172775936, "step": 80085 }, { "epoch": 13.065252854812398, "grad_norm": 0.0025286313612014055, "learning_rate": 0.0003236634831006061, "loss": 0.0945, "num_input_tokens_seen": 172787648, "step": 80090 }, { "epoch": 13.066068515497554, "grad_norm": 0.06105487048625946, "learning_rate": 0.0003235968786094055, "loss": 0.0294, "num_input_tokens_seen": 172798752, "step": 80095 }, { "epoch": 13.066884176182707, "grad_norm": 0.1832054853439331, "learning_rate": 0.0003235302776932382, "loss": 0.0244, "num_input_tokens_seen": 172809856, "step": 80100 }, { "epoch": 13.067699836867863, "grad_norm": 0.05857066065073013, "learning_rate": 0.00032346368035345344, "loss": 0.0197, "num_input_tokens_seen": 172821216, "step": 80105 }, { "epoch": 13.068515497553017, "grad_norm": 0.27611711621284485, "learning_rate": 0.0003233970865914013, "loss": 0.0149, "num_input_tokens_seen": 172831616, "step": 80110 }, { "epoch": 13.069331158238173, "grad_norm": 0.054643433541059494, "learning_rate": 0.0003233304964084311, "loss": 0.0053, "num_input_tokens_seen": 172842464, "step": 80115 }, { "epoch": 13.070146818923329, "grad_norm": 0.010734038427472115, "learning_rate": 0.0003232639098058927, "loss": 0.0019, "num_input_tokens_seen": 172852544, "step": 80120 }, { "epoch": 13.070962479608482, "grad_norm": 0.00719423359259963, "learning_rate": 0.00032319732678513514, "loss": 0.0043, "num_input_tokens_seen": 172862656, "step": 80125 }, { "epoch": 13.071778140293638, "grad_norm": 0.8305765390396118, "learning_rate": 0.00032313074734750813, "loss": 0.0452, "num_input_tokens_seen": 172874592, "step": 80130 }, { "epoch": 13.072593800978792, "grad_norm": 0.010506076738238335, "learning_rate": 0.000323064171494361, "loss": 0.0101, "num_input_tokens_seen": 172886272, "step": 80135 }, { "epoch": 13.073409461663948, "grad_norm": 0.02238144353032112, "learning_rate": 0.00032299759922704277, "loss": 0.0029, "num_input_tokens_seen": 172896096, "step": 80140 }, { "epoch": 13.074225122349104, "grad_norm": 0.9596978425979614, "learning_rate": 0.0003229310305469029, "loss": 0.0851, "num_input_tokens_seen": 172908128, "step": 80145 }, { "epoch": 13.075040783034257, "grad_norm": 0.003242769278585911, "learning_rate": 0.00032286446545529016, "loss": 0.035, "num_input_tokens_seen": 172918496, "step": 80150 }, { "epoch": 13.075856443719413, "grad_norm": 0.037393562495708466, "learning_rate": 0.0003227979039535538, "loss": 0.0074, "num_input_tokens_seen": 172929504, "step": 80155 }, { "epoch": 13.076672104404567, "grad_norm": 0.004145515151321888, "learning_rate": 0.0003227313460430427, "loss": 0.0458, "num_input_tokens_seen": 172939424, "step": 80160 }, { "epoch": 13.077487765089723, "grad_norm": 0.018017858266830444, "learning_rate": 0.0003226647917251058, "loss": 0.0096, "num_input_tokens_seen": 172950208, "step": 80165 }, { "epoch": 13.078303425774878, "grad_norm": 0.02409268729388714, "learning_rate": 0.0003225982410010918, "loss": 0.0287, "num_input_tokens_seen": 172962144, "step": 80170 }, { "epoch": 13.079119086460032, "grad_norm": 0.06826374679803848, "learning_rate": 0.00032253169387234953, "loss": 0.0051, "num_input_tokens_seen": 172973184, "step": 80175 }, { "epoch": 13.079934747145188, "grad_norm": 0.20117846131324768, "learning_rate": 0.0003224651503402276, "loss": 0.0269, "num_input_tokens_seen": 172983392, "step": 80180 }, { "epoch": 13.080750407830342, "grad_norm": 0.004088573157787323, "learning_rate": 0.00032239861040607464, "loss": 0.0082, "num_input_tokens_seen": 172993472, "step": 80185 }, { "epoch": 13.081566068515498, "grad_norm": 0.03030865453183651, "learning_rate": 0.0003223320740712391, "loss": 0.0034, "num_input_tokens_seen": 173004768, "step": 80190 }, { "epoch": 13.082381729200652, "grad_norm": 0.5057169198989868, "learning_rate": 0.0003222655413370696, "loss": 0.0149, "num_input_tokens_seen": 173014688, "step": 80195 }, { "epoch": 13.083197389885807, "grad_norm": 0.004440369550138712, "learning_rate": 0.00032219901220491417, "loss": 0.0022, "num_input_tokens_seen": 173025920, "step": 80200 }, { "epoch": 13.084013050570963, "grad_norm": 0.03785277530550957, "learning_rate": 0.0003221324866761215, "loss": 0.0113, "num_input_tokens_seen": 173036992, "step": 80205 }, { "epoch": 13.084828711256117, "grad_norm": 0.03577620908617973, "learning_rate": 0.0003220659647520395, "loss": 0.0132, "num_input_tokens_seen": 173047360, "step": 80210 }, { "epoch": 13.085644371941273, "grad_norm": 0.15735945105552673, "learning_rate": 0.00032199944643401655, "loss": 0.0128, "num_input_tokens_seen": 173059360, "step": 80215 }, { "epoch": 13.086460032626427, "grad_norm": 0.0050867050886154175, "learning_rate": 0.00032193293172340056, "loss": 0.0026, "num_input_tokens_seen": 173070336, "step": 80220 }, { "epoch": 13.087275693311582, "grad_norm": 0.726453423500061, "learning_rate": 0.0003218664206215397, "loss": 0.0961, "num_input_tokens_seen": 173082400, "step": 80225 }, { "epoch": 13.088091353996738, "grad_norm": 0.05104609206318855, "learning_rate": 0.00032179991312978164, "loss": 0.0092, "num_input_tokens_seen": 173094368, "step": 80230 }, { "epoch": 13.088907014681892, "grad_norm": 0.054848913103342056, "learning_rate": 0.00032173340924947436, "loss": 0.0075, "num_input_tokens_seen": 173104448, "step": 80235 }, { "epoch": 13.089722675367048, "grad_norm": 0.09938617050647736, "learning_rate": 0.00032166690898196594, "loss": 0.1573, "num_input_tokens_seen": 173114912, "step": 80240 }, { "epoch": 13.090538336052202, "grad_norm": 0.01833474077284336, "learning_rate": 0.0003216004123286036, "loss": 0.0187, "num_input_tokens_seen": 173126880, "step": 80245 }, { "epoch": 13.091353996737357, "grad_norm": 0.0031096080783754587, "learning_rate": 0.0003215339192907355, "loss": 0.0017, "num_input_tokens_seen": 173137600, "step": 80250 }, { "epoch": 13.092169657422513, "grad_norm": 0.026395712047815323, "learning_rate": 0.00032146742986970865, "loss": 0.0059, "num_input_tokens_seen": 173149248, "step": 80255 }, { "epoch": 13.092985318107667, "grad_norm": 0.015693750232458115, "learning_rate": 0.000321400944066871, "loss": 0.0027, "num_input_tokens_seen": 173159968, "step": 80260 }, { "epoch": 13.093800978792823, "grad_norm": 0.14889781177043915, "learning_rate": 0.00032133446188356964, "loss": 0.0426, "num_input_tokens_seen": 173170496, "step": 80265 }, { "epoch": 13.094616639477977, "grad_norm": 0.0023413198068737984, "learning_rate": 0.00032126798332115223, "loss": 0.0044, "num_input_tokens_seen": 173181376, "step": 80270 }, { "epoch": 13.095432300163132, "grad_norm": 0.016972091048955917, "learning_rate": 0.00032120150838096576, "loss": 0.002, "num_input_tokens_seen": 173192640, "step": 80275 }, { "epoch": 13.096247960848286, "grad_norm": 0.00946488231420517, "learning_rate": 0.00032113503706435767, "loss": 0.0028, "num_input_tokens_seen": 173204096, "step": 80280 }, { "epoch": 13.097063621533442, "grad_norm": 0.00266844080761075, "learning_rate": 0.00032106856937267475, "loss": 0.0448, "num_input_tokens_seen": 173214848, "step": 80285 }, { "epoch": 13.097879282218598, "grad_norm": 0.015972357243299484, "learning_rate": 0.00032100210530726446, "loss": 0.005, "num_input_tokens_seen": 173225952, "step": 80290 }, { "epoch": 13.098694942903752, "grad_norm": 0.0007245544111356139, "learning_rate": 0.00032093564486947347, "loss": 0.0382, "num_input_tokens_seen": 173237408, "step": 80295 }, { "epoch": 13.099510603588907, "grad_norm": 0.01721024699509144, "learning_rate": 0.0003208691880606488, "loss": 0.0082, "num_input_tokens_seen": 173248512, "step": 80300 }, { "epoch": 13.100326264274061, "grad_norm": 0.01830303855240345, "learning_rate": 0.0003208027348821373, "loss": 0.056, "num_input_tokens_seen": 173258592, "step": 80305 }, { "epoch": 13.101141924959217, "grad_norm": 0.0024359319359064102, "learning_rate": 0.00032073628533528574, "loss": 0.0382, "num_input_tokens_seen": 173269888, "step": 80310 }, { "epoch": 13.101957585644373, "grad_norm": 0.001466614892706275, "learning_rate": 0.0003206698394214407, "loss": 0.004, "num_input_tokens_seen": 173280512, "step": 80315 }, { "epoch": 13.102773246329527, "grad_norm": 0.0017021127277985215, "learning_rate": 0.00032060339714194897, "loss": 0.0897, "num_input_tokens_seen": 173291200, "step": 80320 }, { "epoch": 13.103588907014682, "grad_norm": 0.1483052670955658, "learning_rate": 0.0003205369584981568, "loss": 0.0093, "num_input_tokens_seen": 173302048, "step": 80325 }, { "epoch": 13.104404567699836, "grad_norm": 0.061912551522254944, "learning_rate": 0.000320470523491411, "loss": 0.0097, "num_input_tokens_seen": 173311616, "step": 80330 }, { "epoch": 13.105220228384992, "grad_norm": 0.0009525975910946727, "learning_rate": 0.00032040409212305765, "loss": 0.0186, "num_input_tokens_seen": 173323168, "step": 80335 }, { "epoch": 13.106035889070148, "grad_norm": 0.010273335501551628, "learning_rate": 0.0003203376643944433, "loss": 0.0377, "num_input_tokens_seen": 173333728, "step": 80340 }, { "epoch": 13.106851549755302, "grad_norm": 0.07906752824783325, "learning_rate": 0.0003202712403069141, "loss": 0.0031, "num_input_tokens_seen": 173345088, "step": 80345 }, { "epoch": 13.107667210440457, "grad_norm": 0.002880327869206667, "learning_rate": 0.00032020481986181606, "loss": 0.0015, "num_input_tokens_seen": 173355200, "step": 80350 }, { "epoch": 13.108482871125611, "grad_norm": 0.005732477176934481, "learning_rate": 0.0003201384030604957, "loss": 0.0339, "num_input_tokens_seen": 173366112, "step": 80355 }, { "epoch": 13.109298531810767, "grad_norm": 0.004370862152427435, "learning_rate": 0.0003200719899042985, "loss": 0.0283, "num_input_tokens_seen": 173377632, "step": 80360 }, { "epoch": 13.11011419249592, "grad_norm": 0.008742867037653923, "learning_rate": 0.00032000558039457094, "loss": 0.0224, "num_input_tokens_seen": 173388448, "step": 80365 }, { "epoch": 13.110929853181077, "grad_norm": 0.012412240728735924, "learning_rate": 0.0003199391745326585, "loss": 0.0062, "num_input_tokens_seen": 173399168, "step": 80370 }, { "epoch": 13.111745513866232, "grad_norm": 0.0009243670501746237, "learning_rate": 0.0003198727723199072, "loss": 0.0727, "num_input_tokens_seen": 173408544, "step": 80375 }, { "epoch": 13.112561174551386, "grad_norm": 0.015289409086108208, "learning_rate": 0.0003198063737576625, "loss": 0.0108, "num_input_tokens_seen": 173419104, "step": 80380 }, { "epoch": 13.113376835236542, "grad_norm": 0.950347363948822, "learning_rate": 0.0003197399788472705, "loss": 0.1616, "num_input_tokens_seen": 173429088, "step": 80385 }, { "epoch": 13.114192495921696, "grad_norm": 0.003356748493388295, "learning_rate": 0.0003196735875900762, "loss": 0.0015, "num_input_tokens_seen": 173440512, "step": 80390 }, { "epoch": 13.115008156606851, "grad_norm": 0.20721390843391418, "learning_rate": 0.00031960719998742567, "loss": 0.0207, "num_input_tokens_seen": 173452864, "step": 80395 }, { "epoch": 13.115823817292007, "grad_norm": 0.7187097072601318, "learning_rate": 0.0003195408160406638, "loss": 0.0289, "num_input_tokens_seen": 173463584, "step": 80400 }, { "epoch": 13.116639477977161, "grad_norm": 0.11548445373773575, "learning_rate": 0.00031947443575113655, "loss": 0.0185, "num_input_tokens_seen": 173475008, "step": 80405 }, { "epoch": 13.117455138662317, "grad_norm": 0.013471146114170551, "learning_rate": 0.00031940805912018854, "loss": 0.0095, "num_input_tokens_seen": 173485312, "step": 80410 }, { "epoch": 13.11827079934747, "grad_norm": 0.5451833009719849, "learning_rate": 0.0003193416861491656, "loss": 0.0156, "num_input_tokens_seen": 173495744, "step": 80415 }, { "epoch": 13.119086460032626, "grad_norm": 0.004572047386318445, "learning_rate": 0.00031927531683941234, "loss": 0.0301, "num_input_tokens_seen": 173505984, "step": 80420 }, { "epoch": 13.119902120717782, "grad_norm": 0.007854897528886795, "learning_rate": 0.0003192089511922742, "loss": 0.0036, "num_input_tokens_seen": 173517024, "step": 80425 }, { "epoch": 13.120717781402936, "grad_norm": 0.4270484745502472, "learning_rate": 0.0003191425892090959, "loss": 0.1302, "num_input_tokens_seen": 173527520, "step": 80430 }, { "epoch": 13.121533442088092, "grad_norm": 0.4104847013950348, "learning_rate": 0.0003190762308912226, "loss": 0.0162, "num_input_tokens_seen": 173537952, "step": 80435 }, { "epoch": 13.122349102773246, "grad_norm": 0.013403626158833504, "learning_rate": 0.0003190098762399989, "loss": 0.0031, "num_input_tokens_seen": 173547712, "step": 80440 }, { "epoch": 13.123164763458401, "grad_norm": 0.7732028961181641, "learning_rate": 0.0003189435252567697, "loss": 0.0655, "num_input_tokens_seen": 173557568, "step": 80445 }, { "epoch": 13.123980424143557, "grad_norm": 0.04544537141919136, "learning_rate": 0.00031887717794287963, "loss": 0.1375, "num_input_tokens_seen": 173567776, "step": 80450 }, { "epoch": 13.124796084828711, "grad_norm": 0.009737315587699413, "learning_rate": 0.0003188108342996732, "loss": 0.0123, "num_input_tokens_seen": 173580032, "step": 80455 }, { "epoch": 13.125611745513867, "grad_norm": 0.0023570540361106396, "learning_rate": 0.0003187444943284953, "loss": 0.016, "num_input_tokens_seen": 173590176, "step": 80460 }, { "epoch": 13.12642740619902, "grad_norm": 0.014799975790083408, "learning_rate": 0.00031867815803068996, "loss": 0.0062, "num_input_tokens_seen": 173601376, "step": 80465 }, { "epoch": 13.127243066884176, "grad_norm": 0.01263999193906784, "learning_rate": 0.0003186118254076018, "loss": 0.0042, "num_input_tokens_seen": 173613504, "step": 80470 }, { "epoch": 13.12805872756933, "grad_norm": 0.022360246628522873, "learning_rate": 0.00031854549646057517, "loss": 0.0064, "num_input_tokens_seen": 173623840, "step": 80475 }, { "epoch": 13.128874388254486, "grad_norm": 0.02558664232492447, "learning_rate": 0.00031847917119095425, "loss": 0.1383, "num_input_tokens_seen": 173633280, "step": 80480 }, { "epoch": 13.129690048939642, "grad_norm": 0.20103636384010315, "learning_rate": 0.0003184128496000832, "loss": 0.0148, "num_input_tokens_seen": 173644640, "step": 80485 }, { "epoch": 13.130505709624796, "grad_norm": 0.23378853499889374, "learning_rate": 0.00031834653168930614, "loss": 0.1249, "num_input_tokens_seen": 173656000, "step": 80490 }, { "epoch": 13.131321370309951, "grad_norm": 0.023417634889483452, "learning_rate": 0.0003182802174599669, "loss": 0.0192, "num_input_tokens_seen": 173665568, "step": 80495 }, { "epoch": 13.132137030995105, "grad_norm": 0.003572846995666623, "learning_rate": 0.00031821390691340985, "loss": 0.0017, "num_input_tokens_seen": 173674944, "step": 80500 }, { "epoch": 13.132952691680261, "grad_norm": 0.0035275681875646114, "learning_rate": 0.0003181476000509783, "loss": 0.0245, "num_input_tokens_seen": 173686304, "step": 80505 }, { "epoch": 13.133768352365417, "grad_norm": 0.18566931784152985, "learning_rate": 0.00031808129687401664, "loss": 0.0104, "num_input_tokens_seen": 173697824, "step": 80510 }, { "epoch": 13.13458401305057, "grad_norm": 0.3707401156425476, "learning_rate": 0.00031801499738386797, "loss": 0.023, "num_input_tokens_seen": 173709184, "step": 80515 }, { "epoch": 13.135399673735726, "grad_norm": 0.024199439212679863, "learning_rate": 0.0003179487015818765, "loss": 0.0102, "num_input_tokens_seen": 173719168, "step": 80520 }, { "epoch": 13.13621533442088, "grad_norm": 0.007694084197282791, "learning_rate": 0.00031788240946938534, "loss": 0.0151, "num_input_tokens_seen": 173729888, "step": 80525 }, { "epoch": 13.137030995106036, "grad_norm": 0.008018743246793747, "learning_rate": 0.00031781612104773836, "loss": 0.0052, "num_input_tokens_seen": 173739904, "step": 80530 }, { "epoch": 13.137846655791192, "grad_norm": 0.0028908755630254745, "learning_rate": 0.00031774983631827866, "loss": 0.0095, "num_input_tokens_seen": 173749472, "step": 80535 }, { "epoch": 13.138662316476346, "grad_norm": 0.013667856343090534, "learning_rate": 0.00031768355528234986, "loss": 0.0069, "num_input_tokens_seen": 173760160, "step": 80540 }, { "epoch": 13.139477977161501, "grad_norm": 0.05139991641044617, "learning_rate": 0.0003176172779412949, "loss": 0.0032, "num_input_tokens_seen": 173770880, "step": 80545 }, { "epoch": 13.140293637846655, "grad_norm": 0.010604379698634148, "learning_rate": 0.00031755100429645746, "loss": 0.0077, "num_input_tokens_seen": 173781600, "step": 80550 }, { "epoch": 13.141109298531811, "grad_norm": 0.001072026090696454, "learning_rate": 0.00031748473434918014, "loss": 0.0109, "num_input_tokens_seen": 173792256, "step": 80555 }, { "epoch": 13.141924959216965, "grad_norm": 0.004004660062491894, "learning_rate": 0.0003174184681008061, "loss": 0.0036, "num_input_tokens_seen": 173803744, "step": 80560 }, { "epoch": 13.14274061990212, "grad_norm": 0.09035896509885788, "learning_rate": 0.00031735220555267874, "loss": 0.0597, "num_input_tokens_seen": 173816064, "step": 80565 }, { "epoch": 13.143556280587276, "grad_norm": 0.005286915227770805, "learning_rate": 0.0003172859467061404, "loss": 0.0449, "num_input_tokens_seen": 173827168, "step": 80570 }, { "epoch": 13.14437194127243, "grad_norm": 0.008808581158518791, "learning_rate": 0.0003172196915625344, "loss": 0.0092, "num_input_tokens_seen": 173837248, "step": 80575 }, { "epoch": 13.145187601957586, "grad_norm": 0.0073167867958545685, "learning_rate": 0.0003171534401232029, "loss": 0.1121, "num_input_tokens_seen": 173849696, "step": 80580 }, { "epoch": 13.14600326264274, "grad_norm": 0.003934409469366074, "learning_rate": 0.0003170871923894892, "loss": 0.0059, "num_input_tokens_seen": 173858752, "step": 80585 }, { "epoch": 13.146818923327896, "grad_norm": 0.25876080989837646, "learning_rate": 0.0003170209483627353, "loss": 0.0209, "num_input_tokens_seen": 173871360, "step": 80590 }, { "epoch": 13.147634584013051, "grad_norm": 0.07866032421588898, "learning_rate": 0.00031695470804428427, "loss": 0.0028, "num_input_tokens_seen": 173882016, "step": 80595 }, { "epoch": 13.148450244698205, "grad_norm": 0.012794878333806992, "learning_rate": 0.0003168884714354781, "loss": 0.0451, "num_input_tokens_seen": 173893280, "step": 80600 }, { "epoch": 13.149265905383361, "grad_norm": 0.35433799028396606, "learning_rate": 0.0003168222385376596, "loss": 0.0039, "num_input_tokens_seen": 173904000, "step": 80605 }, { "epoch": 13.150081566068515, "grad_norm": 0.012600569985806942, "learning_rate": 0.0003167560093521705, "loss": 0.0042, "num_input_tokens_seen": 173915424, "step": 80610 }, { "epoch": 13.15089722675367, "grad_norm": 0.0026430252473801374, "learning_rate": 0.00031668978388035347, "loss": 0.0034, "num_input_tokens_seen": 173927328, "step": 80615 }, { "epoch": 13.151712887438826, "grad_norm": 0.040123261511325836, "learning_rate": 0.0003166235621235505, "loss": 0.1254, "num_input_tokens_seen": 173938080, "step": 80620 }, { "epoch": 13.15252854812398, "grad_norm": 0.16726359724998474, "learning_rate": 0.00031655734408310367, "loss": 0.0037, "num_input_tokens_seen": 173948000, "step": 80625 }, { "epoch": 13.153344208809136, "grad_norm": 0.005281352903693914, "learning_rate": 0.000316491129760355, "loss": 0.0143, "num_input_tokens_seen": 173959040, "step": 80630 }, { "epoch": 13.15415986949429, "grad_norm": 0.0031237660441547632, "learning_rate": 0.0003164249191566464, "loss": 0.0535, "num_input_tokens_seen": 173967936, "step": 80635 }, { "epoch": 13.154975530179446, "grad_norm": 0.4133281111717224, "learning_rate": 0.00031635871227331957, "loss": 0.0465, "num_input_tokens_seen": 173978624, "step": 80640 }, { "epoch": 13.1557911908646, "grad_norm": 0.001849912921898067, "learning_rate": 0.00031629250911171657, "loss": 0.0091, "num_input_tokens_seen": 173989632, "step": 80645 }, { "epoch": 13.156606851549755, "grad_norm": 0.042521920055150986, "learning_rate": 0.0003162263096731788, "loss": 0.0216, "num_input_tokens_seen": 174001408, "step": 80650 }, { "epoch": 13.15742251223491, "grad_norm": 0.00928044505417347, "learning_rate": 0.0003161601139590482, "loss": 0.0208, "num_input_tokens_seen": 174012512, "step": 80655 }, { "epoch": 13.158238172920065, "grad_norm": 0.015057997778058052, "learning_rate": 0.0003160939219706658, "loss": 0.0022, "num_input_tokens_seen": 174023840, "step": 80660 }, { "epoch": 13.15905383360522, "grad_norm": 0.006567085161805153, "learning_rate": 0.00031602773370937345, "loss": 0.0029, "num_input_tokens_seen": 174033856, "step": 80665 }, { "epoch": 13.159869494290374, "grad_norm": 0.0021416512317955494, "learning_rate": 0.00031596154917651266, "loss": 0.0042, "num_input_tokens_seen": 174044576, "step": 80670 }, { "epoch": 13.16068515497553, "grad_norm": 0.001120802597142756, "learning_rate": 0.0003158953683734244, "loss": 0.0172, "num_input_tokens_seen": 174056096, "step": 80675 }, { "epoch": 13.161500815660686, "grad_norm": 0.3797813951969147, "learning_rate": 0.00031582919130145016, "loss": 0.0759, "num_input_tokens_seen": 174066176, "step": 80680 }, { "epoch": 13.16231647634584, "grad_norm": 0.015251987613737583, "learning_rate": 0.0003157630179619308, "loss": 0.0981, "num_input_tokens_seen": 174076160, "step": 80685 }, { "epoch": 13.163132137030995, "grad_norm": 0.010434516705572605, "learning_rate": 0.00031569684835620784, "loss": 0.1129, "num_input_tokens_seen": 174087136, "step": 80690 }, { "epoch": 13.16394779771615, "grad_norm": 0.03177884966135025, "learning_rate": 0.00031563068248562185, "loss": 0.0146, "num_input_tokens_seen": 174097344, "step": 80695 }, { "epoch": 13.164763458401305, "grad_norm": 0.010107730515301228, "learning_rate": 0.00031556452035151416, "loss": 0.0071, "num_input_tokens_seen": 174107744, "step": 80700 }, { "epoch": 13.16557911908646, "grad_norm": 0.015693768858909607, "learning_rate": 0.00031549836195522517, "loss": 0.0491, "num_input_tokens_seen": 174118368, "step": 80705 }, { "epoch": 13.166394779771615, "grad_norm": 0.012724840082228184, "learning_rate": 0.00031543220729809626, "loss": 0.0037, "num_input_tokens_seen": 174129312, "step": 80710 }, { "epoch": 13.16721044045677, "grad_norm": 1.1048831939697266, "learning_rate": 0.00031536605638146756, "loss": 0.1844, "num_input_tokens_seen": 174139008, "step": 80715 }, { "epoch": 13.168026101141924, "grad_norm": 0.007694819942116737, "learning_rate": 0.0003152999092066801, "loss": 0.0265, "num_input_tokens_seen": 174150208, "step": 80720 }, { "epoch": 13.16884176182708, "grad_norm": 0.05875613912940025, "learning_rate": 0.0003152337657750741, "loss": 0.0049, "num_input_tokens_seen": 174159712, "step": 80725 }, { "epoch": 13.169657422512234, "grad_norm": 0.03571310639381409, "learning_rate": 0.00031516762608799047, "loss": 0.1439, "num_input_tokens_seen": 174169440, "step": 80730 }, { "epoch": 13.17047308319739, "grad_norm": 0.6607456207275391, "learning_rate": 0.0003151014901467691, "loss": 0.1336, "num_input_tokens_seen": 174180608, "step": 80735 }, { "epoch": 13.171288743882545, "grad_norm": 0.0349813811480999, "learning_rate": 0.00031503535795275096, "loss": 0.1877, "num_input_tokens_seen": 174190816, "step": 80740 }, { "epoch": 13.1721044045677, "grad_norm": 0.011462538503110409, "learning_rate": 0.00031496922950727556, "loss": 0.1036, "num_input_tokens_seen": 174202496, "step": 80745 }, { "epoch": 13.172920065252855, "grad_norm": 0.04959709197282791, "learning_rate": 0.00031490310481168375, "loss": 0.0122, "num_input_tokens_seen": 174214496, "step": 80750 }, { "epoch": 13.173735725938009, "grad_norm": 0.008310262113809586, "learning_rate": 0.0003148369838673151, "loss": 0.0158, "num_input_tokens_seen": 174225056, "step": 80755 }, { "epoch": 13.174551386623165, "grad_norm": 0.018154103308916092, "learning_rate": 0.00031477086667551003, "loss": 0.0061, "num_input_tokens_seen": 174235904, "step": 80760 }, { "epoch": 13.17536704730832, "grad_norm": 1.0546104907989502, "learning_rate": 0.00031470475323760826, "loss": 0.0425, "num_input_tokens_seen": 174245696, "step": 80765 }, { "epoch": 13.176182707993474, "grad_norm": 0.08382266759872437, "learning_rate": 0.0003146386435549496, "loss": 0.1056, "num_input_tokens_seen": 174255264, "step": 80770 }, { "epoch": 13.17699836867863, "grad_norm": 1.1403367519378662, "learning_rate": 0.0003145725376288742, "loss": 0.0809, "num_input_tokens_seen": 174266720, "step": 80775 }, { "epoch": 13.177814029363784, "grad_norm": 0.005978901404887438, "learning_rate": 0.00031450643546072145, "loss": 0.0153, "num_input_tokens_seen": 174274176, "step": 80780 }, { "epoch": 13.17862969004894, "grad_norm": 0.0186043418943882, "learning_rate": 0.0003144403370518311, "loss": 0.0098, "num_input_tokens_seen": 174285952, "step": 80785 }, { "epoch": 13.179445350734095, "grad_norm": 0.010959894396364689, "learning_rate": 0.00031437424240354274, "loss": 0.0101, "num_input_tokens_seen": 174298144, "step": 80790 }, { "epoch": 13.18026101141925, "grad_norm": 0.04258236661553383, "learning_rate": 0.00031430815151719583, "loss": 0.0111, "num_input_tokens_seen": 174308032, "step": 80795 }, { "epoch": 13.181076672104405, "grad_norm": 0.029093148186802864, "learning_rate": 0.00031424206439412984, "loss": 0.0061, "num_input_tokens_seen": 174317984, "step": 80800 }, { "epoch": 13.181892332789559, "grad_norm": 0.017018483951687813, "learning_rate": 0.00031417598103568404, "loss": 0.0037, "num_input_tokens_seen": 174328928, "step": 80805 }, { "epoch": 13.182707993474715, "grad_norm": 0.016081376001238823, "learning_rate": 0.00031410990144319756, "loss": 0.0094, "num_input_tokens_seen": 174340448, "step": 80810 }, { "epoch": 13.18352365415987, "grad_norm": 0.0018330741440877318, "learning_rate": 0.00031404382561801006, "loss": 0.0038, "num_input_tokens_seen": 174351488, "step": 80815 }, { "epoch": 13.184339314845024, "grad_norm": 0.04379534348845482, "learning_rate": 0.00031397775356146004, "loss": 0.1479, "num_input_tokens_seen": 174362816, "step": 80820 }, { "epoch": 13.18515497553018, "grad_norm": 0.03306197375059128, "learning_rate": 0.000313911685274887, "loss": 0.0166, "num_input_tokens_seen": 174373696, "step": 80825 }, { "epoch": 13.185970636215334, "grad_norm": 0.13473236560821533, "learning_rate": 0.0003138456207596296, "loss": 0.0314, "num_input_tokens_seen": 174383744, "step": 80830 }, { "epoch": 13.18678629690049, "grad_norm": 0.2564341127872467, "learning_rate": 0.0003137795600170271, "loss": 0.0147, "num_input_tokens_seen": 174394592, "step": 80835 }, { "epoch": 13.187601957585644, "grad_norm": 0.029348919168114662, "learning_rate": 0.0003137135030484177, "loss": 0.0077, "num_input_tokens_seen": 174404128, "step": 80840 }, { "epoch": 13.1884176182708, "grad_norm": 0.2112913429737091, "learning_rate": 0.00031364744985514084, "loss": 0.0277, "num_input_tokens_seen": 174415552, "step": 80845 }, { "epoch": 13.189233278955955, "grad_norm": 0.013795565813779831, "learning_rate": 0.00031358140043853455, "loss": 0.1234, "num_input_tokens_seen": 174426112, "step": 80850 }, { "epoch": 13.190048939641109, "grad_norm": 0.6724637746810913, "learning_rate": 0.00031351535479993785, "loss": 0.1164, "num_input_tokens_seen": 174436032, "step": 80855 }, { "epoch": 13.190864600326265, "grad_norm": 0.020665759220719337, "learning_rate": 0.0003134493129406889, "loss": 0.0053, "num_input_tokens_seen": 174445376, "step": 80860 }, { "epoch": 13.191680261011419, "grad_norm": 0.02903015911579132, "learning_rate": 0.00031338327486212647, "loss": 0.0121, "num_input_tokens_seen": 174456448, "step": 80865 }, { "epoch": 13.192495921696574, "grad_norm": 0.0185124259442091, "learning_rate": 0.00031331724056558847, "loss": 0.0068, "num_input_tokens_seen": 174468160, "step": 80870 }, { "epoch": 13.19331158238173, "grad_norm": 0.6788234710693359, "learning_rate": 0.0003132512100524134, "loss": 0.1327, "num_input_tokens_seen": 174477888, "step": 80875 }, { "epoch": 13.194127243066884, "grad_norm": 0.6895075440406799, "learning_rate": 0.00031318518332393975, "loss": 0.0911, "num_input_tokens_seen": 174489184, "step": 80880 }, { "epoch": 13.19494290375204, "grad_norm": 0.012661950662732124, "learning_rate": 0.0003131191603815051, "loss": 0.0099, "num_input_tokens_seen": 174500000, "step": 80885 }, { "epoch": 13.195758564437194, "grad_norm": 0.011952362023293972, "learning_rate": 0.000313053141226448, "loss": 0.0355, "num_input_tokens_seen": 174510176, "step": 80890 }, { "epoch": 13.19657422512235, "grad_norm": 0.403862327337265, "learning_rate": 0.0003129871258601059, "loss": 0.0506, "num_input_tokens_seen": 174522464, "step": 80895 }, { "epoch": 13.197389885807505, "grad_norm": 0.010853935964405537, "learning_rate": 0.0003129211142838171, "loss": 0.0092, "num_input_tokens_seen": 174533952, "step": 80900 }, { "epoch": 13.198205546492659, "grad_norm": 0.6797612309455872, "learning_rate": 0.0003128551064989191, "loss": 0.106, "num_input_tokens_seen": 174544544, "step": 80905 }, { "epoch": 13.199021207177815, "grad_norm": 0.020436374470591545, "learning_rate": 0.00031278910250674994, "loss": 0.0763, "num_input_tokens_seen": 174556160, "step": 80910 }, { "epoch": 13.199836867862969, "grad_norm": 0.1086232140660286, "learning_rate": 0.00031272310230864695, "loss": 0.0568, "num_input_tokens_seen": 174566016, "step": 80915 }, { "epoch": 13.200652528548124, "grad_norm": 0.10061995685100555, "learning_rate": 0.0003126571059059481, "loss": 0.0203, "num_input_tokens_seen": 174576896, "step": 80920 }, { "epoch": 13.201468189233278, "grad_norm": 0.07237649708986282, "learning_rate": 0.00031259111329999035, "loss": 0.0156, "num_input_tokens_seen": 174589056, "step": 80925 }, { "epoch": 13.202283849918434, "grad_norm": 0.028159143403172493, "learning_rate": 0.00031252512449211163, "loss": 0.042, "num_input_tokens_seen": 174598752, "step": 80930 }, { "epoch": 13.20309951060359, "grad_norm": 0.03933137655258179, "learning_rate": 0.0003124591394836491, "loss": 0.031, "num_input_tokens_seen": 174610304, "step": 80935 }, { "epoch": 13.203915171288743, "grad_norm": 0.022296667098999023, "learning_rate": 0.00031239315827593994, "loss": 0.0123, "num_input_tokens_seen": 174620896, "step": 80940 }, { "epoch": 13.2047308319739, "grad_norm": 0.020430929958820343, "learning_rate": 0.0003123271808703215, "loss": 0.0842, "num_input_tokens_seen": 174631680, "step": 80945 }, { "epoch": 13.205546492659053, "grad_norm": 0.04694078862667084, "learning_rate": 0.0003122612072681308, "loss": 0.0834, "num_input_tokens_seen": 174642240, "step": 80950 }, { "epoch": 13.206362153344209, "grad_norm": 0.024211278185248375, "learning_rate": 0.00031219523747070475, "loss": 0.0224, "num_input_tokens_seen": 174652480, "step": 80955 }, { "epoch": 13.207177814029365, "grad_norm": 0.014290661551058292, "learning_rate": 0.00031212927147938066, "loss": 0.0044, "num_input_tokens_seen": 174663264, "step": 80960 }, { "epoch": 13.207993474714518, "grad_norm": 0.01665358990430832, "learning_rate": 0.0003120633092954951, "loss": 0.0142, "num_input_tokens_seen": 174674720, "step": 80965 }, { "epoch": 13.208809135399674, "grad_norm": 0.13216033577919006, "learning_rate": 0.0003119973509203851, "loss": 0.0144, "num_input_tokens_seen": 174684448, "step": 80970 }, { "epoch": 13.209624796084828, "grad_norm": 0.03964455425739288, "learning_rate": 0.00031193139635538714, "loss": 0.0112, "num_input_tokens_seen": 174695968, "step": 80975 }, { "epoch": 13.210440456769984, "grad_norm": 0.08269073069095612, "learning_rate": 0.00031186544560183796, "loss": 0.0152, "num_input_tokens_seen": 174707904, "step": 80980 }, { "epoch": 13.21125611745514, "grad_norm": 0.01101865153759718, "learning_rate": 0.00031179949866107443, "loss": 0.0123, "num_input_tokens_seen": 174718752, "step": 80985 }, { "epoch": 13.212071778140293, "grad_norm": 0.7104308009147644, "learning_rate": 0.0003117335555344326, "loss": 0.1477, "num_input_tokens_seen": 174729728, "step": 80990 }, { "epoch": 13.21288743882545, "grad_norm": 0.006669707130640745, "learning_rate": 0.00031166761622324936, "loss": 0.0133, "num_input_tokens_seen": 174741088, "step": 80995 }, { "epoch": 13.213703099510603, "grad_norm": 0.04025764390826225, "learning_rate": 0.00031160168072886054, "loss": 0.0149, "num_input_tokens_seen": 174751456, "step": 81000 }, { "epoch": 13.214518760195759, "grad_norm": 0.0031098683830350637, "learning_rate": 0.00031153574905260287, "loss": 0.0139, "num_input_tokens_seen": 174761888, "step": 81005 }, { "epoch": 13.215334420880913, "grad_norm": 0.07051649689674377, "learning_rate": 0.000311469821195812, "loss": 0.0229, "num_input_tokens_seen": 174773280, "step": 81010 }, { "epoch": 13.216150081566068, "grad_norm": 0.007362148724496365, "learning_rate": 0.00031140389715982476, "loss": 0.0534, "num_input_tokens_seen": 174784576, "step": 81015 }, { "epoch": 13.216965742251224, "grad_norm": 0.3993504047393799, "learning_rate": 0.00031133797694597655, "loss": 0.0333, "num_input_tokens_seen": 174794240, "step": 81020 }, { "epoch": 13.217781402936378, "grad_norm": 0.036044374108314514, "learning_rate": 0.0003112720605556037, "loss": 0.0109, "num_input_tokens_seen": 174805024, "step": 81025 }, { "epoch": 13.218597063621534, "grad_norm": 0.08121154457330704, "learning_rate": 0.00031120614799004184, "loss": 0.0183, "num_input_tokens_seen": 174815840, "step": 81030 }, { "epoch": 13.219412724306688, "grad_norm": 0.011304754763841629, "learning_rate": 0.0003111402392506271, "loss": 0.1752, "num_input_tokens_seen": 174826304, "step": 81035 }, { "epoch": 13.220228384991843, "grad_norm": 0.7776373028755188, "learning_rate": 0.0003110743343386947, "loss": 0.1095, "num_input_tokens_seen": 174838144, "step": 81040 }, { "epoch": 13.221044045676999, "grad_norm": 0.01038753055036068, "learning_rate": 0.0003110084332555808, "loss": 0.0641, "num_input_tokens_seen": 174848480, "step": 81045 }, { "epoch": 13.221859706362153, "grad_norm": 0.008876294828951359, "learning_rate": 0.00031094253600262063, "loss": 0.0052, "num_input_tokens_seen": 174858880, "step": 81050 }, { "epoch": 13.222675367047309, "grad_norm": 0.11714474856853485, "learning_rate": 0.00031087664258115, "loss": 0.0354, "num_input_tokens_seen": 174869792, "step": 81055 }, { "epoch": 13.223491027732463, "grad_norm": 0.06667176634073257, "learning_rate": 0.0003108107529925038, "loss": 0.019, "num_input_tokens_seen": 174881568, "step": 81060 }, { "epoch": 13.224306688417618, "grad_norm": 0.043160580098629, "learning_rate": 0.0003107448672380181, "loss": 0.0058, "num_input_tokens_seen": 174892096, "step": 81065 }, { "epoch": 13.225122349102774, "grad_norm": 0.08003632724285126, "learning_rate": 0.0003106789853190274, "loss": 0.0364, "num_input_tokens_seen": 174902528, "step": 81070 }, { "epoch": 13.225938009787928, "grad_norm": 0.0033619715832173824, "learning_rate": 0.0003106131072368674, "loss": 0.0154, "num_input_tokens_seen": 174913376, "step": 81075 }, { "epoch": 13.226753670473084, "grad_norm": 0.021919123828411102, "learning_rate": 0.00031054723299287303, "loss": 0.0164, "num_input_tokens_seen": 174924800, "step": 81080 }, { "epoch": 13.227569331158238, "grad_norm": 0.01958613656461239, "learning_rate": 0.00031048136258837923, "loss": 0.007, "num_input_tokens_seen": 174936480, "step": 81085 }, { "epoch": 13.228384991843393, "grad_norm": 0.028771450743079185, "learning_rate": 0.0003104154960247211, "loss": 0.0034, "num_input_tokens_seen": 174947104, "step": 81090 }, { "epoch": 13.229200652528547, "grad_norm": 0.02282165177166462, "learning_rate": 0.0003103496333032334, "loss": 0.2098, "num_input_tokens_seen": 174958272, "step": 81095 }, { "epoch": 13.230016313213703, "grad_norm": 0.139828160405159, "learning_rate": 0.00031028377442525104, "loss": 0.0207, "num_input_tokens_seen": 174968128, "step": 81100 }, { "epoch": 13.230831973898859, "grad_norm": 0.04515887051820755, "learning_rate": 0.0003102179193921086, "loss": 0.0389, "num_input_tokens_seen": 174978176, "step": 81105 }, { "epoch": 13.231647634584013, "grad_norm": 0.5662334561347961, "learning_rate": 0.00031015206820514087, "loss": 0.0681, "num_input_tokens_seen": 174987936, "step": 81110 }, { "epoch": 13.232463295269168, "grad_norm": 0.0116050373762846, "learning_rate": 0.0003100862208656823, "loss": 0.0317, "num_input_tokens_seen": 174998016, "step": 81115 }, { "epoch": 13.233278955954322, "grad_norm": 0.005107648205012083, "learning_rate": 0.0003100203773750674, "loss": 0.025, "num_input_tokens_seen": 175008864, "step": 81120 }, { "epoch": 13.234094616639478, "grad_norm": 0.16748400032520294, "learning_rate": 0.00030995453773463035, "loss": 0.0401, "num_input_tokens_seen": 175018528, "step": 81125 }, { "epoch": 13.234910277324634, "grad_norm": 0.3414371609687805, "learning_rate": 0.00030988870194570596, "loss": 0.0144, "num_input_tokens_seen": 175028928, "step": 81130 }, { "epoch": 13.235725938009788, "grad_norm": 0.3373274803161621, "learning_rate": 0.00030982287000962805, "loss": 0.025, "num_input_tokens_seen": 175041440, "step": 81135 }, { "epoch": 13.236541598694943, "grad_norm": 0.011394086293876171, "learning_rate": 0.000309757041927731, "loss": 0.0132, "num_input_tokens_seen": 175052576, "step": 81140 }, { "epoch": 13.237357259380097, "grad_norm": 0.43421101570129395, "learning_rate": 0.00030969121770134877, "loss": 0.0328, "num_input_tokens_seen": 175063136, "step": 81145 }, { "epoch": 13.238172920065253, "grad_norm": 0.004969931207597256, "learning_rate": 0.0003096253973318156, "loss": 0.0085, "num_input_tokens_seen": 175074112, "step": 81150 }, { "epoch": 13.238988580750409, "grad_norm": 0.1451379805803299, "learning_rate": 0.000309559580820465, "loss": 0.0083, "num_input_tokens_seen": 175085376, "step": 81155 }, { "epoch": 13.239804241435563, "grad_norm": 0.11048691719770432, "learning_rate": 0.0003094937681686314, "loss": 0.0059, "num_input_tokens_seen": 175096000, "step": 81160 }, { "epoch": 13.240619902120718, "grad_norm": 0.0023107551969587803, "learning_rate": 0.00030942795937764794, "loss": 0.0145, "num_input_tokens_seen": 175106688, "step": 81165 }, { "epoch": 13.241435562805872, "grad_norm": 0.6982203722000122, "learning_rate": 0.00030936215444884893, "loss": 0.1513, "num_input_tokens_seen": 175118080, "step": 81170 }, { "epoch": 13.242251223491028, "grad_norm": 0.012159997597336769, "learning_rate": 0.00030929635338356745, "loss": 0.0288, "num_input_tokens_seen": 175129312, "step": 81175 }, { "epoch": 13.243066884176184, "grad_norm": 0.2737849950790405, "learning_rate": 0.0003092305561831375, "loss": 0.011, "num_input_tokens_seen": 175139808, "step": 81180 }, { "epoch": 13.243882544861338, "grad_norm": 0.11475928872823715, "learning_rate": 0.0003091647628488922, "loss": 0.0072, "num_input_tokens_seen": 175149472, "step": 81185 }, { "epoch": 13.244698205546493, "grad_norm": 0.031855836510658264, "learning_rate": 0.0003090989733821652, "loss": 0.0079, "num_input_tokens_seen": 175159648, "step": 81190 }, { "epoch": 13.245513866231647, "grad_norm": 0.05392378941178322, "learning_rate": 0.0003090331877842895, "loss": 0.0112, "num_input_tokens_seen": 175169280, "step": 81195 }, { "epoch": 13.246329526916803, "grad_norm": 0.0016745877219364047, "learning_rate": 0.00030896740605659845, "loss": 0.0329, "num_input_tokens_seen": 175180672, "step": 81200 }, { "epoch": 13.247145187601957, "grad_norm": 0.04936029016971588, "learning_rate": 0.00030890162820042553, "loss": 0.009, "num_input_tokens_seen": 175190432, "step": 81205 }, { "epoch": 13.247960848287113, "grad_norm": 0.9666657447814941, "learning_rate": 0.00030883585421710334, "loss": 0.068, "num_input_tokens_seen": 175201408, "step": 81210 }, { "epoch": 13.248776508972268, "grad_norm": 0.006816585548222065, "learning_rate": 0.00030877008410796526, "loss": 0.0499, "num_input_tokens_seen": 175211872, "step": 81215 }, { "epoch": 13.249592169657422, "grad_norm": 0.34660062193870544, "learning_rate": 0.00030870431787434385, "loss": 0.0348, "num_input_tokens_seen": 175222208, "step": 81220 }, { "epoch": 13.250407830342578, "grad_norm": 0.008313002996146679, "learning_rate": 0.00030863855551757223, "loss": 0.0276, "num_input_tokens_seen": 175233280, "step": 81225 }, { "epoch": 13.251223491027732, "grad_norm": 0.7288128137588501, "learning_rate": 0.0003085727970389829, "loss": 0.098, "num_input_tokens_seen": 175243648, "step": 81230 }, { "epoch": 13.252039151712887, "grad_norm": 0.012408953160047531, "learning_rate": 0.0003085070424399089, "loss": 0.0078, "num_input_tokens_seen": 175255616, "step": 81235 }, { "epoch": 13.252854812398043, "grad_norm": 0.04503495246171951, "learning_rate": 0.00030844129172168236, "loss": 0.022, "num_input_tokens_seen": 175266016, "step": 81240 }, { "epoch": 13.253670473083197, "grad_norm": 0.5441933870315552, "learning_rate": 0.0003083755448856361, "loss": 0.115, "num_input_tokens_seen": 175277856, "step": 81245 }, { "epoch": 13.254486133768353, "grad_norm": 0.6115848422050476, "learning_rate": 0.00030830980193310265, "loss": 0.0567, "num_input_tokens_seen": 175288384, "step": 81250 }, { "epoch": 13.255301794453507, "grad_norm": 0.02535507082939148, "learning_rate": 0.00030824406286541415, "loss": 0.1025, "num_input_tokens_seen": 175299488, "step": 81255 }, { "epoch": 13.256117455138662, "grad_norm": 0.015904977917671204, "learning_rate": 0.00030817832768390306, "loss": 0.0068, "num_input_tokens_seen": 175311840, "step": 81260 }, { "epoch": 13.256933115823816, "grad_norm": 0.004766124300658703, "learning_rate": 0.0003081125963899014, "loss": 0.0093, "num_input_tokens_seen": 175322464, "step": 81265 }, { "epoch": 13.257748776508972, "grad_norm": 0.09649891406297684, "learning_rate": 0.0003080468689847414, "loss": 0.036, "num_input_tokens_seen": 175333696, "step": 81270 }, { "epoch": 13.258564437194128, "grad_norm": 0.012715374119579792, "learning_rate": 0.00030798114546975525, "loss": 0.0161, "num_input_tokens_seen": 175345408, "step": 81275 }, { "epoch": 13.259380097879282, "grad_norm": 0.005811099894344807, "learning_rate": 0.00030791542584627455, "loss": 0.0089, "num_input_tokens_seen": 175357696, "step": 81280 }, { "epoch": 13.260195758564437, "grad_norm": 0.01558210514485836, "learning_rate": 0.0003078497101156317, "loss": 0.0047, "num_input_tokens_seen": 175368160, "step": 81285 }, { "epoch": 13.261011419249591, "grad_norm": 0.0440271832048893, "learning_rate": 0.00030778399827915796, "loss": 0.0146, "num_input_tokens_seen": 175379072, "step": 81290 }, { "epoch": 13.261827079934747, "grad_norm": 0.0681568905711174, "learning_rate": 0.0003077182903381856, "loss": 0.0196, "num_input_tokens_seen": 175389088, "step": 81295 }, { "epoch": 13.262642740619903, "grad_norm": 0.005766595713794231, "learning_rate": 0.0003076525862940458, "loss": 0.004, "num_input_tokens_seen": 175399200, "step": 81300 }, { "epoch": 13.263458401305057, "grad_norm": 0.019221173599362373, "learning_rate": 0.00030758688614807033, "loss": 0.0183, "num_input_tokens_seen": 175411136, "step": 81305 }, { "epoch": 13.264274061990212, "grad_norm": 0.0062010763213038445, "learning_rate": 0.0003075211899015909, "loss": 0.0766, "num_input_tokens_seen": 175420928, "step": 81310 }, { "epoch": 13.265089722675366, "grad_norm": 0.059920456260442734, "learning_rate": 0.0003074554975559386, "loss": 0.1071, "num_input_tokens_seen": 175431296, "step": 81315 }, { "epoch": 13.265905383360522, "grad_norm": 0.039908383041620255, "learning_rate": 0.000307389809112445, "loss": 0.0067, "num_input_tokens_seen": 175441568, "step": 81320 }, { "epoch": 13.266721044045678, "grad_norm": 0.20128975808620453, "learning_rate": 0.0003073241245724411, "loss": 0.0222, "num_input_tokens_seen": 175451584, "step": 81325 }, { "epoch": 13.267536704730832, "grad_norm": 0.06668713688850403, "learning_rate": 0.00030725844393725846, "loss": 0.0162, "num_input_tokens_seen": 175462816, "step": 81330 }, { "epoch": 13.268352365415987, "grad_norm": 0.1463806927204132, "learning_rate": 0.00030719276720822774, "loss": 0.0196, "num_input_tokens_seen": 175474016, "step": 81335 }, { "epoch": 13.269168026101141, "grad_norm": 0.6071243286132812, "learning_rate": 0.0003071270943866804, "loss": 0.1144, "num_input_tokens_seen": 175485760, "step": 81340 }, { "epoch": 13.269983686786297, "grad_norm": 0.07866228371858597, "learning_rate": 0.000307061425473947, "loss": 0.0119, "num_input_tokens_seen": 175496864, "step": 81345 }, { "epoch": 13.270799347471453, "grad_norm": 0.11383318156003952, "learning_rate": 0.00030699576047135875, "loss": 0.0378, "num_input_tokens_seen": 175508416, "step": 81350 }, { "epoch": 13.271615008156607, "grad_norm": 0.2868669927120209, "learning_rate": 0.0003069300993802461, "loss": 0.0337, "num_input_tokens_seen": 175518880, "step": 81355 }, { "epoch": 13.272430668841762, "grad_norm": 0.16831383109092712, "learning_rate": 0.00030686444220194, "loss": 0.0214, "num_input_tokens_seen": 175528512, "step": 81360 }, { "epoch": 13.273246329526916, "grad_norm": 0.008366957306861877, "learning_rate": 0.00030679878893777085, "loss": 0.0552, "num_input_tokens_seen": 175539456, "step": 81365 }, { "epoch": 13.274061990212072, "grad_norm": 0.06718295067548752, "learning_rate": 0.0003067331395890696, "loss": 0.0074, "num_input_tokens_seen": 175549472, "step": 81370 }, { "epoch": 13.274877650897226, "grad_norm": 0.7432044148445129, "learning_rate": 0.0003066674941571661, "loss": 0.0469, "num_input_tokens_seen": 175560352, "step": 81375 }, { "epoch": 13.275693311582382, "grad_norm": 0.005605136044323444, "learning_rate": 0.0003066018526433914, "loss": 0.0131, "num_input_tokens_seen": 175571296, "step": 81380 }, { "epoch": 13.276508972267537, "grad_norm": 0.011409245431423187, "learning_rate": 0.00030653621504907533, "loss": 0.0609, "num_input_tokens_seen": 175581248, "step": 81385 }, { "epoch": 13.277324632952691, "grad_norm": 0.0656249150633812, "learning_rate": 0.0003064705813755483, "loss": 0.0062, "num_input_tokens_seen": 175592032, "step": 81390 }, { "epoch": 13.278140293637847, "grad_norm": 0.013315918855369091, "learning_rate": 0.0003064049516241405, "loss": 0.0562, "num_input_tokens_seen": 175603040, "step": 81395 }, { "epoch": 13.278955954323001, "grad_norm": 0.774718701839447, "learning_rate": 0.00030633932579618195, "loss": 0.0209, "num_input_tokens_seen": 175612480, "step": 81400 }, { "epoch": 13.279771615008157, "grad_norm": 0.001465021283365786, "learning_rate": 0.00030627370389300256, "loss": 0.0026, "num_input_tokens_seen": 175624416, "step": 81405 }, { "epoch": 13.280587275693312, "grad_norm": 0.0014322602655738592, "learning_rate": 0.0003062080859159323, "loss": 0.0824, "num_input_tokens_seen": 175634720, "step": 81410 }, { "epoch": 13.281402936378466, "grad_norm": 0.022499917075037956, "learning_rate": 0.0003061424718663011, "loss": 0.0262, "num_input_tokens_seen": 175645696, "step": 81415 }, { "epoch": 13.282218597063622, "grad_norm": 0.007681040093302727, "learning_rate": 0.00030607686174543864, "loss": 0.0191, "num_input_tokens_seen": 175655904, "step": 81420 }, { "epoch": 13.283034257748776, "grad_norm": 0.005519915837794542, "learning_rate": 0.00030601125555467456, "loss": 0.1113, "num_input_tokens_seen": 175666912, "step": 81425 }, { "epoch": 13.283849918433932, "grad_norm": 0.005758213810622692, "learning_rate": 0.0003059456532953385, "loss": 0.0163, "num_input_tokens_seen": 175676864, "step": 81430 }, { "epoch": 13.284665579119087, "grad_norm": 0.019506895914673805, "learning_rate": 0.00030588005496876, "loss": 0.0505, "num_input_tokens_seen": 175687936, "step": 81435 }, { "epoch": 13.285481239804241, "grad_norm": 0.02553323470056057, "learning_rate": 0.00030581446057626827, "loss": 0.0088, "num_input_tokens_seen": 175698368, "step": 81440 }, { "epoch": 13.286296900489397, "grad_norm": 0.08491257578134537, "learning_rate": 0.00030574887011919306, "loss": 0.0214, "num_input_tokens_seen": 175708992, "step": 81445 }, { "epoch": 13.28711256117455, "grad_norm": 0.03230208903551102, "learning_rate": 0.0003056832835988632, "loss": 0.044, "num_input_tokens_seen": 175719616, "step": 81450 }, { "epoch": 13.287928221859707, "grad_norm": 1.4422619342803955, "learning_rate": 0.00030561770101660837, "loss": 0.0463, "num_input_tokens_seen": 175730912, "step": 81455 }, { "epoch": 13.28874388254486, "grad_norm": 0.003273512702435255, "learning_rate": 0.0003055521223737572, "loss": 0.003, "num_input_tokens_seen": 175741536, "step": 81460 }, { "epoch": 13.289559543230016, "grad_norm": 0.20246542990207672, "learning_rate": 0.0003054865476716391, "loss": 0.0146, "num_input_tokens_seen": 175752512, "step": 81465 }, { "epoch": 13.290375203915172, "grad_norm": 0.15982434153556824, "learning_rate": 0.0003054209769115827, "loss": 0.0171, "num_input_tokens_seen": 175764448, "step": 81470 }, { "epoch": 13.291190864600326, "grad_norm": 0.009563048370182514, "learning_rate": 0.0003053554100949173, "loss": 0.0676, "num_input_tokens_seen": 175776896, "step": 81475 }, { "epoch": 13.292006525285482, "grad_norm": 0.35862571001052856, "learning_rate": 0.0003052898472229711, "loss": 0.0103, "num_input_tokens_seen": 175787968, "step": 81480 }, { "epoch": 13.292822185970635, "grad_norm": 0.04893267899751663, "learning_rate": 0.0003052242882970735, "loss": 0.0269, "num_input_tokens_seen": 175798336, "step": 81485 }, { "epoch": 13.293637846655791, "grad_norm": 0.08362024277448654, "learning_rate": 0.0003051587333185525, "loss": 0.0077, "num_input_tokens_seen": 175809696, "step": 81490 }, { "epoch": 13.294453507340947, "grad_norm": 0.06968243420124054, "learning_rate": 0.00030509318228873715, "loss": 0.0434, "num_input_tokens_seen": 175820832, "step": 81495 }, { "epoch": 13.2952691680261, "grad_norm": 0.001856321468949318, "learning_rate": 0.00030502763520895556, "loss": 0.0536, "num_input_tokens_seen": 175829376, "step": 81500 }, { "epoch": 13.296084828711257, "grad_norm": 0.11197815835475922, "learning_rate": 0.00030496209208053643, "loss": 0.0143, "num_input_tokens_seen": 175841888, "step": 81505 }, { "epoch": 13.29690048939641, "grad_norm": 0.12286759167909622, "learning_rate": 0.0003048965529048078, "loss": 0.015, "num_input_tokens_seen": 175852192, "step": 81510 }, { "epoch": 13.297716150081566, "grad_norm": 0.019069164991378784, "learning_rate": 0.00030483101768309797, "loss": 0.0067, "num_input_tokens_seen": 175862240, "step": 81515 }, { "epoch": 13.298531810766722, "grad_norm": 0.0016377100255340338, "learning_rate": 0.00030476548641673537, "loss": 0.0027, "num_input_tokens_seen": 175872864, "step": 81520 }, { "epoch": 13.299347471451876, "grad_norm": 0.04974628612399101, "learning_rate": 0.0003046999591070476, "loss": 0.0275, "num_input_tokens_seen": 175884992, "step": 81525 }, { "epoch": 13.300163132137031, "grad_norm": 0.033164240419864655, "learning_rate": 0.0003046344357553632, "loss": 0.0118, "num_input_tokens_seen": 175896192, "step": 81530 }, { "epoch": 13.300978792822185, "grad_norm": 0.05046248063445091, "learning_rate": 0.0003045689163630095, "loss": 0.0226, "num_input_tokens_seen": 175906944, "step": 81535 }, { "epoch": 13.301794453507341, "grad_norm": 0.009083160199224949, "learning_rate": 0.000304503400931315, "loss": 0.007, "num_input_tokens_seen": 175917888, "step": 81540 }, { "epoch": 13.302610114192497, "grad_norm": 0.014595329761505127, "learning_rate": 0.00030443788946160676, "loss": 0.0157, "num_input_tokens_seen": 175927104, "step": 81545 }, { "epoch": 13.30342577487765, "grad_norm": 0.01882917620241642, "learning_rate": 0.000304372381955213, "loss": 0.0038, "num_input_tokens_seen": 175936832, "step": 81550 }, { "epoch": 13.304241435562806, "grad_norm": 0.04029667750000954, "learning_rate": 0.00030430687841346096, "loss": 0.0037, "num_input_tokens_seen": 175947264, "step": 81555 }, { "epoch": 13.30505709624796, "grad_norm": 0.0568963885307312, "learning_rate": 0.00030424137883767826, "loss": 0.0145, "num_input_tokens_seen": 175957856, "step": 81560 }, { "epoch": 13.305872756933116, "grad_norm": 0.237632617354393, "learning_rate": 0.00030417588322919243, "loss": 0.0104, "num_input_tokens_seen": 175968544, "step": 81565 }, { "epoch": 13.30668841761827, "grad_norm": 0.008709804154932499, "learning_rate": 0.00030411039158933075, "loss": 0.0227, "num_input_tokens_seen": 175978720, "step": 81570 }, { "epoch": 13.307504078303426, "grad_norm": 0.003906225087121129, "learning_rate": 0.0003040449039194205, "loss": 0.0024, "num_input_tokens_seen": 175989152, "step": 81575 }, { "epoch": 13.308319738988581, "grad_norm": 0.004074087832123041, "learning_rate": 0.00030397942022078884, "loss": 0.0159, "num_input_tokens_seen": 176000608, "step": 81580 }, { "epoch": 13.309135399673735, "grad_norm": 0.006301126908510923, "learning_rate": 0.00030391394049476275, "loss": 0.0037, "num_input_tokens_seen": 176011424, "step": 81585 }, { "epoch": 13.309951060358891, "grad_norm": 0.015403126366436481, "learning_rate": 0.00030384846474266965, "loss": 0.0033, "num_input_tokens_seen": 176020544, "step": 81590 }, { "epoch": 13.310766721044045, "grad_norm": 0.25952592492103577, "learning_rate": 0.0003037829929658361, "loss": 0.0243, "num_input_tokens_seen": 176032384, "step": 81595 }, { "epoch": 13.3115823817292, "grad_norm": 0.06939897686243057, "learning_rate": 0.0003037175251655892, "loss": 0.1508, "num_input_tokens_seen": 176042720, "step": 81600 }, { "epoch": 13.312398042414356, "grad_norm": 0.0021613403223454952, "learning_rate": 0.0003036520613432555, "loss": 0.0804, "num_input_tokens_seen": 176053248, "step": 81605 }, { "epoch": 13.31321370309951, "grad_norm": 0.00468669505789876, "learning_rate": 0.0003035866015001621, "loss": 0.0045, "num_input_tokens_seen": 176063680, "step": 81610 }, { "epoch": 13.314029363784666, "grad_norm": 0.02339099533855915, "learning_rate": 0.00030352114563763515, "loss": 0.0053, "num_input_tokens_seen": 176073888, "step": 81615 }, { "epoch": 13.31484502446982, "grad_norm": 0.051926396787166595, "learning_rate": 0.00030345569375700145, "loss": 0.0038, "num_input_tokens_seen": 176084096, "step": 81620 }, { "epoch": 13.315660685154976, "grad_norm": 0.003986321855336428, "learning_rate": 0.0003033902458595877, "loss": 0.1531, "num_input_tokens_seen": 176093696, "step": 81625 }, { "epoch": 13.31647634584013, "grad_norm": 0.05474114418029785, "learning_rate": 0.00030332480194671975, "loss": 0.0096, "num_input_tokens_seen": 176104544, "step": 81630 }, { "epoch": 13.317292006525285, "grad_norm": 0.036133963614702225, "learning_rate": 0.0003032593620197245, "loss": 0.0996, "num_input_tokens_seen": 176115616, "step": 81635 }, { "epoch": 13.318107667210441, "grad_norm": 0.23172801733016968, "learning_rate": 0.0003031939260799276, "loss": 0.0184, "num_input_tokens_seen": 176125664, "step": 81640 }, { "epoch": 13.318923327895595, "grad_norm": 0.03937000408768654, "learning_rate": 0.00030312849412865564, "loss": 0.0149, "num_input_tokens_seen": 176135840, "step": 81645 }, { "epoch": 13.31973898858075, "grad_norm": 0.005249245557934046, "learning_rate": 0.00030306306616723424, "loss": 0.015, "num_input_tokens_seen": 176146848, "step": 81650 }, { "epoch": 13.320554649265905, "grad_norm": 0.021601341664791107, "learning_rate": 0.00030299764219698987, "loss": 0.0079, "num_input_tokens_seen": 176155872, "step": 81655 }, { "epoch": 13.32137030995106, "grad_norm": 0.04131583124399185, "learning_rate": 0.00030293222221924805, "loss": 0.0157, "num_input_tokens_seen": 176166688, "step": 81660 }, { "epoch": 13.322185970636216, "grad_norm": 0.05202984809875488, "learning_rate": 0.0003028668062353349, "loss": 0.0187, "num_input_tokens_seen": 176178624, "step": 81665 }, { "epoch": 13.32300163132137, "grad_norm": 0.005935819819569588, "learning_rate": 0.0003028013942465758, "loss": 0.023, "num_input_tokens_seen": 176188704, "step": 81670 }, { "epoch": 13.323817292006526, "grad_norm": 0.01653580367565155, "learning_rate": 0.00030273598625429687, "loss": 0.0061, "num_input_tokens_seen": 176199488, "step": 81675 }, { "epoch": 13.32463295269168, "grad_norm": 0.7669261693954468, "learning_rate": 0.00030267058225982315, "loss": 0.2039, "num_input_tokens_seen": 176209696, "step": 81680 }, { "epoch": 13.325448613376835, "grad_norm": 0.01939382590353489, "learning_rate": 0.00030260518226448064, "loss": 0.0866, "num_input_tokens_seen": 176219136, "step": 81685 }, { "epoch": 13.326264274061991, "grad_norm": 0.6556524038314819, "learning_rate": 0.00030253978626959435, "loss": 0.0302, "num_input_tokens_seen": 176229024, "step": 81690 }, { "epoch": 13.327079934747145, "grad_norm": 0.5081639289855957, "learning_rate": 0.00030247439427649, "loss": 0.0247, "num_input_tokens_seen": 176239136, "step": 81695 }, { "epoch": 13.3278955954323, "grad_norm": 0.0708695724606514, "learning_rate": 0.0003024090062864924, "loss": 0.0047, "num_input_tokens_seen": 176250016, "step": 81700 }, { "epoch": 13.328711256117455, "grad_norm": 0.3880775570869446, "learning_rate": 0.00030234362230092705, "loss": 0.0096, "num_input_tokens_seen": 176261376, "step": 81705 }, { "epoch": 13.32952691680261, "grad_norm": 0.010771960020065308, "learning_rate": 0.0003022782423211189, "loss": 0.0024, "num_input_tokens_seen": 176272384, "step": 81710 }, { "epoch": 13.330342577487766, "grad_norm": 0.006712086033076048, "learning_rate": 0.0003022128663483931, "loss": 0.0059, "num_input_tokens_seen": 176283712, "step": 81715 }, { "epoch": 13.33115823817292, "grad_norm": 0.002526842290535569, "learning_rate": 0.0003021474943840743, "loss": 0.1008, "num_input_tokens_seen": 176294944, "step": 81720 }, { "epoch": 13.331973898858076, "grad_norm": 0.003371078986674547, "learning_rate": 0.00030208212642948755, "loss": 0.0433, "num_input_tokens_seen": 176307264, "step": 81725 }, { "epoch": 13.33278955954323, "grad_norm": 0.015998700633645058, "learning_rate": 0.0003020167624859577, "loss": 0.077, "num_input_tokens_seen": 176318624, "step": 81730 }, { "epoch": 13.333605220228385, "grad_norm": 0.15391777455806732, "learning_rate": 0.00030195140255480927, "loss": 0.0454, "num_input_tokens_seen": 176330208, "step": 81735 }, { "epoch": 13.33442088091354, "grad_norm": 0.00808068085461855, "learning_rate": 0.0003018860466373669, "loss": 0.056, "num_input_tokens_seen": 176340896, "step": 81740 }, { "epoch": 13.335236541598695, "grad_norm": 0.2969469726085663, "learning_rate": 0.0003018206947349551, "loss": 0.0276, "num_input_tokens_seen": 176351488, "step": 81745 }, { "epoch": 13.33605220228385, "grad_norm": 0.006972472183406353, "learning_rate": 0.00030175534684889836, "loss": 0.0071, "num_input_tokens_seen": 176362848, "step": 81750 }, { "epoch": 13.336867862969005, "grad_norm": 0.00534628052264452, "learning_rate": 0.00030169000298052096, "loss": 0.0043, "num_input_tokens_seen": 176372512, "step": 81755 }, { "epoch": 13.33768352365416, "grad_norm": 0.010491895489394665, "learning_rate": 0.00030162466313114734, "loss": 0.0055, "num_input_tokens_seen": 176383360, "step": 81760 }, { "epoch": 13.338499184339314, "grad_norm": 0.4070037603378296, "learning_rate": 0.00030155932730210145, "loss": 0.028, "num_input_tokens_seen": 176393696, "step": 81765 }, { "epoch": 13.33931484502447, "grad_norm": 0.06323857605457306, "learning_rate": 0.00030149399549470767, "loss": 0.0037, "num_input_tokens_seen": 176405440, "step": 81770 }, { "epoch": 13.340130505709626, "grad_norm": 0.09557635337114334, "learning_rate": 0.00030142866771028974, "loss": 0.0289, "num_input_tokens_seen": 176416576, "step": 81775 }, { "epoch": 13.34094616639478, "grad_norm": 0.007165431510657072, "learning_rate": 0.00030136334395017197, "loss": 0.0106, "num_input_tokens_seen": 176428128, "step": 81780 }, { "epoch": 13.341761827079935, "grad_norm": 0.011670003645122051, "learning_rate": 0.0003012980242156778, "loss": 0.0039, "num_input_tokens_seen": 176439328, "step": 81785 }, { "epoch": 13.34257748776509, "grad_norm": 0.018175851553678513, "learning_rate": 0.00030123270850813147, "loss": 0.012, "num_input_tokens_seen": 176450592, "step": 81790 }, { "epoch": 13.343393148450245, "grad_norm": 0.005962160415947437, "learning_rate": 0.0003011673968288562, "loss": 0.0599, "num_input_tokens_seen": 176461984, "step": 81795 }, { "epoch": 13.3442088091354, "grad_norm": 0.03631450608372688, "learning_rate": 0.00030110208917917607, "loss": 0.0882, "num_input_tokens_seen": 176472000, "step": 81800 }, { "epoch": 13.345024469820554, "grad_norm": 0.09758579730987549, "learning_rate": 0.00030103678556041427, "loss": 0.1377, "num_input_tokens_seen": 176481696, "step": 81805 }, { "epoch": 13.34584013050571, "grad_norm": 0.2948659062385559, "learning_rate": 0.00030097148597389456, "loss": 0.0132, "num_input_tokens_seen": 176492928, "step": 81810 }, { "epoch": 13.346655791190864, "grad_norm": 0.0010999692603945732, "learning_rate": 0.00030090619042094, "loss": 0.0044, "num_input_tokens_seen": 176503520, "step": 81815 }, { "epoch": 13.34747145187602, "grad_norm": 0.0018294668989256024, "learning_rate": 0.0003008408989028743, "loss": 0.0099, "num_input_tokens_seen": 176515328, "step": 81820 }, { "epoch": 13.348287112561174, "grad_norm": 0.057277776300907135, "learning_rate": 0.00030077561142102024, "loss": 0.0712, "num_input_tokens_seen": 176525728, "step": 81825 }, { "epoch": 13.34910277324633, "grad_norm": 0.005031734239310026, "learning_rate": 0.0003007103279767013, "loss": 0.0434, "num_input_tokens_seen": 176537152, "step": 81830 }, { "epoch": 13.349918433931485, "grad_norm": 0.01764173060655594, "learning_rate": 0.0003006450485712402, "loss": 0.1253, "num_input_tokens_seen": 176548704, "step": 81835 }, { "epoch": 13.350734094616639, "grad_norm": 0.0026120776310563087, "learning_rate": 0.00030057977320596007, "loss": 0.0663, "num_input_tokens_seen": 176558656, "step": 81840 }, { "epoch": 13.351549755301795, "grad_norm": 0.010992269963026047, "learning_rate": 0.00030051450188218397, "loss": 0.0738, "num_input_tokens_seen": 176570624, "step": 81845 }, { "epoch": 13.352365415986949, "grad_norm": 0.4562952518463135, "learning_rate": 0.0003004492346012345, "loss": 0.1024, "num_input_tokens_seen": 176582112, "step": 81850 }, { "epoch": 13.353181076672104, "grad_norm": 0.26493075489997864, "learning_rate": 0.0003003839713644345, "loss": 0.0132, "num_input_tokens_seen": 176592224, "step": 81855 }, { "epoch": 13.35399673735726, "grad_norm": 0.010200293734669685, "learning_rate": 0.0003003187121731064, "loss": 0.0096, "num_input_tokens_seen": 176603040, "step": 81860 }, { "epoch": 13.354812398042414, "grad_norm": 0.013799619860947132, "learning_rate": 0.0003002534570285731, "loss": 0.0327, "num_input_tokens_seen": 176613440, "step": 81865 }, { "epoch": 13.35562805872757, "grad_norm": 0.1671331375837326, "learning_rate": 0.00030018820593215675, "loss": 0.0457, "num_input_tokens_seen": 176625216, "step": 81870 }, { "epoch": 13.356443719412724, "grad_norm": 0.856156051158905, "learning_rate": 0.0003001229588851799, "loss": 0.0732, "num_input_tokens_seen": 176635488, "step": 81875 }, { "epoch": 13.35725938009788, "grad_norm": 0.09807083010673523, "learning_rate": 0.0003000577158889649, "loss": 0.0219, "num_input_tokens_seen": 176645824, "step": 81880 }, { "epoch": 13.358075040783035, "grad_norm": 0.00861416570842266, "learning_rate": 0.00029999247694483395, "loss": 0.0125, "num_input_tokens_seen": 176656704, "step": 81885 }, { "epoch": 13.358890701468189, "grad_norm": 0.006893214304000139, "learning_rate": 0.00029992724205410914, "loss": 0.005, "num_input_tokens_seen": 176666976, "step": 81890 }, { "epoch": 13.359706362153345, "grad_norm": 0.04398215189576149, "learning_rate": 0.0002998620112181126, "loss": 0.0494, "num_input_tokens_seen": 176678784, "step": 81895 }, { "epoch": 13.360522022838499, "grad_norm": 0.7824010252952576, "learning_rate": 0.0002997967844381662, "loss": 0.0368, "num_input_tokens_seen": 176690112, "step": 81900 }, { "epoch": 13.361337683523654, "grad_norm": 0.04750139266252518, "learning_rate": 0.00029973156171559214, "loss": 0.0051, "num_input_tokens_seen": 176702336, "step": 81905 }, { "epoch": 13.362153344208808, "grad_norm": 0.0009298750665038824, "learning_rate": 0.0002996663430517118, "loss": 0.0363, "num_input_tokens_seen": 176712896, "step": 81910 }, { "epoch": 13.362969004893964, "grad_norm": 0.01916741207242012, "learning_rate": 0.0002996011284478474, "loss": 0.0148, "num_input_tokens_seen": 176723808, "step": 81915 }, { "epoch": 13.36378466557912, "grad_norm": 0.024953672662377357, "learning_rate": 0.00029953591790532014, "loss": 0.0824, "num_input_tokens_seen": 176735168, "step": 81920 }, { "epoch": 13.364600326264274, "grad_norm": 0.2585717439651489, "learning_rate": 0.000299470711425452, "loss": 0.0288, "num_input_tokens_seen": 176746272, "step": 81925 }, { "epoch": 13.36541598694943, "grad_norm": 0.021771715953946114, "learning_rate": 0.0002994055090095641, "loss": 0.0759, "num_input_tokens_seen": 176756992, "step": 81930 }, { "epoch": 13.366231647634583, "grad_norm": 0.021774988621473312, "learning_rate": 0.00029934031065897824, "loss": 0.0111, "num_input_tokens_seen": 176768064, "step": 81935 }, { "epoch": 13.367047308319739, "grad_norm": 0.0468316525220871, "learning_rate": 0.00029927511637501536, "loss": 0.0496, "num_input_tokens_seen": 176778656, "step": 81940 }, { "epoch": 13.367862969004895, "grad_norm": 0.3672531843185425, "learning_rate": 0.0002992099261589968, "loss": 0.0181, "num_input_tokens_seen": 176789536, "step": 81945 }, { "epoch": 13.368678629690049, "grad_norm": 0.4604007303714752, "learning_rate": 0.00029914474001224413, "loss": 0.016, "num_input_tokens_seen": 176799904, "step": 81950 }, { "epoch": 13.369494290375204, "grad_norm": 0.011184503324329853, "learning_rate": 0.0002990795579360778, "loss": 0.0506, "num_input_tokens_seen": 176809920, "step": 81955 }, { "epoch": 13.370309951060358, "grad_norm": 0.0027907113544642925, "learning_rate": 0.00029901437993181936, "loss": 0.0117, "num_input_tokens_seen": 176822208, "step": 81960 }, { "epoch": 13.371125611745514, "grad_norm": 0.0403558723628521, "learning_rate": 0.0002989492060007893, "loss": 0.0111, "num_input_tokens_seen": 176832928, "step": 81965 }, { "epoch": 13.37194127243067, "grad_norm": 0.045787643641233444, "learning_rate": 0.0002988840361443088, "loss": 0.0088, "num_input_tokens_seen": 176842176, "step": 81970 }, { "epoch": 13.372756933115824, "grad_norm": 0.005155434366315603, "learning_rate": 0.0002988188703636983, "loss": 0.0092, "num_input_tokens_seen": 176852896, "step": 81975 }, { "epoch": 13.37357259380098, "grad_norm": 0.05229124799370766, "learning_rate": 0.0002987537086602787, "loss": 0.0285, "num_input_tokens_seen": 176863648, "step": 81980 }, { "epoch": 13.374388254486133, "grad_norm": 0.16170242428779602, "learning_rate": 0.0002986885510353703, "loss": 0.0065, "num_input_tokens_seen": 176874784, "step": 81985 }, { "epoch": 13.375203915171289, "grad_norm": 0.9189999103546143, "learning_rate": 0.00029862339749029413, "loss": 0.0237, "num_input_tokens_seen": 176885824, "step": 81990 }, { "epoch": 13.376019575856443, "grad_norm": 0.0013213105266913772, "learning_rate": 0.0002985582480263699, "loss": 0.0041, "num_input_tokens_seen": 176896928, "step": 81995 }, { "epoch": 13.376835236541599, "grad_norm": 0.05405731871724129, "learning_rate": 0.00029849310264491865, "loss": 0.0202, "num_input_tokens_seen": 176907072, "step": 82000 }, { "epoch": 13.377650897226754, "grad_norm": 0.016624340787529945, "learning_rate": 0.00029842796134726, "loss": 0.1109, "num_input_tokens_seen": 176917088, "step": 82005 }, { "epoch": 13.378466557911908, "grad_norm": 0.0017868182621896267, "learning_rate": 0.0002983628241347147, "loss": 0.0049, "num_input_tokens_seen": 176927552, "step": 82010 }, { "epoch": 13.379282218597064, "grad_norm": 0.0013543954119086266, "learning_rate": 0.0002982976910086024, "loss": 0.0775, "num_input_tokens_seen": 176937408, "step": 82015 }, { "epoch": 13.380097879282218, "grad_norm": 0.8427207469940186, "learning_rate": 0.0002982325619702433, "loss": 0.0693, "num_input_tokens_seen": 176948192, "step": 82020 }, { "epoch": 13.380913539967374, "grad_norm": 0.03291767090559006, "learning_rate": 0.0002981674370209573, "loss": 0.003, "num_input_tokens_seen": 176960032, "step": 82025 }, { "epoch": 13.38172920065253, "grad_norm": 0.6110392212867737, "learning_rate": 0.00029810231616206426, "loss": 0.0319, "num_input_tokens_seen": 176971328, "step": 82030 }, { "epoch": 13.382544861337683, "grad_norm": 0.0094646867364645, "learning_rate": 0.00029803719939488387, "loss": 0.047, "num_input_tokens_seen": 176983104, "step": 82035 }, { "epoch": 13.383360522022839, "grad_norm": 0.004146335646510124, "learning_rate": 0.0002979720867207358, "loss": 0.0329, "num_input_tokens_seen": 176994080, "step": 82040 }, { "epoch": 13.384176182707993, "grad_norm": 0.0040112389251589775, "learning_rate": 0.0002979069781409397, "loss": 0.0028, "num_input_tokens_seen": 177004032, "step": 82045 }, { "epoch": 13.384991843393149, "grad_norm": 0.003283149329945445, "learning_rate": 0.00029784187365681516, "loss": 0.002, "num_input_tokens_seen": 177016032, "step": 82050 }, { "epoch": 13.385807504078304, "grad_norm": 0.01805758848786354, "learning_rate": 0.00029777677326968144, "loss": 0.0036, "num_input_tokens_seen": 177026784, "step": 82055 }, { "epoch": 13.386623164763458, "grad_norm": 0.0044534639455378056, "learning_rate": 0.0002977116769808579, "loss": 0.0069, "num_input_tokens_seen": 177038112, "step": 82060 }, { "epoch": 13.387438825448614, "grad_norm": 0.0018602694617584348, "learning_rate": 0.000297646584791664, "loss": 0.0384, "num_input_tokens_seen": 177047328, "step": 82065 }, { "epoch": 13.388254486133768, "grad_norm": 0.010855947621166706, "learning_rate": 0.0002975814967034185, "loss": 0.0029, "num_input_tokens_seen": 177057824, "step": 82070 }, { "epoch": 13.389070146818923, "grad_norm": 0.017619602382183075, "learning_rate": 0.000297516412717441, "loss": 0.0066, "num_input_tokens_seen": 177068800, "step": 82075 }, { "epoch": 13.38988580750408, "grad_norm": 0.0543130598962307, "learning_rate": 0.0002974513328350501, "loss": 0.0116, "num_input_tokens_seen": 177079168, "step": 82080 }, { "epoch": 13.390701468189233, "grad_norm": 0.02179442159831524, "learning_rate": 0.00029738625705756514, "loss": 0.0283, "num_input_tokens_seen": 177089952, "step": 82085 }, { "epoch": 13.391517128874389, "grad_norm": 0.046086978167295456, "learning_rate": 0.0002973211853863044, "loss": 0.0553, "num_input_tokens_seen": 177100448, "step": 82090 }, { "epoch": 13.392332789559543, "grad_norm": 0.0045068650506436825, "learning_rate": 0.0002972561178225872, "loss": 0.0301, "num_input_tokens_seen": 177110016, "step": 82095 }, { "epoch": 13.393148450244698, "grad_norm": 0.0047156778164207935, "learning_rate": 0.00029719105436773187, "loss": 0.0024, "num_input_tokens_seen": 177119776, "step": 82100 }, { "epoch": 13.393964110929852, "grad_norm": 0.010665412060916424, "learning_rate": 0.00029712599502305714, "loss": 0.0793, "num_input_tokens_seen": 177130848, "step": 82105 }, { "epoch": 13.394779771615008, "grad_norm": 0.004095626063644886, "learning_rate": 0.0002970609397898814, "loss": 0.003, "num_input_tokens_seen": 177141152, "step": 82110 }, { "epoch": 13.395595432300164, "grad_norm": 0.7827159762382507, "learning_rate": 0.0002969958886695233, "loss": 0.1948, "num_input_tokens_seen": 177153088, "step": 82115 }, { "epoch": 13.396411092985318, "grad_norm": 0.008659974671900272, "learning_rate": 0.00029693084166330084, "loss": 0.1495, "num_input_tokens_seen": 177163328, "step": 82120 }, { "epoch": 13.397226753670473, "grad_norm": 0.02870609052479267, "learning_rate": 0.00029686579877253276, "loss": 0.0104, "num_input_tokens_seen": 177174496, "step": 82125 }, { "epoch": 13.398042414355627, "grad_norm": 0.0063386717811226845, "learning_rate": 0.0002968007599985367, "loss": 0.0414, "num_input_tokens_seen": 177185408, "step": 82130 }, { "epoch": 13.398858075040783, "grad_norm": 0.18878276646137238, "learning_rate": 0.0002967357253426313, "loss": 0.0122, "num_input_tokens_seen": 177196672, "step": 82135 }, { "epoch": 13.399673735725939, "grad_norm": 0.025802595540881157, "learning_rate": 0.000296670694806134, "loss": 0.0132, "num_input_tokens_seen": 177207840, "step": 82140 }, { "epoch": 13.400489396411093, "grad_norm": 0.05117020383477211, "learning_rate": 0.00029660566839036315, "loss": 0.01, "num_input_tokens_seen": 177218688, "step": 82145 }, { "epoch": 13.401305057096248, "grad_norm": 0.007018150761723518, "learning_rate": 0.0002965406460966364, "loss": 0.043, "num_input_tokens_seen": 177228768, "step": 82150 }, { "epoch": 13.402120717781402, "grad_norm": 0.6997339725494385, "learning_rate": 0.00029647562792627145, "loss": 0.1527, "num_input_tokens_seen": 177240064, "step": 82155 }, { "epoch": 13.402936378466558, "grad_norm": 0.03459867089986801, "learning_rate": 0.0002964106138805864, "loss": 0.009, "num_input_tokens_seen": 177250432, "step": 82160 }, { "epoch": 13.403752039151712, "grad_norm": 0.013162773102521896, "learning_rate": 0.00029634560396089827, "loss": 0.0029, "num_input_tokens_seen": 177261632, "step": 82165 }, { "epoch": 13.404567699836868, "grad_norm": 0.016635790467262268, "learning_rate": 0.00029628059816852497, "loss": 0.0719, "num_input_tokens_seen": 177271456, "step": 82170 }, { "epoch": 13.405383360522023, "grad_norm": 0.007559285033494234, "learning_rate": 0.0002962155965047837, "loss": 0.0159, "num_input_tokens_seen": 177282944, "step": 82175 }, { "epoch": 13.406199021207177, "grad_norm": 0.0022939289920032024, "learning_rate": 0.00029615059897099196, "loss": 0.0812, "num_input_tokens_seen": 177293632, "step": 82180 }, { "epoch": 13.407014681892333, "grad_norm": 0.013195296749472618, "learning_rate": 0.0002960856055684668, "loss": 0.0074, "num_input_tokens_seen": 177303744, "step": 82185 }, { "epoch": 13.407830342577487, "grad_norm": 0.8358994722366333, "learning_rate": 0.0002960206162985256, "loss": 0.1123, "num_input_tokens_seen": 177314080, "step": 82190 }, { "epoch": 13.408646003262643, "grad_norm": 0.051133617758750916, "learning_rate": 0.0002959556311624855, "loss": 0.0068, "num_input_tokens_seen": 177323904, "step": 82195 }, { "epoch": 13.409461663947798, "grad_norm": 0.16921986639499664, "learning_rate": 0.0002958906501616632, "loss": 0.0115, "num_input_tokens_seen": 177334528, "step": 82200 }, { "epoch": 13.410277324632952, "grad_norm": 0.037415504455566406, "learning_rate": 0.0002958256732973759, "loss": 0.0297, "num_input_tokens_seen": 177344512, "step": 82205 }, { "epoch": 13.411092985318108, "grad_norm": 0.005634044762700796, "learning_rate": 0.00029576070057094034, "loss": 0.0025, "num_input_tokens_seen": 177353440, "step": 82210 }, { "epoch": 13.411908646003262, "grad_norm": 0.10265978425741196, "learning_rate": 0.00029569573198367317, "loss": 0.1036, "num_input_tokens_seen": 177364416, "step": 82215 }, { "epoch": 13.412724306688418, "grad_norm": 1.1324052810668945, "learning_rate": 0.00029563076753689137, "loss": 0.032, "num_input_tokens_seen": 177375968, "step": 82220 }, { "epoch": 13.413539967373573, "grad_norm": 0.002063976600766182, "learning_rate": 0.00029556580723191116, "loss": 0.1618, "num_input_tokens_seen": 177385888, "step": 82225 }, { "epoch": 13.414355628058727, "grad_norm": 0.01265017595142126, "learning_rate": 0.00029550085107004937, "loss": 0.0108, "num_input_tokens_seen": 177396448, "step": 82230 }, { "epoch": 13.415171288743883, "grad_norm": 1.0285362005233765, "learning_rate": 0.0002954358990526221, "loss": 0.0999, "num_input_tokens_seen": 177408000, "step": 82235 }, { "epoch": 13.415986949429037, "grad_norm": 0.019042037427425385, "learning_rate": 0.000295370951180946, "loss": 0.0268, "num_input_tokens_seen": 177417536, "step": 82240 }, { "epoch": 13.416802610114193, "grad_norm": 0.007670939434319735, "learning_rate": 0.00029530600745633693, "loss": 0.0061, "num_input_tokens_seen": 177428448, "step": 82245 }, { "epoch": 13.417618270799348, "grad_norm": 0.07422017306089401, "learning_rate": 0.0002952410678801116, "loss": 0.0762, "num_input_tokens_seen": 177438048, "step": 82250 }, { "epoch": 13.418433931484502, "grad_norm": 0.04997723177075386, "learning_rate": 0.0002951761324535855, "loss": 0.0167, "num_input_tokens_seen": 177449344, "step": 82255 }, { "epoch": 13.419249592169658, "grad_norm": 0.041262220591306686, "learning_rate": 0.00029511120117807493, "loss": 0.0313, "num_input_tokens_seen": 177459200, "step": 82260 }, { "epoch": 13.420065252854812, "grad_norm": 0.0014246198115870357, "learning_rate": 0.00029504627405489605, "loss": 0.0023, "num_input_tokens_seen": 177469888, "step": 82265 }, { "epoch": 13.420880913539968, "grad_norm": 0.0174840297549963, "learning_rate": 0.0002949813510853641, "loss": 0.0036, "num_input_tokens_seen": 177480448, "step": 82270 }, { "epoch": 13.421696574225122, "grad_norm": 0.013062510639429092, "learning_rate": 0.00029491643227079543, "loss": 0.0177, "num_input_tokens_seen": 177491072, "step": 82275 }, { "epoch": 13.422512234910277, "grad_norm": 0.014931593090295792, "learning_rate": 0.00029485151761250527, "loss": 0.0177, "num_input_tokens_seen": 177502400, "step": 82280 }, { "epoch": 13.423327895595433, "grad_norm": 0.5624591112136841, "learning_rate": 0.0002947866071118095, "loss": 0.0438, "num_input_tokens_seen": 177514336, "step": 82285 }, { "epoch": 13.424143556280587, "grad_norm": 0.01380282361060381, "learning_rate": 0.00029472170077002324, "loss": 0.0028, "num_input_tokens_seen": 177523744, "step": 82290 }, { "epoch": 13.424959216965743, "grad_norm": 0.006004599388688803, "learning_rate": 0.0002946567985884624, "loss": 0.0021, "num_input_tokens_seen": 177534848, "step": 82295 }, { "epoch": 13.425774877650896, "grad_norm": 0.02517123706638813, "learning_rate": 0.0002945919005684418, "loss": 0.0075, "num_input_tokens_seen": 177545248, "step": 82300 }, { "epoch": 13.426590538336052, "grad_norm": 0.10805921256542206, "learning_rate": 0.0002945270067112771, "loss": 0.0285, "num_input_tokens_seen": 177554784, "step": 82305 }, { "epoch": 13.427406199021208, "grad_norm": 0.06230425834655762, "learning_rate": 0.0002944621170182831, "loss": 0.0042, "num_input_tokens_seen": 177565376, "step": 82310 }, { "epoch": 13.428221859706362, "grad_norm": 0.20056845247745514, "learning_rate": 0.00029439723149077523, "loss": 0.0049, "num_input_tokens_seen": 177575136, "step": 82315 }, { "epoch": 13.429037520391518, "grad_norm": 0.01482043880969286, "learning_rate": 0.0002943323501300681, "loss": 0.0223, "num_input_tokens_seen": 177586208, "step": 82320 }, { "epoch": 13.429853181076671, "grad_norm": 0.06956825405359268, "learning_rate": 0.00029426747293747685, "loss": 0.0037, "num_input_tokens_seen": 177596576, "step": 82325 }, { "epoch": 13.430668841761827, "grad_norm": 0.009223024360835552, "learning_rate": 0.00029420259991431633, "loss": 0.0061, "num_input_tokens_seen": 177608320, "step": 82330 }, { "epoch": 13.431484502446983, "grad_norm": 0.005250636488199234, "learning_rate": 0.0002941377310619011, "loss": 0.0692, "num_input_tokens_seen": 177619712, "step": 82335 }, { "epoch": 13.432300163132137, "grad_norm": 0.5357564091682434, "learning_rate": 0.00029407286638154597, "loss": 0.0661, "num_input_tokens_seen": 177628512, "step": 82340 }, { "epoch": 13.433115823817293, "grad_norm": 0.004990137182176113, "learning_rate": 0.00029400800587456544, "loss": 0.0064, "num_input_tokens_seen": 177640608, "step": 82345 }, { "epoch": 13.433931484502446, "grad_norm": 1.0227863788604736, "learning_rate": 0.00029394314954227387, "loss": 0.0964, "num_input_tokens_seen": 177650976, "step": 82350 }, { "epoch": 13.434747145187602, "grad_norm": 0.027064405381679535, "learning_rate": 0.000293878297385986, "loss": 0.009, "num_input_tokens_seen": 177662144, "step": 82355 }, { "epoch": 13.435562805872756, "grad_norm": 0.8433717489242554, "learning_rate": 0.0002938134494070157, "loss": 0.0206, "num_input_tokens_seen": 177673504, "step": 82360 }, { "epoch": 13.436378466557912, "grad_norm": 0.9822838306427002, "learning_rate": 0.00029374860560667747, "loss": 0.0365, "num_input_tokens_seen": 177683904, "step": 82365 }, { "epoch": 13.437194127243067, "grad_norm": 0.09729766100645065, "learning_rate": 0.00029368376598628545, "loss": 0.0082, "num_input_tokens_seen": 177695232, "step": 82370 }, { "epoch": 13.438009787928221, "grad_norm": 0.2985684275627136, "learning_rate": 0.00029361893054715365, "loss": 0.0197, "num_input_tokens_seen": 177705984, "step": 82375 }, { "epoch": 13.438825448613377, "grad_norm": 0.38350343704223633, "learning_rate": 0.000293554099290596, "loss": 0.0085, "num_input_tokens_seen": 177716000, "step": 82380 }, { "epoch": 13.439641109298531, "grad_norm": 0.004382896702736616, "learning_rate": 0.0002934892722179264, "loss": 0.0366, "num_input_tokens_seen": 177725792, "step": 82385 }, { "epoch": 13.440456769983687, "grad_norm": 0.03484003245830536, "learning_rate": 0.0002934244493304588, "loss": 0.0309, "num_input_tokens_seen": 177736576, "step": 82390 }, { "epoch": 13.441272430668842, "grad_norm": 0.0982975885272026, "learning_rate": 0.0002933596306295066, "loss": 0.0196, "num_input_tokens_seen": 177746624, "step": 82395 }, { "epoch": 13.442088091353996, "grad_norm": 0.01951252669095993, "learning_rate": 0.0002932948161163839, "loss": 0.0864, "num_input_tokens_seen": 177758528, "step": 82400 }, { "epoch": 13.442903752039152, "grad_norm": 0.09169973433017731, "learning_rate": 0.0002932300057924037, "loss": 0.0096, "num_input_tokens_seen": 177768416, "step": 82405 }, { "epoch": 13.443719412724306, "grad_norm": 0.025591794401407242, "learning_rate": 0.0002931651996588799, "loss": 0.0427, "num_input_tokens_seen": 177778784, "step": 82410 }, { "epoch": 13.444535073409462, "grad_norm": 0.8844431638717651, "learning_rate": 0.0002931003977171256, "loss": 0.0472, "num_input_tokens_seen": 177789888, "step": 82415 }, { "epoch": 13.445350734094617, "grad_norm": 0.02520960010588169, "learning_rate": 0.00029303559996845434, "loss": 0.017, "num_input_tokens_seen": 177800224, "step": 82420 }, { "epoch": 13.446166394779771, "grad_norm": 0.031458668410778046, "learning_rate": 0.00029297080641417907, "loss": 0.0039, "num_input_tokens_seen": 177811648, "step": 82425 }, { "epoch": 13.446982055464927, "grad_norm": 0.004464976489543915, "learning_rate": 0.0002929060170556132, "loss": 0.0085, "num_input_tokens_seen": 177821728, "step": 82430 }, { "epoch": 13.447797716150081, "grad_norm": 0.011420292779803276, "learning_rate": 0.00029284123189406944, "loss": 0.016, "num_input_tokens_seen": 177833312, "step": 82435 }, { "epoch": 13.448613376835237, "grad_norm": 0.06500400602817535, "learning_rate": 0.00029277645093086114, "loss": 0.0188, "num_input_tokens_seen": 177845504, "step": 82440 }, { "epoch": 13.449429037520392, "grad_norm": 0.3574179708957672, "learning_rate": 0.00029271167416730073, "loss": 0.1586, "num_input_tokens_seen": 177857312, "step": 82445 }, { "epoch": 13.450244698205546, "grad_norm": 0.05622898414731026, "learning_rate": 0.0002926469016047013, "loss": 0.0034, "num_input_tokens_seen": 177866624, "step": 82450 }, { "epoch": 13.451060358890702, "grad_norm": 0.18684279918670654, "learning_rate": 0.00029258213324437533, "loss": 0.0069, "num_input_tokens_seen": 177877536, "step": 82455 }, { "epoch": 13.451876019575856, "grad_norm": 0.015851669013500214, "learning_rate": 0.00029251736908763584, "loss": 0.0084, "num_input_tokens_seen": 177887968, "step": 82460 }, { "epoch": 13.452691680261012, "grad_norm": 0.11261133849620819, "learning_rate": 0.00029245260913579477, "loss": 0.0621, "num_input_tokens_seen": 177898176, "step": 82465 }, { "epoch": 13.453507340946166, "grad_norm": 1.0167851448059082, "learning_rate": 0.00029238785339016487, "loss": 0.021, "num_input_tokens_seen": 177908288, "step": 82470 }, { "epoch": 13.454323001631321, "grad_norm": 0.010129615664482117, "learning_rate": 0.0002923231018520588, "loss": 0.1371, "num_input_tokens_seen": 177918336, "step": 82475 }, { "epoch": 13.455138662316477, "grad_norm": 0.04256328195333481, "learning_rate": 0.0002922583545227882, "loss": 0.0455, "num_input_tokens_seen": 177929824, "step": 82480 }, { "epoch": 13.455954323001631, "grad_norm": 0.01310234609991312, "learning_rate": 0.00029219361140366587, "loss": 0.0079, "num_input_tokens_seen": 177939808, "step": 82485 }, { "epoch": 13.456769983686787, "grad_norm": 0.006620636209845543, "learning_rate": 0.0002921288724960034, "loss": 0.004, "num_input_tokens_seen": 177951520, "step": 82490 }, { "epoch": 13.45758564437194, "grad_norm": 0.002337012207135558, "learning_rate": 0.00029206413780111305, "loss": 0.0094, "num_input_tokens_seen": 177962112, "step": 82495 }, { "epoch": 13.458401305057096, "grad_norm": 0.024844547733664513, "learning_rate": 0.00029199940732030686, "loss": 0.0413, "num_input_tokens_seen": 177973056, "step": 82500 }, { "epoch": 13.459216965742252, "grad_norm": 0.006579355802386999, "learning_rate": 0.0002919346810548965, "loss": 0.0201, "num_input_tokens_seen": 177982272, "step": 82505 }, { "epoch": 13.460032626427406, "grad_norm": 0.004961821250617504, "learning_rate": 0.00029186995900619373, "loss": 0.0019, "num_input_tokens_seen": 177993280, "step": 82510 }, { "epoch": 13.460848287112562, "grad_norm": 0.012634257785975933, "learning_rate": 0.00029180524117551035, "loss": 0.0274, "num_input_tokens_seen": 178003904, "step": 82515 }, { "epoch": 13.461663947797716, "grad_norm": 0.006636204198002815, "learning_rate": 0.0002917405275641578, "loss": 0.016, "num_input_tokens_seen": 178014304, "step": 82520 }, { "epoch": 13.462479608482871, "grad_norm": 0.003253295086324215, "learning_rate": 0.00029167581817344775, "loss": 0.0279, "num_input_tokens_seen": 178024256, "step": 82525 }, { "epoch": 13.463295269168025, "grad_norm": 0.0020958068780601025, "learning_rate": 0.00029161111300469143, "loss": 0.1288, "num_input_tokens_seen": 178035456, "step": 82530 }, { "epoch": 13.464110929853181, "grad_norm": 0.006692560389637947, "learning_rate": 0.0002915464120592003, "loss": 0.1069, "num_input_tokens_seen": 178046048, "step": 82535 }, { "epoch": 13.464926590538337, "grad_norm": 0.005391769576817751, "learning_rate": 0.0002914817153382856, "loss": 0.0655, "num_input_tokens_seen": 178055968, "step": 82540 }, { "epoch": 13.46574225122349, "grad_norm": 0.040437232702970505, "learning_rate": 0.00029141702284325846, "loss": 0.0117, "num_input_tokens_seen": 178067104, "step": 82545 }, { "epoch": 13.466557911908646, "grad_norm": 0.016096187755465508, "learning_rate": 0.0002913523345754299, "loss": 0.0054, "num_input_tokens_seen": 178077088, "step": 82550 }, { "epoch": 13.4673735725938, "grad_norm": 0.009788149036467075, "learning_rate": 0.0002912876505361111, "loss": 0.0175, "num_input_tokens_seen": 178087872, "step": 82555 }, { "epoch": 13.468189233278956, "grad_norm": 0.00917357299476862, "learning_rate": 0.00029122297072661264, "loss": 0.0062, "num_input_tokens_seen": 178098208, "step": 82560 }, { "epoch": 13.469004893964112, "grad_norm": 0.068642757833004, "learning_rate": 0.00029115829514824565, "loss": 0.148, "num_input_tokens_seen": 178109024, "step": 82565 }, { "epoch": 13.469820554649266, "grad_norm": 0.8503569960594177, "learning_rate": 0.00029109362380232075, "loss": 0.4073, "num_input_tokens_seen": 178118752, "step": 82570 }, { "epoch": 13.470636215334421, "grad_norm": 0.30997228622436523, "learning_rate": 0.0002910289566901485, "loss": 0.1118, "num_input_tokens_seen": 178129152, "step": 82575 }, { "epoch": 13.471451876019575, "grad_norm": 0.014993988908827305, "learning_rate": 0.0002909642938130394, "loss": 0.0107, "num_input_tokens_seen": 178139872, "step": 82580 }, { "epoch": 13.47226753670473, "grad_norm": 0.24185355007648468, "learning_rate": 0.0002908996351723043, "loss": 0.0182, "num_input_tokens_seen": 178150240, "step": 82585 }, { "epoch": 13.473083197389887, "grad_norm": 0.3973219096660614, "learning_rate": 0.0002908349807692533, "loss": 0.0343, "num_input_tokens_seen": 178161088, "step": 82590 }, { "epoch": 13.47389885807504, "grad_norm": 0.09699933230876923, "learning_rate": 0.00029077033060519674, "loss": 0.007, "num_input_tokens_seen": 178171488, "step": 82595 }, { "epoch": 13.474714518760196, "grad_norm": 0.014222009107470512, "learning_rate": 0.0002907056846814449, "loss": 0.0104, "num_input_tokens_seen": 178182432, "step": 82600 }, { "epoch": 13.47553017944535, "grad_norm": 0.003987476695328951, "learning_rate": 0.00029064104299930785, "loss": 0.0134, "num_input_tokens_seen": 178194144, "step": 82605 }, { "epoch": 13.476345840130506, "grad_norm": 0.10013998299837112, "learning_rate": 0.00029057640556009567, "loss": 0.0527, "num_input_tokens_seen": 178204832, "step": 82610 }, { "epoch": 13.477161500815662, "grad_norm": 0.33000877499580383, "learning_rate": 0.0002905117723651183, "loss": 0.0728, "num_input_tokens_seen": 178216640, "step": 82615 }, { "epoch": 13.477977161500815, "grad_norm": 0.8035077452659607, "learning_rate": 0.0002904471434156856, "loss": 0.0482, "num_input_tokens_seen": 178226464, "step": 82620 }, { "epoch": 13.478792822185971, "grad_norm": 0.02503439038991928, "learning_rate": 0.0002903825187131074, "loss": 0.0772, "num_input_tokens_seen": 178238784, "step": 82625 }, { "epoch": 13.479608482871125, "grad_norm": 0.006890288088470697, "learning_rate": 0.00029031789825869334, "loss": 0.074, "num_input_tokens_seen": 178250688, "step": 82630 }, { "epoch": 13.48042414355628, "grad_norm": 0.008162684738636017, "learning_rate": 0.0002902532820537531, "loss": 0.0546, "num_input_tokens_seen": 178261536, "step": 82635 }, { "epoch": 13.481239804241435, "grad_norm": 0.0033391681499779224, "learning_rate": 0.00029018867009959623, "loss": 0.0274, "num_input_tokens_seen": 178273408, "step": 82640 }, { "epoch": 13.48205546492659, "grad_norm": 0.02445111982524395, "learning_rate": 0.0002901240623975321, "loss": 0.0074, "num_input_tokens_seen": 178284096, "step": 82645 }, { "epoch": 13.482871125611746, "grad_norm": 0.02918333187699318, "learning_rate": 0.00029005945894887, "loss": 0.004, "num_input_tokens_seen": 178295136, "step": 82650 }, { "epoch": 13.4836867862969, "grad_norm": 0.0018522132886573672, "learning_rate": 0.0002899948597549194, "loss": 0.0064, "num_input_tokens_seen": 178306912, "step": 82655 }, { "epoch": 13.484502446982056, "grad_norm": 0.2604631185531616, "learning_rate": 0.00028993026481698934, "loss": 0.0211, "num_input_tokens_seen": 178318080, "step": 82660 }, { "epoch": 13.48531810766721, "grad_norm": 0.1474015861749649, "learning_rate": 0.00028986567413638895, "loss": 0.0166, "num_input_tokens_seen": 178328896, "step": 82665 }, { "epoch": 13.486133768352365, "grad_norm": 0.1823238730430603, "learning_rate": 0.00028980108771442726, "loss": 0.0178, "num_input_tokens_seen": 178340256, "step": 82670 }, { "epoch": 13.486949429037521, "grad_norm": 0.09697208553552628, "learning_rate": 0.00028973650555241316, "loss": 0.0187, "num_input_tokens_seen": 178350912, "step": 82675 }, { "epoch": 13.487765089722675, "grad_norm": 0.40011072158813477, "learning_rate": 0.0002896719276516555, "loss": 0.0226, "num_input_tokens_seen": 178361824, "step": 82680 }, { "epoch": 13.48858075040783, "grad_norm": 0.0026245785411447287, "learning_rate": 0.0002896073540134631, "loss": 0.0031, "num_input_tokens_seen": 178373888, "step": 82685 }, { "epoch": 13.489396411092985, "grad_norm": 0.01241164468228817, "learning_rate": 0.00028954278463914435, "loss": 0.008, "num_input_tokens_seen": 178384064, "step": 82690 }, { "epoch": 13.49021207177814, "grad_norm": 0.016045669093728065, "learning_rate": 0.00028947821953000845, "loss": 0.0571, "num_input_tokens_seen": 178394976, "step": 82695 }, { "epoch": 13.491027732463296, "grad_norm": 0.00800728052854538, "learning_rate": 0.00028941365868736315, "loss": 0.0045, "num_input_tokens_seen": 178405120, "step": 82700 }, { "epoch": 13.49184339314845, "grad_norm": 0.007545186672359705, "learning_rate": 0.00028934910211251755, "loss": 0.003, "num_input_tokens_seen": 178416096, "step": 82705 }, { "epoch": 13.492659053833606, "grad_norm": 0.1742790937423706, "learning_rate": 0.0002892845498067792, "loss": 0.0384, "num_input_tokens_seen": 178428000, "step": 82710 }, { "epoch": 13.49347471451876, "grad_norm": 0.10489381104707718, "learning_rate": 0.0002892200017714572, "loss": 0.0866, "num_input_tokens_seen": 178438816, "step": 82715 }, { "epoch": 13.494290375203915, "grad_norm": 0.08963478356599808, "learning_rate": 0.00028915545800785883, "loss": 0.0499, "num_input_tokens_seen": 178450016, "step": 82720 }, { "epoch": 13.49510603588907, "grad_norm": 0.030324993655085564, "learning_rate": 0.0002890909185172928, "loss": 0.0058, "num_input_tokens_seen": 178460672, "step": 82725 }, { "epoch": 13.495921696574225, "grad_norm": 0.43110033869743347, "learning_rate": 0.00028902638330106684, "loss": 0.0147, "num_input_tokens_seen": 178471712, "step": 82730 }, { "epoch": 13.49673735725938, "grad_norm": 0.0035309982486069202, "learning_rate": 0.0002889618523604889, "loss": 0.0094, "num_input_tokens_seen": 178483360, "step": 82735 }, { "epoch": 13.497553017944535, "grad_norm": 0.0023967819288372993, "learning_rate": 0.0002888973256968667, "loss": 0.0139, "num_input_tokens_seen": 178494240, "step": 82740 }, { "epoch": 13.49836867862969, "grad_norm": 0.01526052039116621, "learning_rate": 0.000288832803311508, "loss": 0.0021, "num_input_tokens_seen": 178503744, "step": 82745 }, { "epoch": 13.499184339314844, "grad_norm": 0.007729786913841963, "learning_rate": 0.00028876828520572043, "loss": 0.0033, "num_input_tokens_seen": 178514976, "step": 82750 }, { "epoch": 13.5, "grad_norm": 0.004488068167120218, "learning_rate": 0.0002887037713808116, "loss": 0.0019, "num_input_tokens_seen": 178526304, "step": 82755 }, { "epoch": 13.500815660685156, "grad_norm": 0.08463281393051147, "learning_rate": 0.0002886392618380888, "loss": 0.1944, "num_input_tokens_seen": 178537152, "step": 82760 }, { "epoch": 13.50163132137031, "grad_norm": 0.06521822512149811, "learning_rate": 0.00028857475657885956, "loss": 0.0028, "num_input_tokens_seen": 178548544, "step": 82765 }, { "epoch": 13.502446982055465, "grad_norm": 0.036977801471948624, "learning_rate": 0.00028851025560443103, "loss": 0.0098, "num_input_tokens_seen": 178559520, "step": 82770 }, { "epoch": 13.50326264274062, "grad_norm": 0.10496782511472702, "learning_rate": 0.0002884457589161105, "loss": 0.0114, "num_input_tokens_seen": 178569440, "step": 82775 }, { "epoch": 13.504078303425775, "grad_norm": 0.035284098237752914, "learning_rate": 0.000288381266515205, "loss": 0.0067, "num_input_tokens_seen": 178580576, "step": 82780 }, { "epoch": 13.50489396411093, "grad_norm": 0.47754406929016113, "learning_rate": 0.0002883167784030216, "loss": 0.2455, "num_input_tokens_seen": 178591872, "step": 82785 }, { "epoch": 13.505709624796085, "grad_norm": 0.017402349039912224, "learning_rate": 0.00028825229458086726, "loss": 0.0063, "num_input_tokens_seen": 178602880, "step": 82790 }, { "epoch": 13.50652528548124, "grad_norm": 0.012130736373364925, "learning_rate": 0.0002881878150500486, "loss": 0.0108, "num_input_tokens_seen": 178614048, "step": 82795 }, { "epoch": 13.507340946166394, "grad_norm": 0.4529772102832794, "learning_rate": 0.00028812333981187297, "loss": 0.0451, "num_input_tokens_seen": 178624576, "step": 82800 }, { "epoch": 13.50815660685155, "grad_norm": 0.00502577843144536, "learning_rate": 0.00028805886886764623, "loss": 0.0018, "num_input_tokens_seen": 178634432, "step": 82805 }, { "epoch": 13.508972267536706, "grad_norm": 0.10548710823059082, "learning_rate": 0.00028799440221867576, "loss": 0.0198, "num_input_tokens_seen": 178644928, "step": 82810 }, { "epoch": 13.50978792822186, "grad_norm": 0.026495305821299553, "learning_rate": 0.00028792993986626725, "loss": 0.0039, "num_input_tokens_seen": 178657152, "step": 82815 }, { "epoch": 13.510603588907015, "grad_norm": 0.058403223752975464, "learning_rate": 0.000287865481811728, "loss": 0.0082, "num_input_tokens_seen": 178667840, "step": 82820 }, { "epoch": 13.51141924959217, "grad_norm": 0.030116651207208633, "learning_rate": 0.00028780102805636346, "loss": 0.0793, "num_input_tokens_seen": 178677056, "step": 82825 }, { "epoch": 13.512234910277325, "grad_norm": 0.00998600572347641, "learning_rate": 0.0002877365786014806, "loss": 0.0053, "num_input_tokens_seen": 178688320, "step": 82830 }, { "epoch": 13.513050570962479, "grad_norm": 0.0012228736886754632, "learning_rate": 0.00028767213344838493, "loss": 0.0079, "num_input_tokens_seen": 178697952, "step": 82835 }, { "epoch": 13.513866231647635, "grad_norm": 0.1664176881313324, "learning_rate": 0.00028760769259838327, "loss": 0.0051, "num_input_tokens_seen": 178708704, "step": 82840 }, { "epoch": 13.51468189233279, "grad_norm": 0.10617826133966446, "learning_rate": 0.00028754325605278067, "loss": 0.1331, "num_input_tokens_seen": 178719392, "step": 82845 }, { "epoch": 13.515497553017944, "grad_norm": 0.0093598123639822, "learning_rate": 0.00028747882381288393, "loss": 0.076, "num_input_tokens_seen": 178728608, "step": 82850 }, { "epoch": 13.5163132137031, "grad_norm": 0.022264758124947548, "learning_rate": 0.00028741439587999805, "loss": 0.0223, "num_input_tokens_seen": 178739680, "step": 82855 }, { "epoch": 13.517128874388254, "grad_norm": 0.007914872840046883, "learning_rate": 0.00028734997225542954, "loss": 0.061, "num_input_tokens_seen": 178750688, "step": 82860 }, { "epoch": 13.51794453507341, "grad_norm": 0.10490361601114273, "learning_rate": 0.0002872855529404832, "loss": 0.0053, "num_input_tokens_seen": 178760864, "step": 82865 }, { "epoch": 13.518760195758565, "grad_norm": 0.0022800927981734276, "learning_rate": 0.0002872211379364651, "loss": 0.101, "num_input_tokens_seen": 178771616, "step": 82870 }, { "epoch": 13.51957585644372, "grad_norm": 0.11414402723312378, "learning_rate": 0.00028715672724468065, "loss": 0.0428, "num_input_tokens_seen": 178783360, "step": 82875 }, { "epoch": 13.520391517128875, "grad_norm": 0.0024487741757184267, "learning_rate": 0.0002870923208664351, "loss": 0.4019, "num_input_tokens_seen": 178793184, "step": 82880 }, { "epoch": 13.521207177814029, "grad_norm": 0.007895761169493198, "learning_rate": 0.0002870279188030338, "loss": 0.0044, "num_input_tokens_seen": 178803136, "step": 82885 }, { "epoch": 13.522022838499185, "grad_norm": 0.21845567226409912, "learning_rate": 0.00028696352105578185, "loss": 0.074, "num_input_tokens_seen": 178814464, "step": 82890 }, { "epoch": 13.522838499184338, "grad_norm": 0.09672797471284866, "learning_rate": 0.0002868991276259844, "loss": 0.0326, "num_input_tokens_seen": 178826112, "step": 82895 }, { "epoch": 13.523654159869494, "grad_norm": 0.08652757853269577, "learning_rate": 0.0002868347385149465, "loss": 0.032, "num_input_tokens_seen": 178836608, "step": 82900 }, { "epoch": 13.52446982055465, "grad_norm": 0.03390341252088547, "learning_rate": 0.000286770353723973, "loss": 0.0068, "num_input_tokens_seen": 178846976, "step": 82905 }, { "epoch": 13.525285481239804, "grad_norm": 0.0046766395680606365, "learning_rate": 0.00028670597325436886, "loss": 0.115, "num_input_tokens_seen": 178858688, "step": 82910 }, { "epoch": 13.52610114192496, "grad_norm": 0.48411765694618225, "learning_rate": 0.0002866415971074387, "loss": 0.0973, "num_input_tokens_seen": 178870048, "step": 82915 }, { "epoch": 13.526916802610113, "grad_norm": 0.020559003576636314, "learning_rate": 0.000286577225284487, "loss": 0.0118, "num_input_tokens_seen": 178880768, "step": 82920 }, { "epoch": 13.52773246329527, "grad_norm": 0.17778734862804413, "learning_rate": 0.00028651285778681906, "loss": 0.0512, "num_input_tokens_seen": 178891424, "step": 82925 }, { "epoch": 13.528548123980425, "grad_norm": 0.08465657383203506, "learning_rate": 0.00028644849461573847, "loss": 0.0072, "num_input_tokens_seen": 178902400, "step": 82930 }, { "epoch": 13.529363784665579, "grad_norm": 0.18695054948329926, "learning_rate": 0.0002863841357725504, "loss": 0.015, "num_input_tokens_seen": 178913248, "step": 82935 }, { "epoch": 13.530179445350734, "grad_norm": 0.0033503505401313305, "learning_rate": 0.00028631978125855844, "loss": 0.0147, "num_input_tokens_seen": 178923936, "step": 82940 }, { "epoch": 13.530995106035888, "grad_norm": 0.020310306921601295, "learning_rate": 0.0002862554310750676, "loss": 0.121, "num_input_tokens_seen": 178934400, "step": 82945 }, { "epoch": 13.531810766721044, "grad_norm": 0.37271687388420105, "learning_rate": 0.0002861910852233812, "loss": 0.0395, "num_input_tokens_seen": 178945504, "step": 82950 }, { "epoch": 13.5326264274062, "grad_norm": 0.03765486553311348, "learning_rate": 0.00028612674370480406, "loss": 0.1145, "num_input_tokens_seen": 178956064, "step": 82955 }, { "epoch": 13.533442088091354, "grad_norm": 0.006023062858730555, "learning_rate": 0.0002860624065206394, "loss": 0.0155, "num_input_tokens_seen": 178966720, "step": 82960 }, { "epoch": 13.53425774877651, "grad_norm": 0.009742209687829018, "learning_rate": 0.0002859980736721918, "loss": 0.0051, "num_input_tokens_seen": 178978848, "step": 82965 }, { "epoch": 13.535073409461663, "grad_norm": 0.5695927143096924, "learning_rate": 0.0002859337451607644, "loss": 0.1308, "num_input_tokens_seen": 178989088, "step": 82970 }, { "epoch": 13.535889070146819, "grad_norm": 0.011081600561738014, "learning_rate": 0.0002858694209876616, "loss": 0.0232, "num_input_tokens_seen": 178998880, "step": 82975 }, { "epoch": 13.536704730831975, "grad_norm": 0.0042917728424072266, "learning_rate": 0.00028580510115418624, "loss": 0.0037, "num_input_tokens_seen": 179009568, "step": 82980 }, { "epoch": 13.537520391517129, "grad_norm": 0.005771744064986706, "learning_rate": 0.0002857407856616426, "loss": 0.0055, "num_input_tokens_seen": 179019872, "step": 82985 }, { "epoch": 13.538336052202284, "grad_norm": 0.14418165385723114, "learning_rate": 0.0002856764745113334, "loss": 0.0062, "num_input_tokens_seen": 179030336, "step": 82990 }, { "epoch": 13.539151712887438, "grad_norm": 0.02956293895840645, "learning_rate": 0.00028561216770456267, "loss": 0.0093, "num_input_tokens_seen": 179041280, "step": 82995 }, { "epoch": 13.539967373572594, "grad_norm": 0.004189638886600733, "learning_rate": 0.000285547865242633, "loss": 0.0048, "num_input_tokens_seen": 179052832, "step": 83000 }, { "epoch": 13.540783034257748, "grad_norm": 0.023899270221590996, "learning_rate": 0.000285483567126848, "loss": 0.0322, "num_input_tokens_seen": 179063264, "step": 83005 }, { "epoch": 13.541598694942904, "grad_norm": 0.5239161849021912, "learning_rate": 0.0002854192733585107, "loss": 0.0857, "num_input_tokens_seen": 179073760, "step": 83010 }, { "epoch": 13.54241435562806, "grad_norm": 0.6742875576019287, "learning_rate": 0.000285354983938924, "loss": 0.101, "num_input_tokens_seen": 179085536, "step": 83015 }, { "epoch": 13.543230016313213, "grad_norm": 0.0038922138046473265, "learning_rate": 0.0002852906988693909, "loss": 0.0534, "num_input_tokens_seen": 179095808, "step": 83020 }, { "epoch": 13.544045676998369, "grad_norm": 1.1974924802780151, "learning_rate": 0.0002852264181512142, "loss": 0.165, "num_input_tokens_seen": 179108224, "step": 83025 }, { "epoch": 13.544861337683523, "grad_norm": 0.006668461952358484, "learning_rate": 0.00028516214178569656, "loss": 0.1506, "num_input_tokens_seen": 179120128, "step": 83030 }, { "epoch": 13.545676998368679, "grad_norm": 0.00900296401232481, "learning_rate": 0.0002850978697741406, "loss": 0.0248, "num_input_tokens_seen": 179130464, "step": 83035 }, { "epoch": 13.546492659053834, "grad_norm": 0.017927195876836777, "learning_rate": 0.000285033602117849, "loss": 0.0083, "num_input_tokens_seen": 179141536, "step": 83040 }, { "epoch": 13.547308319738988, "grad_norm": 0.03764891251921654, "learning_rate": 0.0002849693388181241, "loss": 0.0063, "num_input_tokens_seen": 179151616, "step": 83045 }, { "epoch": 13.548123980424144, "grad_norm": 0.009917051531374454, "learning_rate": 0.00028490507987626837, "loss": 0.0288, "num_input_tokens_seen": 179162528, "step": 83050 }, { "epoch": 13.548939641109298, "grad_norm": 0.16683761775493622, "learning_rate": 0.00028484082529358403, "loss": 0.0169, "num_input_tokens_seen": 179172800, "step": 83055 }, { "epoch": 13.549755301794454, "grad_norm": 0.044178783893585205, "learning_rate": 0.0002847765750713733, "loss": 0.02, "num_input_tokens_seen": 179183808, "step": 83060 }, { "epoch": 13.550570962479608, "grad_norm": 0.3584696650505066, "learning_rate": 0.0002847123292109382, "loss": 0.0296, "num_input_tokens_seen": 179193952, "step": 83065 }, { "epoch": 13.551386623164763, "grad_norm": 0.023323912173509598, "learning_rate": 0.0002846480877135812, "loss": 0.0044, "num_input_tokens_seen": 179205280, "step": 83070 }, { "epoch": 13.552202283849919, "grad_norm": 0.08521874248981476, "learning_rate": 0.00028458385058060355, "loss": 0.0116, "num_input_tokens_seen": 179214496, "step": 83075 }, { "epoch": 13.553017944535073, "grad_norm": 0.004919032100588083, "learning_rate": 0.0002845196178133078, "loss": 0.0255, "num_input_tokens_seen": 179224768, "step": 83080 }, { "epoch": 13.553833605220229, "grad_norm": 0.010073749348521233, "learning_rate": 0.00028445538941299493, "loss": 0.0059, "num_input_tokens_seen": 179236032, "step": 83085 }, { "epoch": 13.554649265905383, "grad_norm": 0.027646737173199654, "learning_rate": 0.00028439116538096743, "loss": 0.0047, "num_input_tokens_seen": 179246560, "step": 83090 }, { "epoch": 13.555464926590538, "grad_norm": 0.002947772154584527, "learning_rate": 0.0002843269457185261, "loss": 0.0737, "num_input_tokens_seen": 179258624, "step": 83095 }, { "epoch": 13.556280587275694, "grad_norm": 0.11389929801225662, "learning_rate": 0.00028426273042697327, "loss": 0.024, "num_input_tokens_seen": 179270080, "step": 83100 }, { "epoch": 13.557096247960848, "grad_norm": 0.1261122077703476, "learning_rate": 0.0002841985195076094, "loss": 0.0369, "num_input_tokens_seen": 179281216, "step": 83105 }, { "epoch": 13.557911908646004, "grad_norm": 0.002439778298139572, "learning_rate": 0.0002841343129617365, "loss": 0.0924, "num_input_tokens_seen": 179293504, "step": 83110 }, { "epoch": 13.558727569331158, "grad_norm": 0.0028491292614489794, "learning_rate": 0.0002840701107906557, "loss": 0.1011, "num_input_tokens_seen": 179304544, "step": 83115 }, { "epoch": 13.559543230016313, "grad_norm": 0.07990715652704239, "learning_rate": 0.00028400591299566793, "loss": 0.1389, "num_input_tokens_seen": 179314784, "step": 83120 }, { "epoch": 13.560358890701469, "grad_norm": 0.017892207950353622, "learning_rate": 0.00028394171957807433, "loss": 0.045, "num_input_tokens_seen": 179326048, "step": 83125 }, { "epoch": 13.561174551386623, "grad_norm": 0.012023605406284332, "learning_rate": 0.000283877530539176, "loss": 0.0155, "num_input_tokens_seen": 179336640, "step": 83130 }, { "epoch": 13.561990212071779, "grad_norm": 0.031814273446798325, "learning_rate": 0.00028381334588027353, "loss": 0.0072, "num_input_tokens_seen": 179346912, "step": 83135 }, { "epoch": 13.562805872756933, "grad_norm": 0.025559991598129272, "learning_rate": 0.00028374916560266794, "loss": 0.0045, "num_input_tokens_seen": 179358112, "step": 83140 }, { "epoch": 13.563621533442088, "grad_norm": 0.013948386535048485, "learning_rate": 0.0002836849897076598, "loss": 0.0041, "num_input_tokens_seen": 179370048, "step": 83145 }, { "epoch": 13.564437194127244, "grad_norm": 0.14220775663852692, "learning_rate": 0.00028362081819654984, "loss": 0.0057, "num_input_tokens_seen": 179380704, "step": 83150 }, { "epoch": 13.565252854812398, "grad_norm": 0.07033754885196686, "learning_rate": 0.00028355665107063845, "loss": 0.009, "num_input_tokens_seen": 179390592, "step": 83155 }, { "epoch": 13.566068515497554, "grad_norm": 0.015530402772128582, "learning_rate": 0.00028349248833122603, "loss": 0.0126, "num_input_tokens_seen": 179401344, "step": 83160 }, { "epoch": 13.566884176182707, "grad_norm": 0.01808835193514824, "learning_rate": 0.0002834283299796131, "loss": 0.1046, "num_input_tokens_seen": 179411840, "step": 83165 }, { "epoch": 13.567699836867863, "grad_norm": 0.041642915457487106, "learning_rate": 0.00028336417601709975, "loss": 0.0062, "num_input_tokens_seen": 179423552, "step": 83170 }, { "epoch": 13.568515497553017, "grad_norm": 0.06273958086967468, "learning_rate": 0.0002833000264449862, "loss": 0.0144, "num_input_tokens_seen": 179435200, "step": 83175 }, { "epoch": 13.569331158238173, "grad_norm": 0.14963902533054352, "learning_rate": 0.00028323588126457255, "loss": 0.0128, "num_input_tokens_seen": 179446496, "step": 83180 }, { "epoch": 13.570146818923329, "grad_norm": 0.0034774986561387777, "learning_rate": 0.00028317174047715873, "loss": 0.0601, "num_input_tokens_seen": 179457376, "step": 83185 }, { "epoch": 13.570962479608482, "grad_norm": 0.011188593693077564, "learning_rate": 0.0002831076040840446, "loss": 0.0029, "num_input_tokens_seen": 179468128, "step": 83190 }, { "epoch": 13.571778140293638, "grad_norm": 0.2243504524230957, "learning_rate": 0.0002830434720865301, "loss": 0.0162, "num_input_tokens_seen": 179479552, "step": 83195 }, { "epoch": 13.572593800978792, "grad_norm": 0.0077235461212694645, "learning_rate": 0.0002829793444859148, "loss": 0.0667, "num_input_tokens_seen": 179489280, "step": 83200 }, { "epoch": 13.573409461663948, "grad_norm": 0.17330393195152283, "learning_rate": 0.0002829152212834984, "loss": 0.1056, "num_input_tokens_seen": 179500320, "step": 83205 }, { "epoch": 13.574225122349104, "grad_norm": 0.025287067517638206, "learning_rate": 0.0002828511024805803, "loss": 0.0747, "num_input_tokens_seen": 179510752, "step": 83210 }, { "epoch": 13.575040783034257, "grad_norm": 0.31207767128944397, "learning_rate": 0.0002827869880784605, "loss": 0.0232, "num_input_tokens_seen": 179522336, "step": 83215 }, { "epoch": 13.575856443719413, "grad_norm": 0.009158209897577763, "learning_rate": 0.00028272287807843744, "loss": 0.027, "num_input_tokens_seen": 179532608, "step": 83220 }, { "epoch": 13.576672104404567, "grad_norm": 0.08343900740146637, "learning_rate": 0.00028265877248181113, "loss": 0.1138, "num_input_tokens_seen": 179543328, "step": 83225 }, { "epoch": 13.577487765089723, "grad_norm": 0.012292930856347084, "learning_rate": 0.0002825946712898806, "loss": 0.0069, "num_input_tokens_seen": 179554880, "step": 83230 }, { "epoch": 13.578303425774878, "grad_norm": 0.026486042886972427, "learning_rate": 0.0002825305745039447, "loss": 0.0111, "num_input_tokens_seen": 179565984, "step": 83235 }, { "epoch": 13.579119086460032, "grad_norm": 0.0034503673668950796, "learning_rate": 0.00028246648212530267, "loss": 0.0032, "num_input_tokens_seen": 179576032, "step": 83240 }, { "epoch": 13.579934747145188, "grad_norm": 0.01909870095551014, "learning_rate": 0.00028240239415525337, "loss": 0.1827, "num_input_tokens_seen": 179586880, "step": 83245 }, { "epoch": 13.580750407830342, "grad_norm": 0.02691022679209709, "learning_rate": 0.0002823383105950955, "loss": 0.0825, "num_input_tokens_seen": 179598144, "step": 83250 }, { "epoch": 13.581566068515498, "grad_norm": 0.038661979138851166, "learning_rate": 0.00028227423144612794, "loss": 0.0232, "num_input_tokens_seen": 179609824, "step": 83255 }, { "epoch": 13.582381729200652, "grad_norm": 0.02451668307185173, "learning_rate": 0.00028221015670964935, "loss": 0.0038, "num_input_tokens_seen": 179620832, "step": 83260 }, { "epoch": 13.583197389885807, "grad_norm": 0.011792528443038464, "learning_rate": 0.0002821460863869582, "loss": 0.0121, "num_input_tokens_seen": 179630528, "step": 83265 }, { "epoch": 13.584013050570963, "grad_norm": 0.026443682610988617, "learning_rate": 0.0002820820204793529, "loss": 0.0226, "num_input_tokens_seen": 179641888, "step": 83270 }, { "epoch": 13.584828711256117, "grad_norm": 0.01346469484269619, "learning_rate": 0.0002820179589881319, "loss": 0.0183, "num_input_tokens_seen": 179650912, "step": 83275 }, { "epoch": 13.585644371941273, "grad_norm": 0.031120723113417625, "learning_rate": 0.00028195390191459356, "loss": 0.0482, "num_input_tokens_seen": 179661536, "step": 83280 }, { "epoch": 13.586460032626427, "grad_norm": 0.007317977491766214, "learning_rate": 0.000281889849260036, "loss": 0.0348, "num_input_tokens_seen": 179671968, "step": 83285 }, { "epoch": 13.587275693311582, "grad_norm": 1.8993468284606934, "learning_rate": 0.00028182580102575726, "loss": 0.1279, "num_input_tokens_seen": 179682496, "step": 83290 }, { "epoch": 13.588091353996738, "grad_norm": 0.008874778635799885, "learning_rate": 0.00028176175721305555, "loss": 0.0346, "num_input_tokens_seen": 179693920, "step": 83295 }, { "epoch": 13.588907014681892, "grad_norm": 0.1871170997619629, "learning_rate": 0.0002816977178232286, "loss": 0.0104, "num_input_tokens_seen": 179704096, "step": 83300 }, { "epoch": 13.589722675367048, "grad_norm": 0.004576592240482569, "learning_rate": 0.0002816336828575744, "loss": 0.0518, "num_input_tokens_seen": 179715264, "step": 83305 }, { "epoch": 13.590538336052202, "grad_norm": 0.038286712020635605, "learning_rate": 0.0002815696523173906, "loss": 0.0126, "num_input_tokens_seen": 179725920, "step": 83310 }, { "epoch": 13.591353996737357, "grad_norm": 0.005555232986807823, "learning_rate": 0.0002815056262039749, "loss": 0.0541, "num_input_tokens_seen": 179738112, "step": 83315 }, { "epoch": 13.592169657422513, "grad_norm": 0.4668309688568115, "learning_rate": 0.0002814416045186249, "loss": 0.0419, "num_input_tokens_seen": 179748768, "step": 83320 }, { "epoch": 13.592985318107667, "grad_norm": 0.03303199261426926, "learning_rate": 0.00028137758726263796, "loss": 0.0035, "num_input_tokens_seen": 179758080, "step": 83325 }, { "epoch": 13.593800978792823, "grad_norm": 0.0046354299411177635, "learning_rate": 0.0002813135744373114, "loss": 0.0594, "num_input_tokens_seen": 179768352, "step": 83330 }, { "epoch": 13.594616639477977, "grad_norm": 0.19233158230781555, "learning_rate": 0.000281249566043943, "loss": 0.0091, "num_input_tokens_seen": 179778848, "step": 83335 }, { "epoch": 13.595432300163132, "grad_norm": 0.0680939257144928, "learning_rate": 0.0002811855620838294, "loss": 0.0076, "num_input_tokens_seen": 179789600, "step": 83340 }, { "epoch": 13.596247960848288, "grad_norm": 0.018604962155222893, "learning_rate": 0.00028112156255826826, "loss": 0.0427, "num_input_tokens_seen": 179800640, "step": 83345 }, { "epoch": 13.597063621533442, "grad_norm": 0.04600822925567627, "learning_rate": 0.000281057567468556, "loss": 0.0493, "num_input_tokens_seen": 179809984, "step": 83350 }, { "epoch": 13.597879282218598, "grad_norm": 0.7084985375404358, "learning_rate": 0.00028099357681599004, "loss": 0.1178, "num_input_tokens_seen": 179820832, "step": 83355 }, { "epoch": 13.598694942903752, "grad_norm": 0.0076343403197824955, "learning_rate": 0.0002809295906018671, "loss": 0.0061, "num_input_tokens_seen": 179830592, "step": 83360 }, { "epoch": 13.599510603588907, "grad_norm": 0.0016869325190782547, "learning_rate": 0.00028086560882748386, "loss": 0.0065, "num_input_tokens_seen": 179841152, "step": 83365 }, { "epoch": 13.600326264274061, "grad_norm": 0.005359593778848648, "learning_rate": 0.00028080163149413705, "loss": 0.056, "num_input_tokens_seen": 179852704, "step": 83370 }, { "epoch": 13.601141924959217, "grad_norm": 0.01878763921558857, "learning_rate": 0.0002807376586031233, "loss": 0.0568, "num_input_tokens_seen": 179864320, "step": 83375 }, { "epoch": 13.601957585644373, "grad_norm": 0.21416902542114258, "learning_rate": 0.0002806736901557391, "loss": 0.033, "num_input_tokens_seen": 179874592, "step": 83380 }, { "epoch": 13.602773246329527, "grad_norm": 0.5574246644973755, "learning_rate": 0.00028060972615328065, "loss": 0.0435, "num_input_tokens_seen": 179885568, "step": 83385 }, { "epoch": 13.603588907014682, "grad_norm": 0.5374407172203064, "learning_rate": 0.00028054576659704457, "loss": 0.0789, "num_input_tokens_seen": 179896032, "step": 83390 }, { "epoch": 13.604404567699836, "grad_norm": 0.13124190270900726, "learning_rate": 0.00028048181148832685, "loss": 0.0104, "num_input_tokens_seen": 179907168, "step": 83395 }, { "epoch": 13.605220228384992, "grad_norm": 0.08736497908830643, "learning_rate": 0.00028041786082842366, "loss": 0.0097, "num_input_tokens_seen": 179917920, "step": 83400 }, { "epoch": 13.606035889070148, "grad_norm": 0.005984143819659948, "learning_rate": 0.0002803539146186311, "loss": 0.0127, "num_input_tokens_seen": 179928192, "step": 83405 }, { "epoch": 13.606851549755302, "grad_norm": 0.035791102796792984, "learning_rate": 0.0002802899728602452, "loss": 0.0936, "num_input_tokens_seen": 179939712, "step": 83410 }, { "epoch": 13.607667210440457, "grad_norm": 0.015619471669197083, "learning_rate": 0.00028022603555456164, "loss": 0.007, "num_input_tokens_seen": 179950336, "step": 83415 }, { "epoch": 13.608482871125611, "grad_norm": 0.15644066035747528, "learning_rate": 0.00028016210270287635, "loss": 0.0236, "num_input_tokens_seen": 179961504, "step": 83420 }, { "epoch": 13.609298531810767, "grad_norm": 0.0016988529823720455, "learning_rate": 0.00028009817430648483, "loss": 0.0786, "num_input_tokens_seen": 179972096, "step": 83425 }, { "epoch": 13.61011419249592, "grad_norm": 0.09656263887882233, "learning_rate": 0.00028003425036668287, "loss": 0.03, "num_input_tokens_seen": 179982496, "step": 83430 }, { "epoch": 13.610929853181077, "grad_norm": 0.0009607415413483977, "learning_rate": 0.00027997033088476554, "loss": 0.0088, "num_input_tokens_seen": 179992032, "step": 83435 }, { "epoch": 13.611745513866232, "grad_norm": 0.08385143429040909, "learning_rate": 0.000279906415862029, "loss": 0.0641, "num_input_tokens_seen": 180003232, "step": 83440 }, { "epoch": 13.612561174551386, "grad_norm": 0.002811253070831299, "learning_rate": 0.00027984250529976783, "loss": 0.0515, "num_input_tokens_seen": 180014080, "step": 83445 }, { "epoch": 13.613376835236542, "grad_norm": 0.02171432599425316, "learning_rate": 0.000279778599199278, "loss": 0.0664, "num_input_tokens_seen": 180024672, "step": 83450 }, { "epoch": 13.614192495921696, "grad_norm": 0.004560902249068022, "learning_rate": 0.0002797146975618538, "loss": 0.0038, "num_input_tokens_seen": 180036064, "step": 83455 }, { "epoch": 13.615008156606851, "grad_norm": 0.018032025545835495, "learning_rate": 0.0002796508003887911, "loss": 0.1436, "num_input_tokens_seen": 180047136, "step": 83460 }, { "epoch": 13.615823817292007, "grad_norm": 0.07336045056581497, "learning_rate": 0.00027958690768138406, "loss": 0.0424, "num_input_tokens_seen": 180057856, "step": 83465 }, { "epoch": 13.616639477977161, "grad_norm": 0.15308038890361786, "learning_rate": 0.0002795230194409283, "loss": 0.0073, "num_input_tokens_seen": 180068960, "step": 83470 }, { "epoch": 13.617455138662317, "grad_norm": 0.7937920689582825, "learning_rate": 0.00027945913566871793, "loss": 0.0745, "num_input_tokens_seen": 180080224, "step": 83475 }, { "epoch": 13.61827079934747, "grad_norm": 0.0131694246083498, "learning_rate": 0.0002793952563660483, "loss": 0.0281, "num_input_tokens_seen": 180091520, "step": 83480 }, { "epoch": 13.619086460032626, "grad_norm": 0.032326050102710724, "learning_rate": 0.0002793313815342133, "loss": 0.0109, "num_input_tokens_seen": 180102880, "step": 83485 }, { "epoch": 13.619902120717782, "grad_norm": 0.049615900963544846, "learning_rate": 0.0002792675111745081, "loss": 0.0545, "num_input_tokens_seen": 180113952, "step": 83490 }, { "epoch": 13.620717781402936, "grad_norm": 0.02333170548081398, "learning_rate": 0.0002792036452882265, "loss": 0.0341, "num_input_tokens_seen": 180123872, "step": 83495 }, { "epoch": 13.621533442088092, "grad_norm": 0.5732824802398682, "learning_rate": 0.00027913978387666326, "loss": 0.0298, "num_input_tokens_seen": 180134432, "step": 83500 }, { "epoch": 13.622349102773246, "grad_norm": 0.002878038678318262, "learning_rate": 0.0002790759269411125, "loss": 0.0492, "num_input_tokens_seen": 180144768, "step": 83505 }, { "epoch": 13.623164763458401, "grad_norm": 0.27284419536590576, "learning_rate": 0.00027901207448286836, "loss": 0.0202, "num_input_tokens_seen": 180156416, "step": 83510 }, { "epoch": 13.623980424143557, "grad_norm": 0.13872718811035156, "learning_rate": 0.0002789482265032249, "loss": 0.0291, "num_input_tokens_seen": 180167168, "step": 83515 }, { "epoch": 13.624796084828711, "grad_norm": 0.008211048319935799, "learning_rate": 0.00027888438300347607, "loss": 0.0062, "num_input_tokens_seen": 180177536, "step": 83520 }, { "epoch": 13.625611745513867, "grad_norm": 0.007941660471260548, "learning_rate": 0.00027882054398491564, "loss": 0.0179, "num_input_tokens_seen": 180189024, "step": 83525 }, { "epoch": 13.62642740619902, "grad_norm": 0.002546174917370081, "learning_rate": 0.0002787567094488375, "loss": 0.0253, "num_input_tokens_seen": 180200128, "step": 83530 }, { "epoch": 13.627243066884176, "grad_norm": 0.6857663989067078, "learning_rate": 0.00027869287939653534, "loss": 0.1676, "num_input_tokens_seen": 180209856, "step": 83535 }, { "epoch": 13.62805872756933, "grad_norm": 0.1814769208431244, "learning_rate": 0.0002786290538293027, "loss": 0.021, "num_input_tokens_seen": 180220032, "step": 83540 }, { "epoch": 13.628874388254486, "grad_norm": 0.0033023732248693705, "learning_rate": 0.00027856523274843314, "loss": 0.0057, "num_input_tokens_seen": 180231296, "step": 83545 }, { "epoch": 13.629690048939642, "grad_norm": 1.3666231632232666, "learning_rate": 0.00027850141615521983, "loss": 0.0272, "num_input_tokens_seen": 180242336, "step": 83550 }, { "epoch": 13.630505709624796, "grad_norm": 0.011464180424809456, "learning_rate": 0.0002784376040509567, "loss": 0.0288, "num_input_tokens_seen": 180251968, "step": 83555 }, { "epoch": 13.631321370309951, "grad_norm": 1.7342392206192017, "learning_rate": 0.00027837379643693615, "loss": 0.0625, "num_input_tokens_seen": 180263040, "step": 83560 }, { "epoch": 13.632137030995105, "grad_norm": 0.026407018303871155, "learning_rate": 0.0002783099933144523, "loss": 0.0257, "num_input_tokens_seen": 180274048, "step": 83565 }, { "epoch": 13.632952691680261, "grad_norm": 0.3312642276287079, "learning_rate": 0.00027824619468479715, "loss": 0.0294, "num_input_tokens_seen": 180285280, "step": 83570 }, { "epoch": 13.633768352365417, "grad_norm": 0.4619533121585846, "learning_rate": 0.00027818240054926463, "loss": 0.1333, "num_input_tokens_seen": 180294560, "step": 83575 }, { "epoch": 13.63458401305057, "grad_norm": 0.02172946184873581, "learning_rate": 0.0002781186109091467, "loss": 0.0628, "num_input_tokens_seen": 180306592, "step": 83580 }, { "epoch": 13.635399673735726, "grad_norm": 0.2084684669971466, "learning_rate": 0.0002780548257657371, "loss": 0.0051, "num_input_tokens_seen": 180317600, "step": 83585 }, { "epoch": 13.63621533442088, "grad_norm": 0.02319156564772129, "learning_rate": 0.00027799104512032756, "loss": 0.0033, "num_input_tokens_seen": 180327968, "step": 83590 }, { "epoch": 13.637030995106036, "grad_norm": 0.04900064691901207, "learning_rate": 0.0002779272689742115, "loss": 0.0083, "num_input_tokens_seen": 180339584, "step": 83595 }, { "epoch": 13.63784665579119, "grad_norm": 0.0034498756285756826, "learning_rate": 0.0002778634973286807, "loss": 0.0084, "num_input_tokens_seen": 180350816, "step": 83600 }, { "epoch": 13.638662316476346, "grad_norm": 0.017543772235512733, "learning_rate": 0.00027779973018502834, "loss": 0.0922, "num_input_tokens_seen": 180361792, "step": 83605 }, { "epoch": 13.639477977161501, "grad_norm": 0.0023924370761960745, "learning_rate": 0.0002777359675445459, "loss": 0.01, "num_input_tokens_seen": 180371616, "step": 83610 }, { "epoch": 13.640293637846655, "grad_norm": 0.11245270818471909, "learning_rate": 0.00027767220940852646, "loss": 0.0298, "num_input_tokens_seen": 180382752, "step": 83615 }, { "epoch": 13.641109298531811, "grad_norm": 0.024528374895453453, "learning_rate": 0.0002776084557782613, "loss": 0.0077, "num_input_tokens_seen": 180394080, "step": 83620 }, { "epoch": 13.641924959216965, "grad_norm": 0.003381237154826522, "learning_rate": 0.00027754470665504336, "loss": 0.061, "num_input_tokens_seen": 180403904, "step": 83625 }, { "epoch": 13.64274061990212, "grad_norm": 0.048734717071056366, "learning_rate": 0.0002774809620401637, "loss": 0.0314, "num_input_tokens_seen": 180415872, "step": 83630 }, { "epoch": 13.643556280587276, "grad_norm": 0.016908664256334305, "learning_rate": 0.000277417221934915, "loss": 0.1238, "num_input_tokens_seen": 180426592, "step": 83635 }, { "epoch": 13.64437194127243, "grad_norm": 0.1086050420999527, "learning_rate": 0.00027735348634058834, "loss": 0.0403, "num_input_tokens_seen": 180437632, "step": 83640 }, { "epoch": 13.645187601957586, "grad_norm": 0.005747334100306034, "learning_rate": 0.0002772897552584759, "loss": 0.0813, "num_input_tokens_seen": 180448000, "step": 83645 }, { "epoch": 13.64600326264274, "grad_norm": 0.016691338270902634, "learning_rate": 0.000277226028689869, "loss": 0.1143, "num_input_tokens_seen": 180458944, "step": 83650 }, { "epoch": 13.646818923327896, "grad_norm": 0.026544181630015373, "learning_rate": 0.00027716230663605933, "loss": 0.026, "num_input_tokens_seen": 180469312, "step": 83655 }, { "epoch": 13.647634584013051, "grad_norm": 0.44264596700668335, "learning_rate": 0.00027709858909833823, "loss": 0.0822, "num_input_tokens_seen": 180480160, "step": 83660 }, { "epoch": 13.648450244698205, "grad_norm": 0.013421054929494858, "learning_rate": 0.000277034876077997, "loss": 0.0591, "num_input_tokens_seen": 180492032, "step": 83665 }, { "epoch": 13.649265905383361, "grad_norm": 0.30819958448410034, "learning_rate": 0.00027697116757632677, "loss": 0.0385, "num_input_tokens_seen": 180504288, "step": 83670 }, { "epoch": 13.650081566068515, "grad_norm": 0.00883038155734539, "learning_rate": 0.0002769074635946188, "loss": 0.0248, "num_input_tokens_seen": 180514624, "step": 83675 }, { "epoch": 13.65089722675367, "grad_norm": 0.012056360952556133, "learning_rate": 0.0002768437641341641, "loss": 0.0038, "num_input_tokens_seen": 180525248, "step": 83680 }, { "epoch": 13.651712887438826, "grad_norm": 0.053986337035894394, "learning_rate": 0.00027678006919625367, "loss": 0.0219, "num_input_tokens_seen": 180535520, "step": 83685 }, { "epoch": 13.65252854812398, "grad_norm": 0.00553318252786994, "learning_rate": 0.00027671637878217824, "loss": 0.1271, "num_input_tokens_seen": 180546752, "step": 83690 }, { "epoch": 13.653344208809136, "grad_norm": 0.032228920608758926, "learning_rate": 0.0002766526928932285, "loss": 0.1114, "num_input_tokens_seen": 180556672, "step": 83695 }, { "epoch": 13.65415986949429, "grad_norm": 0.011478866450488567, "learning_rate": 0.0002765890115306956, "loss": 0.0033, "num_input_tokens_seen": 180568032, "step": 83700 }, { "epoch": 13.654975530179446, "grad_norm": 0.03752702474594116, "learning_rate": 0.0002765253346958695, "loss": 0.0993, "num_input_tokens_seen": 180579552, "step": 83705 }, { "epoch": 13.655791190864601, "grad_norm": 0.009617595933377743, "learning_rate": 0.00027646166239004134, "loss": 0.0118, "num_input_tokens_seen": 180589024, "step": 83710 }, { "epoch": 13.656606851549755, "grad_norm": 0.8038741946220398, "learning_rate": 0.0002763979946145008, "loss": 0.0438, "num_input_tokens_seen": 180599200, "step": 83715 }, { "epoch": 13.65742251223491, "grad_norm": 0.03252618387341499, "learning_rate": 0.00027633433137053885, "loss": 0.0963, "num_input_tokens_seen": 180610016, "step": 83720 }, { "epoch": 13.658238172920065, "grad_norm": 0.003162363078445196, "learning_rate": 0.00027627067265944514, "loss": 0.0024, "num_input_tokens_seen": 180620768, "step": 83725 }, { "epoch": 13.65905383360522, "grad_norm": 0.07152137905359268, "learning_rate": 0.0002762070184825104, "loss": 0.0092, "num_input_tokens_seen": 180632416, "step": 83730 }, { "epoch": 13.659869494290374, "grad_norm": 0.03066076897084713, "learning_rate": 0.00027614336884102393, "loss": 0.0051, "num_input_tokens_seen": 180642656, "step": 83735 }, { "epoch": 13.66068515497553, "grad_norm": 0.01499424409121275, "learning_rate": 0.0002760797237362765, "loss": 0.1341, "num_input_tokens_seen": 180653056, "step": 83740 }, { "epoch": 13.661500815660686, "grad_norm": 0.018885690718889236, "learning_rate": 0.00027601608316955715, "loss": 0.0313, "num_input_tokens_seen": 180662752, "step": 83745 }, { "epoch": 13.66231647634584, "grad_norm": 0.058334581553936005, "learning_rate": 0.0002759524471421562, "loss": 0.0134, "num_input_tokens_seen": 180672288, "step": 83750 }, { "epoch": 13.663132137030995, "grad_norm": 0.034454312175512314, "learning_rate": 0.00027588881565536303, "loss": 0.0875, "num_input_tokens_seen": 180685696, "step": 83755 }, { "epoch": 13.66394779771615, "grad_norm": 0.05753010883927345, "learning_rate": 0.00027582518871046744, "loss": 0.0979, "num_input_tokens_seen": 180695648, "step": 83760 }, { "epoch": 13.664763458401305, "grad_norm": 0.049889594316482544, "learning_rate": 0.00027576156630875875, "loss": 0.0094, "num_input_tokens_seen": 180706624, "step": 83765 }, { "epoch": 13.66557911908646, "grad_norm": 0.08508668094873428, "learning_rate": 0.0002756979484515264, "loss": 0.0057, "num_input_tokens_seen": 180717504, "step": 83770 }, { "epoch": 13.666394779771615, "grad_norm": 0.009294361807405949, "learning_rate": 0.00027563433514005966, "loss": 0.0053, "num_input_tokens_seen": 180727840, "step": 83775 }, { "epoch": 13.66721044045677, "grad_norm": 0.2951067090034485, "learning_rate": 0.0002755707263756477, "loss": 0.1371, "num_input_tokens_seen": 180738688, "step": 83780 }, { "epoch": 13.668026101141924, "grad_norm": 0.009293925948441029, "learning_rate": 0.0002755071221595798, "loss": 0.0098, "num_input_tokens_seen": 180749184, "step": 83785 }, { "epoch": 13.66884176182708, "grad_norm": 0.02765575610101223, "learning_rate": 0.0002754435224931447, "loss": 0.0452, "num_input_tokens_seen": 180760096, "step": 83790 }, { "epoch": 13.669657422512234, "grad_norm": 0.10952600836753845, "learning_rate": 0.00027537992737763163, "loss": 0.0159, "num_input_tokens_seen": 180770336, "step": 83795 }, { "epoch": 13.67047308319739, "grad_norm": 0.009626870043575764, "learning_rate": 0.00027531633681432925, "loss": 0.0119, "num_input_tokens_seen": 180781440, "step": 83800 }, { "epoch": 13.671288743882545, "grad_norm": 0.935558557510376, "learning_rate": 0.0002752527508045263, "loss": 0.0359, "num_input_tokens_seen": 180792032, "step": 83805 }, { "epoch": 13.6721044045677, "grad_norm": 0.11854733526706696, "learning_rate": 0.0002751891693495115, "loss": 0.0331, "num_input_tokens_seen": 180803392, "step": 83810 }, { "epoch": 13.672920065252855, "grad_norm": 0.023176709190011024, "learning_rate": 0.00027512559245057333, "loss": 0.0074, "num_input_tokens_seen": 180813536, "step": 83815 }, { "epoch": 13.673735725938009, "grad_norm": 0.015529298223555088, "learning_rate": 0.00027506202010900037, "loss": 0.0069, "num_input_tokens_seen": 180824480, "step": 83820 }, { "epoch": 13.674551386623165, "grad_norm": 0.005363152828067541, "learning_rate": 0.00027499845232608087, "loss": 0.0143, "num_input_tokens_seen": 180835520, "step": 83825 }, { "epoch": 13.67536704730832, "grad_norm": 0.029270047321915627, "learning_rate": 0.00027493488910310316, "loss": 0.1808, "num_input_tokens_seen": 180845536, "step": 83830 }, { "epoch": 13.676182707993474, "grad_norm": 0.018376098945736885, "learning_rate": 0.0002748713304413555, "loss": 0.003, "num_input_tokens_seen": 180856800, "step": 83835 }, { "epoch": 13.67699836867863, "grad_norm": 0.015154656022787094, "learning_rate": 0.0002748077763421257, "loss": 0.0087, "num_input_tokens_seen": 180868448, "step": 83840 }, { "epoch": 13.677814029363784, "grad_norm": 0.007552286144345999, "learning_rate": 0.0002747442268067024, "loss": 0.0231, "num_input_tokens_seen": 180879936, "step": 83845 }, { "epoch": 13.67862969004894, "grad_norm": 0.002675456926226616, "learning_rate": 0.00027468068183637265, "loss": 0.006, "num_input_tokens_seen": 180891488, "step": 83850 }, { "epoch": 13.679445350734095, "grad_norm": 0.31013423204421997, "learning_rate": 0.0002746171414324249, "loss": 0.0303, "num_input_tokens_seen": 180901984, "step": 83855 }, { "epoch": 13.68026101141925, "grad_norm": 0.8643865585327148, "learning_rate": 0.00027455360559614677, "loss": 0.0344, "num_input_tokens_seen": 180912352, "step": 83860 }, { "epoch": 13.681076672104405, "grad_norm": 0.01307713519781828, "learning_rate": 0.00027449007432882576, "loss": 0.003, "num_input_tokens_seen": 180923936, "step": 83865 }, { "epoch": 13.681892332789559, "grad_norm": 0.010098682716488838, "learning_rate": 0.00027442654763174955, "loss": 0.0059, "num_input_tokens_seen": 180935328, "step": 83870 }, { "epoch": 13.682707993474715, "grad_norm": 0.8182013630867004, "learning_rate": 0.00027436302550620545, "loss": 0.1286, "num_input_tokens_seen": 180945760, "step": 83875 }, { "epoch": 13.68352365415987, "grad_norm": 0.007595505099743605, "learning_rate": 0.0002742995079534809, "loss": 0.007, "num_input_tokens_seen": 180956416, "step": 83880 }, { "epoch": 13.684339314845024, "grad_norm": 2.020815372467041, "learning_rate": 0.0002742359949748632, "loss": 0.0431, "num_input_tokens_seen": 180966880, "step": 83885 }, { "epoch": 13.68515497553018, "grad_norm": 0.06905794143676758, "learning_rate": 0.0002741724865716394, "loss": 0.0161, "num_input_tokens_seen": 180978880, "step": 83890 }, { "epoch": 13.685970636215334, "grad_norm": 0.012662988156080246, "learning_rate": 0.0002741089827450966, "loss": 0.066, "num_input_tokens_seen": 180991264, "step": 83895 }, { "epoch": 13.68678629690049, "grad_norm": 0.29215264320373535, "learning_rate": 0.0002740454834965219, "loss": 0.0713, "num_input_tokens_seen": 181002784, "step": 83900 }, { "epoch": 13.687601957585644, "grad_norm": 0.006162087898701429, "learning_rate": 0.0002739819888272021, "loss": 0.0677, "num_input_tokens_seen": 181012576, "step": 83905 }, { "epoch": 13.6884176182708, "grad_norm": 0.003966738004237413, "learning_rate": 0.000273918498738424, "loss": 0.0817, "num_input_tokens_seen": 181022400, "step": 83910 }, { "epoch": 13.689233278955955, "grad_norm": 0.004630334209650755, "learning_rate": 0.00027385501323147433, "loss": 0.0157, "num_input_tokens_seen": 181032800, "step": 83915 }, { "epoch": 13.690048939641109, "grad_norm": 0.032001350075006485, "learning_rate": 0.00027379153230763976, "loss": 0.0844, "num_input_tokens_seen": 181044928, "step": 83920 }, { "epoch": 13.690864600326265, "grad_norm": 0.20921839773654938, "learning_rate": 0.00027372805596820673, "loss": 0.0099, "num_input_tokens_seen": 181055424, "step": 83925 }, { "epoch": 13.691680261011419, "grad_norm": 0.061357058584690094, "learning_rate": 0.0002736645842144616, "loss": 0.0171, "num_input_tokens_seen": 181065792, "step": 83930 }, { "epoch": 13.692495921696574, "grad_norm": 0.029230689629912376, "learning_rate": 0.00027360111704769093, "loss": 0.0134, "num_input_tokens_seen": 181077184, "step": 83935 }, { "epoch": 13.69331158238173, "grad_norm": 0.026656830683350563, "learning_rate": 0.00027353765446918075, "loss": 0.01, "num_input_tokens_seen": 181088768, "step": 83940 }, { "epoch": 13.694127243066884, "grad_norm": 0.0064884210005402565, "learning_rate": 0.0002734741964802173, "loss": 0.0965, "num_input_tokens_seen": 181099456, "step": 83945 }, { "epoch": 13.69494290375204, "grad_norm": 0.022289535030722618, "learning_rate": 0.00027341074308208667, "loss": 0.0406, "num_input_tokens_seen": 181111168, "step": 83950 }, { "epoch": 13.695758564437194, "grad_norm": 0.02582310326397419, "learning_rate": 0.00027334729427607476, "loss": 0.0077, "num_input_tokens_seen": 181121504, "step": 83955 }, { "epoch": 13.69657422512235, "grad_norm": 0.001693788799457252, "learning_rate": 0.00027328385006346746, "loss": 0.033, "num_input_tokens_seen": 181131680, "step": 83960 }, { "epoch": 13.697389885807503, "grad_norm": 0.00532862963154912, "learning_rate": 0.00027322041044555045, "loss": 0.0146, "num_input_tokens_seen": 181141120, "step": 83965 }, { "epoch": 13.698205546492659, "grad_norm": 0.004277058877050877, "learning_rate": 0.00027315697542360944, "loss": 0.0095, "num_input_tokens_seen": 181150976, "step": 83970 }, { "epoch": 13.699021207177815, "grad_norm": 0.048466846346855164, "learning_rate": 0.00027309354499893045, "loss": 0.112, "num_input_tokens_seen": 181161600, "step": 83975 }, { "epoch": 13.699836867862969, "grad_norm": 0.013852128759026527, "learning_rate": 0.00027303011917279826, "loss": 0.0968, "num_input_tokens_seen": 181171264, "step": 83980 }, { "epoch": 13.700652528548124, "grad_norm": 0.056428197771310806, "learning_rate": 0.00027296669794649875, "loss": 0.0146, "num_input_tokens_seen": 181182336, "step": 83985 }, { "epoch": 13.701468189233278, "grad_norm": 0.25973379611968994, "learning_rate": 0.0002729032813213172, "loss": 0.0328, "num_input_tokens_seen": 181192992, "step": 83990 }, { "epoch": 13.702283849918434, "grad_norm": 0.015085608698427677, "learning_rate": 0.00027283986929853873, "loss": 0.0079, "num_input_tokens_seen": 181203520, "step": 83995 }, { "epoch": 13.70309951060359, "grad_norm": 0.02610679529607296, "learning_rate": 0.0002727764618794485, "loss": 0.0042, "num_input_tokens_seen": 181214240, "step": 84000 }, { "epoch": 13.703915171288743, "grad_norm": 0.005492442287504673, "learning_rate": 0.00027271305906533146, "loss": 0.0428, "num_input_tokens_seen": 181225408, "step": 84005 }, { "epoch": 13.7047308319739, "grad_norm": 0.03574015200138092, "learning_rate": 0.00027264966085747267, "loss": 0.0048, "num_input_tokens_seen": 181236000, "step": 84010 }, { "epoch": 13.705546492659053, "grad_norm": 0.16450905799865723, "learning_rate": 0.00027258626725715684, "loss": 0.0129, "num_input_tokens_seen": 181246336, "step": 84015 }, { "epoch": 13.706362153344209, "grad_norm": 0.057066600769758224, "learning_rate": 0.0002725228782656689, "loss": 0.0098, "num_input_tokens_seen": 181257728, "step": 84020 }, { "epoch": 13.707177814029365, "grad_norm": 0.11827599257230759, "learning_rate": 0.00027245949388429334, "loss": 0.0086, "num_input_tokens_seen": 181268896, "step": 84025 }, { "epoch": 13.707993474714518, "grad_norm": 0.3682697117328644, "learning_rate": 0.0002723961141143148, "loss": 0.0248, "num_input_tokens_seen": 181280384, "step": 84030 }, { "epoch": 13.708809135399674, "grad_norm": 0.04010923579335213, "learning_rate": 0.0002723327389570177, "loss": 0.0184, "num_input_tokens_seen": 181291168, "step": 84035 }, { "epoch": 13.709624796084828, "grad_norm": 0.029280370101332664, "learning_rate": 0.00027226936841368655, "loss": 0.0066, "num_input_tokens_seen": 181302464, "step": 84040 }, { "epoch": 13.710440456769984, "grad_norm": 0.006572904996573925, "learning_rate": 0.00027220600248560557, "loss": 0.0993, "num_input_tokens_seen": 181312608, "step": 84045 }, { "epoch": 13.71125611745514, "grad_norm": 0.7348065972328186, "learning_rate": 0.00027214264117405884, "loss": 0.0178, "num_input_tokens_seen": 181322976, "step": 84050 }, { "epoch": 13.712071778140293, "grad_norm": 0.10669244080781937, "learning_rate": 0.0002720792844803306, "loss": 0.0399, "num_input_tokens_seen": 181333248, "step": 84055 }, { "epoch": 13.71288743882545, "grad_norm": 0.1848609298467636, "learning_rate": 0.00027201593240570475, "loss": 0.0541, "num_input_tokens_seen": 181343808, "step": 84060 }, { "epoch": 13.713703099510603, "grad_norm": 0.05457932502031326, "learning_rate": 0.00027195258495146525, "loss": 0.0342, "num_input_tokens_seen": 181355872, "step": 84065 }, { "epoch": 13.714518760195759, "grad_norm": 0.03962530940771103, "learning_rate": 0.00027188924211889593, "loss": 0.0082, "num_input_tokens_seen": 181367232, "step": 84070 }, { "epoch": 13.715334420880914, "grad_norm": 1.599404215812683, "learning_rate": 0.0002718259039092803, "loss": 0.0361, "num_input_tokens_seen": 181378720, "step": 84075 }, { "epoch": 13.716150081566068, "grad_norm": 0.49120208621025085, "learning_rate": 0.0002717625703239026, "loss": 0.1345, "num_input_tokens_seen": 181388864, "step": 84080 }, { "epoch": 13.716965742251224, "grad_norm": 0.09545818716287613, "learning_rate": 0.00027169924136404553, "loss": 0.0661, "num_input_tokens_seen": 181400000, "step": 84085 }, { "epoch": 13.717781402936378, "grad_norm": 0.005798594560474157, "learning_rate": 0.00027163591703099335, "loss": 0.0111, "num_input_tokens_seen": 181410688, "step": 84090 }, { "epoch": 13.718597063621534, "grad_norm": 0.0038630373310297728, "learning_rate": 0.0002715725973260286, "loss": 0.0111, "num_input_tokens_seen": 181421824, "step": 84095 }, { "epoch": 13.719412724306688, "grad_norm": 0.10054466128349304, "learning_rate": 0.00027150928225043545, "loss": 0.0076, "num_input_tokens_seen": 181432224, "step": 84100 }, { "epoch": 13.720228384991843, "grad_norm": 0.10065799951553345, "learning_rate": 0.00027144597180549603, "loss": 0.023, "num_input_tokens_seen": 181443072, "step": 84105 }, { "epoch": 13.721044045676999, "grad_norm": 1.4538462162017822, "learning_rate": 0.0002713826659924944, "loss": 0.0882, "num_input_tokens_seen": 181453248, "step": 84110 }, { "epoch": 13.721859706362153, "grad_norm": 0.008288124576210976, "learning_rate": 0.00027131936481271265, "loss": 0.0019, "num_input_tokens_seen": 181464704, "step": 84115 }, { "epoch": 13.722675367047309, "grad_norm": 0.6460946798324585, "learning_rate": 0.00027125606826743445, "loss": 0.0487, "num_input_tokens_seen": 181475520, "step": 84120 }, { "epoch": 13.723491027732463, "grad_norm": 0.04889329522848129, "learning_rate": 0.0002711927763579418, "loss": 0.0987, "num_input_tokens_seen": 181486944, "step": 84125 }, { "epoch": 13.724306688417618, "grad_norm": 0.09713765978813171, "learning_rate": 0.00027112948908551807, "loss": 0.0087, "num_input_tokens_seen": 181499328, "step": 84130 }, { "epoch": 13.725122349102774, "grad_norm": 0.016596613451838493, "learning_rate": 0.00027106620645144555, "loss": 0.0671, "num_input_tokens_seen": 181509024, "step": 84135 }, { "epoch": 13.725938009787928, "grad_norm": 0.0007114295149222016, "learning_rate": 0.00027100292845700676, "loss": 0.0265, "num_input_tokens_seen": 181519392, "step": 84140 }, { "epoch": 13.726753670473084, "grad_norm": 0.0638134703040123, "learning_rate": 0.0002709396551034842, "loss": 0.0139, "num_input_tokens_seen": 181530048, "step": 84145 }, { "epoch": 13.727569331158238, "grad_norm": 0.008448406122624874, "learning_rate": 0.00027087638639215994, "loss": 0.0227, "num_input_tokens_seen": 181540736, "step": 84150 }, { "epoch": 13.728384991843393, "grad_norm": 0.005745413713157177, "learning_rate": 0.00027081312232431654, "loss": 0.06, "num_input_tokens_seen": 181550528, "step": 84155 }, { "epoch": 13.729200652528547, "grad_norm": 1.2074400186538696, "learning_rate": 0.00027074986290123596, "loss": 0.0265, "num_input_tokens_seen": 181561664, "step": 84160 }, { "epoch": 13.730016313213703, "grad_norm": 0.018368346616625786, "learning_rate": 0.0002706866081242001, "loss": 0.1073, "num_input_tokens_seen": 181573280, "step": 84165 }, { "epoch": 13.730831973898859, "grad_norm": 0.7245858311653137, "learning_rate": 0.0002706233579944911, "loss": 0.0274, "num_input_tokens_seen": 181584096, "step": 84170 }, { "epoch": 13.731647634584013, "grad_norm": 0.05534718930721283, "learning_rate": 0.00027056011251339073, "loss": 0.0081, "num_input_tokens_seen": 181595872, "step": 84175 }, { "epoch": 13.732463295269168, "grad_norm": 0.03646060824394226, "learning_rate": 0.0002704968716821806, "loss": 0.0671, "num_input_tokens_seen": 181606720, "step": 84180 }, { "epoch": 13.733278955954322, "grad_norm": 0.05478598177433014, "learning_rate": 0.00027043363550214287, "loss": 0.044, "num_input_tokens_seen": 181617312, "step": 84185 }, { "epoch": 13.734094616639478, "grad_norm": 0.0962568074464798, "learning_rate": 0.00027037040397455837, "loss": 0.0082, "num_input_tokens_seen": 181628992, "step": 84190 }, { "epoch": 13.734910277324634, "grad_norm": 0.028388697654008865, "learning_rate": 0.0002703071771007093, "loss": 0.0925, "num_input_tokens_seen": 181641376, "step": 84195 }, { "epoch": 13.735725938009788, "grad_norm": 0.005647646263241768, "learning_rate": 0.0002702439548818763, "loss": 0.0039, "num_input_tokens_seen": 181651200, "step": 84200 }, { "epoch": 13.736541598694943, "grad_norm": 0.2623197138309479, "learning_rate": 0.0002701807373193414, "loss": 0.0144, "num_input_tokens_seen": 181662816, "step": 84205 }, { "epoch": 13.737357259380097, "grad_norm": 0.03629335016012192, "learning_rate": 0.000270117524414385, "loss": 0.0126, "num_input_tokens_seen": 181673344, "step": 84210 }, { "epoch": 13.738172920065253, "grad_norm": 0.8144631385803223, "learning_rate": 0.000270054316168289, "loss": 0.0814, "num_input_tokens_seen": 181683392, "step": 84215 }, { "epoch": 13.738988580750409, "grad_norm": 0.1959667056798935, "learning_rate": 0.0002699911125823336, "loss": 0.0118, "num_input_tokens_seen": 181694688, "step": 84220 }, { "epoch": 13.739804241435563, "grad_norm": 0.20079147815704346, "learning_rate": 0.0002699279136578005, "loss": 0.0806, "num_input_tokens_seen": 181706048, "step": 84225 }, { "epoch": 13.740619902120718, "grad_norm": 0.06680264323949814, "learning_rate": 0.0002698647193959697, "loss": 0.0092, "num_input_tokens_seen": 181715840, "step": 84230 }, { "epoch": 13.741435562805872, "grad_norm": 0.016095474362373352, "learning_rate": 0.00026980152979812265, "loss": 0.0092, "num_input_tokens_seen": 181726176, "step": 84235 }, { "epoch": 13.742251223491028, "grad_norm": 0.006968729663640261, "learning_rate": 0.0002697383448655393, "loss": 0.0597, "num_input_tokens_seen": 181736448, "step": 84240 }, { "epoch": 13.743066884176184, "grad_norm": 0.23762120306491852, "learning_rate": 0.00026967516459950084, "loss": 0.0145, "num_input_tokens_seen": 181748064, "step": 84245 }, { "epoch": 13.743882544861338, "grad_norm": 0.01533246785402298, "learning_rate": 0.000269611989001287, "loss": 0.0039, "num_input_tokens_seen": 181758432, "step": 84250 }, { "epoch": 13.744698205546493, "grad_norm": 0.027753276750445366, "learning_rate": 0.0002695488180721789, "loss": 0.0055, "num_input_tokens_seen": 181767744, "step": 84255 }, { "epoch": 13.745513866231647, "grad_norm": 0.008072631433606148, "learning_rate": 0.0002694856518134559, "loss": 0.0173, "num_input_tokens_seen": 181779328, "step": 84260 }, { "epoch": 13.746329526916803, "grad_norm": 0.007466837763786316, "learning_rate": 0.000269422490226399, "loss": 0.0024, "num_input_tokens_seen": 181789056, "step": 84265 }, { "epoch": 13.747145187601957, "grad_norm": 0.002708770101889968, "learning_rate": 0.00026935933331228743, "loss": 0.0486, "num_input_tokens_seen": 181799104, "step": 84270 }, { "epoch": 13.747960848287113, "grad_norm": 0.008326053619384766, "learning_rate": 0.00026929618107240173, "loss": 0.0029, "num_input_tokens_seen": 181810496, "step": 84275 }, { "epoch": 13.748776508972268, "grad_norm": 0.3461647927761078, "learning_rate": 0.0002692330335080216, "loss": 0.0156, "num_input_tokens_seen": 181820928, "step": 84280 }, { "epoch": 13.749592169657422, "grad_norm": 0.01878945901989937, "learning_rate": 0.00026916989062042684, "loss": 0.0058, "num_input_tokens_seen": 181832320, "step": 84285 }, { "epoch": 13.750407830342578, "grad_norm": 0.3827398419380188, "learning_rate": 0.0002691067524108971, "loss": 0.1361, "num_input_tokens_seen": 181844320, "step": 84290 }, { "epoch": 13.751223491027732, "grad_norm": 0.006369706708937883, "learning_rate": 0.00026904361888071193, "loss": 0.0193, "num_input_tokens_seen": 181855616, "step": 84295 }, { "epoch": 13.752039151712887, "grad_norm": 0.006208473350852728, "learning_rate": 0.0002689804900311508, "loss": 0.1095, "num_input_tokens_seen": 181865408, "step": 84300 }, { "epoch": 13.752854812398043, "grad_norm": 0.08243457973003387, "learning_rate": 0.000268917365863493, "loss": 0.0085, "num_input_tokens_seen": 181876928, "step": 84305 }, { "epoch": 13.753670473083197, "grad_norm": 0.016273565590381622, "learning_rate": 0.000268854246379018, "loss": 0.0198, "num_input_tokens_seen": 181887936, "step": 84310 }, { "epoch": 13.754486133768353, "grad_norm": 0.0061238049529492855, "learning_rate": 0.00026879113157900496, "loss": 0.0514, "num_input_tokens_seen": 181898560, "step": 84315 }, { "epoch": 13.755301794453507, "grad_norm": 0.6165060997009277, "learning_rate": 0.00026872802146473296, "loss": 0.1354, "num_input_tokens_seen": 181908576, "step": 84320 }, { "epoch": 13.756117455138662, "grad_norm": 0.003234599018469453, "learning_rate": 0.0002686649160374808, "loss": 0.0134, "num_input_tokens_seen": 181918720, "step": 84325 }, { "epoch": 13.756933115823816, "grad_norm": 0.021253099665045738, "learning_rate": 0.0002686018152985279, "loss": 0.1741, "num_input_tokens_seen": 181929792, "step": 84330 }, { "epoch": 13.757748776508972, "grad_norm": 0.5081422328948975, "learning_rate": 0.0002685387192491524, "loss": 0.1977, "num_input_tokens_seen": 181939488, "step": 84335 }, { "epoch": 13.758564437194128, "grad_norm": 0.16428063809871674, "learning_rate": 0.0002684756278906338, "loss": 0.1335, "num_input_tokens_seen": 181950368, "step": 84340 }, { "epoch": 13.759380097879282, "grad_norm": 0.14346139132976532, "learning_rate": 0.0002684125412242499, "loss": 0.0745, "num_input_tokens_seen": 181961408, "step": 84345 }, { "epoch": 13.760195758564437, "grad_norm": 0.43231579661369324, "learning_rate": 0.00026834945925128005, "loss": 0.0265, "num_input_tokens_seen": 181971200, "step": 84350 }, { "epoch": 13.761011419249591, "grad_norm": 0.021556392312049866, "learning_rate": 0.00026828638197300185, "loss": 0.0227, "num_input_tokens_seen": 181981280, "step": 84355 }, { "epoch": 13.761827079934747, "grad_norm": 0.009113860316574574, "learning_rate": 0.0002682233093906945, "loss": 0.1003, "num_input_tokens_seen": 181991744, "step": 84360 }, { "epoch": 13.762642740619903, "grad_norm": 0.14683818817138672, "learning_rate": 0.00026816024150563546, "loss": 0.0161, "num_input_tokens_seen": 182002752, "step": 84365 }, { "epoch": 13.763458401305057, "grad_norm": 0.08112190663814545, "learning_rate": 0.00026809717831910353, "loss": 0.1124, "num_input_tokens_seen": 182013536, "step": 84370 }, { "epoch": 13.764274061990212, "grad_norm": 0.8220617175102234, "learning_rate": 0.0002680341198323761, "loss": 0.0484, "num_input_tokens_seen": 182024640, "step": 84375 }, { "epoch": 13.765089722675366, "grad_norm": 0.016743751242756844, "learning_rate": 0.0002679710660467319, "loss": 0.0572, "num_input_tokens_seen": 182035232, "step": 84380 }, { "epoch": 13.765905383360522, "grad_norm": 0.0334903858602047, "learning_rate": 0.00026790801696344814, "loss": 0.0309, "num_input_tokens_seen": 182046496, "step": 84385 }, { "epoch": 13.766721044045678, "grad_norm": 0.007870622910559177, "learning_rate": 0.00026784497258380293, "loss": 0.0188, "num_input_tokens_seen": 182057856, "step": 84390 }, { "epoch": 13.767536704730832, "grad_norm": 0.08365266025066376, "learning_rate": 0.0002677819329090738, "loss": 0.0147, "num_input_tokens_seen": 182068672, "step": 84395 }, { "epoch": 13.768352365415987, "grad_norm": 0.10302074998617172, "learning_rate": 0.00026771889794053845, "loss": 0.1998, "num_input_tokens_seen": 182078656, "step": 84400 }, { "epoch": 13.769168026101141, "grad_norm": 0.9321720600128174, "learning_rate": 0.00026765586767947433, "loss": 0.0384, "num_input_tokens_seen": 182089696, "step": 84405 }, { "epoch": 13.769983686786297, "grad_norm": 0.054999202489852905, "learning_rate": 0.00026759284212715873, "loss": 0.0413, "num_input_tokens_seen": 182099360, "step": 84410 }, { "epoch": 13.770799347471453, "grad_norm": 0.38283318281173706, "learning_rate": 0.000267529821284869, "loss": 0.0314, "num_input_tokens_seen": 182110208, "step": 84415 }, { "epoch": 13.771615008156607, "grad_norm": 0.026212509721517563, "learning_rate": 0.0002674668051538824, "loss": 0.0213, "num_input_tokens_seen": 182120416, "step": 84420 }, { "epoch": 13.772430668841762, "grad_norm": 0.8862409591674805, "learning_rate": 0.0002674037937354761, "loss": 0.0236, "num_input_tokens_seen": 182131584, "step": 84425 }, { "epoch": 13.773246329526916, "grad_norm": 0.048360809683799744, "learning_rate": 0.00026734078703092684, "loss": 0.1486, "num_input_tokens_seen": 182141856, "step": 84430 }, { "epoch": 13.774061990212072, "grad_norm": 0.015967868268489838, "learning_rate": 0.0002672777850415117, "loss": 0.014, "num_input_tokens_seen": 182152896, "step": 84435 }, { "epoch": 13.774877650897226, "grad_norm": 0.013343348167836666, "learning_rate": 0.0002672147877685075, "loss": 0.0089, "num_input_tokens_seen": 182163552, "step": 84440 }, { "epoch": 13.775693311582382, "grad_norm": 0.08139657974243164, "learning_rate": 0.00026715179521319095, "loss": 0.0082, "num_input_tokens_seen": 182175648, "step": 84445 }, { "epoch": 13.776508972267537, "grad_norm": 0.0880366861820221, "learning_rate": 0.00026708880737683863, "loss": 0.0105, "num_input_tokens_seen": 182186496, "step": 84450 }, { "epoch": 13.777324632952691, "grad_norm": 0.009264902211725712, "learning_rate": 0.00026702582426072705, "loss": 0.0098, "num_input_tokens_seen": 182198144, "step": 84455 }, { "epoch": 13.778140293637847, "grad_norm": 0.01698261871933937, "learning_rate": 0.0002669628458661326, "loss": 0.0408, "num_input_tokens_seen": 182208928, "step": 84460 }, { "epoch": 13.778955954323001, "grad_norm": 0.03621743246912956, "learning_rate": 0.000266899872194332, "loss": 0.0128, "num_input_tokens_seen": 182219776, "step": 84465 }, { "epoch": 13.779771615008157, "grad_norm": 0.03539000451564789, "learning_rate": 0.0002668369032466009, "loss": 0.0043, "num_input_tokens_seen": 182229120, "step": 84470 }, { "epoch": 13.780587275693312, "grad_norm": 0.10664580017328262, "learning_rate": 0.0002667739390242161, "loss": 0.0851, "num_input_tokens_seen": 182238912, "step": 84475 }, { "epoch": 13.781402936378466, "grad_norm": 0.08549370616674423, "learning_rate": 0.00026671097952845284, "loss": 0.0828, "num_input_tokens_seen": 182249408, "step": 84480 }, { "epoch": 13.782218597063622, "grad_norm": 0.012559962458908558, "learning_rate": 0.00026664802476058803, "loss": 0.0358, "num_input_tokens_seen": 182260352, "step": 84485 }, { "epoch": 13.783034257748776, "grad_norm": 0.03886222094297409, "learning_rate": 0.00026658507472189654, "loss": 0.0787, "num_input_tokens_seen": 182270112, "step": 84490 }, { "epoch": 13.783849918433932, "grad_norm": 0.0020093030761927366, "learning_rate": 0.0002665221294136548, "loss": 0.0099, "num_input_tokens_seen": 182280736, "step": 84495 }, { "epoch": 13.784665579119086, "grad_norm": 0.012666499242186546, "learning_rate": 0.0002664591888371384, "loss": 0.0393, "num_input_tokens_seen": 182292064, "step": 84500 }, { "epoch": 13.785481239804241, "grad_norm": 1.1819003820419312, "learning_rate": 0.00026639625299362276, "loss": 0.0645, "num_input_tokens_seen": 182303328, "step": 84505 }, { "epoch": 13.786296900489397, "grad_norm": 0.07328283786773682, "learning_rate": 0.00026633332188438335, "loss": 0.009, "num_input_tokens_seen": 182313376, "step": 84510 }, { "epoch": 13.78711256117455, "grad_norm": 0.11880190670490265, "learning_rate": 0.00026627039551069563, "loss": 0.0147, "num_input_tokens_seen": 182324608, "step": 84515 }, { "epoch": 13.787928221859707, "grad_norm": 0.007523844949901104, "learning_rate": 0.00026620747387383494, "loss": 0.1704, "num_input_tokens_seen": 182334784, "step": 84520 }, { "epoch": 13.78874388254486, "grad_norm": 0.4598270058631897, "learning_rate": 0.0002661445569750762, "loss": 0.0978, "num_input_tokens_seen": 182345504, "step": 84525 }, { "epoch": 13.789559543230016, "grad_norm": 0.21005965769290924, "learning_rate": 0.00026608164481569486, "loss": 0.0502, "num_input_tokens_seen": 182356288, "step": 84530 }, { "epoch": 13.790375203915172, "grad_norm": 0.4009250998497009, "learning_rate": 0.0002660187373969656, "loss": 0.0462, "num_input_tokens_seen": 182368192, "step": 84535 }, { "epoch": 13.791190864600326, "grad_norm": 0.05589642748236656, "learning_rate": 0.00026595583472016355, "loss": 0.1012, "num_input_tokens_seen": 182380096, "step": 84540 }, { "epoch": 13.792006525285482, "grad_norm": 0.025353606790304184, "learning_rate": 0.00026589293678656336, "loss": 0.0068, "num_input_tokens_seen": 182391072, "step": 84545 }, { "epoch": 13.792822185970635, "grad_norm": 0.6768189072608948, "learning_rate": 0.0002658300435974398, "loss": 0.1179, "num_input_tokens_seen": 182400096, "step": 84550 }, { "epoch": 13.793637846655791, "grad_norm": 0.09683632105588913, "learning_rate": 0.00026576715515406747, "loss": 0.0147, "num_input_tokens_seen": 182411616, "step": 84555 }, { "epoch": 13.794453507340947, "grad_norm": 0.007288573309779167, "learning_rate": 0.0002657042714577209, "loss": 0.1293, "num_input_tokens_seen": 182424096, "step": 84560 }, { "epoch": 13.7952691680261, "grad_norm": 0.09776467829942703, "learning_rate": 0.0002656413925096745, "loss": 0.0121, "num_input_tokens_seen": 182434336, "step": 84565 }, { "epoch": 13.796084828711257, "grad_norm": 0.029282325878739357, "learning_rate": 0.00026557851831120254, "loss": 0.0115, "num_input_tokens_seen": 182445248, "step": 84570 }, { "epoch": 13.79690048939641, "grad_norm": 0.03231930732727051, "learning_rate": 0.00026551564886357937, "loss": 0.0147, "num_input_tokens_seen": 182455968, "step": 84575 }, { "epoch": 13.797716150081566, "grad_norm": 0.026240963488817215, "learning_rate": 0.00026545278416807895, "loss": 0.0345, "num_input_tokens_seen": 182467328, "step": 84580 }, { "epoch": 13.798531810766722, "grad_norm": 0.02004300430417061, "learning_rate": 0.00026538992422597547, "loss": 0.0747, "num_input_tokens_seen": 182476864, "step": 84585 }, { "epoch": 13.799347471451876, "grad_norm": 0.16587531566619873, "learning_rate": 0.0002653270690385428, "loss": 0.0326, "num_input_tokens_seen": 182488224, "step": 84590 }, { "epoch": 13.800163132137031, "grad_norm": 0.4353727400302887, "learning_rate": 0.00026526421860705474, "loss": 0.0323, "num_input_tokens_seen": 182499904, "step": 84595 }, { "epoch": 13.800978792822185, "grad_norm": 0.04856307804584503, "learning_rate": 0.0002652013729327849, "loss": 0.0099, "num_input_tokens_seen": 182511040, "step": 84600 }, { "epoch": 13.801794453507341, "grad_norm": 0.007672377862036228, "learning_rate": 0.00026513853201700727, "loss": 0.0166, "num_input_tokens_seen": 182521472, "step": 84605 }, { "epoch": 13.802610114192497, "grad_norm": 0.2658443748950958, "learning_rate": 0.00026507569586099527, "loss": 0.1093, "num_input_tokens_seen": 182531264, "step": 84610 }, { "epoch": 13.80342577487765, "grad_norm": 0.7913045883178711, "learning_rate": 0.0002650128644660223, "loss": 0.0555, "num_input_tokens_seen": 182542208, "step": 84615 }, { "epoch": 13.804241435562806, "grad_norm": 0.2998761534690857, "learning_rate": 0.0002649500378333617, "loss": 0.0124, "num_input_tokens_seen": 182552640, "step": 84620 }, { "epoch": 13.80505709624796, "grad_norm": 0.22462299466133118, "learning_rate": 0.0002648872159642868, "loss": 0.0256, "num_input_tokens_seen": 182564320, "step": 84625 }, { "epoch": 13.805872756933116, "grad_norm": 0.02655922994017601, "learning_rate": 0.00026482439886007077, "loss": 0.1905, "num_input_tokens_seen": 182575296, "step": 84630 }, { "epoch": 13.80668841761827, "grad_norm": 0.23379771411418915, "learning_rate": 0.00026476158652198655, "loss": 0.0178, "num_input_tokens_seen": 182586176, "step": 84635 }, { "epoch": 13.807504078303426, "grad_norm": 0.01736249215900898, "learning_rate": 0.00026469877895130727, "loss": 0.1519, "num_input_tokens_seen": 182596800, "step": 84640 }, { "epoch": 13.808319738988581, "grad_norm": 0.04855513200163841, "learning_rate": 0.00026463597614930575, "loss": 0.0094, "num_input_tokens_seen": 182607488, "step": 84645 }, { "epoch": 13.809135399673735, "grad_norm": 0.047876644879579544, "learning_rate": 0.00026457317811725466, "loss": 0.0048, "num_input_tokens_seen": 182619616, "step": 84650 }, { "epoch": 13.809951060358891, "grad_norm": 0.05628875270485878, "learning_rate": 0.00026451038485642687, "loss": 0.0544, "num_input_tokens_seen": 182630752, "step": 84655 }, { "epoch": 13.810766721044045, "grad_norm": 0.3679070770740509, "learning_rate": 0.0002644475963680948, "loss": 0.0191, "num_input_tokens_seen": 182641696, "step": 84660 }, { "epoch": 13.8115823817292, "grad_norm": 0.006516613531857729, "learning_rate": 0.0002643848126535311, "loss": 0.026, "num_input_tokens_seen": 182652032, "step": 84665 }, { "epoch": 13.812398042414356, "grad_norm": 0.04664992168545723, "learning_rate": 0.000264322033714008, "loss": 0.0183, "num_input_tokens_seen": 182663360, "step": 84670 }, { "epoch": 13.81321370309951, "grad_norm": 0.010171446949243546, "learning_rate": 0.0002642592595507979, "loss": 0.0122, "num_input_tokens_seen": 182673728, "step": 84675 }, { "epoch": 13.814029363784666, "grad_norm": 0.5111817717552185, "learning_rate": 0.0002641964901651729, "loss": 0.034, "num_input_tokens_seen": 182685376, "step": 84680 }, { "epoch": 13.81484502446982, "grad_norm": 0.0067549822852015495, "learning_rate": 0.0002641337255584052, "loss": 0.03, "num_input_tokens_seen": 182695520, "step": 84685 }, { "epoch": 13.815660685154976, "grad_norm": 0.059583164751529694, "learning_rate": 0.0002640709657317668, "loss": 0.0287, "num_input_tokens_seen": 182706304, "step": 84690 }, { "epoch": 13.81647634584013, "grad_norm": 0.8409555554389954, "learning_rate": 0.0002640082106865295, "loss": 0.058, "num_input_tokens_seen": 182718464, "step": 84695 }, { "epoch": 13.817292006525285, "grad_norm": 0.01179381925612688, "learning_rate": 0.00026394546042396525, "loss": 0.0292, "num_input_tokens_seen": 182729984, "step": 84700 }, { "epoch": 13.818107667210441, "grad_norm": 0.2571028172969818, "learning_rate": 0.0002638827149453457, "loss": 0.0165, "num_input_tokens_seen": 182740512, "step": 84705 }, { "epoch": 13.818923327895595, "grad_norm": 0.006162076257169247, "learning_rate": 0.0002638199742519425, "loss": 0.0204, "num_input_tokens_seen": 182751808, "step": 84710 }, { "epoch": 13.81973898858075, "grad_norm": 0.22314994037151337, "learning_rate": 0.00026375723834502686, "loss": 0.0339, "num_input_tokens_seen": 182761632, "step": 84715 }, { "epoch": 13.820554649265905, "grad_norm": 0.04462944716215134, "learning_rate": 0.0002636945072258709, "loss": 0.0501, "num_input_tokens_seen": 182772992, "step": 84720 }, { "epoch": 13.82137030995106, "grad_norm": 0.0057084821164608, "learning_rate": 0.00026363178089574516, "loss": 0.0039, "num_input_tokens_seen": 182782240, "step": 84725 }, { "epoch": 13.822185970636216, "grad_norm": 1.1697832345962524, "learning_rate": 0.0002635690593559216, "loss": 0.2213, "num_input_tokens_seen": 182792960, "step": 84730 }, { "epoch": 13.82300163132137, "grad_norm": 0.007867292501032352, "learning_rate": 0.0002635063426076706, "loss": 0.0418, "num_input_tokens_seen": 182802656, "step": 84735 }, { "epoch": 13.823817292006526, "grad_norm": 0.3440064489841461, "learning_rate": 0.000263443630652264, "loss": 0.0194, "num_input_tokens_seen": 182814336, "step": 84740 }, { "epoch": 13.82463295269168, "grad_norm": 0.08412549644708633, "learning_rate": 0.00026338092349097186, "loss": 0.025, "num_input_tokens_seen": 182825760, "step": 84745 }, { "epoch": 13.825448613376835, "grad_norm": 0.01634236052632332, "learning_rate": 0.00026331822112506576, "loss": 0.0562, "num_input_tokens_seen": 182836256, "step": 84750 }, { "epoch": 13.826264274061991, "grad_norm": 0.015150834806263447, "learning_rate": 0.0002632555235558161, "loss": 0.0122, "num_input_tokens_seen": 182846208, "step": 84755 }, { "epoch": 13.827079934747145, "grad_norm": 0.7708441019058228, "learning_rate": 0.00026319283078449365, "loss": 0.0751, "num_input_tokens_seen": 182857216, "step": 84760 }, { "epoch": 13.8278955954323, "grad_norm": 0.014530989341437817, "learning_rate": 0.0002631301428123688, "loss": 0.0443, "num_input_tokens_seen": 182866720, "step": 84765 }, { "epoch": 13.828711256117455, "grad_norm": 0.008454387076199055, "learning_rate": 0.00026306745964071223, "loss": 0.0108, "num_input_tokens_seen": 182876320, "step": 84770 }, { "epoch": 13.82952691680261, "grad_norm": 0.0020582762081176043, "learning_rate": 0.00026300478127079405, "loss": 0.0038, "num_input_tokens_seen": 182888512, "step": 84775 }, { "epoch": 13.830342577487766, "grad_norm": 0.020112022757530212, "learning_rate": 0.0002629421077038846, "loss": 0.0331, "num_input_tokens_seen": 182899520, "step": 84780 }, { "epoch": 13.83115823817292, "grad_norm": 0.09580262005329132, "learning_rate": 0.00026287943894125415, "loss": 0.019, "num_input_tokens_seen": 182911840, "step": 84785 }, { "epoch": 13.831973898858076, "grad_norm": 5.315633296966553, "learning_rate": 0.0002628167749841727, "loss": 0.0566, "num_input_tokens_seen": 182924192, "step": 84790 }, { "epoch": 13.83278955954323, "grad_norm": 0.6234015226364136, "learning_rate": 0.0002627541158339101, "loss": 0.031, "num_input_tokens_seen": 182936032, "step": 84795 }, { "epoch": 13.833605220228385, "grad_norm": 0.11040055006742477, "learning_rate": 0.0002626914614917364, "loss": 0.0968, "num_input_tokens_seen": 182945920, "step": 84800 }, { "epoch": 13.83442088091354, "grad_norm": 0.0038626338355243206, "learning_rate": 0.0002626288119589212, "loss": 0.0042, "num_input_tokens_seen": 182956416, "step": 84805 }, { "epoch": 13.835236541598695, "grad_norm": 0.04011989384889603, "learning_rate": 0.0002625661672367343, "loss": 0.0033, "num_input_tokens_seen": 182967744, "step": 84810 }, { "epoch": 13.83605220228385, "grad_norm": 0.5366232395172119, "learning_rate": 0.00026250352732644524, "loss": 0.024, "num_input_tokens_seen": 182977664, "step": 84815 }, { "epoch": 13.836867862969005, "grad_norm": 0.01423671655356884, "learning_rate": 0.0002624408922293232, "loss": 0.0102, "num_input_tokens_seen": 182987552, "step": 84820 }, { "epoch": 13.83768352365416, "grad_norm": 0.1561962217092514, "learning_rate": 0.0002623782619466383, "loss": 0.0184, "num_input_tokens_seen": 182998464, "step": 84825 }, { "epoch": 13.838499184339314, "grad_norm": 0.4483034610748291, "learning_rate": 0.00026231563647965896, "loss": 0.0186, "num_input_tokens_seen": 183009440, "step": 84830 }, { "epoch": 13.83931484502447, "grad_norm": 0.015007806941866875, "learning_rate": 0.00026225301582965524, "loss": 0.0365, "num_input_tokens_seen": 183021248, "step": 84835 }, { "epoch": 13.840130505709626, "grad_norm": 0.014108742587268353, "learning_rate": 0.0002621903999978953, "loss": 0.0772, "num_input_tokens_seen": 183032128, "step": 84840 }, { "epoch": 13.84094616639478, "grad_norm": 0.004829798825085163, "learning_rate": 0.0002621277889856489, "loss": 0.015, "num_input_tokens_seen": 183042528, "step": 84845 }, { "epoch": 13.841761827079935, "grad_norm": 0.12998448312282562, "learning_rate": 0.0002620651827941843, "loss": 0.007, "num_input_tokens_seen": 183052032, "step": 84850 }, { "epoch": 13.84257748776509, "grad_norm": 0.004538218956440687, "learning_rate": 0.00026200258142477107, "loss": 0.0428, "num_input_tokens_seen": 183063264, "step": 84855 }, { "epoch": 13.843393148450245, "grad_norm": 0.016471046954393387, "learning_rate": 0.00026193998487867697, "loss": 0.0983, "num_input_tokens_seen": 183072608, "step": 84860 }, { "epoch": 13.844208809135399, "grad_norm": 0.00758066400885582, "learning_rate": 0.0002618773931571715, "loss": 0.0037, "num_input_tokens_seen": 183082976, "step": 84865 }, { "epoch": 13.845024469820554, "grad_norm": 0.10973238945007324, "learning_rate": 0.00026181480626152236, "loss": 0.0072, "num_input_tokens_seen": 183094240, "step": 84870 }, { "epoch": 13.84584013050571, "grad_norm": 0.04885505139827728, "learning_rate": 0.0002617522241929987, "loss": 0.0036, "num_input_tokens_seen": 183104032, "step": 84875 }, { "epoch": 13.846655791190864, "grad_norm": 0.9182645678520203, "learning_rate": 0.0002616896469528681, "loss": 0.0523, "num_input_tokens_seen": 183113536, "step": 84880 }, { "epoch": 13.84747145187602, "grad_norm": 0.2846318483352661, "learning_rate": 0.00026162707454239944, "loss": 0.0099, "num_input_tokens_seen": 183123904, "step": 84885 }, { "epoch": 13.848287112561174, "grad_norm": 0.004203853663057089, "learning_rate": 0.00026156450696286014, "loss": 0.0156, "num_input_tokens_seen": 183135264, "step": 84890 }, { "epoch": 13.84910277324633, "grad_norm": 0.27823206782341003, "learning_rate": 0.0002615019442155189, "loss": 0.0198, "num_input_tokens_seen": 183145184, "step": 84895 }, { "epoch": 13.849918433931485, "grad_norm": 0.048070620745420456, "learning_rate": 0.00026143938630164316, "loss": 0.1797, "num_input_tokens_seen": 183156832, "step": 84900 }, { "epoch": 13.850734094616639, "grad_norm": 0.4290146231651306, "learning_rate": 0.00026137683322250094, "loss": 0.0181, "num_input_tokens_seen": 183167808, "step": 84905 }, { "epoch": 13.851549755301795, "grad_norm": 0.003850239561870694, "learning_rate": 0.00026131428497935995, "loss": 0.0069, "num_input_tokens_seen": 183177376, "step": 84910 }, { "epoch": 13.852365415986949, "grad_norm": 0.0018150180112570524, "learning_rate": 0.0002612517415734877, "loss": 0.0022, "num_input_tokens_seen": 183188640, "step": 84915 }, { "epoch": 13.853181076672104, "grad_norm": 0.014922475442290306, "learning_rate": 0.00026118920300615187, "loss": 0.0059, "num_input_tokens_seen": 183198400, "step": 84920 }, { "epoch": 13.85399673735726, "grad_norm": 0.002802840434014797, "learning_rate": 0.0002611266692786197, "loss": 0.0017, "num_input_tokens_seen": 183209088, "step": 84925 }, { "epoch": 13.854812398042414, "grad_norm": 0.029686229303479195, "learning_rate": 0.00026106414039215865, "loss": 0.0067, "num_input_tokens_seen": 183219616, "step": 84930 }, { "epoch": 13.85562805872757, "grad_norm": 0.019623778760433197, "learning_rate": 0.00026100161634803594, "loss": 0.0172, "num_input_tokens_seen": 183231136, "step": 84935 }, { "epoch": 13.856443719412724, "grad_norm": 0.04738049954175949, "learning_rate": 0.0002609390971475186, "loss": 0.0242, "num_input_tokens_seen": 183242624, "step": 84940 }, { "epoch": 13.85725938009788, "grad_norm": 0.017074188217520714, "learning_rate": 0.00026087658279187357, "loss": 0.0166, "num_input_tokens_seen": 183253920, "step": 84945 }, { "epoch": 13.858075040783035, "grad_norm": 0.004167445003986359, "learning_rate": 0.0002608140732823684, "loss": 0.0249, "num_input_tokens_seen": 183264960, "step": 84950 }, { "epoch": 13.858890701468189, "grad_norm": 0.0036031820345669985, "learning_rate": 0.00026075156862026896, "loss": 0.0146, "num_input_tokens_seen": 183275104, "step": 84955 }, { "epoch": 13.859706362153345, "grad_norm": 0.7869503498077393, "learning_rate": 0.00026068906880684297, "loss": 0.0405, "num_input_tokens_seen": 183286240, "step": 84960 }, { "epoch": 13.860522022838499, "grad_norm": 0.8136313557624817, "learning_rate": 0.0002606265738433561, "loss": 0.0253, "num_input_tokens_seen": 183296800, "step": 84965 }, { "epoch": 13.861337683523654, "grad_norm": 0.004673221614211798, "learning_rate": 0.0002605640837310758, "loss": 0.0067, "num_input_tokens_seen": 183307136, "step": 84970 }, { "epoch": 13.86215334420881, "grad_norm": 0.010181387886404991, "learning_rate": 0.0002605015984712678, "loss": 0.008, "num_input_tokens_seen": 183318048, "step": 84975 }, { "epoch": 13.862969004893964, "grad_norm": 0.007334980182349682, "learning_rate": 0.000260439118065199, "loss": 0.0212, "num_input_tokens_seen": 183328576, "step": 84980 }, { "epoch": 13.86378466557912, "grad_norm": 0.02514778822660446, "learning_rate": 0.000260376642514135, "loss": 0.027, "num_input_tokens_seen": 183339040, "step": 84985 }, { "epoch": 13.864600326264274, "grad_norm": 0.030100977048277855, "learning_rate": 0.00026031417181934276, "loss": 0.0161, "num_input_tokens_seen": 183349376, "step": 84990 }, { "epoch": 13.86541598694943, "grad_norm": 0.03766891360282898, "learning_rate": 0.0002602517059820875, "loss": 0.0134, "num_input_tokens_seen": 183360480, "step": 84995 }, { "epoch": 13.866231647634583, "grad_norm": 0.010510461404919624, "learning_rate": 0.0002601892450036359, "loss": 0.0019, "num_input_tokens_seen": 183370272, "step": 85000 }, { "epoch": 13.867047308319739, "grad_norm": 0.005300341174006462, "learning_rate": 0.0002601267888852531, "loss": 0.0024, "num_input_tokens_seen": 183381184, "step": 85005 }, { "epoch": 13.867862969004895, "grad_norm": 0.046516548842191696, "learning_rate": 0.0002600643376282056, "loss": 0.0084, "num_input_tokens_seen": 183391200, "step": 85010 }, { "epoch": 13.868678629690049, "grad_norm": 0.10458040237426758, "learning_rate": 0.0002600018912337584, "loss": 0.0275, "num_input_tokens_seen": 183403616, "step": 85015 }, { "epoch": 13.869494290375204, "grad_norm": 0.004375677090138197, "learning_rate": 0.00025993944970317763, "loss": 0.0094, "num_input_tokens_seen": 183415840, "step": 85020 }, { "epoch": 13.870309951060358, "grad_norm": 0.054568637162446976, "learning_rate": 0.00025987701303772806, "loss": 0.0089, "num_input_tokens_seen": 183426272, "step": 85025 }, { "epoch": 13.871125611745514, "grad_norm": 0.004253702703863382, "learning_rate": 0.00025981458123867566, "loss": 0.003, "num_input_tokens_seen": 183436512, "step": 85030 }, { "epoch": 13.87194127243067, "grad_norm": 0.07876302301883698, "learning_rate": 0.0002597521543072854, "loss": 0.012, "num_input_tokens_seen": 183446944, "step": 85035 }, { "epoch": 13.872756933115824, "grad_norm": 0.010667159222066402, "learning_rate": 0.00025968973224482257, "loss": 0.0042, "num_input_tokens_seen": 183458080, "step": 85040 }, { "epoch": 13.87357259380098, "grad_norm": 0.08021771162748337, "learning_rate": 0.00025962731505255215, "loss": 0.0038, "num_input_tokens_seen": 183469280, "step": 85045 }, { "epoch": 13.874388254486133, "grad_norm": 0.3150395452976227, "learning_rate": 0.0002595649027317392, "loss": 0.0334, "num_input_tokens_seen": 183480480, "step": 85050 }, { "epoch": 13.875203915171289, "grad_norm": 0.08894519507884979, "learning_rate": 0.0002595024952836484, "loss": 0.0042, "num_input_tokens_seen": 183491648, "step": 85055 }, { "epoch": 13.876019575856443, "grad_norm": 0.023155847564339638, "learning_rate": 0.00025944009270954463, "loss": 0.002, "num_input_tokens_seen": 183502272, "step": 85060 }, { "epoch": 13.876835236541599, "grad_norm": 0.026067351922392845, "learning_rate": 0.00025937769501069264, "loss": 0.0791, "num_input_tokens_seen": 183512352, "step": 85065 }, { "epoch": 13.877650897226754, "grad_norm": 0.0032250231597572565, "learning_rate": 0.00025931530218835684, "loss": 0.0132, "num_input_tokens_seen": 183522880, "step": 85070 }, { "epoch": 13.878466557911908, "grad_norm": 0.06656479090452194, "learning_rate": 0.00025925291424380183, "loss": 0.0086, "num_input_tokens_seen": 183533088, "step": 85075 }, { "epoch": 13.879282218597064, "grad_norm": 0.9557048678398132, "learning_rate": 0.00025919053117829185, "loss": 0.0602, "num_input_tokens_seen": 183542976, "step": 85080 }, { "epoch": 13.880097879282218, "grad_norm": 0.015338104218244553, "learning_rate": 0.0002591281529930913, "loss": 0.0022, "num_input_tokens_seen": 183552448, "step": 85085 }, { "epoch": 13.880913539967374, "grad_norm": 0.371650367975235, "learning_rate": 0.0002590657796894641, "loss": 0.0996, "num_input_tokens_seen": 183563200, "step": 85090 }, { "epoch": 13.88172920065253, "grad_norm": 0.009572053328156471, "learning_rate": 0.0002590034112686749, "loss": 0.0784, "num_input_tokens_seen": 183574560, "step": 85095 }, { "epoch": 13.882544861337683, "grad_norm": 0.004172234795987606, "learning_rate": 0.0002589410477319869, "loss": 0.1418, "num_input_tokens_seen": 183585728, "step": 85100 }, { "epoch": 13.883360522022839, "grad_norm": 0.009775524958968163, "learning_rate": 0.0002588786890806647, "loss": 0.0146, "num_input_tokens_seen": 183597536, "step": 85105 }, { "epoch": 13.884176182707993, "grad_norm": 0.1955619603395462, "learning_rate": 0.0002588163353159715, "loss": 0.011, "num_input_tokens_seen": 183608448, "step": 85110 }, { "epoch": 13.884991843393149, "grad_norm": 0.004827131051570177, "learning_rate": 0.00025875398643917147, "loss": 0.01, "num_input_tokens_seen": 183619392, "step": 85115 }, { "epoch": 13.885807504078304, "grad_norm": 0.010891993530094624, "learning_rate": 0.00025869164245152765, "loss": 0.0017, "num_input_tokens_seen": 183630624, "step": 85120 }, { "epoch": 13.886623164763458, "grad_norm": 0.011002853512763977, "learning_rate": 0.00025862930335430426, "loss": 0.0528, "num_input_tokens_seen": 183641088, "step": 85125 }, { "epoch": 13.887438825448614, "grad_norm": 0.15751543641090393, "learning_rate": 0.0002585669691487637, "loss": 0.1074, "num_input_tokens_seen": 183652160, "step": 85130 }, { "epoch": 13.888254486133768, "grad_norm": 0.0365523099899292, "learning_rate": 0.00025850463983617005, "loss": 0.0059, "num_input_tokens_seen": 183661920, "step": 85135 }, { "epoch": 13.889070146818923, "grad_norm": 0.023331336677074432, "learning_rate": 0.0002584423154177863, "loss": 0.0056, "num_input_tokens_seen": 183672736, "step": 85140 }, { "epoch": 13.88988580750408, "grad_norm": 0.022763198241591454, "learning_rate": 0.0002583799958948754, "loss": 0.0229, "num_input_tokens_seen": 183682400, "step": 85145 }, { "epoch": 13.890701468189233, "grad_norm": 0.5636934041976929, "learning_rate": 0.00025831768126870035, "loss": 0.0142, "num_input_tokens_seen": 183692512, "step": 85150 }, { "epoch": 13.891517128874389, "grad_norm": 0.03606187924742699, "learning_rate": 0.00025825537154052414, "loss": 0.0154, "num_input_tokens_seen": 183703328, "step": 85155 }, { "epoch": 13.892332789559543, "grad_norm": 0.0020342785865068436, "learning_rate": 0.00025819306671160953, "loss": 0.0828, "num_input_tokens_seen": 183712864, "step": 85160 }, { "epoch": 13.893148450244698, "grad_norm": 0.007612097542732954, "learning_rate": 0.00025813076678321914, "loss": 0.0066, "num_input_tokens_seen": 183723936, "step": 85165 }, { "epoch": 13.893964110929852, "grad_norm": 0.7555127143859863, "learning_rate": 0.0002580684717566156, "loss": 0.0286, "num_input_tokens_seen": 183734048, "step": 85170 }, { "epoch": 13.894779771615008, "grad_norm": 0.6424114108085632, "learning_rate": 0.0002580061816330614, "loss": 0.078, "num_input_tokens_seen": 183744320, "step": 85175 }, { "epoch": 13.895595432300164, "grad_norm": 0.004591791890561581, "learning_rate": 0.00025794389641381894, "loss": 0.0063, "num_input_tokens_seen": 183755232, "step": 85180 }, { "epoch": 13.896411092985318, "grad_norm": 0.01061215903609991, "learning_rate": 0.0002578816161001505, "loss": 0.0033, "num_input_tokens_seen": 183765760, "step": 85185 }, { "epoch": 13.897226753670473, "grad_norm": 0.05765000730752945, "learning_rate": 0.0002578193406933182, "loss": 0.0139, "num_input_tokens_seen": 183777312, "step": 85190 }, { "epoch": 13.898042414355627, "grad_norm": 0.11763758957386017, "learning_rate": 0.00025775707019458415, "loss": 0.0107, "num_input_tokens_seen": 183787232, "step": 85195 }, { "epoch": 13.898858075040783, "grad_norm": 0.01747831515967846, "learning_rate": 0.0002576948046052105, "loss": 0.0102, "num_input_tokens_seen": 183796896, "step": 85200 }, { "epoch": 13.899673735725939, "grad_norm": 1.5811023712158203, "learning_rate": 0.000257632543926459, "loss": 0.1022, "num_input_tokens_seen": 183808032, "step": 85205 }, { "epoch": 13.900489396411093, "grad_norm": 0.04049277678132057, "learning_rate": 0.0002575702881595914, "loss": 0.0047, "num_input_tokens_seen": 183818304, "step": 85210 }, { "epoch": 13.901305057096248, "grad_norm": 0.004191242624074221, "learning_rate": 0.0002575080373058695, "loss": 0.0024, "num_input_tokens_seen": 183827680, "step": 85215 }, { "epoch": 13.902120717781402, "grad_norm": 0.8770870566368103, "learning_rate": 0.0002574457913665548, "loss": 0.0974, "num_input_tokens_seen": 183837856, "step": 85220 }, { "epoch": 13.902936378466558, "grad_norm": 0.002213303931057453, "learning_rate": 0.00025738355034290886, "loss": 0.0036, "num_input_tokens_seen": 183848448, "step": 85225 }, { "epoch": 13.903752039151712, "grad_norm": 0.016641533002257347, "learning_rate": 0.00025732131423619303, "loss": 0.1173, "num_input_tokens_seen": 183859136, "step": 85230 }, { "epoch": 13.904567699836868, "grad_norm": 0.01013999804854393, "learning_rate": 0.0002572590830476685, "loss": 0.0098, "num_input_tokens_seen": 183870368, "step": 85235 }, { "epoch": 13.905383360522023, "grad_norm": 0.0021036083344370127, "learning_rate": 0.0002571968567785967, "loss": 0.0033, "num_input_tokens_seen": 183881120, "step": 85240 }, { "epoch": 13.906199021207177, "grad_norm": 0.03816024586558342, "learning_rate": 0.0002571346354302387, "loss": 0.0387, "num_input_tokens_seen": 183892832, "step": 85245 }, { "epoch": 13.907014681892333, "grad_norm": 0.6738782525062561, "learning_rate": 0.0002570724190038554, "loss": 0.3048, "num_input_tokens_seen": 183902528, "step": 85250 }, { "epoch": 13.907830342577487, "grad_norm": 0.10546285659074783, "learning_rate": 0.00025701020750070765, "loss": 0.0068, "num_input_tokens_seen": 183912736, "step": 85255 }, { "epoch": 13.908646003262643, "grad_norm": 0.20645979046821594, "learning_rate": 0.0002569480009220563, "loss": 0.1511, "num_input_tokens_seen": 183923840, "step": 85260 }, { "epoch": 13.909461663947798, "grad_norm": 0.07774461060762405, "learning_rate": 0.00025688579926916213, "loss": 0.0059, "num_input_tokens_seen": 183934592, "step": 85265 }, { "epoch": 13.910277324632952, "grad_norm": 1.5161799192428589, "learning_rate": 0.0002568236025432855, "loss": 0.039, "num_input_tokens_seen": 183944256, "step": 85270 }, { "epoch": 13.911092985318108, "grad_norm": 0.08533214777708054, "learning_rate": 0.00025676141074568713, "loss": 0.0668, "num_input_tokens_seen": 183954336, "step": 85275 }, { "epoch": 13.911908646003262, "grad_norm": 0.11331785470247269, "learning_rate": 0.00025669922387762747, "loss": 0.2955, "num_input_tokens_seen": 183964736, "step": 85280 }, { "epoch": 13.912724306688418, "grad_norm": 0.8477551937103271, "learning_rate": 0.00025663704194036653, "loss": 0.1478, "num_input_tokens_seen": 183974336, "step": 85285 }, { "epoch": 13.913539967373573, "grad_norm": 0.032228752970695496, "learning_rate": 0.0002565748649351647, "loss": 0.0552, "num_input_tokens_seen": 183985376, "step": 85290 }, { "epoch": 13.914355628058727, "grad_norm": 0.06345382332801819, "learning_rate": 0.0002565126928632821, "loss": 0.006, "num_input_tokens_seen": 183997344, "step": 85295 }, { "epoch": 13.915171288743883, "grad_norm": 0.4068472385406494, "learning_rate": 0.00025645052572597856, "loss": 0.0168, "num_input_tokens_seen": 184008320, "step": 85300 }, { "epoch": 13.915986949429037, "grad_norm": 0.5403673648834229, "learning_rate": 0.0002563883635245141, "loss": 0.0322, "num_input_tokens_seen": 184020608, "step": 85305 }, { "epoch": 13.916802610114193, "grad_norm": 0.09667199850082397, "learning_rate": 0.0002563262062601486, "loss": 0.1014, "num_input_tokens_seen": 184031712, "step": 85310 }, { "epoch": 13.917618270799348, "grad_norm": 0.5479460954666138, "learning_rate": 0.0002562640539341415, "loss": 0.1371, "num_input_tokens_seen": 184041952, "step": 85315 }, { "epoch": 13.918433931484502, "grad_norm": 0.07276202738285065, "learning_rate": 0.0002562019065477527, "loss": 0.018, "num_input_tokens_seen": 184052416, "step": 85320 }, { "epoch": 13.919249592169658, "grad_norm": 0.04549545422196388, "learning_rate": 0.00025613976410224145, "loss": 0.0375, "num_input_tokens_seen": 184063904, "step": 85325 }, { "epoch": 13.920065252854812, "grad_norm": 0.17140315473079681, "learning_rate": 0.00025607762659886726, "loss": 0.0107, "num_input_tokens_seen": 184075392, "step": 85330 }, { "epoch": 13.920880913539968, "grad_norm": 0.027514692395925522, "learning_rate": 0.00025601549403888934, "loss": 0.0099, "num_input_tokens_seen": 184086016, "step": 85335 }, { "epoch": 13.921696574225122, "grad_norm": 0.01145249791443348, "learning_rate": 0.00025595336642356706, "loss": 0.0473, "num_input_tokens_seen": 184095200, "step": 85340 }, { "epoch": 13.922512234910277, "grad_norm": 0.03342977166175842, "learning_rate": 0.0002558912437541594, "loss": 0.0084, "num_input_tokens_seen": 184106560, "step": 85345 }, { "epoch": 13.923327895595433, "grad_norm": 0.018622571602463722, "learning_rate": 0.0002558291260319253, "loss": 0.0453, "num_input_tokens_seen": 184117184, "step": 85350 }, { "epoch": 13.924143556280587, "grad_norm": 0.026922546327114105, "learning_rate": 0.0002557670132581235, "loss": 0.0134, "num_input_tokens_seen": 184128640, "step": 85355 }, { "epoch": 13.924959216965743, "grad_norm": 0.04572325199842453, "learning_rate": 0.00025570490543401345, "loss": 0.0322, "num_input_tokens_seen": 184139520, "step": 85360 }, { "epoch": 13.925774877650896, "grad_norm": 0.021734047681093216, "learning_rate": 0.00025564280256085305, "loss": 0.0143, "num_input_tokens_seen": 184149408, "step": 85365 }, { "epoch": 13.926590538336052, "grad_norm": 0.005139812361449003, "learning_rate": 0.0002555807046399016, "loss": 0.0047, "num_input_tokens_seen": 184160480, "step": 85370 }, { "epoch": 13.927406199021208, "grad_norm": 0.036524008959531784, "learning_rate": 0.00025551861167241675, "loss": 0.0248, "num_input_tokens_seen": 184171328, "step": 85375 }, { "epoch": 13.928221859706362, "grad_norm": 0.06596231460571289, "learning_rate": 0.00025545652365965767, "loss": 0.0059, "num_input_tokens_seen": 184182432, "step": 85380 }, { "epoch": 13.929037520391518, "grad_norm": 0.6907486915588379, "learning_rate": 0.00025539444060288235, "loss": 0.0954, "num_input_tokens_seen": 184193088, "step": 85385 }, { "epoch": 13.929853181076671, "grad_norm": 0.16271479427814484, "learning_rate": 0.000255332362503349, "loss": 0.0228, "num_input_tokens_seen": 184203616, "step": 85390 }, { "epoch": 13.930668841761827, "grad_norm": 0.014104736968874931, "learning_rate": 0.00025527028936231567, "loss": 0.0092, "num_input_tokens_seen": 184213600, "step": 85395 }, { "epoch": 13.931484502446983, "grad_norm": 0.6544168591499329, "learning_rate": 0.0002552082211810405, "loss": 0.0539, "num_input_tokens_seen": 184225696, "step": 85400 }, { "epoch": 13.932300163132137, "grad_norm": 0.040624476969242096, "learning_rate": 0.0002551461579607811, "loss": 0.0045, "num_input_tokens_seen": 184237920, "step": 85405 }, { "epoch": 13.933115823817293, "grad_norm": 0.6469240784645081, "learning_rate": 0.00025508409970279554, "loss": 0.1079, "num_input_tokens_seen": 184249248, "step": 85410 }, { "epoch": 13.933931484502446, "grad_norm": 0.0067710126750171185, "learning_rate": 0.00025502204640834135, "loss": 0.0091, "num_input_tokens_seen": 184260000, "step": 85415 }, { "epoch": 13.934747145187602, "grad_norm": 0.019657712429761887, "learning_rate": 0.0002549599980786762, "loss": 0.0824, "num_input_tokens_seen": 184271424, "step": 85420 }, { "epoch": 13.935562805872756, "grad_norm": 0.04173409938812256, "learning_rate": 0.0002548979547150576, "loss": 0.0261, "num_input_tokens_seen": 184282816, "step": 85425 }, { "epoch": 13.936378466557912, "grad_norm": 0.0030247271060943604, "learning_rate": 0.0002548359163187428, "loss": 0.0265, "num_input_tokens_seen": 184295200, "step": 85430 }, { "epoch": 13.937194127243067, "grad_norm": 0.008232303895056248, "learning_rate": 0.0002547738828909891, "loss": 0.1576, "num_input_tokens_seen": 184306688, "step": 85435 }, { "epoch": 13.938009787928221, "grad_norm": 0.20450206100940704, "learning_rate": 0.0002547118544330539, "loss": 0.0224, "num_input_tokens_seen": 184317344, "step": 85440 }, { "epoch": 13.938825448613377, "grad_norm": 0.1727188676595688, "learning_rate": 0.0002546498309461941, "loss": 0.0181, "num_input_tokens_seen": 184328320, "step": 85445 }, { "epoch": 13.939641109298531, "grad_norm": 0.011204349808394909, "learning_rate": 0.00025458781243166667, "loss": 0.0059, "num_input_tokens_seen": 184338240, "step": 85450 }, { "epoch": 13.940456769983687, "grad_norm": 0.003051189938560128, "learning_rate": 0.0002545257988907286, "loss": 0.0799, "num_input_tokens_seen": 184348640, "step": 85455 }, { "epoch": 13.941272430668842, "grad_norm": 0.005785677116364241, "learning_rate": 0.0002544637903246364, "loss": 0.2112, "num_input_tokens_seen": 184357792, "step": 85460 }, { "epoch": 13.942088091353996, "grad_norm": 0.8650006651878357, "learning_rate": 0.0002544017867346474, "loss": 0.2204, "num_input_tokens_seen": 184367712, "step": 85465 }, { "epoch": 13.942903752039152, "grad_norm": 0.5510603189468384, "learning_rate": 0.0002543397881220173, "loss": 0.1623, "num_input_tokens_seen": 184377664, "step": 85470 }, { "epoch": 13.943719412724306, "grad_norm": 0.06131192669272423, "learning_rate": 0.00025427779448800345, "loss": 0.0247, "num_input_tokens_seen": 184388800, "step": 85475 }, { "epoch": 13.944535073409462, "grad_norm": 0.5136590600013733, "learning_rate": 0.0002542158058338615, "loss": 0.037, "num_input_tokens_seen": 184399584, "step": 85480 }, { "epoch": 13.945350734094617, "grad_norm": 0.004187280312180519, "learning_rate": 0.00025415382216084837, "loss": 0.005, "num_input_tokens_seen": 184410816, "step": 85485 }, { "epoch": 13.946166394779771, "grad_norm": 0.015266863629221916, "learning_rate": 0.0002540918434702195, "loss": 0.0044, "num_input_tokens_seen": 184423104, "step": 85490 }, { "epoch": 13.946982055464927, "grad_norm": 0.16170641779899597, "learning_rate": 0.0002540298697632318, "loss": 0.0114, "num_input_tokens_seen": 184432224, "step": 85495 }, { "epoch": 13.947797716150081, "grad_norm": 0.012515636160969734, "learning_rate": 0.0002539679010411404, "loss": 0.0061, "num_input_tokens_seen": 184442016, "step": 85500 }, { "epoch": 13.948613376835237, "grad_norm": 0.026593128219246864, "learning_rate": 0.00025390593730520206, "loss": 0.0104, "num_input_tokens_seen": 184453152, "step": 85505 }, { "epoch": 13.949429037520392, "grad_norm": 0.007874571718275547, "learning_rate": 0.00025384397855667164, "loss": 0.0075, "num_input_tokens_seen": 184463648, "step": 85510 }, { "epoch": 13.950244698205546, "grad_norm": 0.13989458978176117, "learning_rate": 0.0002537820247968057, "loss": 0.0726, "num_input_tokens_seen": 184474432, "step": 85515 }, { "epoch": 13.951060358890702, "grad_norm": 0.008087403140962124, "learning_rate": 0.00025372007602685894, "loss": 0.0182, "num_input_tokens_seen": 184484864, "step": 85520 }, { "epoch": 13.951876019575856, "grad_norm": 0.004844617564231157, "learning_rate": 0.00025365813224808746, "loss": 0.0042, "num_input_tokens_seen": 184495840, "step": 85525 }, { "epoch": 13.952691680261012, "grad_norm": 0.016140727326273918, "learning_rate": 0.00025359619346174644, "loss": 0.1186, "num_input_tokens_seen": 184507200, "step": 85530 }, { "epoch": 13.953507340946166, "grad_norm": 0.017165429890155792, "learning_rate": 0.0002535342596690912, "loss": 0.0059, "num_input_tokens_seen": 184517952, "step": 85535 }, { "epoch": 13.954323001631321, "grad_norm": 0.690234899520874, "learning_rate": 0.0002534723308713768, "loss": 0.1118, "num_input_tokens_seen": 184529664, "step": 85540 }, { "epoch": 13.955138662316477, "grad_norm": 0.007761821616441011, "learning_rate": 0.0002534104070698584, "loss": 0.0116, "num_input_tokens_seen": 184540960, "step": 85545 }, { "epoch": 13.955954323001631, "grad_norm": 0.46400514245033264, "learning_rate": 0.00025334848826579095, "loss": 0.0806, "num_input_tokens_seen": 184551168, "step": 85550 }, { "epoch": 13.956769983686787, "grad_norm": 0.006097679026424885, "learning_rate": 0.0002532865744604292, "loss": 0.0057, "num_input_tokens_seen": 184562016, "step": 85555 }, { "epoch": 13.95758564437194, "grad_norm": 0.048015836626291275, "learning_rate": 0.000253224665655028, "loss": 0.0085, "num_input_tokens_seen": 184571776, "step": 85560 }, { "epoch": 13.958401305057096, "grad_norm": 0.021363230422139168, "learning_rate": 0.0002531627618508421, "loss": 0.0267, "num_input_tokens_seen": 184582592, "step": 85565 }, { "epoch": 13.959216965742252, "grad_norm": 0.41622504591941833, "learning_rate": 0.00025310086304912584, "loss": 0.027, "num_input_tokens_seen": 184592672, "step": 85570 }, { "epoch": 13.960032626427406, "grad_norm": 0.054480284452438354, "learning_rate": 0.0002530389692511337, "loss": 0.0197, "num_input_tokens_seen": 184604128, "step": 85575 }, { "epoch": 13.960848287112562, "grad_norm": 0.06501181423664093, "learning_rate": 0.0002529770804581205, "loss": 0.0577, "num_input_tokens_seen": 184614080, "step": 85580 }, { "epoch": 13.961663947797716, "grad_norm": 0.08970603346824646, "learning_rate": 0.0002529151966713398, "loss": 0.0612, "num_input_tokens_seen": 184625408, "step": 85585 }, { "epoch": 13.962479608482871, "grad_norm": 0.043964315205812454, "learning_rate": 0.00025285331789204633, "loss": 0.0517, "num_input_tokens_seen": 184635776, "step": 85590 }, { "epoch": 13.963295269168025, "grad_norm": 0.24346177279949188, "learning_rate": 0.0002527914441214937, "loss": 0.0286, "num_input_tokens_seen": 184646688, "step": 85595 }, { "epoch": 13.964110929853181, "grad_norm": 0.025076303631067276, "learning_rate": 0.00025272957536093634, "loss": 0.0301, "num_input_tokens_seen": 184657184, "step": 85600 }, { "epoch": 13.964926590538337, "grad_norm": 0.058392155915498734, "learning_rate": 0.00025266771161162736, "loss": 0.1067, "num_input_tokens_seen": 184668384, "step": 85605 }, { "epoch": 13.96574225122349, "grad_norm": 0.007838203571736813, "learning_rate": 0.00025260585287482153, "loss": 0.005, "num_input_tokens_seen": 184679456, "step": 85610 }, { "epoch": 13.966557911908646, "grad_norm": 0.028121719136834145, "learning_rate": 0.0002525439991517714, "loss": 0.0297, "num_input_tokens_seen": 184689920, "step": 85615 }, { "epoch": 13.9673735725938, "grad_norm": 0.0064971777610480785, "learning_rate": 0.0002524821504437316, "loss": 0.0047, "num_input_tokens_seen": 184701216, "step": 85620 }, { "epoch": 13.968189233278956, "grad_norm": 0.01920584961771965, "learning_rate": 0.0002524203067519545, "loss": 0.019, "num_input_tokens_seen": 184711616, "step": 85625 }, { "epoch": 13.969004893964112, "grad_norm": 1.250619888305664, "learning_rate": 0.00025235846807769433, "loss": 0.0284, "num_input_tokens_seen": 184723488, "step": 85630 }, { "epoch": 13.969820554649266, "grad_norm": 0.060606032609939575, "learning_rate": 0.0002522966344222036, "loss": 0.0424, "num_input_tokens_seen": 184734176, "step": 85635 }, { "epoch": 13.970636215334421, "grad_norm": 0.12301360815763474, "learning_rate": 0.00025223480578673627, "loss": 0.0127, "num_input_tokens_seen": 184745824, "step": 85640 }, { "epoch": 13.971451876019575, "grad_norm": 0.07885907590389252, "learning_rate": 0.00025217298217254446, "loss": 0.0104, "num_input_tokens_seen": 184756480, "step": 85645 }, { "epoch": 13.97226753670473, "grad_norm": 0.013525017537176609, "learning_rate": 0.0002521111635808819, "loss": 0.0033, "num_input_tokens_seen": 184767712, "step": 85650 }, { "epoch": 13.973083197389887, "grad_norm": 0.005781926680356264, "learning_rate": 0.0002520493500130008, "loss": 0.0069, "num_input_tokens_seen": 184778336, "step": 85655 }, { "epoch": 13.97389885807504, "grad_norm": 0.005094463936984539, "learning_rate": 0.0002519875414701545, "loss": 0.0022, "num_input_tokens_seen": 184789152, "step": 85660 }, { "epoch": 13.974714518760196, "grad_norm": 0.04813092574477196, "learning_rate": 0.0002519257379535949, "loss": 0.0526, "num_input_tokens_seen": 184799552, "step": 85665 }, { "epoch": 13.97553017944535, "grad_norm": 0.016769248992204666, "learning_rate": 0.00025186393946457516, "loss": 0.0376, "num_input_tokens_seen": 184810976, "step": 85670 }, { "epoch": 13.976345840130506, "grad_norm": 0.1758248656988144, "learning_rate": 0.0002518021460043474, "loss": 0.0612, "num_input_tokens_seen": 184821600, "step": 85675 }, { "epoch": 13.977161500815662, "grad_norm": 0.008801092393696308, "learning_rate": 0.0002517403575741641, "loss": 0.0128, "num_input_tokens_seen": 184832096, "step": 85680 }, { "epoch": 13.977977161500815, "grad_norm": 0.01983959972858429, "learning_rate": 0.0002516785741752773, "loss": 0.0035, "num_input_tokens_seen": 184841856, "step": 85685 }, { "epoch": 13.978792822185971, "grad_norm": 0.007306755520403385, "learning_rate": 0.0002516167958089393, "loss": 0.1363, "num_input_tokens_seen": 184852992, "step": 85690 }, { "epoch": 13.979608482871125, "grad_norm": 0.011397710070014, "learning_rate": 0.00025155502247640196, "loss": 0.0229, "num_input_tokens_seen": 184863008, "step": 85695 }, { "epoch": 13.98042414355628, "grad_norm": 0.18774862587451935, "learning_rate": 0.0002514932541789173, "loss": 0.0655, "num_input_tokens_seen": 184874272, "step": 85700 }, { "epoch": 13.981239804241435, "grad_norm": 0.011745911091566086, "learning_rate": 0.0002514314909177371, "loss": 0.0149, "num_input_tokens_seen": 184885696, "step": 85705 }, { "epoch": 13.98205546492659, "grad_norm": 0.0074547333642840385, "learning_rate": 0.00025136973269411305, "loss": 0.0048, "num_input_tokens_seen": 184896928, "step": 85710 }, { "epoch": 13.982871125611746, "grad_norm": 0.04841725155711174, "learning_rate": 0.0002513079795092968, "loss": 0.0422, "num_input_tokens_seen": 184908832, "step": 85715 }, { "epoch": 13.9836867862969, "grad_norm": 0.015218867920339108, "learning_rate": 0.0002512462313645396, "loss": 0.0045, "num_input_tokens_seen": 184919744, "step": 85720 }, { "epoch": 13.984502446982056, "grad_norm": 0.09455284476280212, "learning_rate": 0.0002511844882610935, "loss": 0.016, "num_input_tokens_seen": 184931264, "step": 85725 }, { "epoch": 13.98531810766721, "grad_norm": 0.07039551436901093, "learning_rate": 0.00025112275020020903, "loss": 0.0172, "num_input_tokens_seen": 184942592, "step": 85730 }, { "epoch": 13.986133768352365, "grad_norm": 0.030298545956611633, "learning_rate": 0.0002510610171831381, "loss": 0.0065, "num_input_tokens_seen": 184953664, "step": 85735 }, { "epoch": 13.986949429037521, "grad_norm": 0.6195943355560303, "learning_rate": 0.00025099928921113113, "loss": 0.1241, "num_input_tokens_seen": 184964288, "step": 85740 }, { "epoch": 13.987765089722675, "grad_norm": 0.006955256685614586, "learning_rate": 0.0002509375662854397, "loss": 0.0972, "num_input_tokens_seen": 184974368, "step": 85745 }, { "epoch": 13.98858075040783, "grad_norm": 0.014941412024199963, "learning_rate": 0.0002508758484073142, "loss": 0.0145, "num_input_tokens_seen": 184985760, "step": 85750 }, { "epoch": 13.989396411092985, "grad_norm": 0.17936839163303375, "learning_rate": 0.00025081413557800604, "loss": 0.0058, "num_input_tokens_seen": 184996576, "step": 85755 }, { "epoch": 13.99021207177814, "grad_norm": 0.0017939393874257803, "learning_rate": 0.0002507524277987651, "loss": 0.0072, "num_input_tokens_seen": 185007712, "step": 85760 }, { "epoch": 13.991027732463294, "grad_norm": 0.9059780240058899, "learning_rate": 0.0002506907250708428, "loss": 0.0678, "num_input_tokens_seen": 185018912, "step": 85765 }, { "epoch": 13.99184339314845, "grad_norm": 0.003605630248785019, "learning_rate": 0.0002506290273954888, "loss": 0.1311, "num_input_tokens_seen": 185028000, "step": 85770 }, { "epoch": 13.992659053833606, "grad_norm": 0.04478728771209717, "learning_rate": 0.00025056733477395415, "loss": 0.0651, "num_input_tokens_seen": 185039680, "step": 85775 }, { "epoch": 13.99347471451876, "grad_norm": 0.017248261719942093, "learning_rate": 0.0002505056472074889, "loss": 0.1072, "num_input_tokens_seen": 185051744, "step": 85780 }, { "epoch": 13.994290375203915, "grad_norm": 0.4848167300224304, "learning_rate": 0.0002504439646973432, "loss": 0.1379, "num_input_tokens_seen": 185062656, "step": 85785 }, { "epoch": 13.99510603588907, "grad_norm": 1.1964749097824097, "learning_rate": 0.00025038228724476715, "loss": 0.1047, "num_input_tokens_seen": 185074048, "step": 85790 }, { "epoch": 13.995921696574225, "grad_norm": 0.14196431636810303, "learning_rate": 0.00025032061485101066, "loss": 0.0197, "num_input_tokens_seen": 185085856, "step": 85795 }, { "epoch": 13.99673735725938, "grad_norm": 0.01614018715918064, "learning_rate": 0.0002502589475173237, "loss": 0.1363, "num_input_tokens_seen": 185096960, "step": 85800 }, { "epoch": 13.997553017944535, "grad_norm": 0.07366901636123657, "learning_rate": 0.000250197285244956, "loss": 0.0148, "num_input_tokens_seen": 185108448, "step": 85805 }, { "epoch": 13.99836867862969, "grad_norm": 0.025252725929021835, "learning_rate": 0.0002501356280351572, "loss": 0.0102, "num_input_tokens_seen": 185119648, "step": 85810 }, { "epoch": 13.999184339314844, "grad_norm": 0.04379880055785179, "learning_rate": 0.00025007397588917683, "loss": 0.0359, "num_input_tokens_seen": 185129984, "step": 85815 }, { "epoch": 14.0, "grad_norm": 0.01049947738647461, "learning_rate": 0.0002500123288082644, "loss": 0.005, "num_input_tokens_seen": 185139040, "step": 85820 }, { "epoch": 14.0, "eval_loss": 0.170640766620636, "eval_runtime": 104.6533, "eval_samples_per_second": 26.038, "eval_steps_per_second": 6.517, "num_input_tokens_seen": 185139040, "step": 85820 }, { "epoch": 14.000815660685156, "grad_norm": 0.010833518579602242, "learning_rate": 0.00024995068679366933, "loss": 0.0797, "num_input_tokens_seen": 185149696, "step": 85825 }, { "epoch": 14.00163132137031, "grad_norm": 0.22259855270385742, "learning_rate": 0.00024988904984664075, "loss": 0.0082, "num_input_tokens_seen": 185159968, "step": 85830 }, { "epoch": 14.002446982055465, "grad_norm": 0.010821081697940826, "learning_rate": 0.00024982741796842787, "loss": 0.0356, "num_input_tokens_seen": 185170528, "step": 85835 }, { "epoch": 14.00326264274062, "grad_norm": 0.013057991862297058, "learning_rate": 0.00024976579116027975, "loss": 0.0096, "num_input_tokens_seen": 185180960, "step": 85840 }, { "epoch": 14.004078303425775, "grad_norm": 0.087175153195858, "learning_rate": 0.00024970416942344533, "loss": 0.0071, "num_input_tokens_seen": 185191904, "step": 85845 }, { "epoch": 14.00489396411093, "grad_norm": 0.08661168813705444, "learning_rate": 0.00024964255275917335, "loss": 0.0212, "num_input_tokens_seen": 185201824, "step": 85850 }, { "epoch": 14.005709624796085, "grad_norm": 0.030978184193372726, "learning_rate": 0.00024958094116871274, "loss": 0.0096, "num_input_tokens_seen": 185212032, "step": 85855 }, { "epoch": 14.00652528548124, "grad_norm": 0.015762506052851677, "learning_rate": 0.000249519334653312, "loss": 0.0074, "num_input_tokens_seen": 185221856, "step": 85860 }, { "epoch": 14.007340946166394, "grad_norm": 0.030760297551751137, "learning_rate": 0.0002494577332142195, "loss": 0.0056, "num_input_tokens_seen": 185233120, "step": 85865 }, { "epoch": 14.00815660685155, "grad_norm": 0.03456499055027962, "learning_rate": 0.0002493961368526843, "loss": 0.0071, "num_input_tokens_seen": 185244480, "step": 85870 }, { "epoch": 14.008972267536704, "grad_norm": 0.0338679663836956, "learning_rate": 0.0002493345455699538, "loss": 0.0332, "num_input_tokens_seen": 185255616, "step": 85875 }, { "epoch": 14.00978792822186, "grad_norm": 0.007555676624178886, "learning_rate": 0.000249272959367277, "loss": 0.2343, "num_input_tokens_seen": 185266048, "step": 85880 }, { "epoch": 14.010603588907015, "grad_norm": 0.2889097332954407, "learning_rate": 0.0002492113782459017, "loss": 0.0252, "num_input_tokens_seen": 185276512, "step": 85885 }, { "epoch": 14.01141924959217, "grad_norm": 0.030179983004927635, "learning_rate": 0.00024914980220707605, "loss": 0.0098, "num_input_tokens_seen": 185287232, "step": 85890 }, { "epoch": 14.012234910277325, "grad_norm": 0.22543705999851227, "learning_rate": 0.00024908823125204785, "loss": 0.1026, "num_input_tokens_seen": 185298624, "step": 85895 }, { "epoch": 14.013050570962479, "grad_norm": 0.006818065419793129, "learning_rate": 0.00024902666538206494, "loss": 0.0571, "num_input_tokens_seen": 185309728, "step": 85900 }, { "epoch": 14.013866231647635, "grad_norm": 0.02426072210073471, "learning_rate": 0.000248965104598375, "loss": 0.0208, "num_input_tokens_seen": 185320256, "step": 85905 }, { "epoch": 14.01468189233279, "grad_norm": 0.03394691273570061, "learning_rate": 0.0002489035489022257, "loss": 0.0597, "num_input_tokens_seen": 185330624, "step": 85910 }, { "epoch": 14.015497553017944, "grad_norm": 0.01710326597094536, "learning_rate": 0.0002488419982948646, "loss": 0.0377, "num_input_tokens_seen": 185342240, "step": 85915 }, { "epoch": 14.0163132137031, "grad_norm": 0.187971830368042, "learning_rate": 0.0002487804527775389, "loss": 0.0118, "num_input_tokens_seen": 185353120, "step": 85920 }, { "epoch": 14.017128874388254, "grad_norm": 0.010443625971674919, "learning_rate": 0.0002487189123514961, "loss": 0.0096, "num_input_tokens_seen": 185362368, "step": 85925 }, { "epoch": 14.01794453507341, "grad_norm": 0.008745288476347923, "learning_rate": 0.0002486573770179833, "loss": 0.0056, "num_input_tokens_seen": 185373600, "step": 85930 }, { "epoch": 14.018760195758565, "grad_norm": 0.018413865938782692, "learning_rate": 0.00024859584677824757, "loss": 0.0088, "num_input_tokens_seen": 185385472, "step": 85935 }, { "epoch": 14.01957585644372, "grad_norm": 0.00951445009559393, "learning_rate": 0.00024853432163353596, "loss": 0.0656, "num_input_tokens_seen": 185396320, "step": 85940 }, { "epoch": 14.020391517128875, "grad_norm": 0.6481195688247681, "learning_rate": 0.00024847280158509535, "loss": 0.0228, "num_input_tokens_seen": 185406720, "step": 85945 }, { "epoch": 14.021207177814029, "grad_norm": 0.008577946573495865, "learning_rate": 0.00024841128663417243, "loss": 0.0187, "num_input_tokens_seen": 185417632, "step": 85950 }, { "epoch": 14.022022838499185, "grad_norm": 0.07869008928537369, "learning_rate": 0.000248349776782014, "loss": 0.0125, "num_input_tokens_seen": 185427616, "step": 85955 }, { "epoch": 14.022838499184338, "grad_norm": 0.6396604776382446, "learning_rate": 0.0002482882720298666, "loss": 0.0268, "num_input_tokens_seen": 185438400, "step": 85960 }, { "epoch": 14.023654159869494, "grad_norm": 2.008042812347412, "learning_rate": 0.0002482267723789767, "loss": 0.0858, "num_input_tokens_seen": 185448224, "step": 85965 }, { "epoch": 14.02446982055465, "grad_norm": 0.009493842720985413, "learning_rate": 0.0002481652778305906, "loss": 0.0384, "num_input_tokens_seen": 185458016, "step": 85970 }, { "epoch": 14.025285481239804, "grad_norm": 0.016928354278206825, "learning_rate": 0.00024810378838595467, "loss": 0.0188, "num_input_tokens_seen": 185468928, "step": 85975 }, { "epoch": 14.02610114192496, "grad_norm": 0.010706453584134579, "learning_rate": 0.00024804230404631495, "loss": 0.0125, "num_input_tokens_seen": 185479008, "step": 85980 }, { "epoch": 14.026916802610113, "grad_norm": 0.014638662338256836, "learning_rate": 0.0002479808248129174, "loss": 0.0044, "num_input_tokens_seen": 185490112, "step": 85985 }, { "epoch": 14.02773246329527, "grad_norm": 0.04495870694518089, "learning_rate": 0.00024791935068700855, "loss": 0.0028, "num_input_tokens_seen": 185500256, "step": 85990 }, { "epoch": 14.028548123980425, "grad_norm": 0.006704435218125582, "learning_rate": 0.0002478578816698335, "loss": 0.0088, "num_input_tokens_seen": 185511456, "step": 85995 }, { "epoch": 14.029363784665579, "grad_norm": 0.06776247918605804, "learning_rate": 0.00024779641776263866, "loss": 0.0243, "num_input_tokens_seen": 185522432, "step": 86000 }, { "epoch": 14.030179445350734, "grad_norm": 0.5403178334236145, "learning_rate": 0.00024773495896666904, "loss": 0.129, "num_input_tokens_seen": 185531936, "step": 86005 }, { "epoch": 14.030995106035888, "grad_norm": 0.035888344049453735, "learning_rate": 0.0002476735052831706, "loss": 0.0045, "num_input_tokens_seen": 185543840, "step": 86010 }, { "epoch": 14.031810766721044, "grad_norm": 0.06894753873348236, "learning_rate": 0.0002476120567133888, "loss": 0.0124, "num_input_tokens_seen": 185554432, "step": 86015 }, { "epoch": 14.0326264274062, "grad_norm": 0.009929022751748562, "learning_rate": 0.0002475506132585687, "loss": 0.008, "num_input_tokens_seen": 185566144, "step": 86020 }, { "epoch": 14.033442088091354, "grad_norm": 0.02868306264281273, "learning_rate": 0.0002474891749199558, "loss": 0.0531, "num_input_tokens_seen": 185576704, "step": 86025 }, { "epoch": 14.03425774877651, "grad_norm": 0.02860150672495365, "learning_rate": 0.000247427741698795, "loss": 0.0055, "num_input_tokens_seen": 185587840, "step": 86030 }, { "epoch": 14.035073409461663, "grad_norm": 0.0037193975877016783, "learning_rate": 0.00024736631359633147, "loss": 0.0172, "num_input_tokens_seen": 185599680, "step": 86035 }, { "epoch": 14.035889070146819, "grad_norm": 0.011950747109949589, "learning_rate": 0.00024730489061381013, "loss": 0.0175, "num_input_tokens_seen": 185611584, "step": 86040 }, { "epoch": 14.036704730831975, "grad_norm": 0.040513649582862854, "learning_rate": 0.00024724347275247564, "loss": 0.0843, "num_input_tokens_seen": 185622720, "step": 86045 }, { "epoch": 14.037520391517129, "grad_norm": 0.05754178762435913, "learning_rate": 0.0002471820600135729, "loss": 0.0154, "num_input_tokens_seen": 185632768, "step": 86050 }, { "epoch": 14.038336052202284, "grad_norm": 0.0015080805169418454, "learning_rate": 0.0002471206523983465, "loss": 0.0033, "num_input_tokens_seen": 185642848, "step": 86055 }, { "epoch": 14.039151712887438, "grad_norm": 0.10564816743135452, "learning_rate": 0.00024705924990804076, "loss": 0.008, "num_input_tokens_seen": 185653536, "step": 86060 }, { "epoch": 14.039967373572594, "grad_norm": 0.08948718011379242, "learning_rate": 0.0002469978525439002, "loss": 0.0808, "num_input_tokens_seen": 185665248, "step": 86065 }, { "epoch": 14.040783034257748, "grad_norm": 0.0520390085875988, "learning_rate": 0.00024693646030716923, "loss": 0.0086, "num_input_tokens_seen": 185677504, "step": 86070 }, { "epoch": 14.041598694942904, "grad_norm": 0.019964883103966713, "learning_rate": 0.0002468750731990918, "loss": 0.086, "num_input_tokens_seen": 185687264, "step": 86075 }, { "epoch": 14.04241435562806, "grad_norm": 0.13081121444702148, "learning_rate": 0.0002468136912209122, "loss": 0.0081, "num_input_tokens_seen": 185698304, "step": 86080 }, { "epoch": 14.043230016313213, "grad_norm": 0.002430058317258954, "learning_rate": 0.0002467523143738743, "loss": 0.0092, "num_input_tokens_seen": 185709952, "step": 86085 }, { "epoch": 14.044045676998369, "grad_norm": 0.5267149806022644, "learning_rate": 0.00024669094265922204, "loss": 0.0315, "num_input_tokens_seen": 185720224, "step": 86090 }, { "epoch": 14.044861337683523, "grad_norm": 0.0043517304584383965, "learning_rate": 0.00024662957607819914, "loss": 0.0054, "num_input_tokens_seen": 185731904, "step": 86095 }, { "epoch": 14.045676998368679, "grad_norm": 0.046680666506290436, "learning_rate": 0.00024656821463204913, "loss": 0.0379, "num_input_tokens_seen": 185743264, "step": 86100 }, { "epoch": 14.046492659053834, "grad_norm": 0.07875433564186096, "learning_rate": 0.0002465068583220161, "loss": 0.0084, "num_input_tokens_seen": 185755040, "step": 86105 }, { "epoch": 14.047308319738988, "grad_norm": 0.12408815324306488, "learning_rate": 0.0002464455071493429, "loss": 0.0113, "num_input_tokens_seen": 185765152, "step": 86110 }, { "epoch": 14.048123980424144, "grad_norm": 0.00635199248790741, "learning_rate": 0.00024638416111527346, "loss": 0.0031, "num_input_tokens_seen": 185775552, "step": 86115 }, { "epoch": 14.048939641109298, "grad_norm": 0.005625027231872082, "learning_rate": 0.0002463228202210503, "loss": 0.0039, "num_input_tokens_seen": 185786624, "step": 86120 }, { "epoch": 14.049755301794454, "grad_norm": 0.0029958358500152826, "learning_rate": 0.00024626148446791745, "loss": 0.0119, "num_input_tokens_seen": 185798272, "step": 86125 }, { "epoch": 14.05057096247961, "grad_norm": 0.06389451026916504, "learning_rate": 0.00024620015385711706, "loss": 0.0159, "num_input_tokens_seen": 185808896, "step": 86130 }, { "epoch": 14.051386623164763, "grad_norm": 0.05884673446416855, "learning_rate": 0.000246138828389893, "loss": 0.0156, "num_input_tokens_seen": 185819968, "step": 86135 }, { "epoch": 14.052202283849919, "grad_norm": 0.029548581689596176, "learning_rate": 0.0002460775080674872, "loss": 0.0096, "num_input_tokens_seen": 185830656, "step": 86140 }, { "epoch": 14.053017944535073, "grad_norm": 0.02405364252626896, "learning_rate": 0.0002460161928911432, "loss": 0.0102, "num_input_tokens_seen": 185841344, "step": 86145 }, { "epoch": 14.053833605220229, "grad_norm": 0.009440899826586246, "learning_rate": 0.0002459548828621028, "loss": 0.0035, "num_input_tokens_seen": 185852064, "step": 86150 }, { "epoch": 14.054649265905383, "grad_norm": 1.6954190731048584, "learning_rate": 0.00024589357798160925, "loss": 0.0332, "num_input_tokens_seen": 185861728, "step": 86155 }, { "epoch": 14.055464926590538, "grad_norm": 0.15296809375286102, "learning_rate": 0.0002458322782509047, "loss": 0.0366, "num_input_tokens_seen": 185872640, "step": 86160 }, { "epoch": 14.056280587275694, "grad_norm": 1.0038474798202515, "learning_rate": 0.00024577098367123146, "loss": 0.2093, "num_input_tokens_seen": 185884096, "step": 86165 }, { "epoch": 14.057096247960848, "grad_norm": 0.046378638595342636, "learning_rate": 0.00024570969424383174, "loss": 0.0114, "num_input_tokens_seen": 185895392, "step": 86170 }, { "epoch": 14.057911908646004, "grad_norm": 0.12572678923606873, "learning_rate": 0.00024564840996994764, "loss": 0.0121, "num_input_tokens_seen": 185905984, "step": 86175 }, { "epoch": 14.058727569331158, "grad_norm": 0.0037791149225085974, "learning_rate": 0.0002455871308508212, "loss": 0.1387, "num_input_tokens_seen": 185915520, "step": 86180 }, { "epoch": 14.059543230016313, "grad_norm": 0.03395291790366173, "learning_rate": 0.0002455258568876943, "loss": 0.008, "num_input_tokens_seen": 185926944, "step": 86185 }, { "epoch": 14.060358890701469, "grad_norm": 0.0037914887070655823, "learning_rate": 0.0002454645880818087, "loss": 0.0121, "num_input_tokens_seen": 185937088, "step": 86190 }, { "epoch": 14.061174551386623, "grad_norm": 0.015173101797699928, "learning_rate": 0.00024540332443440615, "loss": 0.0045, "num_input_tokens_seen": 185946688, "step": 86195 }, { "epoch": 14.061990212071779, "grad_norm": 0.0027595895808190107, "learning_rate": 0.0002453420659467282, "loss": 0.0114, "num_input_tokens_seen": 185957504, "step": 86200 }, { "epoch": 14.062805872756933, "grad_norm": 0.007358408998697996, "learning_rate": 0.00024528081262001615, "loss": 0.0037, "num_input_tokens_seen": 185968992, "step": 86205 }, { "epoch": 14.063621533442088, "grad_norm": 0.016589906066656113, "learning_rate": 0.000245219564455512, "loss": 0.0104, "num_input_tokens_seen": 185979744, "step": 86210 }, { "epoch": 14.064437194127244, "grad_norm": 0.21843653917312622, "learning_rate": 0.00024515832145445614, "loss": 0.0172, "num_input_tokens_seen": 185990592, "step": 86215 }, { "epoch": 14.065252854812398, "grad_norm": 0.004452393390238285, "learning_rate": 0.0002450970836180906, "loss": 0.007, "num_input_tokens_seen": 186002048, "step": 86220 }, { "epoch": 14.066068515497554, "grad_norm": 1.279645323753357, "learning_rate": 0.0002450358509476556, "loss": 0.0663, "num_input_tokens_seen": 186011904, "step": 86225 }, { "epoch": 14.066884176182707, "grad_norm": 0.7143497467041016, "learning_rate": 0.00024497462344439297, "loss": 0.1243, "num_input_tokens_seen": 186022368, "step": 86230 }, { "epoch": 14.067699836867863, "grad_norm": 0.10418631881475449, "learning_rate": 0.0002449134011095427, "loss": 0.0053, "num_input_tokens_seen": 186032960, "step": 86235 }, { "epoch": 14.068515497553017, "grad_norm": 0.00313842436298728, "learning_rate": 0.0002448521839443464, "loss": 0.0083, "num_input_tokens_seen": 186043552, "step": 86240 }, { "epoch": 14.069331158238173, "grad_norm": 0.08173049241304398, "learning_rate": 0.00024479097195004377, "loss": 0.0075, "num_input_tokens_seen": 186054176, "step": 86245 }, { "epoch": 14.070146818923329, "grad_norm": 0.016755685210227966, "learning_rate": 0.0002447297651278763, "loss": 0.0029, "num_input_tokens_seen": 186064576, "step": 86250 }, { "epoch": 14.070962479608482, "grad_norm": 0.008799823001027107, "learning_rate": 0.0002446685634790836, "loss": 0.0109, "num_input_tokens_seen": 186075872, "step": 86255 }, { "epoch": 14.071778140293638, "grad_norm": 0.0023832046426832676, "learning_rate": 0.00024460736700490676, "loss": 0.0502, "num_input_tokens_seen": 186086688, "step": 86260 }, { "epoch": 14.072593800978792, "grad_norm": 0.013204580172896385, "learning_rate": 0.00024454617570658524, "loss": 0.0709, "num_input_tokens_seen": 186098272, "step": 86265 }, { "epoch": 14.073409461663948, "grad_norm": 0.009968632832169533, "learning_rate": 0.00024448498958535984, "loss": 0.0078, "num_input_tokens_seen": 186109280, "step": 86270 }, { "epoch": 14.074225122349104, "grad_norm": 0.005886519327759743, "learning_rate": 0.00024442380864247, "loss": 0.0026, "num_input_tokens_seen": 186119040, "step": 86275 }, { "epoch": 14.075040783034257, "grad_norm": 0.08006228506565094, "learning_rate": 0.00024436263287915623, "loss": 0.069, "num_input_tokens_seen": 186128576, "step": 86280 }, { "epoch": 14.075856443719413, "grad_norm": 0.013897242955863476, "learning_rate": 0.00024430146229665754, "loss": 0.2169, "num_input_tokens_seen": 186136480, "step": 86285 }, { "epoch": 14.076672104404567, "grad_norm": 0.002791492035612464, "learning_rate": 0.0002442402968962146, "loss": 0.0943, "num_input_tokens_seen": 186146112, "step": 86290 }, { "epoch": 14.077487765089723, "grad_norm": 0.0033663243521004915, "learning_rate": 0.00024417913667906604, "loss": 0.0028, "num_input_tokens_seen": 186157376, "step": 86295 }, { "epoch": 14.078303425774878, "grad_norm": 0.04548008739948273, "learning_rate": 0.00024411798164645205, "loss": 0.0072, "num_input_tokens_seen": 186168704, "step": 86300 }, { "epoch": 14.079119086460032, "grad_norm": 0.006094332784414291, "learning_rate": 0.00024405683179961176, "loss": 0.0063, "num_input_tokens_seen": 186179904, "step": 86305 }, { "epoch": 14.079934747145188, "grad_norm": 0.013367907144129276, "learning_rate": 0.00024399568713978444, "loss": 0.0144, "num_input_tokens_seen": 186191488, "step": 86310 }, { "epoch": 14.080750407830342, "grad_norm": 0.0626281127333641, "learning_rate": 0.00024393454766820927, "loss": 0.0083, "num_input_tokens_seen": 186201344, "step": 86315 }, { "epoch": 14.081566068515498, "grad_norm": 0.03930257633328438, "learning_rate": 0.00024387341338612535, "loss": 0.0187, "num_input_tokens_seen": 186211296, "step": 86320 }, { "epoch": 14.082381729200652, "grad_norm": 0.04623904824256897, "learning_rate": 0.00024381228429477166, "loss": 0.0041, "num_input_tokens_seen": 186222464, "step": 86325 }, { "epoch": 14.083197389885807, "grad_norm": 0.006666381377726793, "learning_rate": 0.00024375116039538697, "loss": 0.0909, "num_input_tokens_seen": 186232096, "step": 86330 }, { "epoch": 14.084013050570963, "grad_norm": 0.019434846937656403, "learning_rate": 0.0002436900416892101, "loss": 0.0039, "num_input_tokens_seen": 186242400, "step": 86335 }, { "epoch": 14.084828711256117, "grad_norm": 0.009296801872551441, "learning_rate": 0.00024362892817747972, "loss": 0.005, "num_input_tokens_seen": 186252928, "step": 86340 }, { "epoch": 14.085644371941273, "grad_norm": 0.006293757818639278, "learning_rate": 0.00024356781986143434, "loss": 0.004, "num_input_tokens_seen": 186263904, "step": 86345 }, { "epoch": 14.086460032626427, "grad_norm": 0.034455180168151855, "learning_rate": 0.00024350671674231217, "loss": 0.0057, "num_input_tokens_seen": 186274592, "step": 86350 }, { "epoch": 14.087275693311582, "grad_norm": 0.0303350742906332, "learning_rate": 0.0002434456188213522, "loss": 0.0488, "num_input_tokens_seen": 186285920, "step": 86355 }, { "epoch": 14.088091353996738, "grad_norm": 0.5272178649902344, "learning_rate": 0.00024338452609979177, "loss": 0.0421, "num_input_tokens_seen": 186296288, "step": 86360 }, { "epoch": 14.088907014681892, "grad_norm": 0.030453898012638092, "learning_rate": 0.0002433234385788699, "loss": 0.006, "num_input_tokens_seen": 186306976, "step": 86365 }, { "epoch": 14.089722675367048, "grad_norm": 0.029235810041427612, "learning_rate": 0.00024326235625982378, "loss": 0.0743, "num_input_tokens_seen": 186318528, "step": 86370 }, { "epoch": 14.090538336052202, "grad_norm": 0.011259643360972404, "learning_rate": 0.00024320127914389213, "loss": 0.0055, "num_input_tokens_seen": 186329536, "step": 86375 }, { "epoch": 14.091353996737357, "grad_norm": 0.005089515820145607, "learning_rate": 0.00024314020723231183, "loss": 0.1781, "num_input_tokens_seen": 186340672, "step": 86380 }, { "epoch": 14.092169657422513, "grad_norm": 0.6565680503845215, "learning_rate": 0.00024307914052632159, "loss": 0.0435, "num_input_tokens_seen": 186350176, "step": 86385 }, { "epoch": 14.092985318107667, "grad_norm": 0.009993394836783409, "learning_rate": 0.000243018079027158, "loss": 0.0545, "num_input_tokens_seen": 186360512, "step": 86390 }, { "epoch": 14.093800978792823, "grad_norm": 0.005378041882067919, "learning_rate": 0.0002429570227360595, "loss": 0.0045, "num_input_tokens_seen": 186371520, "step": 86395 }, { "epoch": 14.094616639477977, "grad_norm": 0.018353892490267754, "learning_rate": 0.00024289597165426264, "loss": 0.0147, "num_input_tokens_seen": 186382176, "step": 86400 }, { "epoch": 14.095432300163132, "grad_norm": 0.01679510995745659, "learning_rate": 0.00024283492578300542, "loss": 0.0094, "num_input_tokens_seen": 186393376, "step": 86405 }, { "epoch": 14.096247960848286, "grad_norm": 0.103525310754776, "learning_rate": 0.00024277388512352428, "loss": 0.0088, "num_input_tokens_seen": 186403616, "step": 86410 }, { "epoch": 14.097063621533442, "grad_norm": 0.36318376660346985, "learning_rate": 0.00024271284967705687, "loss": 0.0107, "num_input_tokens_seen": 186414816, "step": 86415 }, { "epoch": 14.097879282218598, "grad_norm": 0.021010275930166245, "learning_rate": 0.00024265181944483995, "loss": 0.0995, "num_input_tokens_seen": 186426336, "step": 86420 }, { "epoch": 14.098694942903752, "grad_norm": 0.023445969447493553, "learning_rate": 0.0002425907944281104, "loss": 0.0259, "num_input_tokens_seen": 186435712, "step": 86425 }, { "epoch": 14.099510603588907, "grad_norm": 0.012286716140806675, "learning_rate": 0.00024252977462810494, "loss": 0.0082, "num_input_tokens_seen": 186446688, "step": 86430 }, { "epoch": 14.100326264274061, "grad_norm": 0.004114396870136261, "learning_rate": 0.0002424687600460602, "loss": 0.1404, "num_input_tokens_seen": 186456000, "step": 86435 }, { "epoch": 14.101141924959217, "grad_norm": 0.008693752810359001, "learning_rate": 0.00024240775068321273, "loss": 0.0242, "num_input_tokens_seen": 186465472, "step": 86440 }, { "epoch": 14.101957585644373, "grad_norm": 0.031855084002017975, "learning_rate": 0.00024234674654079901, "loss": 0.0106, "num_input_tokens_seen": 186475584, "step": 86445 }, { "epoch": 14.102773246329527, "grad_norm": 0.03180380165576935, "learning_rate": 0.00024228574762005534, "loss": 0.0123, "num_input_tokens_seen": 186486048, "step": 86450 }, { "epoch": 14.103588907014682, "grad_norm": 0.0523865707218647, "learning_rate": 0.00024222475392221787, "loss": 0.0047, "num_input_tokens_seen": 186497184, "step": 86455 }, { "epoch": 14.104404567699836, "grad_norm": 0.04459337145090103, "learning_rate": 0.0002421637654485228, "loss": 0.0056, "num_input_tokens_seen": 186507360, "step": 86460 }, { "epoch": 14.105220228384992, "grad_norm": 0.11291451007127762, "learning_rate": 0.00024210278220020614, "loss": 0.0996, "num_input_tokens_seen": 186518016, "step": 86465 }, { "epoch": 14.106035889070148, "grad_norm": 0.004548402037471533, "learning_rate": 0.00024204180417850373, "loss": 0.0044, "num_input_tokens_seen": 186529344, "step": 86470 }, { "epoch": 14.106851549755302, "grad_norm": 0.003333561820909381, "learning_rate": 0.00024198083138465143, "loss": 0.1078, "num_input_tokens_seen": 186540416, "step": 86475 }, { "epoch": 14.107667210440457, "grad_norm": 0.1505666971206665, "learning_rate": 0.0002419198638198849, "loss": 0.0735, "num_input_tokens_seen": 186551232, "step": 86480 }, { "epoch": 14.108482871125611, "grad_norm": 0.029973287135362625, "learning_rate": 0.0002418589014854397, "loss": 0.0136, "num_input_tokens_seen": 186562208, "step": 86485 }, { "epoch": 14.109298531810767, "grad_norm": 0.014518886804580688, "learning_rate": 0.00024179794438255133, "loss": 0.0044, "num_input_tokens_seen": 186572992, "step": 86490 }, { "epoch": 14.11011419249592, "grad_norm": 0.005182154010981321, "learning_rate": 0.000241736992512455, "loss": 0.0274, "num_input_tokens_seen": 186583872, "step": 86495 }, { "epoch": 14.110929853181077, "grad_norm": 0.04141387715935707, "learning_rate": 0.00024167604587638653, "loss": 0.0274, "num_input_tokens_seen": 186594560, "step": 86500 }, { "epoch": 14.111745513866232, "grad_norm": 0.07028771936893463, "learning_rate": 0.00024161510447558032, "loss": 0.0108, "num_input_tokens_seen": 186605312, "step": 86505 }, { "epoch": 14.112561174551386, "grad_norm": 0.011877317912876606, "learning_rate": 0.0002415541683112722, "loss": 0.0041, "num_input_tokens_seen": 186616512, "step": 86510 }, { "epoch": 14.113376835236542, "grad_norm": 0.010844813659787178, "learning_rate": 0.0002414932373846963, "loss": 0.0031, "num_input_tokens_seen": 186627488, "step": 86515 }, { "epoch": 14.114192495921696, "grad_norm": 0.025529352948069572, "learning_rate": 0.00024143231169708806, "loss": 0.0364, "num_input_tokens_seen": 186638208, "step": 86520 }, { "epoch": 14.115008156606851, "grad_norm": 0.005789188202470541, "learning_rate": 0.0002413713912496821, "loss": 0.0118, "num_input_tokens_seen": 186648000, "step": 86525 }, { "epoch": 14.115823817292007, "grad_norm": 0.014010276645421982, "learning_rate": 0.00024131047604371292, "loss": 0.0562, "num_input_tokens_seen": 186658720, "step": 86530 }, { "epoch": 14.116639477977161, "grad_norm": 0.02247089520096779, "learning_rate": 0.0002412495660804152, "loss": 0.0145, "num_input_tokens_seen": 186669120, "step": 86535 }, { "epoch": 14.117455138662317, "grad_norm": 0.6375548839569092, "learning_rate": 0.0002411886613610232, "loss": 0.0606, "num_input_tokens_seen": 186679424, "step": 86540 }, { "epoch": 14.11827079934747, "grad_norm": 0.005438175518065691, "learning_rate": 0.00024112776188677133, "loss": 0.0493, "num_input_tokens_seen": 186691424, "step": 86545 }, { "epoch": 14.119086460032626, "grad_norm": 0.007905769161880016, "learning_rate": 0.0002410668676588938, "loss": 0.0158, "num_input_tokens_seen": 186702048, "step": 86550 }, { "epoch": 14.119902120717782, "grad_norm": 0.003525680396705866, "learning_rate": 0.0002410059786786246, "loss": 0.0454, "num_input_tokens_seen": 186713568, "step": 86555 }, { "epoch": 14.120717781402936, "grad_norm": 0.7027206420898438, "learning_rate": 0.00024094509494719784, "loss": 0.0163, "num_input_tokens_seen": 186723808, "step": 86560 }, { "epoch": 14.121533442088092, "grad_norm": 0.0053064110688865185, "learning_rate": 0.0002408842164658474, "loss": 0.0093, "num_input_tokens_seen": 186735136, "step": 86565 }, { "epoch": 14.122349102773246, "grad_norm": 0.036840397864580154, "learning_rate": 0.00024082334323580695, "loss": 0.0253, "num_input_tokens_seen": 186744448, "step": 86570 }, { "epoch": 14.123164763458401, "grad_norm": 1.0721595287322998, "learning_rate": 0.0002407624752583103, "loss": 0.1059, "num_input_tokens_seen": 186755328, "step": 86575 }, { "epoch": 14.123980424143557, "grad_norm": 0.008695744909346104, "learning_rate": 0.00024070161253459093, "loss": 0.0095, "num_input_tokens_seen": 186766784, "step": 86580 }, { "epoch": 14.124796084828711, "grad_norm": 0.11123359203338623, "learning_rate": 0.00024064075506588235, "loss": 0.0352, "num_input_tokens_seen": 186777536, "step": 86585 }, { "epoch": 14.125611745513867, "grad_norm": 0.6579277515411377, "learning_rate": 0.00024057990285341786, "loss": 0.1038, "num_input_tokens_seen": 186789120, "step": 86590 }, { "epoch": 14.12642740619902, "grad_norm": 0.15325427055358887, "learning_rate": 0.00024051905589843076, "loss": 0.0047, "num_input_tokens_seen": 186799840, "step": 86595 }, { "epoch": 14.127243066884176, "grad_norm": 0.015294155105948448, "learning_rate": 0.00024045821420215412, "loss": 0.0048, "num_input_tokens_seen": 186810560, "step": 86600 }, { "epoch": 14.12805872756933, "grad_norm": 0.009034212678670883, "learning_rate": 0.0002403973777658211, "loss": 0.0077, "num_input_tokens_seen": 186821952, "step": 86605 }, { "epoch": 14.128874388254486, "grad_norm": 0.0704655647277832, "learning_rate": 0.0002403365465906645, "loss": 0.0084, "num_input_tokens_seen": 186833376, "step": 86610 }, { "epoch": 14.129690048939642, "grad_norm": 0.029609955847263336, "learning_rate": 0.0002402757206779172, "loss": 0.0085, "num_input_tokens_seen": 186843552, "step": 86615 }, { "epoch": 14.130505709624796, "grad_norm": 0.019672822207212448, "learning_rate": 0.00024021490002881186, "loss": 0.0407, "num_input_tokens_seen": 186855328, "step": 86620 }, { "epoch": 14.131321370309951, "grad_norm": 0.009617344476282597, "learning_rate": 0.000240154084644581, "loss": 0.0194, "num_input_tokens_seen": 186867936, "step": 86625 }, { "epoch": 14.132137030995105, "grad_norm": 0.0019196603680029511, "learning_rate": 0.0002400932745264574, "loss": 0.0069, "num_input_tokens_seen": 186878944, "step": 86630 }, { "epoch": 14.132952691680261, "grad_norm": 0.05498132109642029, "learning_rate": 0.00024003246967567332, "loss": 0.0202, "num_input_tokens_seen": 186890432, "step": 86635 }, { "epoch": 14.133768352365417, "grad_norm": 0.14677155017852783, "learning_rate": 0.00023997167009346104, "loss": 0.0061, "num_input_tokens_seen": 186901600, "step": 86640 }, { "epoch": 14.13458401305057, "grad_norm": 0.43038856983184814, "learning_rate": 0.00023991087578105274, "loss": 0.0455, "num_input_tokens_seen": 186912096, "step": 86645 }, { "epoch": 14.135399673735726, "grad_norm": 0.015542859211564064, "learning_rate": 0.00023985008673968052, "loss": 0.0134, "num_input_tokens_seen": 186924320, "step": 86650 }, { "epoch": 14.13621533442088, "grad_norm": 0.3148937225341797, "learning_rate": 0.00023978930297057627, "loss": 0.0395, "num_input_tokens_seen": 186935392, "step": 86655 }, { "epoch": 14.137030995106036, "grad_norm": 0.007977279834449291, "learning_rate": 0.0002397285244749719, "loss": 0.0225, "num_input_tokens_seen": 186946752, "step": 86660 }, { "epoch": 14.137846655791192, "grad_norm": 0.007104580756276846, "learning_rate": 0.00023966775125409918, "loss": 0.0031, "num_input_tokens_seen": 186957216, "step": 86665 }, { "epoch": 14.138662316476346, "grad_norm": 0.017083177343010902, "learning_rate": 0.00023960698330918972, "loss": 0.0271, "num_input_tokens_seen": 186966976, "step": 86670 }, { "epoch": 14.139477977161501, "grad_norm": 0.12861613929271698, "learning_rate": 0.00023954622064147507, "loss": 0.0044, "num_input_tokens_seen": 186977600, "step": 86675 }, { "epoch": 14.140293637846655, "grad_norm": 0.04353078082203865, "learning_rate": 0.00023948546325218667, "loss": 0.0505, "num_input_tokens_seen": 186989056, "step": 86680 }, { "epoch": 14.141109298531811, "grad_norm": 0.42091450095176697, "learning_rate": 0.00023942471114255588, "loss": 0.0185, "num_input_tokens_seen": 186999584, "step": 86685 }, { "epoch": 14.141924959216965, "grad_norm": 0.009104925207793713, "learning_rate": 0.00023936396431381386, "loss": 0.0076, "num_input_tokens_seen": 187009568, "step": 86690 }, { "epoch": 14.14274061990212, "grad_norm": 0.17677007615566254, "learning_rate": 0.00023930322276719175, "loss": 0.0324, "num_input_tokens_seen": 187020256, "step": 86695 }, { "epoch": 14.143556280587276, "grad_norm": 0.07592117786407471, "learning_rate": 0.0002392424865039205, "loss": 0.172, "num_input_tokens_seen": 187031712, "step": 86700 }, { "epoch": 14.14437194127243, "grad_norm": 0.3174726963043213, "learning_rate": 0.0002391817555252311, "loss": 0.0157, "num_input_tokens_seen": 187041856, "step": 86705 }, { "epoch": 14.145187601957586, "grad_norm": 0.0033125600311905146, "learning_rate": 0.0002391210298323543, "loss": 0.0066, "num_input_tokens_seen": 187052800, "step": 86710 }, { "epoch": 14.14600326264274, "grad_norm": 0.03655920922756195, "learning_rate": 0.00023906030942652073, "loss": 0.0047, "num_input_tokens_seen": 187063968, "step": 86715 }, { "epoch": 14.146818923327896, "grad_norm": 0.08464816957712173, "learning_rate": 0.00023899959430896106, "loss": 0.124, "num_input_tokens_seen": 187074048, "step": 86720 }, { "epoch": 14.147634584013051, "grad_norm": 0.08451467007398605, "learning_rate": 0.00023893888448090573, "loss": 0.004, "num_input_tokens_seen": 187083392, "step": 86725 }, { "epoch": 14.148450244698205, "grad_norm": 0.237305149435997, "learning_rate": 0.00023887817994358484, "loss": 0.017, "num_input_tokens_seen": 187093920, "step": 86730 }, { "epoch": 14.149265905383361, "grad_norm": 0.004454759415239096, "learning_rate": 0.0002388174806982293, "loss": 0.0023, "num_input_tokens_seen": 187104448, "step": 86735 }, { "epoch": 14.150081566068515, "grad_norm": 0.008993631228804588, "learning_rate": 0.00023875678674606848, "loss": 0.0039, "num_input_tokens_seen": 187114528, "step": 86740 }, { "epoch": 14.15089722675367, "grad_norm": 0.009441027417778969, "learning_rate": 0.00023869609808833316, "loss": 0.0051, "num_input_tokens_seen": 187125888, "step": 86745 }, { "epoch": 14.151712887438826, "grad_norm": 0.018197013065218925, "learning_rate": 0.0002386354147262525, "loss": 0.0023, "num_input_tokens_seen": 187137312, "step": 86750 }, { "epoch": 14.15252854812398, "grad_norm": 0.016189690679311752, "learning_rate": 0.0002385747366610571, "loss": 0.0149, "num_input_tokens_seen": 187147456, "step": 86755 }, { "epoch": 14.153344208809136, "grad_norm": 0.01883782260119915, "learning_rate": 0.00023851406389397594, "loss": 0.0205, "num_input_tokens_seen": 187158208, "step": 86760 }, { "epoch": 14.15415986949429, "grad_norm": 0.0036642923951148987, "learning_rate": 0.00023845339642623937, "loss": 0.077, "num_input_tokens_seen": 187169056, "step": 86765 }, { "epoch": 14.154975530179446, "grad_norm": 0.0007198764360509813, "learning_rate": 0.00023839273425907615, "loss": 0.0052, "num_input_tokens_seen": 187179584, "step": 86770 }, { "epoch": 14.1557911908646, "grad_norm": 0.011281903833150864, "learning_rate": 0.0002383320773937162, "loss": 0.0104, "num_input_tokens_seen": 187188192, "step": 86775 }, { "epoch": 14.156606851549755, "grad_norm": 1.2086055278778076, "learning_rate": 0.00023827142583138873, "loss": 0.1574, "num_input_tokens_seen": 187197984, "step": 86780 }, { "epoch": 14.15742251223491, "grad_norm": 0.04229852557182312, "learning_rate": 0.00023821077957332276, "loss": 0.0088, "num_input_tokens_seen": 187208224, "step": 86785 }, { "epoch": 14.158238172920065, "grad_norm": 0.11260727047920227, "learning_rate": 0.00023815013862074746, "loss": 0.0246, "num_input_tokens_seen": 187218624, "step": 86790 }, { "epoch": 14.15905383360522, "grad_norm": 0.010786265134811401, "learning_rate": 0.0002380895029748918, "loss": 0.0105, "num_input_tokens_seen": 187229216, "step": 86795 }, { "epoch": 14.159869494290374, "grad_norm": 0.08443213999271393, "learning_rate": 0.00023802887263698464, "loss": 0.0148, "num_input_tokens_seen": 187239904, "step": 86800 }, { "epoch": 14.16068515497553, "grad_norm": 0.023160500451922417, "learning_rate": 0.00023796824760825464, "loss": 0.2116, "num_input_tokens_seen": 187250432, "step": 86805 }, { "epoch": 14.161500815660686, "grad_norm": 0.011217671446502209, "learning_rate": 0.0002379076278899306, "loss": 0.0043, "num_input_tokens_seen": 187261088, "step": 86810 }, { "epoch": 14.16231647634584, "grad_norm": 1.3667972087860107, "learning_rate": 0.0002378470134832409, "loss": 0.0714, "num_input_tokens_seen": 187272704, "step": 86815 }, { "epoch": 14.163132137030995, "grad_norm": 0.7404364347457886, "learning_rate": 0.00023778640438941408, "loss": 0.0732, "num_input_tokens_seen": 187281632, "step": 86820 }, { "epoch": 14.16394779771615, "grad_norm": 0.004353814758360386, "learning_rate": 0.00023772580060967834, "loss": 0.0049, "num_input_tokens_seen": 187291616, "step": 86825 }, { "epoch": 14.164763458401305, "grad_norm": 0.003456072648987174, "learning_rate": 0.00023766520214526206, "loss": 0.0588, "num_input_tokens_seen": 187302720, "step": 86830 }, { "epoch": 14.16557911908646, "grad_norm": 0.02902786247432232, "learning_rate": 0.00023760460899739322, "loss": 0.0069, "num_input_tokens_seen": 187313440, "step": 86835 }, { "epoch": 14.166394779771615, "grad_norm": 0.007264486979693174, "learning_rate": 0.00023754402116729983, "loss": 0.0035, "num_input_tokens_seen": 187323552, "step": 86840 }, { "epoch": 14.16721044045677, "grad_norm": 0.020356399938464165, "learning_rate": 0.00023748343865620964, "loss": 0.0033, "num_input_tokens_seen": 187335136, "step": 86845 }, { "epoch": 14.168026101141924, "grad_norm": 0.02346370741724968, "learning_rate": 0.00023742286146535098, "loss": 0.011, "num_input_tokens_seen": 187346144, "step": 86850 }, { "epoch": 14.16884176182708, "grad_norm": 0.006810345686972141, "learning_rate": 0.00023736228959595073, "loss": 0.0031, "num_input_tokens_seen": 187356128, "step": 86855 }, { "epoch": 14.169657422512234, "grad_norm": 0.17587848007678986, "learning_rate": 0.00023730172304923725, "loss": 0.0498, "num_input_tokens_seen": 187367232, "step": 86860 }, { "epoch": 14.17047308319739, "grad_norm": 0.008433666080236435, "learning_rate": 0.00023724116182643725, "loss": 0.0032, "num_input_tokens_seen": 187378112, "step": 86865 }, { "epoch": 14.171288743882545, "grad_norm": 0.16312739253044128, "learning_rate": 0.00023718060592877878, "loss": 0.1115, "num_input_tokens_seen": 187388192, "step": 86870 }, { "epoch": 14.1721044045677, "grad_norm": 0.021693430840969086, "learning_rate": 0.00023712005535748838, "loss": 0.0053, "num_input_tokens_seen": 187398304, "step": 86875 }, { "epoch": 14.172920065252855, "grad_norm": 0.011725623160600662, "learning_rate": 0.0002370595101137939, "loss": 0.08, "num_input_tokens_seen": 187410048, "step": 86880 }, { "epoch": 14.173735725938009, "grad_norm": 0.048552460968494415, "learning_rate": 0.00023699897019892165, "loss": 0.0206, "num_input_tokens_seen": 187421152, "step": 86885 }, { "epoch": 14.174551386623165, "grad_norm": 0.05786004662513733, "learning_rate": 0.00023693843561409928, "loss": 0.0058, "num_input_tokens_seen": 187431424, "step": 86890 }, { "epoch": 14.17536704730832, "grad_norm": 0.01027756929397583, "learning_rate": 0.0002368779063605529, "loss": 0.0302, "num_input_tokens_seen": 187441504, "step": 86895 }, { "epoch": 14.176182707993474, "grad_norm": 0.28189727663993835, "learning_rate": 0.00023681738243950984, "loss": 0.0408, "num_input_tokens_seen": 187452672, "step": 86900 }, { "epoch": 14.17699836867863, "grad_norm": 0.0038666038308292627, "learning_rate": 0.00023675686385219607, "loss": 0.0046, "num_input_tokens_seen": 187463264, "step": 86905 }, { "epoch": 14.177814029363784, "grad_norm": 1.0911550521850586, "learning_rate": 0.0002366963505998388, "loss": 0.0357, "num_input_tokens_seen": 187474336, "step": 86910 }, { "epoch": 14.17862969004894, "grad_norm": 0.021700987592339516, "learning_rate": 0.00023663584268366356, "loss": 0.0161, "num_input_tokens_seen": 187485632, "step": 86915 }, { "epoch": 14.179445350734095, "grad_norm": 0.0582113116979599, "learning_rate": 0.00023657534010489733, "loss": 0.0074, "num_input_tokens_seen": 187497440, "step": 86920 }, { "epoch": 14.18026101141925, "grad_norm": 0.005163951311260462, "learning_rate": 0.000236514842864766, "loss": 0.1948, "num_input_tokens_seen": 187508320, "step": 86925 }, { "epoch": 14.181076672104405, "grad_norm": 0.013334648683667183, "learning_rate": 0.00023645435096449557, "loss": 0.0041, "num_input_tokens_seen": 187518976, "step": 86930 }, { "epoch": 14.181892332789559, "grad_norm": 0.006819948088377714, "learning_rate": 0.00023639386440531208, "loss": 0.0043, "num_input_tokens_seen": 187529984, "step": 86935 }, { "epoch": 14.182707993474715, "grad_norm": 0.6749143600463867, "learning_rate": 0.00023633338318844137, "loss": 0.094, "num_input_tokens_seen": 187540672, "step": 86940 }, { "epoch": 14.18352365415987, "grad_norm": 0.035739269107580185, "learning_rate": 0.00023627290731510908, "loss": 0.0043, "num_input_tokens_seen": 187551072, "step": 86945 }, { "epoch": 14.184339314845024, "grad_norm": 0.20690716803073883, "learning_rate": 0.00023621243678654099, "loss": 0.0135, "num_input_tokens_seen": 187561664, "step": 86950 }, { "epoch": 14.18515497553018, "grad_norm": 0.0024190593976527452, "learning_rate": 0.0002361519716039624, "loss": 0.0144, "num_input_tokens_seen": 187572768, "step": 86955 }, { "epoch": 14.185970636215334, "grad_norm": 0.008097248151898384, "learning_rate": 0.00023609151176859884, "loss": 0.1044, "num_input_tokens_seen": 187582144, "step": 86960 }, { "epoch": 14.18678629690049, "grad_norm": 0.019733905792236328, "learning_rate": 0.00023603105728167562, "loss": 0.0814, "num_input_tokens_seen": 187593760, "step": 86965 }, { "epoch": 14.187601957585644, "grad_norm": 0.015739591792225838, "learning_rate": 0.00023597060814441767, "loss": 0.0096, "num_input_tokens_seen": 187604256, "step": 86970 }, { "epoch": 14.1884176182708, "grad_norm": 0.07301629334688187, "learning_rate": 0.00023591016435805067, "loss": 0.0072, "num_input_tokens_seen": 187615456, "step": 86975 }, { "epoch": 14.189233278955955, "grad_norm": 0.22086675465106964, "learning_rate": 0.00023584972592379888, "loss": 0.0074, "num_input_tokens_seen": 187626016, "step": 86980 }, { "epoch": 14.190048939641109, "grad_norm": 0.7895005941390991, "learning_rate": 0.0002357892928428878, "loss": 0.083, "num_input_tokens_seen": 187638176, "step": 86985 }, { "epoch": 14.190864600326265, "grad_norm": 0.005785096436738968, "learning_rate": 0.00023572886511654157, "loss": 0.0033, "num_input_tokens_seen": 187649280, "step": 86990 }, { "epoch": 14.191680261011419, "grad_norm": 0.04214262589812279, "learning_rate": 0.00023566844274598548, "loss": 0.008, "num_input_tokens_seen": 187659488, "step": 86995 }, { "epoch": 14.192495921696574, "grad_norm": 0.6143233180046082, "learning_rate": 0.00023560802573244333, "loss": 0.1121, "num_input_tokens_seen": 187670976, "step": 87000 }, { "epoch": 14.19331158238173, "grad_norm": 0.01676148921251297, "learning_rate": 0.00023554761407714036, "loss": 0.0144, "num_input_tokens_seen": 187682944, "step": 87005 }, { "epoch": 14.194127243066884, "grad_norm": 0.15882396697998047, "learning_rate": 0.00023548720778130005, "loss": 0.0259, "num_input_tokens_seen": 187694496, "step": 87010 }, { "epoch": 14.19494290375204, "grad_norm": 0.005398150067776442, "learning_rate": 0.0002354268068461475, "loss": 0.0083, "num_input_tokens_seen": 187705824, "step": 87015 }, { "epoch": 14.195758564437194, "grad_norm": 2.5134775638580322, "learning_rate": 0.00023536641127290588, "loss": 0.0434, "num_input_tokens_seen": 187715424, "step": 87020 }, { "epoch": 14.19657422512235, "grad_norm": 0.00813831202685833, "learning_rate": 0.00023530602106280004, "loss": 0.01, "num_input_tokens_seen": 187726432, "step": 87025 }, { "epoch": 14.197389885807505, "grad_norm": 0.44391173124313354, "learning_rate": 0.00023524563621705308, "loss": 0.0849, "num_input_tokens_seen": 187737152, "step": 87030 }, { "epoch": 14.198205546492659, "grad_norm": 0.006323656998574734, "learning_rate": 0.00023518525673688957, "loss": 0.0055, "num_input_tokens_seen": 187747936, "step": 87035 }, { "epoch": 14.199021207177815, "grad_norm": 0.04297136515378952, "learning_rate": 0.0002351248826235324, "loss": 0.0104, "num_input_tokens_seen": 187758336, "step": 87040 }, { "epoch": 14.199836867862969, "grad_norm": 0.00740022910758853, "learning_rate": 0.00023506451387820588, "loss": 0.0078, "num_input_tokens_seen": 187769184, "step": 87045 }, { "epoch": 14.200652528548124, "grad_norm": 0.5588157176971436, "learning_rate": 0.0002350041505021327, "loss": 0.0461, "num_input_tokens_seen": 187779680, "step": 87050 }, { "epoch": 14.201468189233278, "grad_norm": 0.011404044926166534, "learning_rate": 0.00023494379249653675, "loss": 0.006, "num_input_tokens_seen": 187789888, "step": 87055 }, { "epoch": 14.202283849918434, "grad_norm": 0.0025135502219200134, "learning_rate": 0.0002348834398626411, "loss": 0.0479, "num_input_tokens_seen": 187801088, "step": 87060 }, { "epoch": 14.20309951060359, "grad_norm": 0.042476020753383636, "learning_rate": 0.0002348230926016689, "loss": 0.0389, "num_input_tokens_seen": 187812128, "step": 87065 }, { "epoch": 14.203915171288743, "grad_norm": 0.08662734925746918, "learning_rate": 0.00023476275071484309, "loss": 0.0077, "num_input_tokens_seen": 187823072, "step": 87070 }, { "epoch": 14.2047308319739, "grad_norm": 0.027986537665128708, "learning_rate": 0.0002347024142033866, "loss": 0.004, "num_input_tokens_seen": 187832960, "step": 87075 }, { "epoch": 14.205546492659053, "grad_norm": 0.021598218008875847, "learning_rate": 0.0002346420830685223, "loss": 0.0037, "num_input_tokens_seen": 187844288, "step": 87080 }, { "epoch": 14.206362153344209, "grad_norm": 0.93536776304245, "learning_rate": 0.0002345817573114728, "loss": 0.0364, "num_input_tokens_seen": 187854336, "step": 87085 }, { "epoch": 14.207177814029365, "grad_norm": 0.01969945803284645, "learning_rate": 0.00023452143693346067, "loss": 0.0064, "num_input_tokens_seen": 187864480, "step": 87090 }, { "epoch": 14.207993474714518, "grad_norm": 0.005997438915073872, "learning_rate": 0.0002344611219357084, "loss": 0.0031, "num_input_tokens_seen": 187876512, "step": 87095 }, { "epoch": 14.208809135399674, "grad_norm": 0.0023233455140143633, "learning_rate": 0.0002344008123194384, "loss": 0.0241, "num_input_tokens_seen": 187887840, "step": 87100 }, { "epoch": 14.209624796084828, "grad_norm": 0.00787628535181284, "learning_rate": 0.0002343405080858728, "loss": 0.0019, "num_input_tokens_seen": 187898592, "step": 87105 }, { "epoch": 14.210440456769984, "grad_norm": 0.003205854445695877, "learning_rate": 0.00023428020923623382, "loss": 0.0027, "num_input_tokens_seen": 187909376, "step": 87110 }, { "epoch": 14.21125611745514, "grad_norm": 0.046218641102313995, "learning_rate": 0.0002342199157717434, "loss": 0.0075, "num_input_tokens_seen": 187919680, "step": 87115 }, { "epoch": 14.212071778140293, "grad_norm": 0.15535475313663483, "learning_rate": 0.00023415962769362386, "loss": 0.0196, "num_input_tokens_seen": 187930400, "step": 87120 }, { "epoch": 14.21288743882545, "grad_norm": 1.3046773672103882, "learning_rate": 0.00023409934500309633, "loss": 0.1593, "num_input_tokens_seen": 187940768, "step": 87125 }, { "epoch": 14.213703099510603, "grad_norm": 0.4731602370738983, "learning_rate": 0.00023403906770138328, "loss": 0.1258, "num_input_tokens_seen": 187951712, "step": 87130 }, { "epoch": 14.214518760195759, "grad_norm": 0.03601643815636635, "learning_rate": 0.00023397879578970554, "loss": 0.0144, "num_input_tokens_seen": 187962784, "step": 87135 }, { "epoch": 14.215334420880913, "grad_norm": 0.013373619876801968, "learning_rate": 0.00023391852926928536, "loss": 0.0045, "num_input_tokens_seen": 187973152, "step": 87140 }, { "epoch": 14.216150081566068, "grad_norm": 0.35456058382987976, "learning_rate": 0.0002338582681413433, "loss": 0.0244, "num_input_tokens_seen": 187983360, "step": 87145 }, { "epoch": 14.216965742251224, "grad_norm": 0.010401858016848564, "learning_rate": 0.0002337980124071015, "loss": 0.0051, "num_input_tokens_seen": 187994016, "step": 87150 }, { "epoch": 14.217781402936378, "grad_norm": 0.05298225209116936, "learning_rate": 0.0002337377620677803, "loss": 0.0077, "num_input_tokens_seen": 188005440, "step": 87155 }, { "epoch": 14.218597063621534, "grad_norm": 0.02809702232480049, "learning_rate": 0.00023367751712460134, "loss": 0.0128, "num_input_tokens_seen": 188016096, "step": 87160 }, { "epoch": 14.219412724306688, "grad_norm": 0.028549756854772568, "learning_rate": 0.00023361727757878527, "loss": 0.0066, "num_input_tokens_seen": 188025888, "step": 87165 }, { "epoch": 14.220228384991843, "grad_norm": 0.005085950717329979, "learning_rate": 0.00023355704343155305, "loss": 0.0087, "num_input_tokens_seen": 188035616, "step": 87170 }, { "epoch": 14.221044045676999, "grad_norm": 0.6963438391685486, "learning_rate": 0.00023349681468412537, "loss": 0.052, "num_input_tokens_seen": 188047072, "step": 87175 }, { "epoch": 14.221859706362153, "grad_norm": 0.005043588113039732, "learning_rate": 0.00023343659133772277, "loss": 0.0082, "num_input_tokens_seen": 188059040, "step": 87180 }, { "epoch": 14.222675367047309, "grad_norm": 0.007069089449942112, "learning_rate": 0.0002333763733935659, "loss": 0.0273, "num_input_tokens_seen": 188069344, "step": 87185 }, { "epoch": 14.223491027732463, "grad_norm": 0.0014804665697738528, "learning_rate": 0.00023331616085287492, "loss": 0.0201, "num_input_tokens_seen": 188080384, "step": 87190 }, { "epoch": 14.224306688417618, "grad_norm": 0.11146103590726852, "learning_rate": 0.00023325595371687037, "loss": 0.0275, "num_input_tokens_seen": 188091424, "step": 87195 }, { "epoch": 14.225122349102774, "grad_norm": 0.5196319222450256, "learning_rate": 0.00023319575198677223, "loss": 0.0147, "num_input_tokens_seen": 188102880, "step": 87200 }, { "epoch": 14.225938009787928, "grad_norm": 0.04607345908880234, "learning_rate": 0.00023313555566380068, "loss": 0.0097, "num_input_tokens_seen": 188112544, "step": 87205 }, { "epoch": 14.226753670473084, "grad_norm": 0.9006611704826355, "learning_rate": 0.00023307536474917567, "loss": 0.1052, "num_input_tokens_seen": 188124160, "step": 87210 }, { "epoch": 14.227569331158238, "grad_norm": 0.00656993268057704, "learning_rate": 0.00023301517924411696, "loss": 0.0315, "num_input_tokens_seen": 188134272, "step": 87215 }, { "epoch": 14.228384991843393, "grad_norm": 0.0023424543906003237, "learning_rate": 0.00023295499914984436, "loss": 0.0099, "num_input_tokens_seen": 188145440, "step": 87220 }, { "epoch": 14.229200652528547, "grad_norm": 0.002212837338447571, "learning_rate": 0.00023289482446757747, "loss": 0.1086, "num_input_tokens_seen": 188156352, "step": 87225 }, { "epoch": 14.230016313213703, "grad_norm": 0.009030476212501526, "learning_rate": 0.0002328346551985358, "loss": 0.0025, "num_input_tokens_seen": 188167136, "step": 87230 }, { "epoch": 14.230831973898859, "grad_norm": 0.0033580998424440622, "learning_rate": 0.00023277449134393875, "loss": 0.0087, "num_input_tokens_seen": 188178464, "step": 87235 }, { "epoch": 14.231647634584013, "grad_norm": 0.030830368399620056, "learning_rate": 0.00023271433290500567, "loss": 0.0084, "num_input_tokens_seen": 188190112, "step": 87240 }, { "epoch": 14.232463295269168, "grad_norm": 0.009953748434782028, "learning_rate": 0.00023265417988295567, "loss": 0.0127, "num_input_tokens_seen": 188201472, "step": 87245 }, { "epoch": 14.233278955954322, "grad_norm": 0.04978550225496292, "learning_rate": 0.0002325940322790079, "loss": 0.1105, "num_input_tokens_seen": 188212576, "step": 87250 }, { "epoch": 14.234094616639478, "grad_norm": 0.16406074166297913, "learning_rate": 0.0002325338900943813, "loss": 0.0072, "num_input_tokens_seen": 188223648, "step": 87255 }, { "epoch": 14.234910277324634, "grad_norm": 0.03299479931592941, "learning_rate": 0.00023247375333029452, "loss": 0.0037, "num_input_tokens_seen": 188233152, "step": 87260 }, { "epoch": 14.235725938009788, "grad_norm": 0.005836287513375282, "learning_rate": 0.00023241362198796666, "loss": 0.0029, "num_input_tokens_seen": 188242272, "step": 87265 }, { "epoch": 14.236541598694943, "grad_norm": 0.002533625578507781, "learning_rate": 0.00023235349606861628, "loss": 0.0128, "num_input_tokens_seen": 188253824, "step": 87270 }, { "epoch": 14.237357259380097, "grad_norm": 0.007901863195002079, "learning_rate": 0.00023229337557346174, "loss": 0.0085, "num_input_tokens_seen": 188264736, "step": 87275 }, { "epoch": 14.238172920065253, "grad_norm": 0.0013080198550596833, "learning_rate": 0.00023223326050372163, "loss": 0.0623, "num_input_tokens_seen": 188276480, "step": 87280 }, { "epoch": 14.238988580750409, "grad_norm": 0.14830201864242554, "learning_rate": 0.0002321731508606142, "loss": 0.0367, "num_input_tokens_seen": 188287104, "step": 87285 }, { "epoch": 14.239804241435563, "grad_norm": 0.014220505021512508, "learning_rate": 0.0002321130466453576, "loss": 0.0021, "num_input_tokens_seen": 188296896, "step": 87290 }, { "epoch": 14.240619902120718, "grad_norm": 0.1186322346329689, "learning_rate": 0.0002320529478591699, "loss": 0.0062, "num_input_tokens_seen": 188307776, "step": 87295 }, { "epoch": 14.241435562805872, "grad_norm": 0.277007132768631, "learning_rate": 0.00023199285450326918, "loss": 0.0365, "num_input_tokens_seen": 188318912, "step": 87300 }, { "epoch": 14.242251223491028, "grad_norm": 0.010361609980463982, "learning_rate": 0.00023193276657887326, "loss": 0.0034, "num_input_tokens_seen": 188329984, "step": 87305 }, { "epoch": 14.243066884176184, "grad_norm": 0.005532090552151203, "learning_rate": 0.00023187268408719986, "loss": 0.0045, "num_input_tokens_seen": 188341888, "step": 87310 }, { "epoch": 14.243882544861338, "grad_norm": 0.031214915215969086, "learning_rate": 0.00023181260702946673, "loss": 0.0088, "num_input_tokens_seen": 188353248, "step": 87315 }, { "epoch": 14.244698205546493, "grad_norm": 0.00498800165951252, "learning_rate": 0.00023175253540689124, "loss": 0.1248, "num_input_tokens_seen": 188362944, "step": 87320 }, { "epoch": 14.245513866231647, "grad_norm": 0.0038739151787012815, "learning_rate": 0.00023169246922069098, "loss": 0.0742, "num_input_tokens_seen": 188374848, "step": 87325 }, { "epoch": 14.246329526916803, "grad_norm": 0.12290064245462418, "learning_rate": 0.00023163240847208318, "loss": 0.0093, "num_input_tokens_seen": 188384704, "step": 87330 }, { "epoch": 14.247145187601957, "grad_norm": 0.0044774278067052364, "learning_rate": 0.0002315723531622851, "loss": 0.1259, "num_input_tokens_seen": 188396000, "step": 87335 }, { "epoch": 14.247960848287113, "grad_norm": 0.007690066006034613, "learning_rate": 0.00023151230329251376, "loss": 0.0033, "num_input_tokens_seen": 188406976, "step": 87340 }, { "epoch": 14.248776508972268, "grad_norm": 0.01967843808233738, "learning_rate": 0.00023145225886398617, "loss": 0.0199, "num_input_tokens_seen": 188417856, "step": 87345 }, { "epoch": 14.249592169657422, "grad_norm": 0.4073706865310669, "learning_rate": 0.0002313922198779193, "loss": 0.1535, "num_input_tokens_seen": 188428544, "step": 87350 }, { "epoch": 14.250407830342578, "grad_norm": 0.6488693356513977, "learning_rate": 0.00023133218633552982, "loss": 0.2037, "num_input_tokens_seen": 188438592, "step": 87355 }, { "epoch": 14.251223491027732, "grad_norm": 0.2419464886188507, "learning_rate": 0.00023127215823803444, "loss": 0.024, "num_input_tokens_seen": 188450048, "step": 87360 }, { "epoch": 14.252039151712887, "grad_norm": 0.0429023876786232, "learning_rate": 0.00023121213558664966, "loss": 0.0078, "num_input_tokens_seen": 188461056, "step": 87365 }, { "epoch": 14.252854812398043, "grad_norm": 0.5524848699569702, "learning_rate": 0.00023115211838259175, "loss": 0.038, "num_input_tokens_seen": 188472672, "step": 87370 }, { "epoch": 14.253670473083197, "grad_norm": 0.026142222806811333, "learning_rate": 0.00023109210662707757, "loss": 0.0047, "num_input_tokens_seen": 188484096, "step": 87375 }, { "epoch": 14.254486133768353, "grad_norm": 0.025465162470936775, "learning_rate": 0.00023103210032132267, "loss": 0.0448, "num_input_tokens_seen": 188494464, "step": 87380 }, { "epoch": 14.255301794453507, "grad_norm": 0.00884486734867096, "learning_rate": 0.0002309720994665438, "loss": 0.0106, "num_input_tokens_seen": 188505536, "step": 87385 }, { "epoch": 14.256117455138662, "grad_norm": 0.7291843891143799, "learning_rate": 0.00023091210406395624, "loss": 0.0482, "num_input_tokens_seen": 188516672, "step": 87390 }, { "epoch": 14.256933115823816, "grad_norm": 0.003291802015155554, "learning_rate": 0.00023085211411477663, "loss": 0.1169, "num_input_tokens_seen": 188526240, "step": 87395 }, { "epoch": 14.257748776508972, "grad_norm": 0.16772744059562683, "learning_rate": 0.00023079212962022, "loss": 0.0106, "num_input_tokens_seen": 188537216, "step": 87400 }, { "epoch": 14.258564437194128, "grad_norm": 0.7395635843276978, "learning_rate": 0.00023073215058150255, "loss": 0.1054, "num_input_tokens_seen": 188547776, "step": 87405 }, { "epoch": 14.259380097879282, "grad_norm": 0.0040894923731684685, "learning_rate": 0.00023067217699983966, "loss": 0.0119, "num_input_tokens_seen": 188559104, "step": 87410 }, { "epoch": 14.260195758564437, "grad_norm": 0.05267702043056488, "learning_rate": 0.00023061220887644679, "loss": 0.0338, "num_input_tokens_seen": 188569920, "step": 87415 }, { "epoch": 14.261011419249591, "grad_norm": 0.00407877191901207, "learning_rate": 0.00023055224621253923, "loss": 0.0095, "num_input_tokens_seen": 188579744, "step": 87420 }, { "epoch": 14.261827079934747, "grad_norm": 0.008140397258102894, "learning_rate": 0.00023049228900933223, "loss": 0.0085, "num_input_tokens_seen": 188590432, "step": 87425 }, { "epoch": 14.262642740619903, "grad_norm": 0.012962247245013714, "learning_rate": 0.00023043233726804087, "loss": 0.0079, "num_input_tokens_seen": 188600352, "step": 87430 }, { "epoch": 14.263458401305057, "grad_norm": 0.007674058433622122, "learning_rate": 0.00023037239098988016, "loss": 0.0089, "num_input_tokens_seen": 188610976, "step": 87435 }, { "epoch": 14.264274061990212, "grad_norm": 0.16679972410202026, "learning_rate": 0.00023031245017606506, "loss": 0.011, "num_input_tokens_seen": 188621248, "step": 87440 }, { "epoch": 14.265089722675366, "grad_norm": 0.03357921913266182, "learning_rate": 0.00023025251482781023, "loss": 0.0042, "num_input_tokens_seen": 188632032, "step": 87445 }, { "epoch": 14.265905383360522, "grad_norm": 0.00487865274772048, "learning_rate": 0.00023019258494633038, "loss": 0.0209, "num_input_tokens_seen": 188642464, "step": 87450 }, { "epoch": 14.266721044045678, "grad_norm": 0.016534309834241867, "learning_rate": 0.0002301326605328401, "loss": 0.0041, "num_input_tokens_seen": 188652384, "step": 87455 }, { "epoch": 14.267536704730832, "grad_norm": 0.01768423430621624, "learning_rate": 0.00023007274158855378, "loss": 0.0192, "num_input_tokens_seen": 188662944, "step": 87460 }, { "epoch": 14.268352365415987, "grad_norm": 0.002413802780210972, "learning_rate": 0.00023001282811468577, "loss": 0.0028, "num_input_tokens_seen": 188674592, "step": 87465 }, { "epoch": 14.269168026101141, "grad_norm": 0.018620040267705917, "learning_rate": 0.00022995292011245033, "loss": 0.0082, "num_input_tokens_seen": 188684288, "step": 87470 }, { "epoch": 14.269983686786297, "grad_norm": 0.13078853487968445, "learning_rate": 0.00022989301758306153, "loss": 0.0087, "num_input_tokens_seen": 188694528, "step": 87475 }, { "epoch": 14.270799347471453, "grad_norm": 0.26004543900489807, "learning_rate": 0.00022983312052773336, "loss": 0.0121, "num_input_tokens_seen": 188705472, "step": 87480 }, { "epoch": 14.271615008156607, "grad_norm": 0.02304054982960224, "learning_rate": 0.0002297732289476796, "loss": 0.0041, "num_input_tokens_seen": 188714336, "step": 87485 }, { "epoch": 14.272430668841762, "grad_norm": 0.05910288542509079, "learning_rate": 0.0002297133428441145, "loss": 0.0551, "num_input_tokens_seen": 188726400, "step": 87490 }, { "epoch": 14.273246329526916, "grad_norm": 0.006778143346309662, "learning_rate": 0.000229653462218251, "loss": 0.0255, "num_input_tokens_seen": 188737216, "step": 87495 }, { "epoch": 14.274061990212072, "grad_norm": 0.027862556278705597, "learning_rate": 0.00022959358707130346, "loss": 0.0036, "num_input_tokens_seen": 188747648, "step": 87500 }, { "epoch": 14.274877650897226, "grad_norm": 0.04585995897650719, "learning_rate": 0.00022953371740448453, "loss": 0.0397, "num_input_tokens_seen": 188759456, "step": 87505 }, { "epoch": 14.275693311582382, "grad_norm": 0.007352481596171856, "learning_rate": 0.00022947385321900825, "loss": 0.062, "num_input_tokens_seen": 188770432, "step": 87510 }, { "epoch": 14.276508972267537, "grad_norm": 0.002369675552472472, "learning_rate": 0.00022941399451608725, "loss": 0.0026, "num_input_tokens_seen": 188781216, "step": 87515 }, { "epoch": 14.277324632952691, "grad_norm": 0.030936891213059425, "learning_rate": 0.00022935414129693523, "loss": 0.0039, "num_input_tokens_seen": 188792640, "step": 87520 }, { "epoch": 14.278140293637847, "grad_norm": 0.08848312497138977, "learning_rate": 0.0002292942935627645, "loss": 0.0081, "num_input_tokens_seen": 188803040, "step": 87525 }, { "epoch": 14.278955954323001, "grad_norm": 0.0011001932434737682, "learning_rate": 0.00022923445131478866, "loss": 0.0229, "num_input_tokens_seen": 188814112, "step": 87530 }, { "epoch": 14.279771615008157, "grad_norm": 0.03232671692967415, "learning_rate": 0.00022917461455421984, "loss": 0.0073, "num_input_tokens_seen": 188825696, "step": 87535 }, { "epoch": 14.280587275693312, "grad_norm": 0.002933687996119261, "learning_rate": 0.00022911478328227136, "loss": 0.0338, "num_input_tokens_seen": 188836288, "step": 87540 }, { "epoch": 14.281402936378466, "grad_norm": 0.002049962757155299, "learning_rate": 0.00022905495750015508, "loss": 0.0023, "num_input_tokens_seen": 188847392, "step": 87545 }, { "epoch": 14.282218597063622, "grad_norm": 0.000977461924776435, "learning_rate": 0.000228995137209084, "loss": 0.059, "num_input_tokens_seen": 188858752, "step": 87550 }, { "epoch": 14.283034257748776, "grad_norm": 0.027393346652388573, "learning_rate": 0.00022893532241027026, "loss": 0.0033, "num_input_tokens_seen": 188868544, "step": 87555 }, { "epoch": 14.283849918433932, "grad_norm": 0.11348753422498703, "learning_rate": 0.00022887551310492605, "loss": 0.0085, "num_input_tokens_seen": 188880416, "step": 87560 }, { "epoch": 14.284665579119087, "grad_norm": 0.04598163813352585, "learning_rate": 0.00022881570929426354, "loss": 0.1911, "num_input_tokens_seen": 188891552, "step": 87565 }, { "epoch": 14.285481239804241, "grad_norm": 0.03280395269393921, "learning_rate": 0.00022875591097949472, "loss": 0.1241, "num_input_tokens_seen": 188902560, "step": 87570 }, { "epoch": 14.286296900489397, "grad_norm": 0.26995164155960083, "learning_rate": 0.00022869611816183144, "loss": 0.0162, "num_input_tokens_seen": 188914272, "step": 87575 }, { "epoch": 14.28711256117455, "grad_norm": 0.021022794768214226, "learning_rate": 0.00022863633084248549, "loss": 0.0069, "num_input_tokens_seen": 188924928, "step": 87580 }, { "epoch": 14.287928221859707, "grad_norm": 0.0739586353302002, "learning_rate": 0.00022857654902266856, "loss": 0.0053, "num_input_tokens_seen": 188935776, "step": 87585 }, { "epoch": 14.28874388254486, "grad_norm": 0.005429745651781559, "learning_rate": 0.00022851677270359217, "loss": 0.0019, "num_input_tokens_seen": 188947136, "step": 87590 }, { "epoch": 14.289559543230016, "grad_norm": 0.005947369150817394, "learning_rate": 0.0002284570018864678, "loss": 0.0705, "num_input_tokens_seen": 188957472, "step": 87595 }, { "epoch": 14.290375203915172, "grad_norm": 0.0028275910299271345, "learning_rate": 0.0002283972365725066, "loss": 0.024, "num_input_tokens_seen": 188967968, "step": 87600 }, { "epoch": 14.291190864600326, "grad_norm": 0.036363694816827774, "learning_rate": 0.00022833747676292027, "loss": 0.0059, "num_input_tokens_seen": 188979616, "step": 87605 }, { "epoch": 14.292006525285482, "grad_norm": 0.01819031685590744, "learning_rate": 0.00022827772245891925, "loss": 0.0069, "num_input_tokens_seen": 188989408, "step": 87610 }, { "epoch": 14.292822185970635, "grad_norm": 0.4716435670852661, "learning_rate": 0.00022821797366171531, "loss": 0.1078, "num_input_tokens_seen": 189000768, "step": 87615 }, { "epoch": 14.293637846655791, "grad_norm": 0.015804624184966087, "learning_rate": 0.00022815823037251849, "loss": 0.006, "num_input_tokens_seen": 189012608, "step": 87620 }, { "epoch": 14.294453507340947, "grad_norm": 0.14859530329704285, "learning_rate": 0.00022809849259254034, "loss": 0.0134, "num_input_tokens_seen": 189023616, "step": 87625 }, { "epoch": 14.2952691680261, "grad_norm": 0.005781870801001787, "learning_rate": 0.00022803876032299086, "loss": 0.0118, "num_input_tokens_seen": 189034752, "step": 87630 }, { "epoch": 14.296084828711257, "grad_norm": 0.0805203840136528, "learning_rate": 0.00022797903356508125, "loss": 0.0164, "num_input_tokens_seen": 189045888, "step": 87635 }, { "epoch": 14.29690048939641, "grad_norm": 0.10604111850261688, "learning_rate": 0.00022791931232002123, "loss": 0.0628, "num_input_tokens_seen": 189056736, "step": 87640 }, { "epoch": 14.297716150081566, "grad_norm": 0.0031233446206897497, "learning_rate": 0.00022785959658902188, "loss": 0.0067, "num_input_tokens_seen": 189067488, "step": 87645 }, { "epoch": 14.298531810766722, "grad_norm": 0.11174801737070084, "learning_rate": 0.00022779988637329263, "loss": 0.0103, "num_input_tokens_seen": 189077760, "step": 87650 }, { "epoch": 14.299347471451876, "grad_norm": 0.00879643764346838, "learning_rate": 0.00022774018167404442, "loss": 0.003, "num_input_tokens_seen": 189089568, "step": 87655 }, { "epoch": 14.300163132137031, "grad_norm": 0.01211919728666544, "learning_rate": 0.00022768048249248646, "loss": 0.0943, "num_input_tokens_seen": 189100480, "step": 87660 }, { "epoch": 14.300978792822185, "grad_norm": 0.23539622128009796, "learning_rate": 0.00022762078882982928, "loss": 0.011, "num_input_tokens_seen": 189111008, "step": 87665 }, { "epoch": 14.301794453507341, "grad_norm": 0.08340545743703842, "learning_rate": 0.00022756110068728204, "loss": 0.0303, "num_input_tokens_seen": 189123104, "step": 87670 }, { "epoch": 14.302610114192497, "grad_norm": 0.008662608452141285, "learning_rate": 0.00022750141806605507, "loss": 0.1066, "num_input_tokens_seen": 189133984, "step": 87675 }, { "epoch": 14.30342577487765, "grad_norm": 0.04563036561012268, "learning_rate": 0.00022744174096735715, "loss": 0.076, "num_input_tokens_seen": 189144512, "step": 87680 }, { "epoch": 14.304241435562806, "grad_norm": 0.038313381373882294, "learning_rate": 0.00022738206939239852, "loss": 0.0044, "num_input_tokens_seen": 189154592, "step": 87685 }, { "epoch": 14.30505709624796, "grad_norm": 0.0650675892829895, "learning_rate": 0.0002273224033423877, "loss": 0.0051, "num_input_tokens_seen": 189164000, "step": 87690 }, { "epoch": 14.305872756933116, "grad_norm": 0.019803591072559357, "learning_rate": 0.0002272627428185345, "loss": 0.0263, "num_input_tokens_seen": 189174688, "step": 87695 }, { "epoch": 14.30668841761827, "grad_norm": 0.1171201542019844, "learning_rate": 0.0002272030878220478, "loss": 0.0124, "num_input_tokens_seen": 189186496, "step": 87700 }, { "epoch": 14.307504078303426, "grad_norm": 0.0032381326891481876, "learning_rate": 0.0002271434383541366, "loss": 0.0046, "num_input_tokens_seen": 189196768, "step": 87705 }, { "epoch": 14.308319738988581, "grad_norm": 0.025308286771178246, "learning_rate": 0.00022708379441600975, "loss": 0.0657, "num_input_tokens_seen": 189207264, "step": 87710 }, { "epoch": 14.309135399673735, "grad_norm": 0.011834482662379742, "learning_rate": 0.000227024156008876, "loss": 0.0066, "num_input_tokens_seen": 189218272, "step": 87715 }, { "epoch": 14.309951060358891, "grad_norm": 0.012617947533726692, "learning_rate": 0.00022696452313394406, "loss": 0.0028, "num_input_tokens_seen": 189230144, "step": 87720 }, { "epoch": 14.310766721044045, "grad_norm": 0.03325992077589035, "learning_rate": 0.0002269048957924224, "loss": 0.0144, "num_input_tokens_seen": 189240544, "step": 87725 }, { "epoch": 14.3115823817292, "grad_norm": 0.0030641648918390274, "learning_rate": 0.0002268452739855195, "loss": 0.1455, "num_input_tokens_seen": 189251808, "step": 87730 }, { "epoch": 14.312398042414356, "grad_norm": 0.055653803050518036, "learning_rate": 0.00022678565771444364, "loss": 0.0515, "num_input_tokens_seen": 189262560, "step": 87735 }, { "epoch": 14.31321370309951, "grad_norm": 0.008053705096244812, "learning_rate": 0.00022672604698040306, "loss": 0.0033, "num_input_tokens_seen": 189273312, "step": 87740 }, { "epoch": 14.314029363784666, "grad_norm": 0.001545738778077066, "learning_rate": 0.00022666644178460555, "loss": 0.0107, "num_input_tokens_seen": 189283456, "step": 87745 }, { "epoch": 14.31484502446982, "grad_norm": 0.06261210888624191, "learning_rate": 0.00022660684212825978, "loss": 0.0088, "num_input_tokens_seen": 189293248, "step": 87750 }, { "epoch": 14.315660685154976, "grad_norm": 0.0051405723206698895, "learning_rate": 0.00022654724801257276, "loss": 0.015, "num_input_tokens_seen": 189302240, "step": 87755 }, { "epoch": 14.31647634584013, "grad_norm": 0.0117839640006423, "learning_rate": 0.00022648765943875305, "loss": 0.101, "num_input_tokens_seen": 189313984, "step": 87760 }, { "epoch": 14.317292006525285, "grad_norm": 0.04220317304134369, "learning_rate": 0.00022642807640800756, "loss": 0.017, "num_input_tokens_seen": 189325376, "step": 87765 }, { "epoch": 14.318107667210441, "grad_norm": 0.5303179621696472, "learning_rate": 0.0002263684989215445, "loss": 0.1985, "num_input_tokens_seen": 189335552, "step": 87770 }, { "epoch": 14.318923327895595, "grad_norm": 0.3348124623298645, "learning_rate": 0.00022630892698057055, "loss": 0.0251, "num_input_tokens_seen": 189346496, "step": 87775 }, { "epoch": 14.31973898858075, "grad_norm": 0.08374251425266266, "learning_rate": 0.00022624936058629374, "loss": 0.1194, "num_input_tokens_seen": 189354016, "step": 87780 }, { "epoch": 14.320554649265905, "grad_norm": 0.17399942874908447, "learning_rate": 0.00022618979973992054, "loss": 0.0405, "num_input_tokens_seen": 189365280, "step": 87785 }, { "epoch": 14.32137030995106, "grad_norm": 0.47285571694374084, "learning_rate": 0.00022613024444265883, "loss": 0.0161, "num_input_tokens_seen": 189375168, "step": 87790 }, { "epoch": 14.322185970636216, "grad_norm": 0.09027660638093948, "learning_rate": 0.00022607069469571473, "loss": 0.0036, "num_input_tokens_seen": 189385344, "step": 87795 }, { "epoch": 14.32300163132137, "grad_norm": 0.005185849033296108, "learning_rate": 0.00022601115050029574, "loss": 0.0063, "num_input_tokens_seen": 189396288, "step": 87800 }, { "epoch": 14.323817292006526, "grad_norm": 0.057442646473646164, "learning_rate": 0.0002259516118576083, "loss": 0.0035, "num_input_tokens_seen": 189407680, "step": 87805 }, { "epoch": 14.32463295269168, "grad_norm": 0.0046707079745829105, "learning_rate": 0.00022589207876885914, "loss": 0.0993, "num_input_tokens_seen": 189418400, "step": 87810 }, { "epoch": 14.325448613376835, "grad_norm": 0.02282445877790451, "learning_rate": 0.00022583255123525476, "loss": 0.0045, "num_input_tokens_seen": 189429728, "step": 87815 }, { "epoch": 14.326264274061991, "grad_norm": 0.03610313683748245, "learning_rate": 0.00022577302925800153, "loss": 0.006, "num_input_tokens_seen": 189441440, "step": 87820 }, { "epoch": 14.327079934747145, "grad_norm": 0.0075338794849812984, "learning_rate": 0.0002257135128383057, "loss": 0.0074, "num_input_tokens_seen": 189452864, "step": 87825 }, { "epoch": 14.3278955954323, "grad_norm": 0.12656524777412415, "learning_rate": 0.00022565400197737352, "loss": 0.0376, "num_input_tokens_seen": 189463968, "step": 87830 }, { "epoch": 14.328711256117455, "grad_norm": 0.04222884774208069, "learning_rate": 0.000225594496676411, "loss": 0.0079, "num_input_tokens_seen": 189474528, "step": 87835 }, { "epoch": 14.32952691680261, "grad_norm": 0.04598267376422882, "learning_rate": 0.0002255349969366241, "loss": 0.0144, "num_input_tokens_seen": 189484128, "step": 87840 }, { "epoch": 14.330342577487766, "grad_norm": 0.11190125346183777, "learning_rate": 0.0002254755027592187, "loss": 0.0057, "num_input_tokens_seen": 189494816, "step": 87845 }, { "epoch": 14.33115823817292, "grad_norm": 0.011955286376178265, "learning_rate": 0.00022541601414540052, "loss": 0.0044, "num_input_tokens_seen": 189505248, "step": 87850 }, { "epoch": 14.331973898858076, "grad_norm": 0.033998504281044006, "learning_rate": 0.00022535653109637512, "loss": 0.0034, "num_input_tokens_seen": 189517440, "step": 87855 }, { "epoch": 14.33278955954323, "grad_norm": 0.002024466870352626, "learning_rate": 0.000225297053613348, "loss": 0.0072, "num_input_tokens_seen": 189527136, "step": 87860 }, { "epoch": 14.333605220228385, "grad_norm": 0.07695024460554123, "learning_rate": 0.0002252375816975246, "loss": 0.0071, "num_input_tokens_seen": 189537408, "step": 87865 }, { "epoch": 14.33442088091354, "grad_norm": 0.012441478669643402, "learning_rate": 0.0002251781153501102, "loss": 0.0035, "num_input_tokens_seen": 189547424, "step": 87870 }, { "epoch": 14.335236541598695, "grad_norm": 0.03001299314200878, "learning_rate": 0.0002251186545723099, "loss": 0.006, "num_input_tokens_seen": 189557248, "step": 87875 }, { "epoch": 14.33605220228385, "grad_norm": 0.06510217487812042, "learning_rate": 0.00022505919936532877, "loss": 0.0044, "num_input_tokens_seen": 189567840, "step": 87880 }, { "epoch": 14.336867862969005, "grad_norm": 0.007860948331654072, "learning_rate": 0.00022499974973037173, "loss": 0.0389, "num_input_tokens_seen": 189577568, "step": 87885 }, { "epoch": 14.33768352365416, "grad_norm": 0.008998947218060493, "learning_rate": 0.0002249403056686435, "loss": 0.0016, "num_input_tokens_seen": 189588224, "step": 87890 }, { "epoch": 14.338499184339314, "grad_norm": 0.3982495665550232, "learning_rate": 0.0002248808671813492, "loss": 0.0196, "num_input_tokens_seen": 189599936, "step": 87895 }, { "epoch": 14.33931484502447, "grad_norm": 0.04828609898686409, "learning_rate": 0.00022482143426969282, "loss": 0.0024, "num_input_tokens_seen": 189611328, "step": 87900 }, { "epoch": 14.340130505709626, "grad_norm": 0.4207671582698822, "learning_rate": 0.00022476200693487936, "loss": 0.0041, "num_input_tokens_seen": 189621888, "step": 87905 }, { "epoch": 14.34094616639478, "grad_norm": 0.6866723895072937, "learning_rate": 0.000224702585178113, "loss": 0.0543, "num_input_tokens_seen": 189632960, "step": 87910 }, { "epoch": 14.341761827079935, "grad_norm": 0.0014469147427007556, "learning_rate": 0.00022464316900059795, "loss": 0.0118, "num_input_tokens_seen": 189643648, "step": 87915 }, { "epoch": 14.34257748776509, "grad_norm": 0.005772044882178307, "learning_rate": 0.0002245837584035384, "loss": 0.0357, "num_input_tokens_seen": 189654720, "step": 87920 }, { "epoch": 14.343393148450245, "grad_norm": 0.01860293745994568, "learning_rate": 0.00022452435338813842, "loss": 0.0818, "num_input_tokens_seen": 189665952, "step": 87925 }, { "epoch": 14.3442088091354, "grad_norm": 0.06278412789106369, "learning_rate": 0.00022446495395560186, "loss": 0.0736, "num_input_tokens_seen": 189675808, "step": 87930 }, { "epoch": 14.345024469820554, "grad_norm": 0.0401422344148159, "learning_rate": 0.00022440556010713253, "loss": 0.0084, "num_input_tokens_seen": 189686624, "step": 87935 }, { "epoch": 14.34584013050571, "grad_norm": 0.09492646902799606, "learning_rate": 0.00022434617184393418, "loss": 0.0302, "num_input_tokens_seen": 189698368, "step": 87940 }, { "epoch": 14.346655791190864, "grad_norm": 0.0031234300695359707, "learning_rate": 0.00022428678916721029, "loss": 0.005, "num_input_tokens_seen": 189708192, "step": 87945 }, { "epoch": 14.34747145187602, "grad_norm": 0.03207390382885933, "learning_rate": 0.00022422741207816444, "loss": 0.0061, "num_input_tokens_seen": 189718976, "step": 87950 }, { "epoch": 14.348287112561174, "grad_norm": 0.005674757529050112, "learning_rate": 0.00022416804057799988, "loss": 0.0057, "num_input_tokens_seen": 189728992, "step": 87955 }, { "epoch": 14.34910277324633, "grad_norm": 0.00694040535017848, "learning_rate": 0.00022410867466791996, "loss": 0.0113, "num_input_tokens_seen": 189738752, "step": 87960 }, { "epoch": 14.349918433931485, "grad_norm": 0.2576298713684082, "learning_rate": 0.00022404931434912768, "loss": 0.0106, "num_input_tokens_seen": 189748256, "step": 87965 }, { "epoch": 14.350734094616639, "grad_norm": 0.014442906714975834, "learning_rate": 0.00022398995962282615, "loss": 0.0028, "num_input_tokens_seen": 189759872, "step": 87970 }, { "epoch": 14.351549755301795, "grad_norm": 0.06339304894208908, "learning_rate": 0.00022393061049021823, "loss": 0.0149, "num_input_tokens_seen": 189770528, "step": 87975 }, { "epoch": 14.352365415986949, "grad_norm": 0.09748394787311554, "learning_rate": 0.0002238712669525067, "loss": 0.0096, "num_input_tokens_seen": 189782144, "step": 87980 }, { "epoch": 14.353181076672104, "grad_norm": 0.024698838591575623, "learning_rate": 0.0002238119290108942, "loss": 0.0048, "num_input_tokens_seen": 189793920, "step": 87985 }, { "epoch": 14.35399673735726, "grad_norm": 0.1941937357187271, "learning_rate": 0.00022375259666658338, "loss": 0.0397, "num_input_tokens_seen": 189805152, "step": 87990 }, { "epoch": 14.354812398042414, "grad_norm": 0.20996151864528656, "learning_rate": 0.0002236932699207766, "loss": 0.0759, "num_input_tokens_seen": 189816800, "step": 87995 }, { "epoch": 14.35562805872757, "grad_norm": 0.3935123085975647, "learning_rate": 0.00022363394877467625, "loss": 0.1887, "num_input_tokens_seen": 189827488, "step": 88000 }, { "epoch": 14.356443719412724, "grad_norm": 0.11779608577489853, "learning_rate": 0.0002235746332294845, "loss": 0.0115, "num_input_tokens_seen": 189838880, "step": 88005 }, { "epoch": 14.35725938009788, "grad_norm": 0.02472005784511566, "learning_rate": 0.00022351532328640335, "loss": 0.0139, "num_input_tokens_seen": 189849888, "step": 88010 }, { "epoch": 14.358075040783035, "grad_norm": 0.01833752728998661, "learning_rate": 0.0002234560189466352, "loss": 0.0143, "num_input_tokens_seen": 189862176, "step": 88015 }, { "epoch": 14.358890701468189, "grad_norm": 0.004354285541921854, "learning_rate": 0.00022339672021138136, "loss": 0.0016, "num_input_tokens_seen": 189872288, "step": 88020 }, { "epoch": 14.359706362153345, "grad_norm": 0.016737548634409904, "learning_rate": 0.00022333742708184417, "loss": 0.0504, "num_input_tokens_seen": 189882720, "step": 88025 }, { "epoch": 14.360522022838499, "grad_norm": 0.0314948707818985, "learning_rate": 0.0002232781395592247, "loss": 0.0031, "num_input_tokens_seen": 189893088, "step": 88030 }, { "epoch": 14.361337683523654, "grad_norm": 0.3482327163219452, "learning_rate": 0.00022321885764472495, "loss": 0.0135, "num_input_tokens_seen": 189903584, "step": 88035 }, { "epoch": 14.362153344208808, "grad_norm": 0.005314143840223551, "learning_rate": 0.00022315958133954612, "loss": 0.002, "num_input_tokens_seen": 189914656, "step": 88040 }, { "epoch": 14.362969004893964, "grad_norm": 0.03933553025126457, "learning_rate": 0.00022310031064488962, "loss": 0.009, "num_input_tokens_seen": 189924992, "step": 88045 }, { "epoch": 14.36378466557912, "grad_norm": 0.0027325551491230726, "learning_rate": 0.0002230410455619566, "loss": 0.0055, "num_input_tokens_seen": 189934560, "step": 88050 }, { "epoch": 14.364600326264274, "grad_norm": 0.3617590069770813, "learning_rate": 0.00022298178609194807, "loss": 0.0121, "num_input_tokens_seen": 189944672, "step": 88055 }, { "epoch": 14.36541598694943, "grad_norm": 0.16980212926864624, "learning_rate": 0.00022292253223606513, "loss": 0.0084, "num_input_tokens_seen": 189954720, "step": 88060 }, { "epoch": 14.366231647634583, "grad_norm": 0.002989273052662611, "learning_rate": 0.0002228632839955086, "loss": 0.0045, "num_input_tokens_seen": 189965120, "step": 88065 }, { "epoch": 14.367047308319739, "grad_norm": 0.010535376146435738, "learning_rate": 0.00022280404137147914, "loss": 0.0159, "num_input_tokens_seen": 189975776, "step": 88070 }, { "epoch": 14.367862969004895, "grad_norm": 0.36319345235824585, "learning_rate": 0.00022274480436517742, "loss": 0.0927, "num_input_tokens_seen": 189986016, "step": 88075 }, { "epoch": 14.368678629690049, "grad_norm": 0.31522372364997864, "learning_rate": 0.00022268557297780396, "loss": 0.0164, "num_input_tokens_seen": 189996992, "step": 88080 }, { "epoch": 14.369494290375204, "grad_norm": 0.0035693729296326637, "learning_rate": 0.00022262634721055918, "loss": 0.0018, "num_input_tokens_seen": 190007168, "step": 88085 }, { "epoch": 14.370309951060358, "grad_norm": 0.007225734181702137, "learning_rate": 0.00022256712706464338, "loss": 0.0067, "num_input_tokens_seen": 190018528, "step": 88090 }, { "epoch": 14.371125611745514, "grad_norm": 0.006687076762318611, "learning_rate": 0.0002225079125412567, "loss": 0.0015, "num_input_tokens_seen": 190029568, "step": 88095 }, { "epoch": 14.37194127243067, "grad_norm": 0.019450535997748375, "learning_rate": 0.00022244870364159912, "loss": 0.0035, "num_input_tokens_seen": 190039392, "step": 88100 }, { "epoch": 14.372756933115824, "grad_norm": 0.2843969762325287, "learning_rate": 0.00022238950036687071, "loss": 0.0038, "num_input_tokens_seen": 190048928, "step": 88105 }, { "epoch": 14.37357259380098, "grad_norm": 0.003084060037508607, "learning_rate": 0.00022233030271827126, "loss": 0.0287, "num_input_tokens_seen": 190059104, "step": 88110 }, { "epoch": 14.374388254486133, "grad_norm": 0.3434167802333832, "learning_rate": 0.0002222711106970003, "loss": 0.0626, "num_input_tokens_seen": 190070496, "step": 88115 }, { "epoch": 14.375203915171289, "grad_norm": 0.005949328187853098, "learning_rate": 0.0002222119243042579, "loss": 0.0518, "num_input_tokens_seen": 190082080, "step": 88120 }, { "epoch": 14.376019575856443, "grad_norm": 0.04750858619809151, "learning_rate": 0.00022215274354124294, "loss": 0.0355, "num_input_tokens_seen": 190093632, "step": 88125 }, { "epoch": 14.376835236541599, "grad_norm": 0.032109133899211884, "learning_rate": 0.00022209356840915552, "loss": 0.0044, "num_input_tokens_seen": 190104960, "step": 88130 }, { "epoch": 14.377650897226754, "grad_norm": 0.0008577115368098021, "learning_rate": 0.00022203439890919403, "loss": 0.1311, "num_input_tokens_seen": 190115168, "step": 88135 }, { "epoch": 14.378466557911908, "grad_norm": 0.005376284942030907, "learning_rate": 0.00022197523504255846, "loss": 0.0025, "num_input_tokens_seen": 190126784, "step": 88140 }, { "epoch": 14.379282218597064, "grad_norm": 0.15383519232273102, "learning_rate": 0.00022191607681044712, "loss": 0.0114, "num_input_tokens_seen": 190137760, "step": 88145 }, { "epoch": 14.380097879282218, "grad_norm": 0.0346897691488266, "learning_rate": 0.00022185692421405962, "loss": 0.0078, "num_input_tokens_seen": 190148032, "step": 88150 }, { "epoch": 14.380913539967374, "grad_norm": 1.4299472570419312, "learning_rate": 0.000221797777254594, "loss": 0.0317, "num_input_tokens_seen": 190157696, "step": 88155 }, { "epoch": 14.38172920065253, "grad_norm": 0.11807762831449509, "learning_rate": 0.00022173863593324971, "loss": 0.0083, "num_input_tokens_seen": 190168800, "step": 88160 }, { "epoch": 14.382544861337683, "grad_norm": 0.005379651673138142, "learning_rate": 0.00022167950025122463, "loss": 0.0043, "num_input_tokens_seen": 190178304, "step": 88165 }, { "epoch": 14.383360522022839, "grad_norm": 0.0023677758872509003, "learning_rate": 0.00022162037020971793, "loss": 0.0052, "num_input_tokens_seen": 190189856, "step": 88170 }, { "epoch": 14.384176182707993, "grad_norm": 0.6793798208236694, "learning_rate": 0.00022156124580992716, "loss": 0.1759, "num_input_tokens_seen": 190200960, "step": 88175 }, { "epoch": 14.384991843393149, "grad_norm": 0.005639829207211733, "learning_rate": 0.00022150212705305118, "loss": 0.0012, "num_input_tokens_seen": 190212832, "step": 88180 }, { "epoch": 14.385807504078304, "grad_norm": 0.012083529494702816, "learning_rate": 0.00022144301394028793, "loss": 0.0103, "num_input_tokens_seen": 190224608, "step": 88185 }, { "epoch": 14.386623164763458, "grad_norm": 0.13688413798809052, "learning_rate": 0.0002213839064728353, "loss": 0.0069, "num_input_tokens_seen": 190236704, "step": 88190 }, { "epoch": 14.387438825448614, "grad_norm": 0.019034383818507195, "learning_rate": 0.0002213248046518913, "loss": 0.0119, "num_input_tokens_seen": 190248096, "step": 88195 }, { "epoch": 14.388254486133768, "grad_norm": 0.011196671985089779, "learning_rate": 0.00022126570847865368, "loss": 0.0023, "num_input_tokens_seen": 190258816, "step": 88200 }, { "epoch": 14.389070146818923, "grad_norm": 0.01668175496160984, "learning_rate": 0.00022120661795432, "loss": 0.0123, "num_input_tokens_seen": 190269728, "step": 88205 }, { "epoch": 14.38988580750408, "grad_norm": 0.004712158814072609, "learning_rate": 0.00022114753308008795, "loss": 0.1023, "num_input_tokens_seen": 190280608, "step": 88210 }, { "epoch": 14.390701468189233, "grad_norm": 0.06231231987476349, "learning_rate": 0.00022108845385715488, "loss": 0.0084, "num_input_tokens_seen": 190291616, "step": 88215 }, { "epoch": 14.391517128874389, "grad_norm": 0.010815629735589027, "learning_rate": 0.00022102938028671816, "loss": 0.0055, "num_input_tokens_seen": 190303136, "step": 88220 }, { "epoch": 14.392332789559543, "grad_norm": 0.0070774611085653305, "learning_rate": 0.00022097031236997488, "loss": 0.0041, "num_input_tokens_seen": 190313216, "step": 88225 }, { "epoch": 14.393148450244698, "grad_norm": 0.011268883012235165, "learning_rate": 0.00022091125010812202, "loss": 0.0043, "num_input_tokens_seen": 190324480, "step": 88230 }, { "epoch": 14.393964110929852, "grad_norm": 0.007204147987067699, "learning_rate": 0.00022085219350235707, "loss": 0.0032, "num_input_tokens_seen": 190334976, "step": 88235 }, { "epoch": 14.394779771615008, "grad_norm": 0.020214706659317017, "learning_rate": 0.00022079314255387623, "loss": 0.0179, "num_input_tokens_seen": 190346560, "step": 88240 }, { "epoch": 14.395595432300164, "grad_norm": 0.0032529595773667097, "learning_rate": 0.00022073409726387688, "loss": 0.0241, "num_input_tokens_seen": 190357376, "step": 88245 }, { "epoch": 14.396411092985318, "grad_norm": 0.01209596823900938, "learning_rate": 0.000220675057633555, "loss": 0.0044, "num_input_tokens_seen": 190367584, "step": 88250 }, { "epoch": 14.397226753670473, "grad_norm": 1.6050293445587158, "learning_rate": 0.00022061602366410776, "loss": 0.0534, "num_input_tokens_seen": 190378784, "step": 88255 }, { "epoch": 14.398042414355627, "grad_norm": 0.0229205209761858, "learning_rate": 0.0002205569953567309, "loss": 0.0012, "num_input_tokens_seen": 190389792, "step": 88260 }, { "epoch": 14.398858075040783, "grad_norm": 0.10779175907373428, "learning_rate": 0.00022049797271262133, "loss": 0.0506, "num_input_tokens_seen": 190400224, "step": 88265 }, { "epoch": 14.399673735725939, "grad_norm": 0.005253085866570473, "learning_rate": 0.00022043895573297463, "loss": 0.0107, "num_input_tokens_seen": 190411520, "step": 88270 }, { "epoch": 14.400489396411093, "grad_norm": 0.04664282500743866, "learning_rate": 0.0002203799444189874, "loss": 0.0119, "num_input_tokens_seen": 190422720, "step": 88275 }, { "epoch": 14.401305057096248, "grad_norm": 0.0009344830759800971, "learning_rate": 0.00022032093877185504, "loss": 0.1833, "num_input_tokens_seen": 190432000, "step": 88280 }, { "epoch": 14.402120717781402, "grad_norm": 0.011029716581106186, "learning_rate": 0.000220261938792774, "loss": 0.1031, "num_input_tokens_seen": 190443328, "step": 88285 }, { "epoch": 14.402936378466558, "grad_norm": 0.006572442594915628, "learning_rate": 0.00022020294448293925, "loss": 0.002, "num_input_tokens_seen": 190454208, "step": 88290 }, { "epoch": 14.403752039151712, "grad_norm": 0.015860658138990402, "learning_rate": 0.00022014395584354717, "loss": 0.0137, "num_input_tokens_seen": 190465152, "step": 88295 }, { "epoch": 14.404567699836868, "grad_norm": 0.32941076159477234, "learning_rate": 0.0002200849728757925, "loss": 0.1152, "num_input_tokens_seen": 190476576, "step": 88300 }, { "epoch": 14.405383360522023, "grad_norm": 0.02048441953957081, "learning_rate": 0.00022002599558087126, "loss": 0.0474, "num_input_tokens_seen": 190487072, "step": 88305 }, { "epoch": 14.406199021207177, "grad_norm": 0.007728917524218559, "learning_rate": 0.00021996702395997807, "loss": 0.0083, "num_input_tokens_seen": 190498016, "step": 88310 }, { "epoch": 14.407014681892333, "grad_norm": 0.042253587394952774, "learning_rate": 0.00021990805801430874, "loss": 0.2349, "num_input_tokens_seen": 190509184, "step": 88315 }, { "epoch": 14.407830342577487, "grad_norm": 0.008459951728582382, "learning_rate": 0.00021984909774505756, "loss": 0.0032, "num_input_tokens_seen": 190520064, "step": 88320 }, { "epoch": 14.408646003262643, "grad_norm": 0.07834939658641815, "learning_rate": 0.00021979014315342, "loss": 0.1043, "num_input_tokens_seen": 190530944, "step": 88325 }, { "epoch": 14.409461663947798, "grad_norm": 0.024549826979637146, "learning_rate": 0.00021973119424059068, "loss": 0.0035, "num_input_tokens_seen": 190542016, "step": 88330 }, { "epoch": 14.410277324632952, "grad_norm": 0.034020375460386276, "learning_rate": 0.00021967225100776424, "loss": 0.0061, "num_input_tokens_seen": 190552064, "step": 88335 }, { "epoch": 14.411092985318108, "grad_norm": 0.12867970764636993, "learning_rate": 0.00021961331345613522, "loss": 0.0038, "num_input_tokens_seen": 190563872, "step": 88340 }, { "epoch": 14.411908646003262, "grad_norm": 0.102449931204319, "learning_rate": 0.00021955438158689818, "loss": 0.0048, "num_input_tokens_seen": 190575008, "step": 88345 }, { "epoch": 14.412724306688418, "grad_norm": 0.054215386509895325, "learning_rate": 0.00021949545540124734, "loss": 0.0051, "num_input_tokens_seen": 190584896, "step": 88350 }, { "epoch": 14.413539967373573, "grad_norm": 1.7004011869430542, "learning_rate": 0.0002194365349003769, "loss": 0.0627, "num_input_tokens_seen": 190594912, "step": 88355 }, { "epoch": 14.414355628058727, "grad_norm": 0.0039036108646541834, "learning_rate": 0.00021937762008548102, "loss": 0.002, "num_input_tokens_seen": 190604672, "step": 88360 }, { "epoch": 14.415171288743883, "grad_norm": 0.08841753751039505, "learning_rate": 0.00021931871095775364, "loss": 0.007, "num_input_tokens_seen": 190616224, "step": 88365 }, { "epoch": 14.415986949429037, "grad_norm": 0.030235229060053825, "learning_rate": 0.0002192598075183887, "loss": 0.0099, "num_input_tokens_seen": 190628000, "step": 88370 }, { "epoch": 14.416802610114193, "grad_norm": 0.8298838138580322, "learning_rate": 0.00021920090976857971, "loss": 0.0451, "num_input_tokens_seen": 190638656, "step": 88375 }, { "epoch": 14.417618270799348, "grad_norm": 0.15697695314884186, "learning_rate": 0.00021914201770952086, "loss": 0.1131, "num_input_tokens_seen": 190647232, "step": 88380 }, { "epoch": 14.418433931484502, "grad_norm": 0.9490054249763489, "learning_rate": 0.00021908313134240493, "loss": 0.0468, "num_input_tokens_seen": 190657792, "step": 88385 }, { "epoch": 14.419249592169658, "grad_norm": 0.11163915693759918, "learning_rate": 0.00021902425066842608, "loss": 0.0403, "num_input_tokens_seen": 190668992, "step": 88390 }, { "epoch": 14.420065252854812, "grad_norm": 0.0015579510945826769, "learning_rate": 0.00021896537568877688, "loss": 0.0164, "num_input_tokens_seen": 190680960, "step": 88395 }, { "epoch": 14.420880913539968, "grad_norm": 0.0018666499527171254, "learning_rate": 0.00021890650640465125, "loss": 0.0287, "num_input_tokens_seen": 190693472, "step": 88400 }, { "epoch": 14.421696574225122, "grad_norm": 0.013390128500759602, "learning_rate": 0.00021884764281724145, "loss": 0.0065, "num_input_tokens_seen": 190705248, "step": 88405 }, { "epoch": 14.422512234910277, "grad_norm": 0.30930933356285095, "learning_rate": 0.00021878878492774125, "loss": 0.0124, "num_input_tokens_seen": 190716096, "step": 88410 }, { "epoch": 14.423327895595433, "grad_norm": 0.35998010635375977, "learning_rate": 0.00021872993273734266, "loss": 0.0206, "num_input_tokens_seen": 190726752, "step": 88415 }, { "epoch": 14.424143556280587, "grad_norm": 0.014954096637666225, "learning_rate": 0.0002186710862472392, "loss": 0.0034, "num_input_tokens_seen": 190737472, "step": 88420 }, { "epoch": 14.424959216965743, "grad_norm": 0.01035772543400526, "learning_rate": 0.00021861224545862264, "loss": 0.0044, "num_input_tokens_seen": 190748256, "step": 88425 }, { "epoch": 14.425774877650896, "grad_norm": 0.10234939306974411, "learning_rate": 0.0002185534103726863, "loss": 0.0329, "num_input_tokens_seen": 190758944, "step": 88430 }, { "epoch": 14.426590538336052, "grad_norm": 0.004187151789665222, "learning_rate": 0.00021849458099062175, "loss": 0.0017, "num_input_tokens_seen": 190770016, "step": 88435 }, { "epoch": 14.427406199021208, "grad_norm": 0.008185651153326035, "learning_rate": 0.00021843575731362187, "loss": 0.0048, "num_input_tokens_seen": 190781152, "step": 88440 }, { "epoch": 14.428221859706362, "grad_norm": 0.04953635483980179, "learning_rate": 0.0002183769393428785, "loss": 0.0038, "num_input_tokens_seen": 190791776, "step": 88445 }, { "epoch": 14.429037520391518, "grad_norm": 0.007335507310926914, "learning_rate": 0.00021831812707958376, "loss": 0.036, "num_input_tokens_seen": 190802176, "step": 88450 }, { "epoch": 14.429853181076671, "grad_norm": 0.009930635802447796, "learning_rate": 0.00021825932052492946, "loss": 0.128, "num_input_tokens_seen": 190814784, "step": 88455 }, { "epoch": 14.430668841761827, "grad_norm": 0.01763264276087284, "learning_rate": 0.0002182005196801075, "loss": 0.0014, "num_input_tokens_seen": 190825728, "step": 88460 }, { "epoch": 14.431484502446983, "grad_norm": 0.01920587196946144, "learning_rate": 0.0002181417245463095, "loss": 0.1023, "num_input_tokens_seen": 190836160, "step": 88465 }, { "epoch": 14.432300163132137, "grad_norm": 0.30706244707107544, "learning_rate": 0.00021808293512472698, "loss": 0.009, "num_input_tokens_seen": 190846880, "step": 88470 }, { "epoch": 14.433115823817293, "grad_norm": 0.013469132594764233, "learning_rate": 0.0002180241514165514, "loss": 0.008, "num_input_tokens_seen": 190857888, "step": 88475 }, { "epoch": 14.433931484502446, "grad_norm": 0.0024323442485183477, "learning_rate": 0.00021796537342297413, "loss": 0.002, "num_input_tokens_seen": 190870272, "step": 88480 }, { "epoch": 14.434747145187602, "grad_norm": 0.005847222171723843, "learning_rate": 0.00021790660114518633, "loss": 0.0034, "num_input_tokens_seen": 190881024, "step": 88485 }, { "epoch": 14.435562805872756, "grad_norm": 0.00454943859949708, "learning_rate": 0.0002178478345843792, "loss": 0.0096, "num_input_tokens_seen": 190888992, "step": 88490 }, { "epoch": 14.436378466557912, "grad_norm": 0.0023710941895842552, "learning_rate": 0.00021778907374174356, "loss": 0.0073, "num_input_tokens_seen": 190899648, "step": 88495 }, { "epoch": 14.437194127243067, "grad_norm": 0.011621500365436077, "learning_rate": 0.00021773031861847036, "loss": 0.0021, "num_input_tokens_seen": 190909856, "step": 88500 }, { "epoch": 14.438009787928221, "grad_norm": 0.04551706463098526, "learning_rate": 0.0002176715692157503, "loss": 0.0108, "num_input_tokens_seen": 190921696, "step": 88505 }, { "epoch": 14.438825448613377, "grad_norm": 0.01904352381825447, "learning_rate": 0.00021761282553477412, "loss": 0.0027, "num_input_tokens_seen": 190931616, "step": 88510 }, { "epoch": 14.439641109298531, "grad_norm": 0.027790287509560585, "learning_rate": 0.00021755408757673228, "loss": 0.0019, "num_input_tokens_seen": 190942688, "step": 88515 }, { "epoch": 14.440456769983687, "grad_norm": 1.0678861141204834, "learning_rate": 0.00021749535534281488, "loss": 0.0336, "num_input_tokens_seen": 190954592, "step": 88520 }, { "epoch": 14.441272430668842, "grad_norm": 0.0015039030695334077, "learning_rate": 0.00021743662883421294, "loss": 0.0044, "num_input_tokens_seen": 190965280, "step": 88525 }, { "epoch": 14.442088091353996, "grad_norm": 0.004158501513302326, "learning_rate": 0.00021737790805211578, "loss": 0.0237, "num_input_tokens_seen": 190975648, "step": 88530 }, { "epoch": 14.442903752039152, "grad_norm": 0.013511579483747482, "learning_rate": 0.00021731919299771424, "loss": 0.0046, "num_input_tokens_seen": 190987744, "step": 88535 }, { "epoch": 14.443719412724306, "grad_norm": 0.0024539681617170572, "learning_rate": 0.00021726048367219747, "loss": 0.0038, "num_input_tokens_seen": 190999392, "step": 88540 }, { "epoch": 14.444535073409462, "grad_norm": 0.001360918628051877, "learning_rate": 0.00021720178007675583, "loss": 0.0014, "num_input_tokens_seen": 191009888, "step": 88545 }, { "epoch": 14.445350734094617, "grad_norm": 0.014948241412639618, "learning_rate": 0.00021714308221257889, "loss": 0.0055, "num_input_tokens_seen": 191021824, "step": 88550 }, { "epoch": 14.446166394779771, "grad_norm": 0.008832264691591263, "learning_rate": 0.00021708439008085624, "loss": 0.0026, "num_input_tokens_seen": 191033504, "step": 88555 }, { "epoch": 14.446982055464927, "grad_norm": 0.006496297661215067, "learning_rate": 0.0002170257036827773, "loss": 0.0084, "num_input_tokens_seen": 191043520, "step": 88560 }, { "epoch": 14.447797716150081, "grad_norm": 1.3829180002212524, "learning_rate": 0.00021696702301953147, "loss": 0.061, "num_input_tokens_seen": 191054112, "step": 88565 }, { "epoch": 14.448613376835237, "grad_norm": 0.03617605194449425, "learning_rate": 0.00021690834809230797, "loss": 0.0053, "num_input_tokens_seen": 191064256, "step": 88570 }, { "epoch": 14.449429037520392, "grad_norm": 0.009446823038160801, "learning_rate": 0.00021684967890229595, "loss": 0.0113, "num_input_tokens_seen": 191074528, "step": 88575 }, { "epoch": 14.450244698205546, "grad_norm": 0.07070329040288925, "learning_rate": 0.00021679101545068436, "loss": 0.0781, "num_input_tokens_seen": 191083904, "step": 88580 }, { "epoch": 14.451060358890702, "grad_norm": 0.1569625437259674, "learning_rate": 0.00021673235773866212, "loss": 0.0076, "num_input_tokens_seen": 191093760, "step": 88585 }, { "epoch": 14.451876019575856, "grad_norm": 0.004268985241651535, "learning_rate": 0.00021667370576741802, "loss": 0.0038, "num_input_tokens_seen": 191104320, "step": 88590 }, { "epoch": 14.452691680261012, "grad_norm": 0.02708180993795395, "learning_rate": 0.00021661505953814064, "loss": 0.0062, "num_input_tokens_seen": 191114336, "step": 88595 }, { "epoch": 14.453507340946166, "grad_norm": 0.5045461058616638, "learning_rate": 0.0002165564190520186, "loss": 0.0246, "num_input_tokens_seen": 191124768, "step": 88600 }, { "epoch": 14.454323001631321, "grad_norm": 0.018335120752453804, "learning_rate": 0.00021649778431024035, "loss": 0.0314, "num_input_tokens_seen": 191135584, "step": 88605 }, { "epoch": 14.455138662316477, "grad_norm": 0.003731632838025689, "learning_rate": 0.0002164391553139941, "loss": 0.0101, "num_input_tokens_seen": 191145568, "step": 88610 }, { "epoch": 14.455954323001631, "grad_norm": 0.007366247475147247, "learning_rate": 0.00021638053206446813, "loss": 0.0545, "num_input_tokens_seen": 191156128, "step": 88615 }, { "epoch": 14.456769983686787, "grad_norm": 0.025735680013895035, "learning_rate": 0.00021632191456285045, "loss": 0.0039, "num_input_tokens_seen": 191166496, "step": 88620 }, { "epoch": 14.45758564437194, "grad_norm": 0.17444458603858948, "learning_rate": 0.00021626330281032902, "loss": 0.038, "num_input_tokens_seen": 191177056, "step": 88625 }, { "epoch": 14.458401305057096, "grad_norm": 0.12297383695840836, "learning_rate": 0.00021620469680809173, "loss": 0.0057, "num_input_tokens_seen": 191186528, "step": 88630 }, { "epoch": 14.459216965742252, "grad_norm": 0.03435764089226723, "learning_rate": 0.0002161460965573263, "loss": 0.0147, "num_input_tokens_seen": 191196800, "step": 88635 }, { "epoch": 14.460032626427406, "grad_norm": 0.010668028146028519, "learning_rate": 0.0002160875020592203, "loss": 0.0071, "num_input_tokens_seen": 191207776, "step": 88640 }, { "epoch": 14.460848287112562, "grad_norm": 0.003553638467565179, "learning_rate": 0.00021602891331496123, "loss": 0.003, "num_input_tokens_seen": 191219360, "step": 88645 }, { "epoch": 14.461663947797716, "grad_norm": 0.0175008662045002, "learning_rate": 0.0002159703303257363, "loss": 0.01, "num_input_tokens_seen": 191230720, "step": 88650 }, { "epoch": 14.462479608482871, "grad_norm": 0.2688143849372864, "learning_rate": 0.00021591175309273314, "loss": 0.0127, "num_input_tokens_seen": 191241184, "step": 88655 }, { "epoch": 14.463295269168025, "grad_norm": 0.023036817088723183, "learning_rate": 0.00021585318161713868, "loss": 0.0072, "num_input_tokens_seen": 191252320, "step": 88660 }, { "epoch": 14.464110929853181, "grad_norm": 0.014059700071811676, "learning_rate": 0.00021579461590013994, "loss": 0.0905, "num_input_tokens_seen": 191262144, "step": 88665 }, { "epoch": 14.464926590538337, "grad_norm": 0.5681215524673462, "learning_rate": 0.0002157360559429239, "loss": 0.0334, "num_input_tokens_seen": 191273376, "step": 88670 }, { "epoch": 14.46574225122349, "grad_norm": 0.005725328344851732, "learning_rate": 0.00021567750174667722, "loss": 0.0025, "num_input_tokens_seen": 191284640, "step": 88675 }, { "epoch": 14.466557911908646, "grad_norm": 0.9656330347061157, "learning_rate": 0.00021561895331258674, "loss": 0.2398, "num_input_tokens_seen": 191294560, "step": 88680 }, { "epoch": 14.4673735725938, "grad_norm": 0.03955146297812462, "learning_rate": 0.0002155604106418389, "loss": 0.0041, "num_input_tokens_seen": 191305472, "step": 88685 }, { "epoch": 14.468189233278956, "grad_norm": 0.006298612803220749, "learning_rate": 0.00021550187373562015, "loss": 0.0033, "num_input_tokens_seen": 191316480, "step": 88690 }, { "epoch": 14.469004893964112, "grad_norm": 0.006513848900794983, "learning_rate": 0.00021544334259511688, "loss": 0.0023, "num_input_tokens_seen": 191326752, "step": 88695 }, { "epoch": 14.469820554649266, "grad_norm": 0.06492870301008224, "learning_rate": 0.0002153848172215152, "loss": 0.0031, "num_input_tokens_seen": 191338368, "step": 88700 }, { "epoch": 14.470636215334421, "grad_norm": 0.0024135466665029526, "learning_rate": 0.00021532629761600132, "loss": 0.1371, "num_input_tokens_seen": 191348832, "step": 88705 }, { "epoch": 14.471451876019575, "grad_norm": 0.01875806227326393, "learning_rate": 0.00021526778377976114, "loss": 0.0666, "num_input_tokens_seen": 191359200, "step": 88710 }, { "epoch": 14.47226753670473, "grad_norm": 0.03733399137854576, "learning_rate": 0.00021520927571398052, "loss": 0.0218, "num_input_tokens_seen": 191371072, "step": 88715 }, { "epoch": 14.473083197389887, "grad_norm": 0.0523112453520298, "learning_rate": 0.00021515077341984523, "loss": 0.1315, "num_input_tokens_seen": 191381248, "step": 88720 }, { "epoch": 14.47389885807504, "grad_norm": 0.006696126889437437, "learning_rate": 0.00021509227689854083, "loss": 0.0371, "num_input_tokens_seen": 191392064, "step": 88725 }, { "epoch": 14.474714518760196, "grad_norm": 0.005018499214202166, "learning_rate": 0.0002150337861512529, "loss": 0.0072, "num_input_tokens_seen": 191403072, "step": 88730 }, { "epoch": 14.47553017944535, "grad_norm": 0.014231212437152863, "learning_rate": 0.0002149753011791668, "loss": 0.0052, "num_input_tokens_seen": 191415168, "step": 88735 }, { "epoch": 14.476345840130506, "grad_norm": 0.040964819490909576, "learning_rate": 0.00021491682198346778, "loss": 0.2212, "num_input_tokens_seen": 191427456, "step": 88740 }, { "epoch": 14.477161500815662, "grad_norm": 0.006567889824509621, "learning_rate": 0.00021485834856534104, "loss": 0.0092, "num_input_tokens_seen": 191438528, "step": 88745 }, { "epoch": 14.477977161500815, "grad_norm": 0.01012183167040348, "learning_rate": 0.00021479988092597157, "loss": 0.0097, "num_input_tokens_seen": 191449568, "step": 88750 }, { "epoch": 14.478792822185971, "grad_norm": 0.016617121174931526, "learning_rate": 0.00021474141906654414, "loss": 0.005, "num_input_tokens_seen": 191461024, "step": 88755 }, { "epoch": 14.479608482871125, "grad_norm": 0.12802140414714813, "learning_rate": 0.00021468296298824413, "loss": 0.028, "num_input_tokens_seen": 191471360, "step": 88760 }, { "epoch": 14.48042414355628, "grad_norm": 0.1862056404352188, "learning_rate": 0.00021462451269225547, "loss": 0.1709, "num_input_tokens_seen": 191482752, "step": 88765 }, { "epoch": 14.481239804241435, "grad_norm": 0.009122297167778015, "learning_rate": 0.00021456606817976337, "loss": 0.0118, "num_input_tokens_seen": 191492320, "step": 88770 }, { "epoch": 14.48205546492659, "grad_norm": 0.005948106292635202, "learning_rate": 0.00021450762945195167, "loss": 0.0293, "num_input_tokens_seen": 191503744, "step": 88775 }, { "epoch": 14.482871125611746, "grad_norm": 0.11526896804571152, "learning_rate": 0.00021444919651000544, "loss": 0.0329, "num_input_tokens_seen": 191514688, "step": 88780 }, { "epoch": 14.4836867862969, "grad_norm": 0.0012611837591975927, "learning_rate": 0.0002143907693551081, "loss": 0.071, "num_input_tokens_seen": 191525952, "step": 88785 }, { "epoch": 14.484502446982056, "grad_norm": 0.09046098589897156, "learning_rate": 0.00021433234798844448, "loss": 0.0055, "num_input_tokens_seen": 191536704, "step": 88790 }, { "epoch": 14.48531810766721, "grad_norm": 0.007428077049553394, "learning_rate": 0.00021427393241119785, "loss": 0.0155, "num_input_tokens_seen": 191547872, "step": 88795 }, { "epoch": 14.486133768352365, "grad_norm": 0.013973820023238659, "learning_rate": 0.00021421552262455268, "loss": 0.1227, "num_input_tokens_seen": 191557888, "step": 88800 }, { "epoch": 14.486949429037521, "grad_norm": 0.0017807598924264312, "learning_rate": 0.00021415711862969244, "loss": 0.0617, "num_input_tokens_seen": 191568704, "step": 88805 }, { "epoch": 14.487765089722675, "grad_norm": 0.003798752324655652, "learning_rate": 0.00021409872042780083, "loss": 0.1248, "num_input_tokens_seen": 191578816, "step": 88810 }, { "epoch": 14.48858075040783, "grad_norm": 0.01079730037599802, "learning_rate": 0.00021404032802006134, "loss": 0.006, "num_input_tokens_seen": 191590112, "step": 88815 }, { "epoch": 14.489396411092985, "grad_norm": 0.010289848782122135, "learning_rate": 0.00021398194140765736, "loss": 0.0335, "num_input_tokens_seen": 191601184, "step": 88820 }, { "epoch": 14.49021207177814, "grad_norm": 0.007110270205885172, "learning_rate": 0.0002139235605917722, "loss": 0.0049, "num_input_tokens_seen": 191611776, "step": 88825 }, { "epoch": 14.491027732463296, "grad_norm": 0.5567957162857056, "learning_rate": 0.00021386518557358898, "loss": 0.0247, "num_input_tokens_seen": 191622272, "step": 88830 }, { "epoch": 14.49184339314845, "grad_norm": 0.020504044368863106, "learning_rate": 0.00021380681635429079, "loss": 0.02, "num_input_tokens_seen": 191630848, "step": 88835 }, { "epoch": 14.492659053833606, "grad_norm": 0.003213174408301711, "learning_rate": 0.00021374845293506046, "loss": 0.0279, "num_input_tokens_seen": 191642272, "step": 88840 }, { "epoch": 14.49347471451876, "grad_norm": 0.0054749976843595505, "learning_rate": 0.00021369009531708094, "loss": 0.0106, "num_input_tokens_seen": 191654144, "step": 88845 }, { "epoch": 14.494290375203915, "grad_norm": 0.1738140881061554, "learning_rate": 0.0002136317435015348, "loss": 0.0114, "num_input_tokens_seen": 191665312, "step": 88850 }, { "epoch": 14.49510603588907, "grad_norm": 0.01573525182902813, "learning_rate": 0.0002135733974896047, "loss": 0.1297, "num_input_tokens_seen": 191677248, "step": 88855 }, { "epoch": 14.495921696574225, "grad_norm": 0.012372500263154507, "learning_rate": 0.00021351505728247282, "loss": 0.0625, "num_input_tokens_seen": 191687936, "step": 88860 }, { "epoch": 14.49673735725938, "grad_norm": 0.001263459213078022, "learning_rate": 0.00021345672288132218, "loss": 0.0024, "num_input_tokens_seen": 191697920, "step": 88865 }, { "epoch": 14.497553017944535, "grad_norm": 0.01859821379184723, "learning_rate": 0.00021339839428733415, "loss": 0.0815, "num_input_tokens_seen": 191709344, "step": 88870 }, { "epoch": 14.49836867862969, "grad_norm": 0.038047756999731064, "learning_rate": 0.0002133400715016916, "loss": 0.0222, "num_input_tokens_seen": 191720192, "step": 88875 }, { "epoch": 14.499184339314844, "grad_norm": 0.2235417366027832, "learning_rate": 0.0002132817545255758, "loss": 0.0624, "num_input_tokens_seen": 191730240, "step": 88880 }, { "epoch": 14.5, "grad_norm": 0.026106612756848335, "learning_rate": 0.0002132234433601693, "loss": 0.0035, "num_input_tokens_seen": 191742688, "step": 88885 }, { "epoch": 14.500815660685156, "grad_norm": 0.004831301514059305, "learning_rate": 0.00021316513800665322, "loss": 0.0824, "num_input_tokens_seen": 191752896, "step": 88890 }, { "epoch": 14.50163132137031, "grad_norm": 0.05355004966259003, "learning_rate": 0.0002131068384662098, "loss": 0.0272, "num_input_tokens_seen": 191764160, "step": 88895 }, { "epoch": 14.502446982055465, "grad_norm": 0.006749794818460941, "learning_rate": 0.00021304854474001993, "loss": 0.0083, "num_input_tokens_seen": 191775616, "step": 88900 }, { "epoch": 14.50326264274062, "grad_norm": 0.07044735550880432, "learning_rate": 0.00021299025682926565, "loss": 0.0126, "num_input_tokens_seen": 191786336, "step": 88905 }, { "epoch": 14.504078303425775, "grad_norm": 0.01472396682947874, "learning_rate": 0.0002129319747351276, "loss": 0.0058, "num_input_tokens_seen": 191797312, "step": 88910 }, { "epoch": 14.50489396411093, "grad_norm": 0.004959443584084511, "learning_rate": 0.00021287369845878756, "loss": 0.0035, "num_input_tokens_seen": 191808800, "step": 88915 }, { "epoch": 14.505709624796085, "grad_norm": 0.06946083158254623, "learning_rate": 0.00021281542800142595, "loss": 0.0073, "num_input_tokens_seen": 191819936, "step": 88920 }, { "epoch": 14.50652528548124, "grad_norm": 0.004429814871400595, "learning_rate": 0.00021275716336422435, "loss": 0.0047, "num_input_tokens_seen": 191831008, "step": 88925 }, { "epoch": 14.507340946166394, "grad_norm": 0.016773466020822525, "learning_rate": 0.00021269890454836288, "loss": 0.0036, "num_input_tokens_seen": 191842048, "step": 88930 }, { "epoch": 14.50815660685155, "grad_norm": 0.33335718512535095, "learning_rate": 0.00021264065155502293, "loss": 0.0077, "num_input_tokens_seen": 191854144, "step": 88935 }, { "epoch": 14.508972267536706, "grad_norm": 0.014766393229365349, "learning_rate": 0.00021258240438538434, "loss": 0.011, "num_input_tokens_seen": 191863744, "step": 88940 }, { "epoch": 14.50978792822186, "grad_norm": 0.2571893632411957, "learning_rate": 0.0002125241630406281, "loss": 0.0851, "num_input_tokens_seen": 191874944, "step": 88945 }, { "epoch": 14.510603588907015, "grad_norm": 0.013651500456035137, "learning_rate": 0.00021246592752193445, "loss": 0.0334, "num_input_tokens_seen": 191886144, "step": 88950 }, { "epoch": 14.51141924959217, "grad_norm": 0.19616489112377167, "learning_rate": 0.00021240769783048352, "loss": 0.0728, "num_input_tokens_seen": 191896160, "step": 88955 }, { "epoch": 14.512234910277325, "grad_norm": 1.0779352188110352, "learning_rate": 0.00021234947396745542, "loss": 0.2369, "num_input_tokens_seen": 191907008, "step": 88960 }, { "epoch": 14.513050570962479, "grad_norm": 1.0987437963485718, "learning_rate": 0.00021229125593403016, "loss": 0.057, "num_input_tokens_seen": 191917088, "step": 88965 }, { "epoch": 14.513866231647635, "grad_norm": 0.028295524418354034, "learning_rate": 0.00021223304373138753, "loss": 0.0045, "num_input_tokens_seen": 191928128, "step": 88970 }, { "epoch": 14.51468189233279, "grad_norm": 0.09227123856544495, "learning_rate": 0.00021217483736070736, "loss": 0.008, "num_input_tokens_seen": 191938336, "step": 88975 }, { "epoch": 14.515497553017944, "grad_norm": 0.029156019911170006, "learning_rate": 0.00021211663682316922, "loss": 0.0328, "num_input_tokens_seen": 191948064, "step": 88980 }, { "epoch": 14.5163132137031, "grad_norm": 0.031882692128419876, "learning_rate": 0.00021205844211995268, "loss": 0.0503, "num_input_tokens_seen": 191957888, "step": 88985 }, { "epoch": 14.517128874388254, "grad_norm": 0.08354411274194717, "learning_rate": 0.0002120002532522371, "loss": 0.005, "num_input_tokens_seen": 191968608, "step": 88990 }, { "epoch": 14.51794453507341, "grad_norm": 0.2072557955980301, "learning_rate": 0.00021194207022120153, "loss": 0.0185, "num_input_tokens_seen": 191979904, "step": 88995 }, { "epoch": 14.518760195758565, "grad_norm": 0.02147253043949604, "learning_rate": 0.0002118838930280257, "loss": 0.0066, "num_input_tokens_seen": 191990624, "step": 89000 }, { "epoch": 14.51957585644372, "grad_norm": 0.012323420494794846, "learning_rate": 0.00021182572167388792, "loss": 0.0045, "num_input_tokens_seen": 192001824, "step": 89005 }, { "epoch": 14.520391517128875, "grad_norm": 0.010625148192048073, "learning_rate": 0.00021176755615996785, "loss": 0.0083, "num_input_tokens_seen": 192012928, "step": 89010 }, { "epoch": 14.521207177814029, "grad_norm": 0.007529766298830509, "learning_rate": 0.00021170939648744346, "loss": 0.0172, "num_input_tokens_seen": 192022720, "step": 89015 }, { "epoch": 14.522022838499185, "grad_norm": 0.0022195475175976753, "learning_rate": 0.00021165124265749431, "loss": 0.0107, "num_input_tokens_seen": 192033824, "step": 89020 }, { "epoch": 14.522838499184338, "grad_norm": 0.00680540781468153, "learning_rate": 0.00021159309467129816, "loss": 0.0605, "num_input_tokens_seen": 192045312, "step": 89025 }, { "epoch": 14.523654159869494, "grad_norm": 0.013945330865681171, "learning_rate": 0.0002115349525300342, "loss": 0.0943, "num_input_tokens_seen": 192055648, "step": 89030 }, { "epoch": 14.52446982055465, "grad_norm": 0.00402448046952486, "learning_rate": 0.00021147681623487997, "loss": 0.0484, "num_input_tokens_seen": 192066208, "step": 89035 }, { "epoch": 14.525285481239804, "grad_norm": 0.049788281321525574, "learning_rate": 0.0002114186857870144, "loss": 0.0123, "num_input_tokens_seen": 192077344, "step": 89040 }, { "epoch": 14.52610114192496, "grad_norm": 0.0428604930639267, "learning_rate": 0.00021136056118761494, "loss": 0.0026, "num_input_tokens_seen": 192088224, "step": 89045 }, { "epoch": 14.526916802610113, "grad_norm": 0.0035907719284296036, "learning_rate": 0.00021130244243786024, "loss": 0.1119, "num_input_tokens_seen": 192098304, "step": 89050 }, { "epoch": 14.52773246329527, "grad_norm": 0.311904639005661, "learning_rate": 0.00021124432953892742, "loss": 0.1228, "num_input_tokens_seen": 192109024, "step": 89055 }, { "epoch": 14.528548123980425, "grad_norm": 0.016046972945332527, "learning_rate": 0.00021118622249199494, "loss": 0.0072, "num_input_tokens_seen": 192120064, "step": 89060 }, { "epoch": 14.529363784665579, "grad_norm": 0.010494503192603588, "learning_rate": 0.00021112812129823967, "loss": 0.0101, "num_input_tokens_seen": 192130272, "step": 89065 }, { "epoch": 14.530179445350734, "grad_norm": 0.008433211594820023, "learning_rate": 0.00021107002595883978, "loss": 0.0537, "num_input_tokens_seen": 192140128, "step": 89070 }, { "epoch": 14.530995106035888, "grad_norm": 0.021653709933161736, "learning_rate": 0.00021101193647497208, "loss": 0.0136, "num_input_tokens_seen": 192149024, "step": 89075 }, { "epoch": 14.531810766721044, "grad_norm": 0.2722325921058655, "learning_rate": 0.00021095385284781426, "loss": 0.0191, "num_input_tokens_seen": 192160224, "step": 89080 }, { "epoch": 14.5326264274062, "grad_norm": 0.04513232409954071, "learning_rate": 0.00021089577507854324, "loss": 0.0047, "num_input_tokens_seen": 192171744, "step": 89085 }, { "epoch": 14.533442088091354, "grad_norm": 0.06678885221481323, "learning_rate": 0.00021083770316833618, "loss": 0.0054, "num_input_tokens_seen": 192182112, "step": 89090 }, { "epoch": 14.53425774877651, "grad_norm": 0.009022576734423637, "learning_rate": 0.00021077963711836983, "loss": 0.0043, "num_input_tokens_seen": 192191904, "step": 89095 }, { "epoch": 14.535073409461663, "grad_norm": 0.4102114140987396, "learning_rate": 0.00021072157692982103, "loss": 0.0929, "num_input_tokens_seen": 192202720, "step": 89100 }, { "epoch": 14.535889070146819, "grad_norm": 0.06501226127147675, "learning_rate": 0.00021066352260386644, "loss": 0.113, "num_input_tokens_seen": 192214496, "step": 89105 }, { "epoch": 14.536704730831975, "grad_norm": 0.01701521687209606, "learning_rate": 0.0002106054741416827, "loss": 0.0106, "num_input_tokens_seen": 192225088, "step": 89110 }, { "epoch": 14.537520391517129, "grad_norm": 0.03839682787656784, "learning_rate": 0.00021054743154444607, "loss": 0.0089, "num_input_tokens_seen": 192236736, "step": 89115 }, { "epoch": 14.538336052202284, "grad_norm": 2.46354603767395, "learning_rate": 0.00021048939481333297, "loss": 0.085, "num_input_tokens_seen": 192247808, "step": 89120 }, { "epoch": 14.539151712887438, "grad_norm": 0.008587910793721676, "learning_rate": 0.00021043136394951955, "loss": 0.0017, "num_input_tokens_seen": 192258240, "step": 89125 }, { "epoch": 14.539967373572594, "grad_norm": 0.06168248504400253, "learning_rate": 0.00021037333895418186, "loss": 0.0073, "num_input_tokens_seen": 192267648, "step": 89130 }, { "epoch": 14.540783034257748, "grad_norm": 0.16524586081504822, "learning_rate": 0.0002103153198284959, "loss": 0.0112, "num_input_tokens_seen": 192278176, "step": 89135 }, { "epoch": 14.541598694942904, "grad_norm": 0.007886257022619247, "learning_rate": 0.0002102573065736373, "loss": 0.0145, "num_input_tokens_seen": 192289152, "step": 89140 }, { "epoch": 14.54241435562806, "grad_norm": 0.039290644228458405, "learning_rate": 0.00021019929919078228, "loss": 0.0083, "num_input_tokens_seen": 192300096, "step": 89145 }, { "epoch": 14.543230016313213, "grad_norm": 0.00883400347083807, "learning_rate": 0.00021014129768110574, "loss": 0.0046, "num_input_tokens_seen": 192309952, "step": 89150 }, { "epoch": 14.544045676998369, "grad_norm": 0.006890626158565283, "learning_rate": 0.0002100833020457839, "loss": 0.0507, "num_input_tokens_seen": 192321792, "step": 89155 }, { "epoch": 14.544861337683523, "grad_norm": 0.034679658710956573, "learning_rate": 0.00021002531228599136, "loss": 0.0749, "num_input_tokens_seen": 192332320, "step": 89160 }, { "epoch": 14.545676998368679, "grad_norm": 0.5407046675682068, "learning_rate": 0.00020996732840290405, "loss": 0.0798, "num_input_tokens_seen": 192343136, "step": 89165 }, { "epoch": 14.546492659053834, "grad_norm": 0.02785268798470497, "learning_rate": 0.0002099093503976965, "loss": 0.0125, "num_input_tokens_seen": 192351328, "step": 89170 }, { "epoch": 14.547308319738988, "grad_norm": 0.05339758098125458, "learning_rate": 0.0002098513782715442, "loss": 0.01, "num_input_tokens_seen": 192361120, "step": 89175 }, { "epoch": 14.548123980424144, "grad_norm": 0.019465211778879166, "learning_rate": 0.00020979341202562152, "loss": 0.0075, "num_input_tokens_seen": 192370912, "step": 89180 }, { "epoch": 14.548939641109298, "grad_norm": 0.16231609880924225, "learning_rate": 0.00020973545166110368, "loss": 0.06, "num_input_tokens_seen": 192380928, "step": 89185 }, { "epoch": 14.549755301794454, "grad_norm": 0.010452073067426682, "learning_rate": 0.00020967749717916513, "loss": 0.0096, "num_input_tokens_seen": 192391904, "step": 89190 }, { "epoch": 14.550570962479608, "grad_norm": 0.03393692523241043, "learning_rate": 0.00020961954858098037, "loss": 0.018, "num_input_tokens_seen": 192403072, "step": 89195 }, { "epoch": 14.551386623164763, "grad_norm": 0.013704908080399036, "learning_rate": 0.0002095616058677239, "loss": 0.058, "num_input_tokens_seen": 192414368, "step": 89200 }, { "epoch": 14.552202283849919, "grad_norm": 0.006801190786063671, "learning_rate": 0.00020950366904056984, "loss": 0.0045, "num_input_tokens_seen": 192423616, "step": 89205 }, { "epoch": 14.553017944535073, "grad_norm": 0.10268911719322205, "learning_rate": 0.00020944573810069252, "loss": 0.0074, "num_input_tokens_seen": 192433600, "step": 89210 }, { "epoch": 14.553833605220229, "grad_norm": 0.019625717774033546, "learning_rate": 0.00020938781304926586, "loss": 0.1356, "num_input_tokens_seen": 192443968, "step": 89215 }, { "epoch": 14.554649265905383, "grad_norm": 0.05655986815690994, "learning_rate": 0.00020932989388746387, "loss": 0.0046, "num_input_tokens_seen": 192455552, "step": 89220 }, { "epoch": 14.555464926590538, "grad_norm": 0.014394225552678108, "learning_rate": 0.0002092719806164603, "loss": 0.0049, "num_input_tokens_seen": 192466624, "step": 89225 }, { "epoch": 14.556280587275694, "grad_norm": 0.012823114171624184, "learning_rate": 0.00020921407323742892, "loss": 0.0123, "num_input_tokens_seen": 192475936, "step": 89230 }, { "epoch": 14.557096247960848, "grad_norm": 0.026526452973484993, "learning_rate": 0.00020915617175154316, "loss": 0.009, "num_input_tokens_seen": 192486304, "step": 89235 }, { "epoch": 14.557911908646004, "grad_norm": 0.04471902921795845, "learning_rate": 0.00020909827615997657, "loss": 0.0099, "num_input_tokens_seen": 192496576, "step": 89240 }, { "epoch": 14.558727569331158, "grad_norm": 0.007646934129297733, "learning_rate": 0.00020904038646390246, "loss": 0.0037, "num_input_tokens_seen": 192507936, "step": 89245 }, { "epoch": 14.559543230016313, "grad_norm": 0.0048499442636966705, "learning_rate": 0.00020898250266449399, "loss": 0.0106, "num_input_tokens_seen": 192518176, "step": 89250 }, { "epoch": 14.560358890701469, "grad_norm": 0.012430887669324875, "learning_rate": 0.0002089246247629243, "loss": 0.002, "num_input_tokens_seen": 192528320, "step": 89255 }, { "epoch": 14.561174551386623, "grad_norm": 0.004969580098986626, "learning_rate": 0.00020886675276036637, "loss": 0.0057, "num_input_tokens_seen": 192539040, "step": 89260 }, { "epoch": 14.561990212071779, "grad_norm": 0.2916017472743988, "learning_rate": 0.00020880888665799304, "loss": 0.0188, "num_input_tokens_seen": 192548864, "step": 89265 }, { "epoch": 14.562805872756933, "grad_norm": 0.3823688328266144, "learning_rate": 0.00020875102645697696, "loss": 0.0815, "num_input_tokens_seen": 192560256, "step": 89270 }, { "epoch": 14.563621533442088, "grad_norm": 0.002937816083431244, "learning_rate": 0.0002086931721584908, "loss": 0.021, "num_input_tokens_seen": 192571264, "step": 89275 }, { "epoch": 14.564437194127244, "grad_norm": 0.02331402711570263, "learning_rate": 0.00020863532376370715, "loss": 0.0064, "num_input_tokens_seen": 192582368, "step": 89280 }, { "epoch": 14.565252854812398, "grad_norm": 0.06402833014726639, "learning_rate": 0.000208577481273798, "loss": 0.0068, "num_input_tokens_seen": 192592416, "step": 89285 }, { "epoch": 14.566068515497554, "grad_norm": 0.07657475769519806, "learning_rate": 0.00020851964468993612, "loss": 0.0071, "num_input_tokens_seen": 192601664, "step": 89290 }, { "epoch": 14.566884176182707, "grad_norm": 0.016126157715916634, "learning_rate": 0.00020846181401329338, "loss": 0.0032, "num_input_tokens_seen": 192613120, "step": 89295 }, { "epoch": 14.567699836867863, "grad_norm": 0.08806990832090378, "learning_rate": 0.00020840398924504188, "loss": 0.0155, "num_input_tokens_seen": 192624000, "step": 89300 }, { "epoch": 14.568515497553017, "grad_norm": 0.026961958035826683, "learning_rate": 0.0002083461703863534, "loss": 0.0058, "num_input_tokens_seen": 192634368, "step": 89305 }, { "epoch": 14.569331158238173, "grad_norm": 0.33260151743888855, "learning_rate": 0.0002082883574383998, "loss": 0.0069, "num_input_tokens_seen": 192644736, "step": 89310 }, { "epoch": 14.570146818923329, "grad_norm": 0.09995939582586288, "learning_rate": 0.00020823055040235266, "loss": 0.1132, "num_input_tokens_seen": 192655168, "step": 89315 }, { "epoch": 14.570962479608482, "grad_norm": 0.00798753835260868, "learning_rate": 0.0002081727492793836, "loss": 0.0543, "num_input_tokens_seen": 192665792, "step": 89320 }, { "epoch": 14.571778140293638, "grad_norm": 0.12000565975904465, "learning_rate": 0.00020811495407066394, "loss": 0.0077, "num_input_tokens_seen": 192677152, "step": 89325 }, { "epoch": 14.572593800978792, "grad_norm": 0.5418410897254944, "learning_rate": 0.00020805716477736508, "loss": 0.0717, "num_input_tokens_seen": 192687776, "step": 89330 }, { "epoch": 14.573409461663948, "grad_norm": 0.013238731771707535, "learning_rate": 0.00020799938140065804, "loss": 0.0073, "num_input_tokens_seen": 192698432, "step": 89335 }, { "epoch": 14.574225122349104, "grad_norm": 0.018302015960216522, "learning_rate": 0.00020794160394171403, "loss": 0.1657, "num_input_tokens_seen": 192708960, "step": 89340 }, { "epoch": 14.575040783034257, "grad_norm": 0.16383560001850128, "learning_rate": 0.00020788383240170395, "loss": 0.0101, "num_input_tokens_seen": 192720192, "step": 89345 }, { "epoch": 14.575856443719413, "grad_norm": 0.02785312570631504, "learning_rate": 0.0002078260667817985, "loss": 0.0956, "num_input_tokens_seen": 192729984, "step": 89350 }, { "epoch": 14.576672104404567, "grad_norm": 0.03999635949730873, "learning_rate": 0.0002077683070831685, "loss": 0.0133, "num_input_tokens_seen": 192740832, "step": 89355 }, { "epoch": 14.577487765089723, "grad_norm": 0.034628041088581085, "learning_rate": 0.00020771055330698446, "loss": 0.0055, "num_input_tokens_seen": 192752576, "step": 89360 }, { "epoch": 14.578303425774878, "grad_norm": 0.003350580809637904, "learning_rate": 0.0002076528054544169, "loss": 0.0143, "num_input_tokens_seen": 192764480, "step": 89365 }, { "epoch": 14.579119086460032, "grad_norm": 0.24242228269577026, "learning_rate": 0.00020759506352663605, "loss": 0.0132, "num_input_tokens_seen": 192775616, "step": 89370 }, { "epoch": 14.579934747145188, "grad_norm": 0.005835976451635361, "learning_rate": 0.0002075373275248122, "loss": 0.1307, "num_input_tokens_seen": 192787520, "step": 89375 }, { "epoch": 14.580750407830342, "grad_norm": 0.00737958587706089, "learning_rate": 0.00020747959745011542, "loss": 0.0498, "num_input_tokens_seen": 192798400, "step": 89380 }, { "epoch": 14.581566068515498, "grad_norm": 0.07595057040452957, "learning_rate": 0.0002074218733037157, "loss": 0.0053, "num_input_tokens_seen": 192811136, "step": 89385 }, { "epoch": 14.582381729200652, "grad_norm": 0.014548624865710735, "learning_rate": 0.00020736415508678285, "loss": 0.0825, "num_input_tokens_seen": 192822752, "step": 89390 }, { "epoch": 14.583197389885807, "grad_norm": 0.01153327152132988, "learning_rate": 0.0002073064428004865, "loss": 0.0082, "num_input_tokens_seen": 192833408, "step": 89395 }, { "epoch": 14.584013050570963, "grad_norm": 0.0007731911027804017, "learning_rate": 0.00020724873644599668, "loss": 0.0152, "num_input_tokens_seen": 192843296, "step": 89400 }, { "epoch": 14.584828711256117, "grad_norm": 0.05567508935928345, "learning_rate": 0.0002071910360244823, "loss": 0.0247, "num_input_tokens_seen": 192854048, "step": 89405 }, { "epoch": 14.585644371941273, "grad_norm": 0.013739659450948238, "learning_rate": 0.0002071333415371134, "loss": 0.0097, "num_input_tokens_seen": 192865024, "step": 89410 }, { "epoch": 14.586460032626427, "grad_norm": 0.003326072823256254, "learning_rate": 0.00020707565298505842, "loss": 0.0021, "num_input_tokens_seen": 192874784, "step": 89415 }, { "epoch": 14.587275693311582, "grad_norm": 0.021161677315831184, "learning_rate": 0.00020701797036948739, "loss": 0.1221, "num_input_tokens_seen": 192884800, "step": 89420 }, { "epoch": 14.588091353996738, "grad_norm": 0.35926300287246704, "learning_rate": 0.00020696029369156844, "loss": 0.0225, "num_input_tokens_seen": 192895360, "step": 89425 }, { "epoch": 14.588907014681892, "grad_norm": 0.0013130512088537216, "learning_rate": 0.0002069026229524711, "loss": 0.0031, "num_input_tokens_seen": 192906720, "step": 89430 }, { "epoch": 14.589722675367048, "grad_norm": 1.1978635787963867, "learning_rate": 0.00020684495815336392, "loss": 0.0832, "num_input_tokens_seen": 192917792, "step": 89435 }, { "epoch": 14.590538336052202, "grad_norm": 0.1822976917028427, "learning_rate": 0.00020678729929541552, "loss": 0.0097, "num_input_tokens_seen": 192928800, "step": 89440 }, { "epoch": 14.591353996737357, "grad_norm": 0.05576722323894501, "learning_rate": 0.00020672964637979453, "loss": 0.023, "num_input_tokens_seen": 192940320, "step": 89445 }, { "epoch": 14.592169657422513, "grad_norm": 0.2535172700881958, "learning_rate": 0.00020667199940766924, "loss": 0.2847, "num_input_tokens_seen": 192951680, "step": 89450 }, { "epoch": 14.592985318107667, "grad_norm": 0.03587258607149124, "learning_rate": 0.00020661435838020798, "loss": 0.0048, "num_input_tokens_seen": 192962720, "step": 89455 }, { "epoch": 14.593800978792823, "grad_norm": 0.01838524639606476, "learning_rate": 0.000206556723298579, "loss": 0.0079, "num_input_tokens_seen": 192973472, "step": 89460 }, { "epoch": 14.594616639477977, "grad_norm": 0.0028433636762201786, "learning_rate": 0.00020649909416395025, "loss": 0.1551, "num_input_tokens_seen": 192984704, "step": 89465 }, { "epoch": 14.595432300163132, "grad_norm": 0.014353753998875618, "learning_rate": 0.00020644147097748967, "loss": 0.0065, "num_input_tokens_seen": 192995456, "step": 89470 }, { "epoch": 14.596247960848288, "grad_norm": 0.002957229735329747, "learning_rate": 0.0002063838537403651, "loss": 0.0588, "num_input_tokens_seen": 193006560, "step": 89475 }, { "epoch": 14.597063621533442, "grad_norm": 0.04525954648852348, "learning_rate": 0.00020632624245374426, "loss": 0.0084, "num_input_tokens_seen": 193017952, "step": 89480 }, { "epoch": 14.597879282218598, "grad_norm": 0.0075845601968467236, "learning_rate": 0.0002062686371187946, "loss": 0.0146, "num_input_tokens_seen": 193028640, "step": 89485 }, { "epoch": 14.598694942903752, "grad_norm": 0.6519078016281128, "learning_rate": 0.00020621103773668366, "loss": 0.0327, "num_input_tokens_seen": 193040224, "step": 89490 }, { "epoch": 14.599510603588907, "grad_norm": 0.015654049813747406, "learning_rate": 0.00020615344430857874, "loss": 0.0088, "num_input_tokens_seen": 193050816, "step": 89495 }, { "epoch": 14.600326264274061, "grad_norm": 0.00894885417073965, "learning_rate": 0.00020609585683564687, "loss": 0.0199, "num_input_tokens_seen": 193060992, "step": 89500 }, { "epoch": 14.601141924959217, "grad_norm": 0.4366072416305542, "learning_rate": 0.00020603827531905566, "loss": 0.0616, "num_input_tokens_seen": 193071552, "step": 89505 }, { "epoch": 14.601957585644373, "grad_norm": 0.012084768153727055, "learning_rate": 0.00020598069975997135, "loss": 0.0099, "num_input_tokens_seen": 193081888, "step": 89510 }, { "epoch": 14.602773246329527, "grad_norm": 0.04872514307498932, "learning_rate": 0.0002059231301595615, "loss": 0.0093, "num_input_tokens_seen": 193092896, "step": 89515 }, { "epoch": 14.603588907014682, "grad_norm": 0.6540920734405518, "learning_rate": 0.00020586556651899213, "loss": 0.0223, "num_input_tokens_seen": 193104512, "step": 89520 }, { "epoch": 14.604404567699836, "grad_norm": 0.05387471616268158, "learning_rate": 0.00020580800883943058, "loss": 0.0129, "num_input_tokens_seen": 193116224, "step": 89525 }, { "epoch": 14.605220228384992, "grad_norm": 0.017093930393457413, "learning_rate": 0.00020575045712204254, "loss": 0.0039, "num_input_tokens_seen": 193127008, "step": 89530 }, { "epoch": 14.606035889070148, "grad_norm": 0.7520192265510559, "learning_rate": 0.00020569291136799512, "loss": 0.1341, "num_input_tokens_seen": 193138144, "step": 89535 }, { "epoch": 14.606851549755302, "grad_norm": 0.009976383298635483, "learning_rate": 0.00020563537157845392, "loss": 0.0061, "num_input_tokens_seen": 193149472, "step": 89540 }, { "epoch": 14.607667210440457, "grad_norm": 0.03189476206898689, "learning_rate": 0.0002055778377545856, "loss": 0.0931, "num_input_tokens_seen": 193160160, "step": 89545 }, { "epoch": 14.608482871125611, "grad_norm": 0.01205186266452074, "learning_rate": 0.0002055203098975556, "loss": 0.0506, "num_input_tokens_seen": 193172160, "step": 89550 }, { "epoch": 14.609298531810767, "grad_norm": 0.008286071009933949, "learning_rate": 0.00020546278800853048, "loss": 0.0563, "num_input_tokens_seen": 193182976, "step": 89555 }, { "epoch": 14.61011419249592, "grad_norm": 0.004727502353489399, "learning_rate": 0.00020540527208867522, "loss": 0.0808, "num_input_tokens_seen": 193194176, "step": 89560 }, { "epoch": 14.610929853181077, "grad_norm": 0.0062154787592589855, "learning_rate": 0.00020534776213915619, "loss": 0.002, "num_input_tokens_seen": 193204128, "step": 89565 }, { "epoch": 14.611745513866232, "grad_norm": 0.02934611588716507, "learning_rate": 0.00020529025816113817, "loss": 0.0029, "num_input_tokens_seen": 193214560, "step": 89570 }, { "epoch": 14.612561174551386, "grad_norm": 0.01282170694321394, "learning_rate": 0.00020523276015578713, "loss": 0.1567, "num_input_tokens_seen": 193224992, "step": 89575 }, { "epoch": 14.613376835236542, "grad_norm": 0.16255901753902435, "learning_rate": 0.0002051752681242682, "loss": 0.0174, "num_input_tokens_seen": 193235808, "step": 89580 }, { "epoch": 14.614192495921696, "grad_norm": 0.545876681804657, "learning_rate": 0.0002051177820677464, "loss": 0.1469, "num_input_tokens_seen": 193246752, "step": 89585 }, { "epoch": 14.615008156606851, "grad_norm": 0.01524296123534441, "learning_rate": 0.00020506030198738683, "loss": 0.0187, "num_input_tokens_seen": 193257920, "step": 89590 }, { "epoch": 14.615823817292007, "grad_norm": 0.01242340449243784, "learning_rate": 0.00020500282788435441, "loss": 0.0701, "num_input_tokens_seen": 193268960, "step": 89595 }, { "epoch": 14.616639477977161, "grad_norm": 0.051841650158166885, "learning_rate": 0.00020494535975981398, "loss": 0.0259, "num_input_tokens_seen": 193280512, "step": 89600 }, { "epoch": 14.617455138662317, "grad_norm": 0.022078340873122215, "learning_rate": 0.0002048878976149301, "loss": 0.0269, "num_input_tokens_seen": 193292000, "step": 89605 }, { "epoch": 14.61827079934747, "grad_norm": 0.1450110226869583, "learning_rate": 0.00020483044145086732, "loss": 0.0162, "num_input_tokens_seen": 193302720, "step": 89610 }, { "epoch": 14.619086460032626, "grad_norm": 0.0369730219244957, "learning_rate": 0.00020477299126879013, "loss": 0.0204, "num_input_tokens_seen": 193313632, "step": 89615 }, { "epoch": 14.619902120717782, "grad_norm": 0.401039183139801, "learning_rate": 0.00020471554706986273, "loss": 0.0214, "num_input_tokens_seen": 193325376, "step": 89620 }, { "epoch": 14.620717781402936, "grad_norm": 0.27654221653938293, "learning_rate": 0.00020465810885524928, "loss": 0.023, "num_input_tokens_seen": 193336896, "step": 89625 }, { "epoch": 14.621533442088092, "grad_norm": 0.04207804426550865, "learning_rate": 0.0002046006766261142, "loss": 0.013, "num_input_tokens_seen": 193347328, "step": 89630 }, { "epoch": 14.622349102773246, "grad_norm": 0.0907987654209137, "learning_rate": 0.00020454325038362083, "loss": 0.1125, "num_input_tokens_seen": 193358400, "step": 89635 }, { "epoch": 14.623164763458401, "grad_norm": 0.07568073272705078, "learning_rate": 0.00020448583012893363, "loss": 0.0077, "num_input_tokens_seen": 193369312, "step": 89640 }, { "epoch": 14.623980424143557, "grad_norm": 0.05796712636947632, "learning_rate": 0.00020442841586321565, "loss": 0.0103, "num_input_tokens_seen": 193380672, "step": 89645 }, { "epoch": 14.624796084828711, "grad_norm": 0.005107163451611996, "learning_rate": 0.0002043710075876311, "loss": 0.0089, "num_input_tokens_seen": 193390848, "step": 89650 }, { "epoch": 14.625611745513867, "grad_norm": 0.37148788571357727, "learning_rate": 0.00020431360530334282, "loss": 0.0246, "num_input_tokens_seen": 193403040, "step": 89655 }, { "epoch": 14.62642740619902, "grad_norm": 0.008168878965079784, "learning_rate": 0.0002042562090115147, "loss": 0.0026, "num_input_tokens_seen": 193414528, "step": 89660 }, { "epoch": 14.627243066884176, "grad_norm": 0.0031913979910314083, "learning_rate": 0.0002041988187133094, "loss": 0.0314, "num_input_tokens_seen": 193424416, "step": 89665 }, { "epoch": 14.62805872756933, "grad_norm": 0.05353798717260361, "learning_rate": 0.00020414143440989062, "loss": 0.012, "num_input_tokens_seen": 193433536, "step": 89670 }, { "epoch": 14.628874388254486, "grad_norm": 0.4699936509132385, "learning_rate": 0.00020408405610242063, "loss": 0.0175, "num_input_tokens_seen": 193444576, "step": 89675 }, { "epoch": 14.629690048939642, "grad_norm": 0.025739429518580437, "learning_rate": 0.000204026683792063, "loss": 0.1013, "num_input_tokens_seen": 193456096, "step": 89680 }, { "epoch": 14.630505709624796, "grad_norm": 0.007411503698676825, "learning_rate": 0.00020396931747997978, "loss": 0.1197, "num_input_tokens_seen": 193467488, "step": 89685 }, { "epoch": 14.631321370309951, "grad_norm": 0.02122335135936737, "learning_rate": 0.0002039119571673342, "loss": 0.01, "num_input_tokens_seen": 193477568, "step": 89690 }, { "epoch": 14.632137030995105, "grad_norm": 0.018387366086244583, "learning_rate": 0.00020385460285528807, "loss": 0.0354, "num_input_tokens_seen": 193488128, "step": 89695 }, { "epoch": 14.632952691680261, "grad_norm": 0.22013387084007263, "learning_rate": 0.0002037972545450044, "loss": 0.0177, "num_input_tokens_seen": 193499296, "step": 89700 }, { "epoch": 14.633768352365417, "grad_norm": 0.049581192433834076, "learning_rate": 0.0002037399122376449, "loss": 0.0095, "num_input_tokens_seen": 193510016, "step": 89705 }, { "epoch": 14.63458401305057, "grad_norm": 0.012911495752632618, "learning_rate": 0.0002036825759343721, "loss": 0.0078, "num_input_tokens_seen": 193520224, "step": 89710 }, { "epoch": 14.635399673735726, "grad_norm": 0.004513100255280733, "learning_rate": 0.0002036252456363476, "loss": 0.0152, "num_input_tokens_seen": 193531232, "step": 89715 }, { "epoch": 14.63621533442088, "grad_norm": 0.10328993946313858, "learning_rate": 0.00020356792134473356, "loss": 0.029, "num_input_tokens_seen": 193540576, "step": 89720 }, { "epoch": 14.637030995106036, "grad_norm": 0.005075286142528057, "learning_rate": 0.0002035106030606917, "loss": 0.0058, "num_input_tokens_seen": 193549920, "step": 89725 }, { "epoch": 14.63784665579119, "grad_norm": 0.014417627826333046, "learning_rate": 0.00020345329078538354, "loss": 0.0033, "num_input_tokens_seen": 193560896, "step": 89730 }, { "epoch": 14.638662316476346, "grad_norm": 0.20875027775764465, "learning_rate": 0.00020339598451997066, "loss": 0.0172, "num_input_tokens_seen": 193571648, "step": 89735 }, { "epoch": 14.639477977161501, "grad_norm": 0.012781262397766113, "learning_rate": 0.00020333868426561448, "loss": 0.021, "num_input_tokens_seen": 193582336, "step": 89740 }, { "epoch": 14.640293637846655, "grad_norm": 0.004548768978565931, "learning_rate": 0.00020328139002347612, "loss": 0.0114, "num_input_tokens_seen": 193591744, "step": 89745 }, { "epoch": 14.641109298531811, "grad_norm": 0.01563483104109764, "learning_rate": 0.00020322410179471684, "loss": 0.0288, "num_input_tokens_seen": 193603456, "step": 89750 }, { "epoch": 14.641924959216965, "grad_norm": 0.046667538583278656, "learning_rate": 0.00020316681958049758, "loss": 0.008, "num_input_tokens_seen": 193614656, "step": 89755 }, { "epoch": 14.64274061990212, "grad_norm": 0.17878325283527374, "learning_rate": 0.00020310954338197934, "loss": 0.0145, "num_input_tokens_seen": 193624256, "step": 89760 }, { "epoch": 14.643556280587276, "grad_norm": 0.004769281484186649, "learning_rate": 0.00020305227320032283, "loss": 0.0277, "num_input_tokens_seen": 193634176, "step": 89765 }, { "epoch": 14.64437194127243, "grad_norm": 0.5355260372161865, "learning_rate": 0.00020299500903668856, "loss": 0.0227, "num_input_tokens_seen": 193643744, "step": 89770 }, { "epoch": 14.645187601957586, "grad_norm": 0.0008842019597068429, "learning_rate": 0.00020293775089223748, "loss": 0.0024, "num_input_tokens_seen": 193653248, "step": 89775 }, { "epoch": 14.64600326264274, "grad_norm": 0.008595534600317478, "learning_rate": 0.00020288049876812943, "loss": 0.0098, "num_input_tokens_seen": 193662944, "step": 89780 }, { "epoch": 14.646818923327896, "grad_norm": 0.018159886822104454, "learning_rate": 0.00020282325266552536, "loss": 0.0072, "num_input_tokens_seen": 193674048, "step": 89785 }, { "epoch": 14.647634584013051, "grad_norm": 0.0032309305388480425, "learning_rate": 0.0002027660125855847, "loss": 0.0111, "num_input_tokens_seen": 193685248, "step": 89790 }, { "epoch": 14.648450244698205, "grad_norm": 0.007573968730866909, "learning_rate": 0.00020270877852946817, "loss": 0.02, "num_input_tokens_seen": 193695328, "step": 89795 }, { "epoch": 14.649265905383361, "grad_norm": 0.06463432312011719, "learning_rate": 0.0002026515504983351, "loss": 0.0516, "num_input_tokens_seen": 193705856, "step": 89800 }, { "epoch": 14.650081566068515, "grad_norm": 0.003920464310795069, "learning_rate": 0.00020259432849334592, "loss": 0.1161, "num_input_tokens_seen": 193717184, "step": 89805 }, { "epoch": 14.65089722675367, "grad_norm": 0.03884154185652733, "learning_rate": 0.00020253711251565953, "loss": 0.0036, "num_input_tokens_seen": 193727200, "step": 89810 }, { "epoch": 14.651712887438826, "grad_norm": 0.005288069136440754, "learning_rate": 0.00020247990256643634, "loss": 0.0023, "num_input_tokens_seen": 193737664, "step": 89815 }, { "epoch": 14.65252854812398, "grad_norm": 0.06579665094614029, "learning_rate": 0.000202422698646835, "loss": 0.0065, "num_input_tokens_seen": 193747968, "step": 89820 }, { "epoch": 14.653344208809136, "grad_norm": 0.009600764140486717, "learning_rate": 0.00020236550075801535, "loss": 0.005, "num_input_tokens_seen": 193757728, "step": 89825 }, { "epoch": 14.65415986949429, "grad_norm": 0.0024883823934942484, "learning_rate": 0.0002023083089011364, "loss": 0.0036, "num_input_tokens_seen": 193767968, "step": 89830 }, { "epoch": 14.654975530179446, "grad_norm": 0.020401649177074432, "learning_rate": 0.00020225112307735717, "loss": 0.0551, "num_input_tokens_seen": 193779040, "step": 89835 }, { "epoch": 14.655791190864601, "grad_norm": 0.014404309913516045, "learning_rate": 0.00020219394328783668, "loss": 0.0028, "num_input_tokens_seen": 193790144, "step": 89840 }, { "epoch": 14.656606851549755, "grad_norm": 1.1801332235336304, "learning_rate": 0.00020213676953373372, "loss": 0.0342, "num_input_tokens_seen": 193801760, "step": 89845 }, { "epoch": 14.65742251223491, "grad_norm": 0.007144493516534567, "learning_rate": 0.00020207960181620706, "loss": 0.0048, "num_input_tokens_seen": 193812864, "step": 89850 }, { "epoch": 14.658238172920065, "grad_norm": 0.024956440553069115, "learning_rate": 0.00020202244013641513, "loss": 0.1376, "num_input_tokens_seen": 193822528, "step": 89855 }, { "epoch": 14.65905383360522, "grad_norm": 1.6371476650238037, "learning_rate": 0.0002019652844955165, "loss": 0.0458, "num_input_tokens_seen": 193833056, "step": 89860 }, { "epoch": 14.659869494290374, "grad_norm": 0.03169335797429085, "learning_rate": 0.00020190813489466943, "loss": 0.0026, "num_input_tokens_seen": 193843296, "step": 89865 }, { "epoch": 14.66068515497553, "grad_norm": 0.36911025643348694, "learning_rate": 0.00020185099133503216, "loss": 0.0126, "num_input_tokens_seen": 193854432, "step": 89870 }, { "epoch": 14.661500815660686, "grad_norm": 0.12633159756660461, "learning_rate": 0.00020179385381776283, "loss": 0.0131, "num_input_tokens_seen": 193865696, "step": 89875 }, { "epoch": 14.66231647634584, "grad_norm": 0.021699754521250725, "learning_rate": 0.00020173672234401928, "loss": 0.0059, "num_input_tokens_seen": 193876800, "step": 89880 }, { "epoch": 14.663132137030995, "grad_norm": 0.023981599137187004, "learning_rate": 0.00020167959691495946, "loss": 0.004, "num_input_tokens_seen": 193887072, "step": 89885 }, { "epoch": 14.66394779771615, "grad_norm": 0.2328680008649826, "learning_rate": 0.00020162247753174105, "loss": 0.0068, "num_input_tokens_seen": 193897440, "step": 89890 }, { "epoch": 14.664763458401305, "grad_norm": 0.2832028269767761, "learning_rate": 0.00020156536419552168, "loss": 0.0153, "num_input_tokens_seen": 193908512, "step": 89895 }, { "epoch": 14.66557911908646, "grad_norm": 0.004384899977594614, "learning_rate": 0.00020150825690745883, "loss": 0.101, "num_input_tokens_seen": 193920000, "step": 89900 }, { "epoch": 14.666394779771615, "grad_norm": 0.022658390924334526, "learning_rate": 0.00020145115566870975, "loss": 0.0072, "num_input_tokens_seen": 193930144, "step": 89905 }, { "epoch": 14.66721044045677, "grad_norm": 0.2330552339553833, "learning_rate": 0.00020139406048043173, "loss": 0.0115, "num_input_tokens_seen": 193941632, "step": 89910 }, { "epoch": 14.668026101141924, "grad_norm": 0.023431379348039627, "learning_rate": 0.00020133697134378176, "loss": 0.0245, "num_input_tokens_seen": 193952896, "step": 89915 }, { "epoch": 14.66884176182708, "grad_norm": 0.009687644429504871, "learning_rate": 0.0002012798882599173, "loss": 0.0599, "num_input_tokens_seen": 193963552, "step": 89920 }, { "epoch": 14.669657422512234, "grad_norm": 0.003845750819891691, "learning_rate": 0.00020122281122999443, "loss": 0.005, "num_input_tokens_seen": 193974496, "step": 89925 }, { "epoch": 14.67047308319739, "grad_norm": 0.12190516293048859, "learning_rate": 0.00020116574025517053, "loss": 0.0764, "num_input_tokens_seen": 193986016, "step": 89930 }, { "epoch": 14.671288743882545, "grad_norm": 0.0056618074886500835, "learning_rate": 0.00020110867533660204, "loss": 0.0028, "num_input_tokens_seen": 193997216, "step": 89935 }, { "epoch": 14.6721044045677, "grad_norm": 0.0072369142435491085, "learning_rate": 0.00020105161647544534, "loss": 0.0023, "num_input_tokens_seen": 194009280, "step": 89940 }, { "epoch": 14.672920065252855, "grad_norm": 0.018302498385310173, "learning_rate": 0.00020099456367285695, "loss": 0.0276, "num_input_tokens_seen": 194020032, "step": 89945 }, { "epoch": 14.673735725938009, "grad_norm": 0.010762719437479973, "learning_rate": 0.00020093751692999302, "loss": 0.0118, "num_input_tokens_seen": 194031872, "step": 89950 }, { "epoch": 14.674551386623165, "grad_norm": 0.0026657781563699245, "learning_rate": 0.00020088047624800966, "loss": 0.0044, "num_input_tokens_seen": 194044256, "step": 89955 }, { "epoch": 14.67536704730832, "grad_norm": 0.32003533840179443, "learning_rate": 0.00020082344162806293, "loss": 0.1412, "num_input_tokens_seen": 194055392, "step": 89960 }, { "epoch": 14.676182707993474, "grad_norm": 0.008590412326157093, "learning_rate": 0.00020076641307130872, "loss": 0.0338, "num_input_tokens_seen": 194066112, "step": 89965 }, { "epoch": 14.67699836867863, "grad_norm": 0.02044883742928505, "learning_rate": 0.00020070939057890275, "loss": 0.0015, "num_input_tokens_seen": 194076704, "step": 89970 }, { "epoch": 14.677814029363784, "grad_norm": 0.022925008088350296, "learning_rate": 0.00020065237415200062, "loss": 0.0021, "num_input_tokens_seen": 194088096, "step": 89975 }, { "epoch": 14.67862969004894, "grad_norm": 0.01657666452229023, "learning_rate": 0.00020059536379175792, "loss": 0.0036, "num_input_tokens_seen": 194098208, "step": 89980 }, { "epoch": 14.679445350734095, "grad_norm": 0.0017174652311950922, "learning_rate": 0.0002005383594993299, "loss": 0.0031, "num_input_tokens_seen": 194108832, "step": 89985 }, { "epoch": 14.68026101141925, "grad_norm": 0.2198350876569748, "learning_rate": 0.00020048136127587203, "loss": 0.0093, "num_input_tokens_seen": 194119104, "step": 89990 }, { "epoch": 14.681076672104405, "grad_norm": 0.12339432537555695, "learning_rate": 0.0002004243691225393, "loss": 0.0084, "num_input_tokens_seen": 194128480, "step": 89995 }, { "epoch": 14.681892332789559, "grad_norm": 0.00945698656141758, "learning_rate": 0.00020036738304048674, "loss": 0.0132, "num_input_tokens_seen": 194138208, "step": 90000 }, { "epoch": 14.682707993474715, "grad_norm": 0.06640013307332993, "learning_rate": 0.00020031040303086932, "loss": 0.0029, "num_input_tokens_seen": 194147360, "step": 90005 }, { "epoch": 14.68352365415987, "grad_norm": 0.019629506394267082, "learning_rate": 0.00020025342909484173, "loss": 0.0675, "num_input_tokens_seen": 194156832, "step": 90010 }, { "epoch": 14.684339314845024, "grad_norm": 0.0018462417647242546, "learning_rate": 0.00020019646123355868, "loss": 0.0033, "num_input_tokens_seen": 194167680, "step": 90015 }, { "epoch": 14.68515497553018, "grad_norm": 0.00232519768178463, "learning_rate": 0.00020013949944817466, "loss": 0.0053, "num_input_tokens_seen": 194178304, "step": 90020 }, { "epoch": 14.685970636215334, "grad_norm": 0.052973367273807526, "learning_rate": 0.00020008254373984408, "loss": 0.0033, "num_input_tokens_seen": 194189280, "step": 90025 }, { "epoch": 14.68678629690049, "grad_norm": 0.0064922827295959, "learning_rate": 0.00020002559410972121, "loss": 0.0051, "num_input_tokens_seen": 194199488, "step": 90030 }, { "epoch": 14.687601957585644, "grad_norm": 0.017520800232887268, "learning_rate": 0.00019996865055896008, "loss": 0.0029, "num_input_tokens_seen": 194209856, "step": 90035 }, { "epoch": 14.6884176182708, "grad_norm": 0.0010415025753900409, "learning_rate": 0.0001999117130887152, "loss": 0.0025, "num_input_tokens_seen": 194220192, "step": 90040 }, { "epoch": 14.689233278955955, "grad_norm": 1.1958917379379272, "learning_rate": 0.00019985478170013977, "loss": 0.0539, "num_input_tokens_seen": 194230368, "step": 90045 }, { "epoch": 14.690048939641109, "grad_norm": 0.045183826237916946, "learning_rate": 0.00019979785639438836, "loss": 0.007, "num_input_tokens_seen": 194241632, "step": 90050 }, { "epoch": 14.690864600326265, "grad_norm": 0.004305718932300806, "learning_rate": 0.00019974093717261383, "loss": 0.0058, "num_input_tokens_seen": 194254080, "step": 90055 }, { "epoch": 14.691680261011419, "grad_norm": 0.01703573577105999, "learning_rate": 0.0001996840240359703, "loss": 0.0075, "num_input_tokens_seen": 194265792, "step": 90060 }, { "epoch": 14.692495921696574, "grad_norm": 0.005960729904472828, "learning_rate": 0.00019962711698561097, "loss": 0.0076, "num_input_tokens_seen": 194276352, "step": 90065 }, { "epoch": 14.69331158238173, "grad_norm": 0.004706806968897581, "learning_rate": 0.0001995702160226892, "loss": 0.0681, "num_input_tokens_seen": 194287104, "step": 90070 }, { "epoch": 14.694127243066884, "grad_norm": 0.07768066972494125, "learning_rate": 0.00019951332114835808, "loss": 0.005, "num_input_tokens_seen": 194299296, "step": 90075 }, { "epoch": 14.69494290375204, "grad_norm": 1.6062085628509521, "learning_rate": 0.00019945643236377074, "loss": 0.0477, "num_input_tokens_seen": 194308640, "step": 90080 }, { "epoch": 14.695758564437194, "grad_norm": 0.02524995617568493, "learning_rate": 0.00019939954967008005, "loss": 0.013, "num_input_tokens_seen": 194319808, "step": 90085 }, { "epoch": 14.69657422512235, "grad_norm": 0.007160089910030365, "learning_rate": 0.00019934267306843885, "loss": 0.0026, "num_input_tokens_seen": 194330336, "step": 90090 }, { "epoch": 14.697389885807503, "grad_norm": 0.004365231841802597, "learning_rate": 0.0001992858025599998, "loss": 0.0168, "num_input_tokens_seen": 194340736, "step": 90095 }, { "epoch": 14.698205546492659, "grad_norm": 0.004341513384133577, "learning_rate": 0.00019922893814591541, "loss": 0.0009, "num_input_tokens_seen": 194352096, "step": 90100 }, { "epoch": 14.699021207177815, "grad_norm": 0.011261179111897945, "learning_rate": 0.00019917207982733814, "loss": 0.0107, "num_input_tokens_seen": 194361312, "step": 90105 }, { "epoch": 14.699836867862969, "grad_norm": 0.005915646441280842, "learning_rate": 0.00019911522760542028, "loss": 0.0165, "num_input_tokens_seen": 194372736, "step": 90110 }, { "epoch": 14.700652528548124, "grad_norm": 0.675635039806366, "learning_rate": 0.0001990583814813141, "loss": 0.1088, "num_input_tokens_seen": 194383584, "step": 90115 }, { "epoch": 14.701468189233278, "grad_norm": 0.20635943114757538, "learning_rate": 0.00019900154145617157, "loss": 0.1289, "num_input_tokens_seen": 194396256, "step": 90120 }, { "epoch": 14.702283849918434, "grad_norm": 0.005825618281960487, "learning_rate": 0.00019894470753114456, "loss": 0.0991, "num_input_tokens_seen": 194407136, "step": 90125 }, { "epoch": 14.70309951060359, "grad_norm": 0.012722766026854515, "learning_rate": 0.00019888787970738508, "loss": 0.0037, "num_input_tokens_seen": 194419136, "step": 90130 }, { "epoch": 14.703915171288743, "grad_norm": 0.20933017134666443, "learning_rate": 0.00019883105798604468, "loss": 0.0102, "num_input_tokens_seen": 194430464, "step": 90135 }, { "epoch": 14.7047308319739, "grad_norm": 0.001629327773116529, "learning_rate": 0.00019877424236827473, "loss": 0.0167, "num_input_tokens_seen": 194440224, "step": 90140 }, { "epoch": 14.705546492659053, "grad_norm": 0.020982401445508003, "learning_rate": 0.00019871743285522725, "loss": 0.1085, "num_input_tokens_seen": 194451648, "step": 90145 }, { "epoch": 14.706362153344209, "grad_norm": 0.024896975606679916, "learning_rate": 0.0001986606294480529, "loss": 0.0286, "num_input_tokens_seen": 194461760, "step": 90150 }, { "epoch": 14.707177814029365, "grad_norm": 0.002071694703772664, "learning_rate": 0.00019860383214790345, "loss": 0.0495, "num_input_tokens_seen": 194472384, "step": 90155 }, { "epoch": 14.707993474714518, "grad_norm": 0.017583593726158142, "learning_rate": 0.0001985470409559294, "loss": 0.0035, "num_input_tokens_seen": 194483104, "step": 90160 }, { "epoch": 14.708809135399674, "grad_norm": 0.001124270842410624, "learning_rate": 0.00019849025587328228, "loss": 0.0222, "num_input_tokens_seen": 194494144, "step": 90165 }, { "epoch": 14.709624796084828, "grad_norm": 0.06921017915010452, "learning_rate": 0.00019843347690111235, "loss": 0.0148, "num_input_tokens_seen": 194505440, "step": 90170 }, { "epoch": 14.710440456769984, "grad_norm": 0.0057412865571677685, "learning_rate": 0.00019837670404057085, "loss": 0.0055, "num_input_tokens_seen": 194517248, "step": 90175 }, { "epoch": 14.71125611745514, "grad_norm": 0.0026943173725157976, "learning_rate": 0.00019831993729280774, "loss": 0.014, "num_input_tokens_seen": 194527456, "step": 90180 }, { "epoch": 14.712071778140293, "grad_norm": 0.01468754094094038, "learning_rate": 0.0001982631766589742, "loss": 0.002, "num_input_tokens_seen": 194538560, "step": 90185 }, { "epoch": 14.71288743882545, "grad_norm": 0.038453713059425354, "learning_rate": 0.00019820642214021979, "loss": 0.0112, "num_input_tokens_seen": 194549728, "step": 90190 }, { "epoch": 14.713703099510603, "grad_norm": 0.013780445791780949, "learning_rate": 0.00019814967373769544, "loss": 0.005, "num_input_tokens_seen": 194560288, "step": 90195 }, { "epoch": 14.714518760195759, "grad_norm": 0.08198558539152145, "learning_rate": 0.00019809293145255048, "loss": 0.0724, "num_input_tokens_seen": 194571360, "step": 90200 }, { "epoch": 14.715334420880914, "grad_norm": 0.02255728282034397, "learning_rate": 0.00019803619528593547, "loss": 0.0031, "num_input_tokens_seen": 194582304, "step": 90205 }, { "epoch": 14.716150081566068, "grad_norm": 0.0227470975369215, "learning_rate": 0.00019797946523900006, "loss": 0.017, "num_input_tokens_seen": 194593600, "step": 90210 }, { "epoch": 14.716965742251224, "grad_norm": 0.021564185619354248, "learning_rate": 0.0001979227413128939, "loss": 0.01, "num_input_tokens_seen": 194605024, "step": 90215 }, { "epoch": 14.717781402936378, "grad_norm": 0.3137671947479248, "learning_rate": 0.0001978660235087666, "loss": 0.0089, "num_input_tokens_seen": 194616064, "step": 90220 }, { "epoch": 14.718597063621534, "grad_norm": 0.01597781851887703, "learning_rate": 0.00019780931182776762, "loss": 0.0025, "num_input_tokens_seen": 194626304, "step": 90225 }, { "epoch": 14.719412724306688, "grad_norm": 1.0236071348190308, "learning_rate": 0.0001977526062710463, "loss": 0.2242, "num_input_tokens_seen": 194637888, "step": 90230 }, { "epoch": 14.720228384991843, "grad_norm": 0.7767883539199829, "learning_rate": 0.0001976959068397518, "loss": 0.1171, "num_input_tokens_seen": 194649536, "step": 90235 }, { "epoch": 14.721044045676999, "grad_norm": 0.3753204345703125, "learning_rate": 0.00019763921353503335, "loss": 0.0086, "num_input_tokens_seen": 194659232, "step": 90240 }, { "epoch": 14.721859706362153, "grad_norm": 0.46734243631362915, "learning_rate": 0.0001975825263580397, "loss": 0.1772, "num_input_tokens_seen": 194670016, "step": 90245 }, { "epoch": 14.722675367047309, "grad_norm": 0.006752827204763889, "learning_rate": 0.00019752584530991984, "loss": 0.0567, "num_input_tokens_seen": 194681408, "step": 90250 }, { "epoch": 14.723491027732463, "grad_norm": 0.04736413061618805, "learning_rate": 0.00019746917039182226, "loss": 0.0074, "num_input_tokens_seen": 194692224, "step": 90255 }, { "epoch": 14.724306688417618, "grad_norm": 0.010970334522426128, "learning_rate": 0.0001974125016048961, "loss": 0.1276, "num_input_tokens_seen": 194703104, "step": 90260 }, { "epoch": 14.725122349102774, "grad_norm": 0.006916071753948927, "learning_rate": 0.0001973558389502891, "loss": 0.0344, "num_input_tokens_seen": 194712384, "step": 90265 }, { "epoch": 14.725938009787928, "grad_norm": 0.016726814210414886, "learning_rate": 0.0001972991824291503, "loss": 0.062, "num_input_tokens_seen": 194722624, "step": 90270 }, { "epoch": 14.726753670473084, "grad_norm": 0.019452862441539764, "learning_rate": 0.00019724253204262717, "loss": 0.1552, "num_input_tokens_seen": 194733952, "step": 90275 }, { "epoch": 14.727569331158238, "grad_norm": 0.1739393025636673, "learning_rate": 0.00019718588779186864, "loss": 0.0508, "num_input_tokens_seen": 194745440, "step": 90280 }, { "epoch": 14.728384991843393, "grad_norm": 0.0021948807407170534, "learning_rate": 0.00019712924967802182, "loss": 0.019, "num_input_tokens_seen": 194755296, "step": 90285 }, { "epoch": 14.729200652528547, "grad_norm": 0.1954794079065323, "learning_rate": 0.00019707261770223532, "loss": 0.0191, "num_input_tokens_seen": 194766304, "step": 90290 }, { "epoch": 14.730016313213703, "grad_norm": 0.17645587027072906, "learning_rate": 0.00019701599186565621, "loss": 0.0655, "num_input_tokens_seen": 194777536, "step": 90295 }, { "epoch": 14.730831973898859, "grad_norm": 0.0036506582982838154, "learning_rate": 0.00019695937216943272, "loss": 0.0058, "num_input_tokens_seen": 194789248, "step": 90300 }, { "epoch": 14.731647634584013, "grad_norm": 0.10842780768871307, "learning_rate": 0.00019690275861471168, "loss": 0.0294, "num_input_tokens_seen": 194800480, "step": 90305 }, { "epoch": 14.732463295269168, "grad_norm": 0.3651350438594818, "learning_rate": 0.00019684615120264104, "loss": 0.0307, "num_input_tokens_seen": 194809664, "step": 90310 }, { "epoch": 14.733278955954322, "grad_norm": 0.9159109592437744, "learning_rate": 0.00019678954993436736, "loss": 0.0419, "num_input_tokens_seen": 194821024, "step": 90315 }, { "epoch": 14.734094616639478, "grad_norm": 0.00662341620773077, "learning_rate": 0.00019673295481103847, "loss": 0.0856, "num_input_tokens_seen": 194831936, "step": 90320 }, { "epoch": 14.734910277324634, "grad_norm": 0.016845468431711197, "learning_rate": 0.00019667636583380066, "loss": 0.0068, "num_input_tokens_seen": 194843584, "step": 90325 }, { "epoch": 14.735725938009788, "grad_norm": 0.014324668794870377, "learning_rate": 0.0001966197830038014, "loss": 0.0129, "num_input_tokens_seen": 194852256, "step": 90330 }, { "epoch": 14.736541598694943, "grad_norm": 0.010953659191727638, "learning_rate": 0.00019656320632218676, "loss": 0.0021, "num_input_tokens_seen": 194862848, "step": 90335 }, { "epoch": 14.737357259380097, "grad_norm": 0.09414136409759521, "learning_rate": 0.00019650663579010401, "loss": 0.0119, "num_input_tokens_seen": 194873184, "step": 90340 }, { "epoch": 14.738172920065253, "grad_norm": 0.07112300395965576, "learning_rate": 0.00019645007140869897, "loss": 0.0185, "num_input_tokens_seen": 194884800, "step": 90345 }, { "epoch": 14.738988580750409, "grad_norm": 0.029517194256186485, "learning_rate": 0.00019639351317911853, "loss": 0.0576, "num_input_tokens_seen": 194895616, "step": 90350 }, { "epoch": 14.739804241435563, "grad_norm": 0.025510961189866066, "learning_rate": 0.00019633696110250864, "loss": 0.0051, "num_input_tokens_seen": 194906016, "step": 90355 }, { "epoch": 14.740619902120718, "grad_norm": 0.09747328609228134, "learning_rate": 0.0001962804151800155, "loss": 0.0716, "num_input_tokens_seen": 194917184, "step": 90360 }, { "epoch": 14.741435562805872, "grad_norm": 0.0042806812562048435, "learning_rate": 0.00019622387541278497, "loss": 0.0033, "num_input_tokens_seen": 194927136, "step": 90365 }, { "epoch": 14.742251223491028, "grad_norm": 0.2515532970428467, "learning_rate": 0.000196167341801963, "loss": 0.0151, "num_input_tokens_seen": 194938144, "step": 90370 }, { "epoch": 14.743066884176184, "grad_norm": 0.0039220829494297504, "learning_rate": 0.00019611081434869532, "loss": 0.0133, "num_input_tokens_seen": 194948608, "step": 90375 }, { "epoch": 14.743882544861338, "grad_norm": 0.4433172643184662, "learning_rate": 0.00019605429305412746, "loss": 0.0264, "num_input_tokens_seen": 194960000, "step": 90380 }, { "epoch": 14.744698205546493, "grad_norm": 0.005239179823547602, "learning_rate": 0.00019599777791940497, "loss": 0.0087, "num_input_tokens_seen": 194971616, "step": 90385 }, { "epoch": 14.745513866231647, "grad_norm": 0.6410978436470032, "learning_rate": 0.00019594126894567315, "loss": 0.0295, "num_input_tokens_seen": 194981344, "step": 90390 }, { "epoch": 14.746329526916803, "grad_norm": 0.19592076539993286, "learning_rate": 0.00019588476613407725, "loss": 0.0094, "num_input_tokens_seen": 194991520, "step": 90395 }, { "epoch": 14.747145187601957, "grad_norm": 0.3171235918998718, "learning_rate": 0.00019582826948576215, "loss": 0.0169, "num_input_tokens_seen": 195002272, "step": 90400 }, { "epoch": 14.747960848287113, "grad_norm": 0.046789273619651794, "learning_rate": 0.00019577177900187342, "loss": 0.0322, "num_input_tokens_seen": 195012896, "step": 90405 }, { "epoch": 14.748776508972268, "grad_norm": 0.16493386030197144, "learning_rate": 0.0001957152946835552, "loss": 0.0672, "num_input_tokens_seen": 195024352, "step": 90410 }, { "epoch": 14.749592169657422, "grad_norm": 0.021328620612621307, "learning_rate": 0.00019565881653195284, "loss": 0.0089, "num_input_tokens_seen": 195034816, "step": 90415 }, { "epoch": 14.750407830342578, "grad_norm": 0.017898570746183395, "learning_rate": 0.00019560234454821034, "loss": 0.0092, "num_input_tokens_seen": 195044704, "step": 90420 }, { "epoch": 14.751223491027732, "grad_norm": 0.018897678703069687, "learning_rate": 0.0001955458787334728, "loss": 0.0126, "num_input_tokens_seen": 195055328, "step": 90425 }, { "epoch": 14.752039151712887, "grad_norm": 0.05925485119223595, "learning_rate": 0.00019548941908888396, "loss": 0.0822, "num_input_tokens_seen": 195065760, "step": 90430 }, { "epoch": 14.752854812398043, "grad_norm": 0.007908497005701065, "learning_rate": 0.00019543296561558865, "loss": 0.0095, "num_input_tokens_seen": 195076000, "step": 90435 }, { "epoch": 14.753670473083197, "grad_norm": 0.010396001860499382, "learning_rate": 0.0001953765183147303, "loss": 0.0036, "num_input_tokens_seen": 195086368, "step": 90440 }, { "epoch": 14.754486133768353, "grad_norm": 0.06449390202760696, "learning_rate": 0.00019532007718745366, "loss": 0.0104, "num_input_tokens_seen": 195096480, "step": 90445 }, { "epoch": 14.755301794453507, "grad_norm": 0.013377663679420948, "learning_rate": 0.00019526364223490172, "loss": 0.0419, "num_input_tokens_seen": 195106624, "step": 90450 }, { "epoch": 14.756117455138662, "grad_norm": 0.011755137704312801, "learning_rate": 0.00019520721345821907, "loss": 0.0035, "num_input_tokens_seen": 195116768, "step": 90455 }, { "epoch": 14.756933115823816, "grad_norm": 0.011851364746689796, "learning_rate": 0.00019515079085854854, "loss": 0.0061, "num_input_tokens_seen": 195126944, "step": 90460 }, { "epoch": 14.757748776508972, "grad_norm": 0.13556043803691864, "learning_rate": 0.00019509437443703415, "loss": 0.0102, "num_input_tokens_seen": 195137568, "step": 90465 }, { "epoch": 14.758564437194128, "grad_norm": 0.00919298455119133, "learning_rate": 0.00019503796419481908, "loss": 0.0031, "num_input_tokens_seen": 195148480, "step": 90470 }, { "epoch": 14.759380097879282, "grad_norm": 1.42605459690094, "learning_rate": 0.00019498156013304647, "loss": 0.0171, "num_input_tokens_seen": 195159840, "step": 90475 }, { "epoch": 14.760195758564437, "grad_norm": 0.1350853145122528, "learning_rate": 0.0001949251622528595, "loss": 0.0116, "num_input_tokens_seen": 195171456, "step": 90480 }, { "epoch": 14.761011419249591, "grad_norm": 0.012902768328785896, "learning_rate": 0.0001948687705554012, "loss": 0.0249, "num_input_tokens_seen": 195182240, "step": 90485 }, { "epoch": 14.761827079934747, "grad_norm": 0.04117109999060631, "learning_rate": 0.00019481238504181431, "loss": 0.1262, "num_input_tokens_seen": 195193632, "step": 90490 }, { "epoch": 14.762642740619903, "grad_norm": 0.00466868607327342, "learning_rate": 0.0001947560057132416, "loss": 0.0017, "num_input_tokens_seen": 195204704, "step": 90495 }, { "epoch": 14.763458401305057, "grad_norm": 0.014614344574511051, "learning_rate": 0.00019469963257082564, "loss": 0.0129, "num_input_tokens_seen": 195215264, "step": 90500 }, { "epoch": 14.764274061990212, "grad_norm": 0.4438250958919525, "learning_rate": 0.00019464326561570894, "loss": 0.0293, "num_input_tokens_seen": 195226368, "step": 90505 }, { "epoch": 14.765089722675366, "grad_norm": 0.009511164389550686, "learning_rate": 0.0001945869048490338, "loss": 0.0098, "num_input_tokens_seen": 195236064, "step": 90510 }, { "epoch": 14.765905383360522, "grad_norm": 0.00905949529260397, "learning_rate": 0.00019453055027194256, "loss": 0.0085, "num_input_tokens_seen": 195247616, "step": 90515 }, { "epoch": 14.766721044045678, "grad_norm": 0.0312429741024971, "learning_rate": 0.00019447420188557714, "loss": 0.0038, "num_input_tokens_seen": 195257632, "step": 90520 }, { "epoch": 14.767536704730832, "grad_norm": 0.14220425486564636, "learning_rate": 0.00019441785969107967, "loss": 0.0145, "num_input_tokens_seen": 195267776, "step": 90525 }, { "epoch": 14.768352365415987, "grad_norm": 0.22437895834445953, "learning_rate": 0.00019436152368959193, "loss": 0.0093, "num_input_tokens_seen": 195278176, "step": 90530 }, { "epoch": 14.769168026101141, "grad_norm": 0.006699579767882824, "learning_rate": 0.0001943051938822556, "loss": 0.0159, "num_input_tokens_seen": 195286976, "step": 90535 }, { "epoch": 14.769983686786297, "grad_norm": 0.12213800102472305, "learning_rate": 0.00019424887027021237, "loss": 0.1627, "num_input_tokens_seen": 195298048, "step": 90540 }, { "epoch": 14.770799347471453, "grad_norm": 0.011539925821125507, "learning_rate": 0.00019419255285460347, "loss": 0.0368, "num_input_tokens_seen": 195308480, "step": 90545 }, { "epoch": 14.771615008156607, "grad_norm": 0.01276391465216875, "learning_rate": 0.00019413624163657072, "loss": 0.0457, "num_input_tokens_seen": 195317792, "step": 90550 }, { "epoch": 14.772430668841762, "grad_norm": 0.051559608429670334, "learning_rate": 0.00019407993661725475, "loss": 0.087, "num_input_tokens_seen": 195329344, "step": 90555 }, { "epoch": 14.773246329526916, "grad_norm": 0.07180595397949219, "learning_rate": 0.0001940236377977973, "loss": 0.004, "num_input_tokens_seen": 195339904, "step": 90560 }, { "epoch": 14.774061990212072, "grad_norm": 0.005203750915825367, "learning_rate": 0.00019396734517933867, "loss": 0.0024, "num_input_tokens_seen": 195349728, "step": 90565 }, { "epoch": 14.774877650897226, "grad_norm": 0.005227117799222469, "learning_rate": 0.00019391105876302012, "loss": 0.0017, "num_input_tokens_seen": 195360032, "step": 90570 }, { "epoch": 14.775693311582382, "grad_norm": 0.03818028047680855, "learning_rate": 0.00019385477854998235, "loss": 0.0202, "num_input_tokens_seen": 195370560, "step": 90575 }, { "epoch": 14.776508972267537, "grad_norm": 0.02716906927525997, "learning_rate": 0.00019379850454136582, "loss": 0.0027, "num_input_tokens_seen": 195381888, "step": 90580 }, { "epoch": 14.777324632952691, "grad_norm": 0.013195335865020752, "learning_rate": 0.00019374223673831103, "loss": 0.0145, "num_input_tokens_seen": 195393728, "step": 90585 }, { "epoch": 14.778140293637847, "grad_norm": 0.01204199343919754, "learning_rate": 0.00019368597514195834, "loss": 0.0221, "num_input_tokens_seen": 195403808, "step": 90590 }, { "epoch": 14.778955954323001, "grad_norm": 0.01755082979798317, "learning_rate": 0.00019362971975344796, "loss": 0.0149, "num_input_tokens_seen": 195413312, "step": 90595 }, { "epoch": 14.779771615008157, "grad_norm": 0.5995972752571106, "learning_rate": 0.00019357347057391994, "loss": 0.1309, "num_input_tokens_seen": 195425056, "step": 90600 }, { "epoch": 14.780587275693312, "grad_norm": 0.004275030922144651, "learning_rate": 0.0001935172276045143, "loss": 0.0249, "num_input_tokens_seen": 195435904, "step": 90605 }, { "epoch": 14.781402936378466, "grad_norm": 0.8118448853492737, "learning_rate": 0.0001934609908463708, "loss": 0.105, "num_input_tokens_seen": 195446560, "step": 90610 }, { "epoch": 14.782218597063622, "grad_norm": 0.016602858901023865, "learning_rate": 0.00019340476030062925, "loss": 0.0361, "num_input_tokens_seen": 195458208, "step": 90615 }, { "epoch": 14.783034257748776, "grad_norm": 0.050970349460840225, "learning_rate": 0.00019334853596842915, "loss": 0.0042, "num_input_tokens_seen": 195469376, "step": 90620 }, { "epoch": 14.783849918433932, "grad_norm": 0.016957690939307213, "learning_rate": 0.00019329231785090994, "loss": 0.0168, "num_input_tokens_seen": 195478720, "step": 90625 }, { "epoch": 14.784665579119086, "grad_norm": 0.009569638408720493, "learning_rate": 0.0001932361059492111, "loss": 0.0028, "num_input_tokens_seen": 195489504, "step": 90630 }, { "epoch": 14.785481239804241, "grad_norm": 0.017265763133764267, "learning_rate": 0.00019317990026447164, "loss": 0.0174, "num_input_tokens_seen": 195501056, "step": 90635 }, { "epoch": 14.786296900489397, "grad_norm": 0.12623582780361176, "learning_rate": 0.00019312370079783075, "loss": 0.0075, "num_input_tokens_seen": 195512256, "step": 90640 }, { "epoch": 14.78711256117455, "grad_norm": 0.05146452412009239, "learning_rate": 0.0001930675075504274, "loss": 0.0032, "num_input_tokens_seen": 195521888, "step": 90645 }, { "epoch": 14.787928221859707, "grad_norm": 0.017342252656817436, "learning_rate": 0.00019301132052340031, "loss": 0.0029, "num_input_tokens_seen": 195532064, "step": 90650 }, { "epoch": 14.78874388254486, "grad_norm": 0.2746075689792633, "learning_rate": 0.0001929551397178883, "loss": 0.0049, "num_input_tokens_seen": 195543040, "step": 90655 }, { "epoch": 14.789559543230016, "grad_norm": 0.008066631853580475, "learning_rate": 0.00019289896513502991, "loss": 0.0021, "num_input_tokens_seen": 195552672, "step": 90660 }, { "epoch": 14.790375203915172, "grad_norm": 0.6646782159805298, "learning_rate": 0.00019284279677596355, "loss": 0.0903, "num_input_tokens_seen": 195563872, "step": 90665 }, { "epoch": 14.791190864600326, "grad_norm": 0.01542715821415186, "learning_rate": 0.0001927866346418276, "loss": 0.0067, "num_input_tokens_seen": 195574752, "step": 90670 }, { "epoch": 14.792006525285482, "grad_norm": 0.0020434511825442314, "learning_rate": 0.00019273047873376005, "loss": 0.0012, "num_input_tokens_seen": 195586272, "step": 90675 }, { "epoch": 14.792822185970635, "grad_norm": 1.0931661128997803, "learning_rate": 0.00019267432905289945, "loss": 0.0396, "num_input_tokens_seen": 195596672, "step": 90680 }, { "epoch": 14.793637846655791, "grad_norm": 0.04143368825316429, "learning_rate": 0.00019261818560038313, "loss": 0.0104, "num_input_tokens_seen": 195608096, "step": 90685 }, { "epoch": 14.794453507340947, "grad_norm": 0.07076908648014069, "learning_rate": 0.00019256204837734937, "loss": 0.0042, "num_input_tokens_seen": 195619776, "step": 90690 }, { "epoch": 14.7952691680261, "grad_norm": 0.020702052861452103, "learning_rate": 0.00019250591738493572, "loss": 0.0957, "num_input_tokens_seen": 195629184, "step": 90695 }, { "epoch": 14.796084828711257, "grad_norm": 0.4451166093349457, "learning_rate": 0.00019244979262427974, "loss": 0.0103, "num_input_tokens_seen": 195640352, "step": 90700 }, { "epoch": 14.79690048939641, "grad_norm": 0.0030880055855959654, "learning_rate": 0.00019239367409651893, "loss": 0.1318, "num_input_tokens_seen": 195652736, "step": 90705 }, { "epoch": 14.797716150081566, "grad_norm": 0.10264142602682114, "learning_rate": 0.00019233756180279043, "loss": 0.0118, "num_input_tokens_seen": 195663680, "step": 90710 }, { "epoch": 14.798531810766722, "grad_norm": 0.01740247756242752, "learning_rate": 0.00019228145574423162, "loss": 0.0061, "num_input_tokens_seen": 195674752, "step": 90715 }, { "epoch": 14.799347471451876, "grad_norm": 0.006299274042248726, "learning_rate": 0.00019222535592197944, "loss": 0.0031, "num_input_tokens_seen": 195686624, "step": 90720 }, { "epoch": 14.800163132137031, "grad_norm": 0.15909415483474731, "learning_rate": 0.00019216926233717085, "loss": 0.0047, "num_input_tokens_seen": 195697312, "step": 90725 }, { "epoch": 14.800978792822185, "grad_norm": 0.0020198356360197067, "learning_rate": 0.0001921131749909427, "loss": 0.0157, "num_input_tokens_seen": 195708160, "step": 90730 }, { "epoch": 14.801794453507341, "grad_norm": 0.05101926252245903, "learning_rate": 0.00019205709388443165, "loss": 0.0032, "num_input_tokens_seen": 195718784, "step": 90735 }, { "epoch": 14.802610114192497, "grad_norm": 1.147568941116333, "learning_rate": 0.00019200101901877426, "loss": 0.029, "num_input_tokens_seen": 195729504, "step": 90740 }, { "epoch": 14.80342577487765, "grad_norm": 0.03670395910739899, "learning_rate": 0.0001919449503951069, "loss": 0.0077, "num_input_tokens_seen": 195740416, "step": 90745 }, { "epoch": 14.804241435562806, "grad_norm": 0.08632595092058182, "learning_rate": 0.00019188888801456594, "loss": 0.0349, "num_input_tokens_seen": 195750144, "step": 90750 }, { "epoch": 14.80505709624796, "grad_norm": 0.020740436390042305, "learning_rate": 0.0001918328318782875, "loss": 0.0034, "num_input_tokens_seen": 195761696, "step": 90755 }, { "epoch": 14.805872756933116, "grad_norm": 0.006489619147032499, "learning_rate": 0.00019177678198740766, "loss": 0.0017, "num_input_tokens_seen": 195772768, "step": 90760 }, { "epoch": 14.80668841761827, "grad_norm": 0.012188761495053768, "learning_rate": 0.00019172073834306235, "loss": 0.0038, "num_input_tokens_seen": 195783040, "step": 90765 }, { "epoch": 14.807504078303426, "grad_norm": 0.7854589223861694, "learning_rate": 0.00019166470094638739, "loss": 0.0222, "num_input_tokens_seen": 195793440, "step": 90770 }, { "epoch": 14.808319738988581, "grad_norm": 0.022344106808304787, "learning_rate": 0.00019160866979851842, "loss": 0.0584, "num_input_tokens_seen": 195804352, "step": 90775 }, { "epoch": 14.809135399673735, "grad_norm": 0.022037727758288383, "learning_rate": 0.00019155264490059077, "loss": 0.0031, "num_input_tokens_seen": 195814976, "step": 90780 }, { "epoch": 14.809951060358891, "grad_norm": 0.01594049483537674, "learning_rate": 0.00019149662625374042, "loss": 0.0713, "num_input_tokens_seen": 195824864, "step": 90785 }, { "epoch": 14.810766721044045, "grad_norm": 0.0019355514086782932, "learning_rate": 0.00019144061385910195, "loss": 0.0081, "num_input_tokens_seen": 195835200, "step": 90790 }, { "epoch": 14.8115823817292, "grad_norm": 0.02822726033627987, "learning_rate": 0.00019138460771781125, "loss": 0.0024, "num_input_tokens_seen": 195845920, "step": 90795 }, { "epoch": 14.812398042414356, "grad_norm": 0.0456225760281086, "learning_rate": 0.0001913286078310026, "loss": 0.0048, "num_input_tokens_seen": 195856288, "step": 90800 }, { "epoch": 14.81321370309951, "grad_norm": 0.051594603806734085, "learning_rate": 0.00019127261419981168, "loss": 0.1961, "num_input_tokens_seen": 195865600, "step": 90805 }, { "epoch": 14.814029363784666, "grad_norm": 0.029476845636963844, "learning_rate": 0.0001912166268253725, "loss": 0.0128, "num_input_tokens_seen": 195877344, "step": 90810 }, { "epoch": 14.81484502446982, "grad_norm": 0.2860308289527893, "learning_rate": 0.0001911606457088204, "loss": 0.0377, "num_input_tokens_seen": 195888416, "step": 90815 }, { "epoch": 14.815660685154976, "grad_norm": 0.07743611186742783, "learning_rate": 0.00019110467085128936, "loss": 0.0129, "num_input_tokens_seen": 195898848, "step": 90820 }, { "epoch": 14.81647634584013, "grad_norm": 0.7124349474906921, "learning_rate": 0.00019104870225391412, "loss": 0.1721, "num_input_tokens_seen": 195909088, "step": 90825 }, { "epoch": 14.817292006525285, "grad_norm": 0.31195032596588135, "learning_rate": 0.0001909927399178289, "loss": 0.0579, "num_input_tokens_seen": 195920480, "step": 90830 }, { "epoch": 14.818107667210441, "grad_norm": 0.9214631915092468, "learning_rate": 0.0001909367838441678, "loss": 0.0586, "num_input_tokens_seen": 195930400, "step": 90835 }, { "epoch": 14.818923327895595, "grad_norm": 0.0028242513071745634, "learning_rate": 0.00019088083403406486, "loss": 0.0238, "num_input_tokens_seen": 195942208, "step": 90840 }, { "epoch": 14.81973898858075, "grad_norm": 0.020539460703730583, "learning_rate": 0.00019082489048865393, "loss": 0.0493, "num_input_tokens_seen": 195952448, "step": 90845 }, { "epoch": 14.820554649265905, "grad_norm": 0.021850887686014175, "learning_rate": 0.00019076895320906885, "loss": 0.0083, "num_input_tokens_seen": 195963360, "step": 90850 }, { "epoch": 14.82137030995106, "grad_norm": 0.050785455852746964, "learning_rate": 0.0001907130221964432, "loss": 0.0133, "num_input_tokens_seen": 195972832, "step": 90855 }, { "epoch": 14.822185970636216, "grad_norm": 0.10055693984031677, "learning_rate": 0.0001906570974519105, "loss": 0.0191, "num_input_tokens_seen": 195983136, "step": 90860 }, { "epoch": 14.82300163132137, "grad_norm": 0.28820449113845825, "learning_rate": 0.00019060117897660417, "loss": 0.0121, "num_input_tokens_seen": 195994144, "step": 90865 }, { "epoch": 14.823817292006526, "grad_norm": 0.03513197973370552, "learning_rate": 0.00019054526677165744, "loss": 0.0104, "num_input_tokens_seen": 196005408, "step": 90870 }, { "epoch": 14.82463295269168, "grad_norm": 0.023409759625792503, "learning_rate": 0.00019048936083820346, "loss": 0.0129, "num_input_tokens_seen": 196015776, "step": 90875 }, { "epoch": 14.825448613376835, "grad_norm": 0.0027011113706976175, "learning_rate": 0.00019043346117737526, "loss": 0.0164, "num_input_tokens_seen": 196026304, "step": 90880 }, { "epoch": 14.826264274061991, "grad_norm": 0.02388186752796173, "learning_rate": 0.00019037756779030545, "loss": 0.0347, "num_input_tokens_seen": 196037248, "step": 90885 }, { "epoch": 14.827079934747145, "grad_norm": 0.010670988820493221, "learning_rate": 0.00019032168067812738, "loss": 0.0065, "num_input_tokens_seen": 196047776, "step": 90890 }, { "epoch": 14.8278955954323, "grad_norm": 0.010675220750272274, "learning_rate": 0.00019026579984197296, "loss": 0.0082, "num_input_tokens_seen": 196058720, "step": 90895 }, { "epoch": 14.828711256117455, "grad_norm": 0.003877029288560152, "learning_rate": 0.00019020992528297537, "loss": 0.077, "num_input_tokens_seen": 196068640, "step": 90900 }, { "epoch": 14.82952691680261, "grad_norm": 0.008060252293944359, "learning_rate": 0.0001901540570022663, "loss": 0.0043, "num_input_tokens_seen": 196079008, "step": 90905 }, { "epoch": 14.830342577487766, "grad_norm": 0.3280404508113861, "learning_rate": 0.0001900981950009787, "loss": 0.1266, "num_input_tokens_seen": 196089856, "step": 90910 }, { "epoch": 14.83115823817292, "grad_norm": 0.024854404851794243, "learning_rate": 0.00019004233928024395, "loss": 0.1988, "num_input_tokens_seen": 196100768, "step": 90915 }, { "epoch": 14.831973898858076, "grad_norm": 0.06449846923351288, "learning_rate": 0.0001899864898411947, "loss": 0.0273, "num_input_tokens_seen": 196112320, "step": 90920 }, { "epoch": 14.83278955954323, "grad_norm": 0.5909405946731567, "learning_rate": 0.00018993064668496225, "loss": 0.1046, "num_input_tokens_seen": 196123264, "step": 90925 }, { "epoch": 14.833605220228385, "grad_norm": 0.03720393404364586, "learning_rate": 0.00018987480981267892, "loss": 0.005, "num_input_tokens_seen": 196134848, "step": 90930 }, { "epoch": 14.83442088091354, "grad_norm": 0.003399422625079751, "learning_rate": 0.00018981897922547565, "loss": 0.0071, "num_input_tokens_seen": 196144640, "step": 90935 }, { "epoch": 14.835236541598695, "grad_norm": 0.002211781684309244, "learning_rate": 0.00018976315492448453, "loss": 0.0031, "num_input_tokens_seen": 196156960, "step": 90940 }, { "epoch": 14.83605220228385, "grad_norm": 0.004931868985295296, "learning_rate": 0.00018970733691083637, "loss": 0.002, "num_input_tokens_seen": 196168288, "step": 90945 }, { "epoch": 14.836867862969005, "grad_norm": 0.051295481622219086, "learning_rate": 0.000189651525185663, "loss": 0.0074, "num_input_tokens_seen": 196178816, "step": 90950 }, { "epoch": 14.83768352365416, "grad_norm": 0.07646121084690094, "learning_rate": 0.00018959571975009481, "loss": 0.0088, "num_input_tokens_seen": 196189344, "step": 90955 }, { "epoch": 14.838499184339314, "grad_norm": 0.011518619954586029, "learning_rate": 0.00018953992060526348, "loss": 0.0015, "num_input_tokens_seen": 196200160, "step": 90960 }, { "epoch": 14.83931484502447, "grad_norm": 0.19999709725379944, "learning_rate": 0.00018948412775229918, "loss": 0.013, "num_input_tokens_seen": 196211392, "step": 90965 }, { "epoch": 14.840130505709626, "grad_norm": 0.5819754004478455, "learning_rate": 0.0001894283411923331, "loss": 0.1039, "num_input_tokens_seen": 196221952, "step": 90970 }, { "epoch": 14.84094616639478, "grad_norm": 0.00794198177754879, "learning_rate": 0.0001893725609264957, "loss": 0.0203, "num_input_tokens_seen": 196233440, "step": 90975 }, { "epoch": 14.841761827079935, "grad_norm": 0.46201565861701965, "learning_rate": 0.00018931678695591742, "loss": 0.0356, "num_input_tokens_seen": 196244160, "step": 90980 }, { "epoch": 14.84257748776509, "grad_norm": 0.01400209590792656, "learning_rate": 0.00018926101928172856, "loss": 0.0203, "num_input_tokens_seen": 196254720, "step": 90985 }, { "epoch": 14.843393148450245, "grad_norm": 0.12333527952432632, "learning_rate": 0.00018920525790505933, "loss": 0.0088, "num_input_tokens_seen": 196265312, "step": 90990 }, { "epoch": 14.844208809135399, "grad_norm": 0.0024427652824670076, "learning_rate": 0.00018914950282703985, "loss": 0.255, "num_input_tokens_seen": 196276576, "step": 90995 }, { "epoch": 14.845024469820554, "grad_norm": 0.09467651695013046, "learning_rate": 0.00018909375404879998, "loss": 0.012, "num_input_tokens_seen": 196286528, "step": 91000 }, { "epoch": 14.84584013050571, "grad_norm": 0.002683422528207302, "learning_rate": 0.00018903801157146965, "loss": 0.057, "num_input_tokens_seen": 196297568, "step": 91005 }, { "epoch": 14.846655791190864, "grad_norm": 0.02391847036778927, "learning_rate": 0.00018898227539617852, "loss": 0.0067, "num_input_tokens_seen": 196309760, "step": 91010 }, { "epoch": 14.84747145187602, "grad_norm": 0.006485488265752792, "learning_rate": 0.0001889265455240561, "loss": 0.009, "num_input_tokens_seen": 196319648, "step": 91015 }, { "epoch": 14.848287112561174, "grad_norm": 0.019908104091882706, "learning_rate": 0.00018887082195623167, "loss": 0.02, "num_input_tokens_seen": 196330080, "step": 91020 }, { "epoch": 14.84910277324633, "grad_norm": 0.26793110370635986, "learning_rate": 0.00018881510469383506, "loss": 0.2065, "num_input_tokens_seen": 196342208, "step": 91025 }, { "epoch": 14.849918433931485, "grad_norm": 0.34800171852111816, "learning_rate": 0.00018875939373799483, "loss": 0.0591, "num_input_tokens_seen": 196353120, "step": 91030 }, { "epoch": 14.850734094616639, "grad_norm": 0.010555317625403404, "learning_rate": 0.00018870368908984063, "loss": 0.0221, "num_input_tokens_seen": 196365088, "step": 91035 }, { "epoch": 14.851549755301795, "grad_norm": 0.04263678938150406, "learning_rate": 0.00018864799075050078, "loss": 0.0518, "num_input_tokens_seen": 196375744, "step": 91040 }, { "epoch": 14.852365415986949, "grad_norm": 0.015177798457443714, "learning_rate": 0.00018859229872110467, "loss": 0.0081, "num_input_tokens_seen": 196385824, "step": 91045 }, { "epoch": 14.853181076672104, "grad_norm": 0.02815868705511093, "learning_rate": 0.00018853661300278034, "loss": 0.0066, "num_input_tokens_seen": 196396000, "step": 91050 }, { "epoch": 14.85399673735726, "grad_norm": 0.0015993593260645866, "learning_rate": 0.00018848093359665703, "loss": 0.0096, "num_input_tokens_seen": 196407072, "step": 91055 }, { "epoch": 14.854812398042414, "grad_norm": 0.001749714370816946, "learning_rate": 0.0001884252605038624, "loss": 0.0265, "num_input_tokens_seen": 196417696, "step": 91060 }, { "epoch": 14.85562805872757, "grad_norm": 0.010427766479551792, "learning_rate": 0.00018836959372552553, "loss": 0.0031, "num_input_tokens_seen": 196428960, "step": 91065 }, { "epoch": 14.856443719412724, "grad_norm": 0.012313637882471085, "learning_rate": 0.0001883139332627738, "loss": 0.0065, "num_input_tokens_seen": 196438752, "step": 91070 }, { "epoch": 14.85725938009788, "grad_norm": 0.2619791030883789, "learning_rate": 0.00018825827911673592, "loss": 0.0195, "num_input_tokens_seen": 196449760, "step": 91075 }, { "epoch": 14.858075040783035, "grad_norm": 0.02902335673570633, "learning_rate": 0.0001882026312885392, "loss": 0.0031, "num_input_tokens_seen": 196460480, "step": 91080 }, { "epoch": 14.858890701468189, "grad_norm": 0.03994120657444, "learning_rate": 0.00018814698977931204, "loss": 0.0299, "num_input_tokens_seen": 196471872, "step": 91085 }, { "epoch": 14.859706362153345, "grad_norm": 0.11147171258926392, "learning_rate": 0.0001880913545901814, "loss": 0.0067, "num_input_tokens_seen": 196482272, "step": 91090 }, { "epoch": 14.860522022838499, "grad_norm": 0.005833734758198261, "learning_rate": 0.00018803572572227546, "loss": 0.0054, "num_input_tokens_seen": 196493184, "step": 91095 }, { "epoch": 14.861337683523654, "grad_norm": 0.004026546608656645, "learning_rate": 0.000187980103176721, "loss": 0.0132, "num_input_tokens_seen": 196504288, "step": 91100 }, { "epoch": 14.86215334420881, "grad_norm": 0.9018500447273254, "learning_rate": 0.0001879244869546457, "loss": 0.0298, "num_input_tokens_seen": 196515296, "step": 91105 }, { "epoch": 14.862969004893964, "grad_norm": 0.17936429381370544, "learning_rate": 0.00018786887705717658, "loss": 0.0337, "num_input_tokens_seen": 196526528, "step": 91110 }, { "epoch": 14.86378466557912, "grad_norm": 0.003296932438388467, "learning_rate": 0.00018781327348544065, "loss": 0.0039, "num_input_tokens_seen": 196537024, "step": 91115 }, { "epoch": 14.864600326264274, "grad_norm": 0.02158510684967041, "learning_rate": 0.00018775767624056472, "loss": 0.0102, "num_input_tokens_seen": 196548224, "step": 91120 }, { "epoch": 14.86541598694943, "grad_norm": 0.10101553797721863, "learning_rate": 0.0001877020853236756, "loss": 0.1011, "num_input_tokens_seen": 196559328, "step": 91125 }, { "epoch": 14.866231647634583, "grad_norm": 0.008149228058755398, "learning_rate": 0.00018764650073589995, "loss": 0.0027, "num_input_tokens_seen": 196570624, "step": 91130 }, { "epoch": 14.867047308319739, "grad_norm": 0.06370249390602112, "learning_rate": 0.0001875909224783642, "loss": 0.0103, "num_input_tokens_seen": 196582176, "step": 91135 }, { "epoch": 14.867862969004895, "grad_norm": 0.017262227833271027, "learning_rate": 0.00018753535055219468, "loss": 0.015, "num_input_tokens_seen": 196592000, "step": 91140 }, { "epoch": 14.868678629690049, "grad_norm": 0.008435085415840149, "learning_rate": 0.0001874797849585177, "loss": 0.042, "num_input_tokens_seen": 196603424, "step": 91145 }, { "epoch": 14.869494290375204, "grad_norm": 0.01878134347498417, "learning_rate": 0.00018742422569845935, "loss": 0.2679, "num_input_tokens_seen": 196614752, "step": 91150 }, { "epoch": 14.870309951060358, "grad_norm": 0.024911075830459595, "learning_rate": 0.00018736867277314556, "loss": 0.0035, "num_input_tokens_seen": 196624448, "step": 91155 }, { "epoch": 14.871125611745514, "grad_norm": 2.440078020095825, "learning_rate": 0.00018731312618370228, "loss": 0.0794, "num_input_tokens_seen": 196635552, "step": 91160 }, { "epoch": 14.87194127243067, "grad_norm": 0.01800604723393917, "learning_rate": 0.0001872575859312549, "loss": 0.1153, "num_input_tokens_seen": 196647168, "step": 91165 }, { "epoch": 14.872756933115824, "grad_norm": 0.999069333076477, "learning_rate": 0.00018720205201692975, "loss": 0.0586, "num_input_tokens_seen": 196658816, "step": 91170 }, { "epoch": 14.87357259380098, "grad_norm": 0.06230998411774635, "learning_rate": 0.00018714652444185137, "loss": 0.1056, "num_input_tokens_seen": 196669728, "step": 91175 }, { "epoch": 14.874388254486133, "grad_norm": 0.012311974540352821, "learning_rate": 0.00018709100320714594, "loss": 0.0092, "num_input_tokens_seen": 196680640, "step": 91180 }, { "epoch": 14.875203915171289, "grad_norm": 0.09641791880130768, "learning_rate": 0.00018703548831393795, "loss": 0.019, "num_input_tokens_seen": 196692064, "step": 91185 }, { "epoch": 14.876019575856443, "grad_norm": 0.016533339396119118, "learning_rate": 0.00018697997976335317, "loss": 0.1371, "num_input_tokens_seen": 196702528, "step": 91190 }, { "epoch": 14.876835236541599, "grad_norm": 0.33797144889831543, "learning_rate": 0.0001869244775565158, "loss": 0.0378, "num_input_tokens_seen": 196713664, "step": 91195 }, { "epoch": 14.877650897226754, "grad_norm": 0.8016533255577087, "learning_rate": 0.00018686898169455147, "loss": 0.1275, "num_input_tokens_seen": 196724832, "step": 91200 }, { "epoch": 14.878466557911908, "grad_norm": 0.010366318747401237, "learning_rate": 0.00018681349217858408, "loss": 0.0073, "num_input_tokens_seen": 196734880, "step": 91205 }, { "epoch": 14.879282218597064, "grad_norm": 0.00780695304274559, "learning_rate": 0.00018675800900973876, "loss": 0.1104, "num_input_tokens_seen": 196745888, "step": 91210 }, { "epoch": 14.880097879282218, "grad_norm": 0.023848596960306168, "learning_rate": 0.00018670253218913975, "loss": 0.0238, "num_input_tokens_seen": 196757888, "step": 91215 }, { "epoch": 14.880913539967374, "grad_norm": 0.0013198460219427943, "learning_rate": 0.00018664706171791134, "loss": 0.0059, "num_input_tokens_seen": 196768416, "step": 91220 }, { "epoch": 14.88172920065253, "grad_norm": 0.00504318019375205, "learning_rate": 0.0001865915975971778, "loss": 0.0169, "num_input_tokens_seen": 196779168, "step": 91225 }, { "epoch": 14.882544861337683, "grad_norm": 0.0457756407558918, "learning_rate": 0.00018653613982806311, "loss": 0.019, "num_input_tokens_seen": 196789952, "step": 91230 }, { "epoch": 14.883360522022839, "grad_norm": 0.009656946174800396, "learning_rate": 0.0001864806884116912, "loss": 0.1536, "num_input_tokens_seen": 196800320, "step": 91235 }, { "epoch": 14.884176182707993, "grad_norm": 0.013422165997326374, "learning_rate": 0.00018642524334918582, "loss": 0.0022, "num_input_tokens_seen": 196810464, "step": 91240 }, { "epoch": 14.884991843393149, "grad_norm": 0.28181612491607666, "learning_rate": 0.00018636980464167076, "loss": 0.0105, "num_input_tokens_seen": 196821824, "step": 91245 }, { "epoch": 14.885807504078304, "grad_norm": 0.8956671357154846, "learning_rate": 0.00018631437229026942, "loss": 0.0282, "num_input_tokens_seen": 196833856, "step": 91250 }, { "epoch": 14.886623164763458, "grad_norm": 0.00457068532705307, "learning_rate": 0.0001862589462961053, "loss": 0.0159, "num_input_tokens_seen": 196844672, "step": 91255 }, { "epoch": 14.887438825448614, "grad_norm": 0.09010881930589676, "learning_rate": 0.0001862035266603016, "loss": 0.0077, "num_input_tokens_seen": 196855520, "step": 91260 }, { "epoch": 14.888254486133768, "grad_norm": 0.003874789923429489, "learning_rate": 0.00018614811338398153, "loss": 0.0053, "num_input_tokens_seen": 196867008, "step": 91265 }, { "epoch": 14.889070146818923, "grad_norm": 0.7236346006393433, "learning_rate": 0.0001860927064682681, "loss": 0.0291, "num_input_tokens_seen": 196878112, "step": 91270 }, { "epoch": 14.88988580750408, "grad_norm": 0.10781071335077286, "learning_rate": 0.0001860373059142842, "loss": 0.1281, "num_input_tokens_seen": 196888896, "step": 91275 }, { "epoch": 14.890701468189233, "grad_norm": 0.028043517842888832, "learning_rate": 0.00018598191172315253, "loss": 0.0068, "num_input_tokens_seen": 196899936, "step": 91280 }, { "epoch": 14.891517128874389, "grad_norm": 0.013533554039895535, "learning_rate": 0.00018592652389599583, "loss": 0.0051, "num_input_tokens_seen": 196910816, "step": 91285 }, { "epoch": 14.892332789559543, "grad_norm": 0.008559079840779305, "learning_rate": 0.00018587114243393655, "loss": 0.1113, "num_input_tokens_seen": 196921792, "step": 91290 }, { "epoch": 14.893148450244698, "grad_norm": 0.5116294622421265, "learning_rate": 0.00018581576733809707, "loss": 0.1205, "num_input_tokens_seen": 196933088, "step": 91295 }, { "epoch": 14.893964110929852, "grad_norm": 0.05815356224775314, "learning_rate": 0.00018576039860959966, "loss": 0.0039, "num_input_tokens_seen": 196943296, "step": 91300 }, { "epoch": 14.894779771615008, "grad_norm": 0.04112934693694115, "learning_rate": 0.00018570503624956635, "loss": 0.0163, "num_input_tokens_seen": 196952960, "step": 91305 }, { "epoch": 14.895595432300164, "grad_norm": 0.3584984242916107, "learning_rate": 0.00018564968025911905, "loss": 0.0296, "num_input_tokens_seen": 196963904, "step": 91310 }, { "epoch": 14.896411092985318, "grad_norm": 0.04093239828944206, "learning_rate": 0.00018559433063937997, "loss": 0.0057, "num_input_tokens_seen": 196974720, "step": 91315 }, { "epoch": 14.897226753670473, "grad_norm": 0.42152467370033264, "learning_rate": 0.00018553898739147057, "loss": 0.0081, "num_input_tokens_seen": 196986656, "step": 91320 }, { "epoch": 14.898042414355627, "grad_norm": 0.00591781921684742, "learning_rate": 0.00018548365051651255, "loss": 0.1069, "num_input_tokens_seen": 196998464, "step": 91325 }, { "epoch": 14.898858075040783, "grad_norm": 0.06077577918767929, "learning_rate": 0.00018542832001562732, "loss": 0.0095, "num_input_tokens_seen": 197009408, "step": 91330 }, { "epoch": 14.899673735725939, "grad_norm": 0.012135002762079239, "learning_rate": 0.00018537299588993627, "loss": 0.0037, "num_input_tokens_seen": 197019328, "step": 91335 }, { "epoch": 14.900489396411093, "grad_norm": 0.04891710728406906, "learning_rate": 0.0001853176781405606, "loss": 0.0762, "num_input_tokens_seen": 197031200, "step": 91340 }, { "epoch": 14.901305057096248, "grad_norm": 0.2068236619234085, "learning_rate": 0.00018526236676862134, "loss": 0.0282, "num_input_tokens_seen": 197041248, "step": 91345 }, { "epoch": 14.902120717781402, "grad_norm": 0.006584752816706896, "learning_rate": 0.00018520706177523955, "loss": 0.0196, "num_input_tokens_seen": 197051680, "step": 91350 }, { "epoch": 14.902936378466558, "grad_norm": 0.02240839973092079, "learning_rate": 0.000185151763161536, "loss": 0.05, "num_input_tokens_seen": 197061920, "step": 91355 }, { "epoch": 14.903752039151712, "grad_norm": 0.012760811485350132, "learning_rate": 0.0001850964709286313, "loss": 0.0075, "num_input_tokens_seen": 197072608, "step": 91360 }, { "epoch": 14.904567699836868, "grad_norm": 0.026944618672132492, "learning_rate": 0.00018504118507764618, "loss": 0.0085, "num_input_tokens_seen": 197083072, "step": 91365 }, { "epoch": 14.905383360522023, "grad_norm": 0.03860986977815628, "learning_rate": 0.00018498590560970098, "loss": 0.0258, "num_input_tokens_seen": 197093856, "step": 91370 }, { "epoch": 14.906199021207177, "grad_norm": 0.03619516268372536, "learning_rate": 0.00018493063252591596, "loss": 0.0093, "num_input_tokens_seen": 197104000, "step": 91375 }, { "epoch": 14.907014681892333, "grad_norm": 0.007498772349208593, "learning_rate": 0.00018487536582741142, "loss": 0.0021, "num_input_tokens_seen": 197113888, "step": 91380 }, { "epoch": 14.907830342577487, "grad_norm": 0.017642341554164886, "learning_rate": 0.00018482010551530736, "loss": 0.0061, "num_input_tokens_seen": 197124000, "step": 91385 }, { "epoch": 14.908646003262643, "grad_norm": 0.11727790534496307, "learning_rate": 0.00018476485159072371, "loss": 0.0068, "num_input_tokens_seen": 197135424, "step": 91390 }, { "epoch": 14.909461663947798, "grad_norm": 0.03655335679650307, "learning_rate": 0.0001847096040547802, "loss": 0.0436, "num_input_tokens_seen": 197144960, "step": 91395 }, { "epoch": 14.910277324632952, "grad_norm": 0.008979732170701027, "learning_rate": 0.00018465436290859662, "loss": 0.0041, "num_input_tokens_seen": 197154752, "step": 91400 }, { "epoch": 14.911092985318108, "grad_norm": 0.00692667905241251, "learning_rate": 0.00018459912815329234, "loss": 0.01, "num_input_tokens_seen": 197164576, "step": 91405 }, { "epoch": 14.911908646003262, "grad_norm": 0.012186594307422638, "learning_rate": 0.00018454389978998686, "loss": 0.0186, "num_input_tokens_seen": 197176416, "step": 91410 }, { "epoch": 14.912724306688418, "grad_norm": 0.8051772117614746, "learning_rate": 0.00018448867781979943, "loss": 0.0359, "num_input_tokens_seen": 197187904, "step": 91415 }, { "epoch": 14.913539967373573, "grad_norm": 0.012307604774832726, "learning_rate": 0.00018443346224384906, "loss": 0.0042, "num_input_tokens_seen": 197197408, "step": 91420 }, { "epoch": 14.914355628058727, "grad_norm": 0.08530403673648834, "learning_rate": 0.00018437825306325524, "loss": 0.0147, "num_input_tokens_seen": 197207840, "step": 91425 }, { "epoch": 14.915171288743883, "grad_norm": 0.1317361295223236, "learning_rate": 0.00018432305027913615, "loss": 0.1281, "num_input_tokens_seen": 197219968, "step": 91430 }, { "epoch": 14.915986949429037, "grad_norm": 0.026397982612252235, "learning_rate": 0.00018426785389261124, "loss": 0.0038, "num_input_tokens_seen": 197230912, "step": 91435 }, { "epoch": 14.916802610114193, "grad_norm": 0.02205594815313816, "learning_rate": 0.00018421266390479846, "loss": 0.0092, "num_input_tokens_seen": 197242272, "step": 91440 }, { "epoch": 14.917618270799348, "grad_norm": 0.018856551498174667, "learning_rate": 0.00018415748031681706, "loss": 0.0018, "num_input_tokens_seen": 197253248, "step": 91445 }, { "epoch": 14.918433931484502, "grad_norm": 0.040025174617767334, "learning_rate": 0.0001841023031297846, "loss": 0.0066, "num_input_tokens_seen": 197264640, "step": 91450 }, { "epoch": 14.919249592169658, "grad_norm": 0.004428384825587273, "learning_rate": 0.0001840471323448199, "loss": 0.0293, "num_input_tokens_seen": 197276064, "step": 91455 }, { "epoch": 14.920065252854812, "grad_norm": 0.03011285699903965, "learning_rate": 0.00018399196796304085, "loss": 0.0058, "num_input_tokens_seen": 197284480, "step": 91460 }, { "epoch": 14.920880913539968, "grad_norm": 0.026194892823696136, "learning_rate": 0.0001839368099855655, "loss": 0.0044, "num_input_tokens_seen": 197295712, "step": 91465 }, { "epoch": 14.921696574225122, "grad_norm": 0.051315221935510635, "learning_rate": 0.00018388165841351162, "loss": 0.0035, "num_input_tokens_seen": 197307104, "step": 91470 }, { "epoch": 14.922512234910277, "grad_norm": 0.003189179813489318, "learning_rate": 0.000183826513247997, "loss": 0.0117, "num_input_tokens_seen": 197317312, "step": 91475 }, { "epoch": 14.923327895595433, "grad_norm": 0.22428081929683685, "learning_rate": 0.0001837713744901391, "loss": 0.0226, "num_input_tokens_seen": 197329408, "step": 91480 }, { "epoch": 14.924143556280587, "grad_norm": 0.003765654284507036, "learning_rate": 0.00018371624214105553, "loss": 0.0754, "num_input_tokens_seen": 197340832, "step": 91485 }, { "epoch": 14.924959216965743, "grad_norm": 0.06391692161560059, "learning_rate": 0.00018366111620186348, "loss": 0.007, "num_input_tokens_seen": 197350464, "step": 91490 }, { "epoch": 14.925774877650896, "grad_norm": 1.2825957536697388, "learning_rate": 0.0001836059966736803, "loss": 0.0594, "num_input_tokens_seen": 197361728, "step": 91495 }, { "epoch": 14.926590538336052, "grad_norm": 0.01534697599709034, "learning_rate": 0.0001835508835576229, "loss": 0.0574, "num_input_tokens_seen": 197372256, "step": 91500 }, { "epoch": 14.927406199021208, "grad_norm": 0.07393841445446014, "learning_rate": 0.00018349577685480834, "loss": 0.0066, "num_input_tokens_seen": 197383008, "step": 91505 }, { "epoch": 14.928221859706362, "grad_norm": 0.013042877428233624, "learning_rate": 0.0001834406765663534, "loss": 0.0051, "num_input_tokens_seen": 197393696, "step": 91510 }, { "epoch": 14.929037520391518, "grad_norm": 0.016469378024339676, "learning_rate": 0.00018338558269337464, "loss": 0.1106, "num_input_tokens_seen": 197403872, "step": 91515 }, { "epoch": 14.929853181076671, "grad_norm": 0.05654904991388321, "learning_rate": 0.00018333049523698876, "loss": 0.0054, "num_input_tokens_seen": 197416096, "step": 91520 }, { "epoch": 14.930668841761827, "grad_norm": 0.1108851283788681, "learning_rate": 0.00018327541419831196, "loss": 0.0383, "num_input_tokens_seen": 197427648, "step": 91525 }, { "epoch": 14.931484502446983, "grad_norm": 0.29693999886512756, "learning_rate": 0.00018322033957846097, "loss": 0.0134, "num_input_tokens_seen": 197438240, "step": 91530 }, { "epoch": 14.932300163132137, "grad_norm": 0.009830850176513195, "learning_rate": 0.00018316527137855138, "loss": 0.0029, "num_input_tokens_seen": 197449792, "step": 91535 }, { "epoch": 14.933115823817293, "grad_norm": 0.02880791202187538, "learning_rate": 0.00018311020959969982, "loss": 0.0175, "num_input_tokens_seen": 197461600, "step": 91540 }, { "epoch": 14.933931484502446, "grad_norm": 0.004078914411365986, "learning_rate": 0.0001830551542430215, "loss": 0.0529, "num_input_tokens_seen": 197471168, "step": 91545 }, { "epoch": 14.934747145187602, "grad_norm": 0.004536197520792484, "learning_rate": 0.0001830001053096329, "loss": 0.007, "num_input_tokens_seen": 197482368, "step": 91550 }, { "epoch": 14.935562805872756, "grad_norm": 0.015831997618079185, "learning_rate": 0.000182945062800649, "loss": 0.0461, "num_input_tokens_seen": 197492800, "step": 91555 }, { "epoch": 14.936378466557912, "grad_norm": 0.026228871196508408, "learning_rate": 0.0001828900267171859, "loss": 0.0084, "num_input_tokens_seen": 197504096, "step": 91560 }, { "epoch": 14.937194127243067, "grad_norm": 0.03762160986661911, "learning_rate": 0.0001828349970603584, "loss": 0.0052, "num_input_tokens_seen": 197514368, "step": 91565 }, { "epoch": 14.938009787928221, "grad_norm": 0.013585063628852367, "learning_rate": 0.00018277997383128237, "loss": 0.0026, "num_input_tokens_seen": 197525472, "step": 91570 }, { "epoch": 14.938825448613377, "grad_norm": 0.03490360826253891, "learning_rate": 0.00018272495703107222, "loss": 0.0035, "num_input_tokens_seen": 197537120, "step": 91575 }, { "epoch": 14.939641109298531, "grad_norm": 0.008376812562346458, "learning_rate": 0.00018266994666084368, "loss": 0.0018, "num_input_tokens_seen": 197548288, "step": 91580 }, { "epoch": 14.940456769983687, "grad_norm": 0.2774714231491089, "learning_rate": 0.0001826149427217109, "loss": 0.0315, "num_input_tokens_seen": 197558208, "step": 91585 }, { "epoch": 14.941272430668842, "grad_norm": 0.007843281142413616, "learning_rate": 0.00018255994521478925, "loss": 0.0161, "num_input_tokens_seen": 197569344, "step": 91590 }, { "epoch": 14.942088091353996, "grad_norm": 0.004047416616231203, "learning_rate": 0.00018250495414119273, "loss": 0.003, "num_input_tokens_seen": 197580256, "step": 91595 }, { "epoch": 14.942903752039152, "grad_norm": 0.011550561524927616, "learning_rate": 0.0001824499695020362, "loss": 0.2994, "num_input_tokens_seen": 197590208, "step": 91600 }, { "epoch": 14.943719412724306, "grad_norm": 0.013135246932506561, "learning_rate": 0.0001823949912984339, "loss": 0.0077, "num_input_tokens_seen": 197601760, "step": 91605 }, { "epoch": 14.944535073409462, "grad_norm": 0.05306397005915642, "learning_rate": 0.00018234001953149997, "loss": 0.1124, "num_input_tokens_seen": 197611680, "step": 91610 }, { "epoch": 14.945350734094617, "grad_norm": 0.029959172010421753, "learning_rate": 0.00018228505420234858, "loss": 0.0413, "num_input_tokens_seen": 197622464, "step": 91615 }, { "epoch": 14.946166394779771, "grad_norm": 0.014676331542432308, "learning_rate": 0.00018223009531209355, "loss": 0.0069, "num_input_tokens_seen": 197631648, "step": 91620 }, { "epoch": 14.946982055464927, "grad_norm": 0.0037393090315163136, "learning_rate": 0.00018217514286184884, "loss": 0.0108, "num_input_tokens_seen": 197643232, "step": 91625 }, { "epoch": 14.947797716150081, "grad_norm": 0.8185652494430542, "learning_rate": 0.00018212019685272802, "loss": 0.1317, "num_input_tokens_seen": 197654400, "step": 91630 }, { "epoch": 14.948613376835237, "grad_norm": 0.005269949324429035, "learning_rate": 0.00018206525728584462, "loss": 0.007, "num_input_tokens_seen": 197665504, "step": 91635 }, { "epoch": 14.949429037520392, "grad_norm": 0.0034501368645578623, "learning_rate": 0.00018201032416231217, "loss": 0.005, "num_input_tokens_seen": 197677280, "step": 91640 }, { "epoch": 14.950244698205546, "grad_norm": 0.005568252876400948, "learning_rate": 0.00018195539748324386, "loss": 0.0024, "num_input_tokens_seen": 197688128, "step": 91645 }, { "epoch": 14.951060358890702, "grad_norm": 0.027983618900179863, "learning_rate": 0.00018190047724975271, "loss": 0.0186, "num_input_tokens_seen": 197697888, "step": 91650 }, { "epoch": 14.951876019575856, "grad_norm": 0.014069877564907074, "learning_rate": 0.00018184556346295233, "loss": 0.0539, "num_input_tokens_seen": 197708512, "step": 91655 }, { "epoch": 14.952691680261012, "grad_norm": 0.1250534951686859, "learning_rate": 0.00018179065612395484, "loss": 0.0076, "num_input_tokens_seen": 197718560, "step": 91660 }, { "epoch": 14.953507340946166, "grad_norm": 0.0015535735292360187, "learning_rate": 0.0001817357552338737, "loss": 0.0018, "num_input_tokens_seen": 197730176, "step": 91665 }, { "epoch": 14.954323001631321, "grad_norm": 0.4162045121192932, "learning_rate": 0.0001816808607938209, "loss": 0.0578, "num_input_tokens_seen": 197741568, "step": 91670 }, { "epoch": 14.955138662316477, "grad_norm": 0.2321537882089615, "learning_rate": 0.00018162597280490966, "loss": 0.0108, "num_input_tokens_seen": 197751744, "step": 91675 }, { "epoch": 14.955954323001631, "grad_norm": 0.6387122273445129, "learning_rate": 0.00018157109126825156, "loss": 0.0289, "num_input_tokens_seen": 197762688, "step": 91680 }, { "epoch": 14.956769983686787, "grad_norm": 0.1408645510673523, "learning_rate": 0.0001815162161849596, "loss": 0.0293, "num_input_tokens_seen": 197773152, "step": 91685 }, { "epoch": 14.95758564437194, "grad_norm": 0.0076782358810305595, "learning_rate": 0.00018146134755614524, "loss": 0.0118, "num_input_tokens_seen": 197785312, "step": 91690 }, { "epoch": 14.958401305057096, "grad_norm": 0.002452652668580413, "learning_rate": 0.0001814064853829211, "loss": 0.0177, "num_input_tokens_seen": 197796704, "step": 91695 }, { "epoch": 14.959216965742252, "grad_norm": 0.02224743738770485, "learning_rate": 0.00018135162966639835, "loss": 0.094, "num_input_tokens_seen": 197807200, "step": 91700 }, { "epoch": 14.960032626427406, "grad_norm": 0.11881683021783829, "learning_rate": 0.00018129678040768938, "loss": 0.013, "num_input_tokens_seen": 197818112, "step": 91705 }, { "epoch": 14.960848287112562, "grad_norm": 0.594805121421814, "learning_rate": 0.00018124193760790514, "loss": 0.0199, "num_input_tokens_seen": 197829024, "step": 91710 }, { "epoch": 14.961663947797716, "grad_norm": 0.35641995072364807, "learning_rate": 0.00018118710126815773, "loss": 0.0115, "num_input_tokens_seen": 197839616, "step": 91715 }, { "epoch": 14.962479608482871, "grad_norm": 0.004903493449091911, "learning_rate": 0.00018113227138955785, "loss": 0.0104, "num_input_tokens_seen": 197850848, "step": 91720 }, { "epoch": 14.963295269168025, "grad_norm": 0.17776449024677277, "learning_rate": 0.00018107744797321728, "loss": 0.0114, "num_input_tokens_seen": 197862912, "step": 91725 }, { "epoch": 14.964110929853181, "grad_norm": 0.0034439547453075647, "learning_rate": 0.00018102263102024653, "loss": 0.0065, "num_input_tokens_seen": 197873952, "step": 91730 }, { "epoch": 14.964926590538337, "grad_norm": 0.010122783482074738, "learning_rate": 0.00018096782053175715, "loss": 0.002, "num_input_tokens_seen": 197883488, "step": 91735 }, { "epoch": 14.96574225122349, "grad_norm": 0.012874247506260872, "learning_rate": 0.00018091301650885922, "loss": 0.003, "num_input_tokens_seen": 197893536, "step": 91740 }, { "epoch": 14.966557911908646, "grad_norm": 0.02317088097333908, "learning_rate": 0.00018085821895266402, "loss": 0.006, "num_input_tokens_seen": 197904960, "step": 91745 }, { "epoch": 14.9673735725938, "grad_norm": 0.7945225238800049, "learning_rate": 0.00018080342786428184, "loss": 0.0261, "num_input_tokens_seen": 197914944, "step": 91750 }, { "epoch": 14.968189233278956, "grad_norm": 0.024404676631093025, "learning_rate": 0.00018074864324482315, "loss": 0.0041, "num_input_tokens_seen": 197924448, "step": 91755 }, { "epoch": 14.969004893964112, "grad_norm": 0.04939607158303261, "learning_rate": 0.0001806938650953982, "loss": 0.0061, "num_input_tokens_seen": 197935136, "step": 91760 }, { "epoch": 14.969820554649266, "grad_norm": 0.263517290353775, "learning_rate": 0.00018063909341711716, "loss": 0.0157, "num_input_tokens_seen": 197945600, "step": 91765 }, { "epoch": 14.970636215334421, "grad_norm": 0.017584795132279396, "learning_rate": 0.00018058432821109, "loss": 0.0146, "num_input_tokens_seen": 197955712, "step": 91770 }, { "epoch": 14.971451876019575, "grad_norm": 0.717647135257721, "learning_rate": 0.00018052956947842665, "loss": 0.0378, "num_input_tokens_seen": 197966784, "step": 91775 }, { "epoch": 14.97226753670473, "grad_norm": 0.003657313296571374, "learning_rate": 0.0001804748172202368, "loss": 0.0132, "num_input_tokens_seen": 197978240, "step": 91780 }, { "epoch": 14.973083197389887, "grad_norm": 0.24121206998825073, "learning_rate": 0.00018042007143763018, "loss": 0.0064, "num_input_tokens_seen": 197987872, "step": 91785 }, { "epoch": 14.97389885807504, "grad_norm": 0.011296981945633888, "learning_rate": 0.00018036533213171618, "loss": 0.0042, "num_input_tokens_seen": 197998400, "step": 91790 }, { "epoch": 14.974714518760196, "grad_norm": 0.06533413380384445, "learning_rate": 0.0001803105993036041, "loss": 0.0186, "num_input_tokens_seen": 198009280, "step": 91795 }, { "epoch": 14.97553017944535, "grad_norm": 0.02223447524011135, "learning_rate": 0.0001802558729544036, "loss": 0.0083, "num_input_tokens_seen": 198018688, "step": 91800 }, { "epoch": 14.976345840130506, "grad_norm": 0.005110082682222128, "learning_rate": 0.0001802011530852231, "loss": 0.0039, "num_input_tokens_seen": 198030144, "step": 91805 }, { "epoch": 14.977161500815662, "grad_norm": 0.005362540949136019, "learning_rate": 0.00018014643969717231, "loss": 0.0025, "num_input_tokens_seen": 198040480, "step": 91810 }, { "epoch": 14.977977161500815, "grad_norm": 0.014981192536652088, "learning_rate": 0.0001800917327913593, "loss": 0.1187, "num_input_tokens_seen": 198051328, "step": 91815 }, { "epoch": 14.978792822185971, "grad_norm": 0.008824207819998264, "learning_rate": 0.0001800370323688935, "loss": 0.0039, "num_input_tokens_seen": 198062880, "step": 91820 }, { "epoch": 14.979608482871125, "grad_norm": 0.009656177833676338, "learning_rate": 0.00017998233843088284, "loss": 0.036, "num_input_tokens_seen": 198073888, "step": 91825 }, { "epoch": 14.98042414355628, "grad_norm": 0.01120589766651392, "learning_rate": 0.00017992765097843639, "loss": 0.0056, "num_input_tokens_seen": 198084288, "step": 91830 }, { "epoch": 14.981239804241435, "grad_norm": 0.2712480425834656, "learning_rate": 0.00017987297001266172, "loss": 0.0235, "num_input_tokens_seen": 198095648, "step": 91835 }, { "epoch": 14.98205546492659, "grad_norm": 1.98981511592865, "learning_rate": 0.00017981829553466783, "loss": 0.0211, "num_input_tokens_seen": 198107648, "step": 91840 }, { "epoch": 14.982871125611746, "grad_norm": 0.01084563136100769, "learning_rate": 0.00017976362754556203, "loss": 0.0014, "num_input_tokens_seen": 198119104, "step": 91845 }, { "epoch": 14.9836867862969, "grad_norm": 0.010065988637506962, "learning_rate": 0.0001797089660464527, "loss": 0.0216, "num_input_tokens_seen": 198130336, "step": 91850 }, { "epoch": 14.984502446982056, "grad_norm": 0.09801369160413742, "learning_rate": 0.00017965431103844753, "loss": 0.0023, "num_input_tokens_seen": 198140704, "step": 91855 }, { "epoch": 14.98531810766721, "grad_norm": 0.01563100703060627, "learning_rate": 0.00017959966252265407, "loss": 0.0047, "num_input_tokens_seen": 198153728, "step": 91860 }, { "epoch": 14.986133768352365, "grad_norm": 0.006586786359548569, "learning_rate": 0.00017954502050018, "loss": 0.0228, "num_input_tokens_seen": 198164608, "step": 91865 }, { "epoch": 14.986949429037521, "grad_norm": 0.09805265814065933, "learning_rate": 0.00017949038497213255, "loss": 0.0056, "num_input_tokens_seen": 198175872, "step": 91870 }, { "epoch": 14.987765089722675, "grad_norm": 0.9272639751434326, "learning_rate": 0.0001794357559396191, "loss": 0.1657, "num_input_tokens_seen": 198186528, "step": 91875 }, { "epoch": 14.98858075040783, "grad_norm": 0.008798099122941494, "learning_rate": 0.00017938113340374662, "loss": 0.002, "num_input_tokens_seen": 198197280, "step": 91880 }, { "epoch": 14.989396411092985, "grad_norm": 0.010249063372612, "learning_rate": 0.00017932651736562226, "loss": 0.0018, "num_input_tokens_seen": 198207712, "step": 91885 }, { "epoch": 14.99021207177814, "grad_norm": 1.1745091676712036, "learning_rate": 0.00017927190782635283, "loss": 0.0221, "num_input_tokens_seen": 198218400, "step": 91890 }, { "epoch": 14.991027732463294, "grad_norm": 0.018142219632864, "learning_rate": 0.00017921730478704506, "loss": 0.0076, "num_input_tokens_seen": 198228224, "step": 91895 }, { "epoch": 14.99184339314845, "grad_norm": 0.01334477961063385, "learning_rate": 0.0001791627082488056, "loss": 0.0052, "num_input_tokens_seen": 198239488, "step": 91900 }, { "epoch": 14.992659053833606, "grad_norm": 0.045343659818172455, "learning_rate": 0.00017910811821274082, "loss": 0.0053, "num_input_tokens_seen": 198250336, "step": 91905 }, { "epoch": 14.99347471451876, "grad_norm": 0.2202807515859604, "learning_rate": 0.0001790535346799571, "loss": 0.0186, "num_input_tokens_seen": 198261856, "step": 91910 }, { "epoch": 14.994290375203915, "grad_norm": 0.04990023747086525, "learning_rate": 0.00017899895765156065, "loss": 0.0087, "num_input_tokens_seen": 198272032, "step": 91915 }, { "epoch": 14.99510603588907, "grad_norm": 0.0034048608504235744, "learning_rate": 0.00017894438712865753, "loss": 0.0098, "num_input_tokens_seen": 198283168, "step": 91920 }, { "epoch": 14.995921696574225, "grad_norm": 0.24714434146881104, "learning_rate": 0.00017888982311235375, "loss": 0.0162, "num_input_tokens_seen": 198293856, "step": 91925 }, { "epoch": 14.99673735725938, "grad_norm": 0.030603189021348953, "learning_rate": 0.00017883526560375502, "loss": 0.0405, "num_input_tokens_seen": 198306016, "step": 91930 }, { "epoch": 14.997553017944535, "grad_norm": 0.6182783842086792, "learning_rate": 0.00017878071460396706, "loss": 0.0748, "num_input_tokens_seen": 198317760, "step": 91935 }, { "epoch": 14.99836867862969, "grad_norm": 0.9237636923789978, "learning_rate": 0.0001787261701140952, "loss": 0.0605, "num_input_tokens_seen": 198327200, "step": 91940 }, { "epoch": 14.999184339314844, "grad_norm": 0.003113700309768319, "learning_rate": 0.00017867163213524545, "loss": 0.0033, "num_input_tokens_seen": 198335744, "step": 91945 }, { "epoch": 15.0, "grad_norm": 0.005289952736347914, "learning_rate": 0.00017861710066852237, "loss": 0.0011, "num_input_tokens_seen": 198344384, "step": 91950 }, { "epoch": 15.0, "eval_loss": 0.2126746028661728, "eval_runtime": 104.7532, "eval_samples_per_second": 26.014, "eval_steps_per_second": 6.511, "num_input_tokens_seen": 198344384, "step": 91950 }, { "epoch": 15.000815660685156, "grad_norm": 0.0025843745097517967, "learning_rate": 0.00017856257571503164, "loss": 0.0017, "num_input_tokens_seen": 198355456, "step": 91955 }, { "epoch": 15.00163132137031, "grad_norm": 0.004058094695210457, "learning_rate": 0.00017850805727587804, "loss": 0.0741, "num_input_tokens_seen": 198365504, "step": 91960 }, { "epoch": 15.002446982055465, "grad_norm": 0.006636620499193668, "learning_rate": 0.00017845354535216658, "loss": 0.0155, "num_input_tokens_seen": 198375584, "step": 91965 }, { "epoch": 15.00326264274062, "grad_norm": 0.007555291522294283, "learning_rate": 0.00017839903994500185, "loss": 0.003, "num_input_tokens_seen": 198387072, "step": 91970 }, { "epoch": 15.004078303425775, "grad_norm": 0.0014727737288922071, "learning_rate": 0.0001783445410554886, "loss": 0.0039, "num_input_tokens_seen": 198397760, "step": 91975 }, { "epoch": 15.00489396411093, "grad_norm": 0.02516160160303116, "learning_rate": 0.00017829004868473124, "loss": 0.0104, "num_input_tokens_seen": 198408928, "step": 91980 }, { "epoch": 15.005709624796085, "grad_norm": 0.030205126851797104, "learning_rate": 0.00017823556283383418, "loss": 0.011, "num_input_tokens_seen": 198419520, "step": 91985 }, { "epoch": 15.00652528548124, "grad_norm": 0.030878309160470963, "learning_rate": 0.0001781810835039016, "loss": 0.0098, "num_input_tokens_seen": 198431008, "step": 91990 }, { "epoch": 15.007340946166394, "grad_norm": 0.20752781629562378, "learning_rate": 0.0001781266106960377, "loss": 0.0054, "num_input_tokens_seen": 198441344, "step": 91995 }, { "epoch": 15.00815660685155, "grad_norm": 0.04594150185585022, "learning_rate": 0.00017807214441134628, "loss": 0.0681, "num_input_tokens_seen": 198451680, "step": 92000 }, { "epoch": 15.008972267536704, "grad_norm": 0.0066791074350476265, "learning_rate": 0.00017801768465093126, "loss": 0.1062, "num_input_tokens_seen": 198461376, "step": 92005 }, { "epoch": 15.00978792822186, "grad_norm": 0.005318595562130213, "learning_rate": 0.00017796323141589638, "loss": 0.0292, "num_input_tokens_seen": 198472448, "step": 92010 }, { "epoch": 15.010603588907015, "grad_norm": 0.027211179956793785, "learning_rate": 0.00017790878470734506, "loss": 0.0047, "num_input_tokens_seen": 198485120, "step": 92015 }, { "epoch": 15.01141924959217, "grad_norm": 0.0021591533441096544, "learning_rate": 0.0001778543445263809, "loss": 0.0054, "num_input_tokens_seen": 198497888, "step": 92020 }, { "epoch": 15.012234910277325, "grad_norm": 0.07993344217538834, "learning_rate": 0.00017779991087410707, "loss": 0.0045, "num_input_tokens_seen": 198508000, "step": 92025 }, { "epoch": 15.013050570962479, "grad_norm": 0.019178472459316254, "learning_rate": 0.0001777454837516268, "loss": 0.0087, "num_input_tokens_seen": 198519072, "step": 92030 }, { "epoch": 15.013866231647635, "grad_norm": 0.0293814018368721, "learning_rate": 0.00017769106316004314, "loss": 0.0073, "num_input_tokens_seen": 198530400, "step": 92035 }, { "epoch": 15.01468189233279, "grad_norm": 0.0036052404902875423, "learning_rate": 0.0001776366491004589, "loss": 0.001, "num_input_tokens_seen": 198542304, "step": 92040 }, { "epoch": 15.015497553017944, "grad_norm": 0.030474966391921043, "learning_rate": 0.00017758224157397696, "loss": 0.0054, "num_input_tokens_seen": 198552256, "step": 92045 }, { "epoch": 15.0163132137031, "grad_norm": 0.0013302334118634462, "learning_rate": 0.00017752784058169992, "loss": 0.0194, "num_input_tokens_seen": 198563104, "step": 92050 }, { "epoch": 15.017128874388254, "grad_norm": 0.029335327446460724, "learning_rate": 0.00017747344612473022, "loss": 0.0076, "num_input_tokens_seen": 198575008, "step": 92055 }, { "epoch": 15.01794453507341, "grad_norm": 0.011085507459938526, "learning_rate": 0.00017741905820417014, "loss": 0.0032, "num_input_tokens_seen": 198585824, "step": 92060 }, { "epoch": 15.018760195758565, "grad_norm": 0.01159068662673235, "learning_rate": 0.00017736467682112245, "loss": 0.0026, "num_input_tokens_seen": 198596928, "step": 92065 }, { "epoch": 15.01957585644372, "grad_norm": 0.016354776918888092, "learning_rate": 0.00017731030197668847, "loss": 0.0059, "num_input_tokens_seen": 198608160, "step": 92070 }, { "epoch": 15.020391517128875, "grad_norm": 0.020781252533197403, "learning_rate": 0.00017725593367197095, "loss": 0.0262, "num_input_tokens_seen": 198618240, "step": 92075 }, { "epoch": 15.021207177814029, "grad_norm": 0.28883105516433716, "learning_rate": 0.00017720157190807107, "loss": 0.0099, "num_input_tokens_seen": 198628832, "step": 92080 }, { "epoch": 15.022022838499185, "grad_norm": 0.30782920122146606, "learning_rate": 0.00017714721668609095, "loss": 0.0382, "num_input_tokens_seen": 198639424, "step": 92085 }, { "epoch": 15.022838499184338, "grad_norm": 0.012834534980356693, "learning_rate": 0.00017709286800713202, "loss": 0.002, "num_input_tokens_seen": 198651008, "step": 92090 }, { "epoch": 15.023654159869494, "grad_norm": 0.04640903323888779, "learning_rate": 0.00017703852587229584, "loss": 0.0078, "num_input_tokens_seen": 198661536, "step": 92095 }, { "epoch": 15.02446982055465, "grad_norm": 0.004769181367009878, "learning_rate": 0.00017698419028268358, "loss": 0.0069, "num_input_tokens_seen": 198673056, "step": 92100 }, { "epoch": 15.025285481239804, "grad_norm": 0.013276834040880203, "learning_rate": 0.00017692986123939652, "loss": 0.0022, "num_input_tokens_seen": 198683200, "step": 92105 }, { "epoch": 15.02610114192496, "grad_norm": 0.0021160836331546307, "learning_rate": 0.00017687553874353563, "loss": 0.0755, "num_input_tokens_seen": 198694944, "step": 92110 }, { "epoch": 15.026916802610113, "grad_norm": 0.02373495325446129, "learning_rate": 0.0001768212227962019, "loss": 0.0037, "num_input_tokens_seen": 198705216, "step": 92115 }, { "epoch": 15.02773246329527, "grad_norm": 0.005740254186093807, "learning_rate": 0.00017676691339849605, "loss": 0.0037, "num_input_tokens_seen": 198715712, "step": 92120 }, { "epoch": 15.028548123980425, "grad_norm": 0.004589035175740719, "learning_rate": 0.00017671261055151872, "loss": 0.0015, "num_input_tokens_seen": 198727360, "step": 92125 }, { "epoch": 15.029363784665579, "grad_norm": 0.16137884557247162, "learning_rate": 0.00017665831425637052, "loss": 0.0043, "num_input_tokens_seen": 198737792, "step": 92130 }, { "epoch": 15.030179445350734, "grad_norm": 0.0026455000042915344, "learning_rate": 0.0001766040245141517, "loss": 0.0101, "num_input_tokens_seen": 198748032, "step": 92135 }, { "epoch": 15.030995106035888, "grad_norm": 0.0011369764106348157, "learning_rate": 0.00017654974132596263, "loss": 0.0052, "num_input_tokens_seen": 198759744, "step": 92140 }, { "epoch": 15.031810766721044, "grad_norm": 0.3635886609554291, "learning_rate": 0.00017649546469290333, "loss": 0.0156, "num_input_tokens_seen": 198770944, "step": 92145 }, { "epoch": 15.0326264274062, "grad_norm": 0.1499967873096466, "learning_rate": 0.00017644119461607388, "loss": 0.0191, "num_input_tokens_seen": 198781632, "step": 92150 }, { "epoch": 15.033442088091354, "grad_norm": 0.002419485943391919, "learning_rate": 0.0001763869310965741, "loss": 0.0071, "num_input_tokens_seen": 198792896, "step": 92155 }, { "epoch": 15.03425774877651, "grad_norm": 0.05932333320379257, "learning_rate": 0.00017633267413550362, "loss": 0.0052, "num_input_tokens_seen": 198803840, "step": 92160 }, { "epoch": 15.035073409461663, "grad_norm": 0.004340953193604946, "learning_rate": 0.00017627842373396202, "loss": 0.0029, "num_input_tokens_seen": 198814944, "step": 92165 }, { "epoch": 15.035889070146819, "grad_norm": 0.005325765814632177, "learning_rate": 0.00017622417989304913, "loss": 0.0069, "num_input_tokens_seen": 198825728, "step": 92170 }, { "epoch": 15.036704730831975, "grad_norm": 0.1973496526479721, "learning_rate": 0.0001761699426138636, "loss": 0.044, "num_input_tokens_seen": 198837088, "step": 92175 }, { "epoch": 15.037520391517129, "grad_norm": 0.009179106913506985, "learning_rate": 0.00017611571189750537, "loss": 0.013, "num_input_tokens_seen": 198848288, "step": 92180 }, { "epoch": 15.038336052202284, "grad_norm": 0.0005140295252203941, "learning_rate": 0.00017606148774507274, "loss": 0.0038, "num_input_tokens_seen": 198858400, "step": 92185 }, { "epoch": 15.039151712887438, "grad_norm": 0.02172279544174671, "learning_rate": 0.0001760072701576654, "loss": 0.002, "num_input_tokens_seen": 198868224, "step": 92190 }, { "epoch": 15.039967373572594, "grad_norm": 0.004843581933528185, "learning_rate": 0.00017595305913638138, "loss": 0.0083, "num_input_tokens_seen": 198877856, "step": 92195 }, { "epoch": 15.040783034257748, "grad_norm": 0.027915403246879578, "learning_rate": 0.00017589885468232002, "loss": 0.0057, "num_input_tokens_seen": 198888672, "step": 92200 }, { "epoch": 15.041598694942904, "grad_norm": 0.02397981286048889, "learning_rate": 0.00017584465679657918, "loss": 0.0043, "num_input_tokens_seen": 198898336, "step": 92205 }, { "epoch": 15.04241435562806, "grad_norm": 0.4185062348842621, "learning_rate": 0.00017579046548025796, "loss": 0.0076, "num_input_tokens_seen": 198908896, "step": 92210 }, { "epoch": 15.043230016313213, "grad_norm": 0.46495354175567627, "learning_rate": 0.00017573628073445393, "loss": 0.0339, "num_input_tokens_seen": 198919872, "step": 92215 }, { "epoch": 15.044045676998369, "grad_norm": 0.002673432929441333, "learning_rate": 0.00017568210256026578, "loss": 0.0279, "num_input_tokens_seen": 198930912, "step": 92220 }, { "epoch": 15.044861337683523, "grad_norm": 0.037136875092983246, "learning_rate": 0.000175627930958791, "loss": 0.0284, "num_input_tokens_seen": 198940896, "step": 92225 }, { "epoch": 15.045676998368679, "grad_norm": 0.19148589670658112, "learning_rate": 0.0001755737659311278, "loss": 0.0069, "num_input_tokens_seen": 198952448, "step": 92230 }, { "epoch": 15.046492659053834, "grad_norm": 0.010085180401802063, "learning_rate": 0.00017551960747837382, "loss": 0.0125, "num_input_tokens_seen": 198963392, "step": 92235 }, { "epoch": 15.047308319738988, "grad_norm": 0.010916695930063725, "learning_rate": 0.00017546545560162663, "loss": 0.0048, "num_input_tokens_seen": 198974368, "step": 92240 }, { "epoch": 15.048123980424144, "grad_norm": 1.1057188510894775, "learning_rate": 0.00017541131030198364, "loss": 0.1614, "num_input_tokens_seen": 198985408, "step": 92245 }, { "epoch": 15.048939641109298, "grad_norm": 0.0104009248316288, "learning_rate": 0.00017535717158054226, "loss": 0.0024, "num_input_tokens_seen": 198995968, "step": 92250 }, { "epoch": 15.049755301794454, "grad_norm": 0.09020555764436722, "learning_rate": 0.00017530303943839965, "loss": 0.0092, "num_input_tokens_seen": 199006624, "step": 92255 }, { "epoch": 15.05057096247961, "grad_norm": 0.03543743118643761, "learning_rate": 0.00017524891387665282, "loss": 0.007, "num_input_tokens_seen": 199017120, "step": 92260 }, { "epoch": 15.051386623164763, "grad_norm": 0.16398119926452637, "learning_rate": 0.00017519479489639877, "loss": 0.0044, "num_input_tokens_seen": 199026016, "step": 92265 }, { "epoch": 15.052202283849919, "grad_norm": 0.001528521184809506, "learning_rate": 0.0001751406824987342, "loss": 0.0019, "num_input_tokens_seen": 199038144, "step": 92270 }, { "epoch": 15.053017944535073, "grad_norm": 0.1836228221654892, "learning_rate": 0.00017508657668475585, "loss": 0.0041, "num_input_tokens_seen": 199049440, "step": 92275 }, { "epoch": 15.053833605220229, "grad_norm": 0.0023798972833901644, "learning_rate": 0.00017503247745556, "loss": 0.0032, "num_input_tokens_seen": 199060800, "step": 92280 }, { "epoch": 15.054649265905383, "grad_norm": 0.006202168762683868, "learning_rate": 0.0001749783848122436, "loss": 0.0397, "num_input_tokens_seen": 199071488, "step": 92285 }, { "epoch": 15.055464926590538, "grad_norm": 0.010280206799507141, "learning_rate": 0.0001749242987559022, "loss": 0.0059, "num_input_tokens_seen": 199082240, "step": 92290 }, { "epoch": 15.056280587275694, "grad_norm": 0.028452815487980843, "learning_rate": 0.00017487021928763263, "loss": 0.0226, "num_input_tokens_seen": 199093888, "step": 92295 }, { "epoch": 15.057096247960848, "grad_norm": 0.012108825147151947, "learning_rate": 0.0001748161464085302, "loss": 0.0332, "num_input_tokens_seen": 199104640, "step": 92300 }, { "epoch": 15.057911908646004, "grad_norm": 0.003961328417062759, "learning_rate": 0.00017476208011969142, "loss": 0.0029, "num_input_tokens_seen": 199116448, "step": 92305 }, { "epoch": 15.058727569331158, "grad_norm": 0.05018113926053047, "learning_rate": 0.0001747080204222113, "loss": 0.0062, "num_input_tokens_seen": 199125856, "step": 92310 }, { "epoch": 15.059543230016313, "grad_norm": 2.5079758167266846, "learning_rate": 0.00017465396731718619, "loss": 0.0486, "num_input_tokens_seen": 199136352, "step": 92315 }, { "epoch": 15.060358890701469, "grad_norm": 0.015776822343468666, "learning_rate": 0.0001745999208057108, "loss": 0.004, "num_input_tokens_seen": 199147136, "step": 92320 }, { "epoch": 15.061174551386623, "grad_norm": 0.004170059226453304, "learning_rate": 0.00017454588088888117, "loss": 0.0011, "num_input_tokens_seen": 199158240, "step": 92325 }, { "epoch": 15.061990212071779, "grad_norm": 0.02163352444767952, "learning_rate": 0.00017449184756779178, "loss": 0.0061, "num_input_tokens_seen": 199168992, "step": 92330 }, { "epoch": 15.062805872756933, "grad_norm": 0.009785100817680359, "learning_rate": 0.00017443782084353837, "loss": 0.0067, "num_input_tokens_seen": 199179648, "step": 92335 }, { "epoch": 15.063621533442088, "grad_norm": 0.010475009679794312, "learning_rate": 0.0001743838007172152, "loss": 0.0418, "num_input_tokens_seen": 199190656, "step": 92340 }, { "epoch": 15.064437194127244, "grad_norm": 0.0034593320451676846, "learning_rate": 0.00017432978718991772, "loss": 0.002, "num_input_tokens_seen": 199200192, "step": 92345 }, { "epoch": 15.065252854812398, "grad_norm": 0.9293054938316345, "learning_rate": 0.00017427578026273988, "loss": 0.1699, "num_input_tokens_seen": 199211040, "step": 92350 }, { "epoch": 15.066068515497554, "grad_norm": 0.26435452699661255, "learning_rate": 0.00017422177993677696, "loss": 0.0371, "num_input_tokens_seen": 199221600, "step": 92355 }, { "epoch": 15.066884176182707, "grad_norm": 0.0033007687889039516, "learning_rate": 0.00017416778621312257, "loss": 0.0046, "num_input_tokens_seen": 199233024, "step": 92360 }, { "epoch": 15.067699836867863, "grad_norm": 0.0004432809364516288, "learning_rate": 0.00017411379909287167, "loss": 0.0066, "num_input_tokens_seen": 199243936, "step": 92365 }, { "epoch": 15.068515497553017, "grad_norm": 1.1743769645690918, "learning_rate": 0.00017405981857711772, "loss": 0.0231, "num_input_tokens_seen": 199253408, "step": 92370 }, { "epoch": 15.069331158238173, "grad_norm": 0.0009909427026286721, "learning_rate": 0.0001740058446669552, "loss": 0.0126, "num_input_tokens_seen": 199263872, "step": 92375 }, { "epoch": 15.070146818923329, "grad_norm": 0.0033899147529155016, "learning_rate": 0.00017395187736347778, "loss": 0.0018, "num_input_tokens_seen": 199273984, "step": 92380 }, { "epoch": 15.070962479608482, "grad_norm": 0.01703052967786789, "learning_rate": 0.0001738979166677792, "loss": 0.0018, "num_input_tokens_seen": 199286912, "step": 92385 }, { "epoch": 15.071778140293638, "grad_norm": 0.06401059031486511, "learning_rate": 0.00017384396258095304, "loss": 0.0031, "num_input_tokens_seen": 199298624, "step": 92390 }, { "epoch": 15.072593800978792, "grad_norm": 0.0021884478628635406, "learning_rate": 0.0001737900151040927, "loss": 0.0104, "num_input_tokens_seen": 199308864, "step": 92395 }, { "epoch": 15.073409461663948, "grad_norm": 0.11439217627048492, "learning_rate": 0.00017373607423829159, "loss": 0.0463, "num_input_tokens_seen": 199318592, "step": 92400 }, { "epoch": 15.074225122349104, "grad_norm": 0.8450055122375488, "learning_rate": 0.00017368213998464278, "loss": 0.0767, "num_input_tokens_seen": 199328416, "step": 92405 }, { "epoch": 15.075040783034257, "grad_norm": 0.9606846570968628, "learning_rate": 0.00017362821234423936, "loss": 0.0992, "num_input_tokens_seen": 199338848, "step": 92410 }, { "epoch": 15.075856443719413, "grad_norm": 0.0036511393263936043, "learning_rate": 0.00017357429131817432, "loss": 0.0014, "num_input_tokens_seen": 199350720, "step": 92415 }, { "epoch": 15.076672104404567, "grad_norm": 0.0038500288501381874, "learning_rate": 0.0001735203769075403, "loss": 0.1755, "num_input_tokens_seen": 199360992, "step": 92420 }, { "epoch": 15.077487765089723, "grad_norm": 0.007394440937787294, "learning_rate": 0.00017346646911342985, "loss": 0.0113, "num_input_tokens_seen": 199372672, "step": 92425 }, { "epoch": 15.078303425774878, "grad_norm": 0.08335992693901062, "learning_rate": 0.000173412567936936, "loss": 0.0085, "num_input_tokens_seen": 199381856, "step": 92430 }, { "epoch": 15.079119086460032, "grad_norm": 0.11409106105566025, "learning_rate": 0.0001733586733791504, "loss": 0.0046, "num_input_tokens_seen": 199393120, "step": 92435 }, { "epoch": 15.079934747145188, "grad_norm": 0.008427751250565052, "learning_rate": 0.000173304785441166, "loss": 0.1063, "num_input_tokens_seen": 199405024, "step": 92440 }, { "epoch": 15.080750407830342, "grad_norm": 0.010266950353980064, "learning_rate": 0.00017325090412407423, "loss": 0.0012, "num_input_tokens_seen": 199414976, "step": 92445 }, { "epoch": 15.081566068515498, "grad_norm": 0.13835185766220093, "learning_rate": 0.00017319702942896777, "loss": 0.0046, "num_input_tokens_seen": 199424896, "step": 92450 }, { "epoch": 15.082381729200652, "grad_norm": 0.002194441854953766, "learning_rate": 0.00017314316135693775, "loss": 0.0026, "num_input_tokens_seen": 199436768, "step": 92455 }, { "epoch": 15.083197389885807, "grad_norm": 0.00943522434681654, "learning_rate": 0.00017308929990907652, "loss": 0.0061, "num_input_tokens_seen": 199447296, "step": 92460 }, { "epoch": 15.084013050570963, "grad_norm": 0.11505699902772903, "learning_rate": 0.000173035445086475, "loss": 0.003, "num_input_tokens_seen": 199458400, "step": 92465 }, { "epoch": 15.084828711256117, "grad_norm": 0.0016115279868245125, "learning_rate": 0.0001729815968902253, "loss": 0.0068, "num_input_tokens_seen": 199470080, "step": 92470 }, { "epoch": 15.085644371941273, "grad_norm": 0.016954926773905754, "learning_rate": 0.0001729277553214181, "loss": 0.0045, "num_input_tokens_seen": 199480128, "step": 92475 }, { "epoch": 15.086460032626427, "grad_norm": 0.06096147000789642, "learning_rate": 0.00017287392038114514, "loss": 0.0033, "num_input_tokens_seen": 199490944, "step": 92480 }, { "epoch": 15.087275693311582, "grad_norm": 0.01704513281583786, "learning_rate": 0.00017282009207049686, "loss": 0.0483, "num_input_tokens_seen": 199502624, "step": 92485 }, { "epoch": 15.088091353996738, "grad_norm": 0.048019904643297195, "learning_rate": 0.00017276627039056463, "loss": 0.0037, "num_input_tokens_seen": 199513152, "step": 92490 }, { "epoch": 15.088907014681892, "grad_norm": 0.18413586914539337, "learning_rate": 0.00017271245534243912, "loss": 0.0086, "num_input_tokens_seen": 199523488, "step": 92495 }, { "epoch": 15.089722675367048, "grad_norm": 0.02133675292134285, "learning_rate": 0.00017265864692721084, "loss": 0.0136, "num_input_tokens_seen": 199533920, "step": 92500 }, { "epoch": 15.090538336052202, "grad_norm": 0.0054010353051126, "learning_rate": 0.00017260484514597035, "loss": 0.0021, "num_input_tokens_seen": 199545440, "step": 92505 }, { "epoch": 15.091353996737357, "grad_norm": 0.09720762073993683, "learning_rate": 0.00017255104999980799, "loss": 0.0039, "num_input_tokens_seen": 199555904, "step": 92510 }, { "epoch": 15.092169657422513, "grad_norm": 0.7311348915100098, "learning_rate": 0.00017249726148981399, "loss": 0.0456, "num_input_tokens_seen": 199566112, "step": 92515 }, { "epoch": 15.092985318107667, "grad_norm": 0.003744817338883877, "learning_rate": 0.00017244347961707852, "loss": 0.0405, "num_input_tokens_seen": 199578080, "step": 92520 }, { "epoch": 15.093800978792823, "grad_norm": 0.0029269305523484945, "learning_rate": 0.00017238970438269142, "loss": 0.0053, "num_input_tokens_seen": 199588864, "step": 92525 }, { "epoch": 15.094616639477977, "grad_norm": 0.8154952526092529, "learning_rate": 0.00017233593578774254, "loss": 0.0615, "num_input_tokens_seen": 199599712, "step": 92530 }, { "epoch": 15.095432300163132, "grad_norm": 0.025878529995679855, "learning_rate": 0.00017228217383332163, "loss": 0.0056, "num_input_tokens_seen": 199610144, "step": 92535 }, { "epoch": 15.096247960848286, "grad_norm": 0.12342125177383423, "learning_rate": 0.00017222841852051817, "loss": 0.0064, "num_input_tokens_seen": 199621952, "step": 92540 }, { "epoch": 15.097063621533442, "grad_norm": 0.04897353798151016, "learning_rate": 0.0001721746698504217, "loss": 0.041, "num_input_tokens_seen": 199633376, "step": 92545 }, { "epoch": 15.097879282218598, "grad_norm": 0.3269202709197998, "learning_rate": 0.0001721209278241213, "loss": 0.0105, "num_input_tokens_seen": 199643840, "step": 92550 }, { "epoch": 15.098694942903752, "grad_norm": 0.01747013069689274, "learning_rate": 0.00017206719244270636, "loss": 0.0999, "num_input_tokens_seen": 199655072, "step": 92555 }, { "epoch": 15.099510603588907, "grad_norm": 0.002357908058911562, "learning_rate": 0.00017201346370726572, "loss": 0.0009, "num_input_tokens_seen": 199667392, "step": 92560 }, { "epoch": 15.100326264274061, "grad_norm": 0.5787882804870605, "learning_rate": 0.00017195974161888833, "loss": 0.014, "num_input_tokens_seen": 199677760, "step": 92565 }, { "epoch": 15.101141924959217, "grad_norm": 0.005114361178129911, "learning_rate": 0.00017190602617866274, "loss": 0.1479, "num_input_tokens_seen": 199688960, "step": 92570 }, { "epoch": 15.101957585644373, "grad_norm": 0.09417971968650818, "learning_rate": 0.0001718523173876781, "loss": 0.0197, "num_input_tokens_seen": 199699200, "step": 92575 }, { "epoch": 15.102773246329527, "grad_norm": 0.03254089877009392, "learning_rate": 0.00017179861524702216, "loss": 0.0078, "num_input_tokens_seen": 199709440, "step": 92580 }, { "epoch": 15.103588907014682, "grad_norm": 0.003594990586861968, "learning_rate": 0.000171744919757784, "loss": 0.0046, "num_input_tokens_seen": 199720256, "step": 92585 }, { "epoch": 15.104404567699836, "grad_norm": 0.005326172802597284, "learning_rate": 0.00017169123092105115, "loss": 0.0059, "num_input_tokens_seen": 199731712, "step": 92590 }, { "epoch": 15.105220228384992, "grad_norm": 0.0024266382679343224, "learning_rate": 0.0001716375487379121, "loss": 0.0091, "num_input_tokens_seen": 199743136, "step": 92595 }, { "epoch": 15.106035889070148, "grad_norm": 0.0008427354041486979, "learning_rate": 0.00017158387320945472, "loss": 0.0254, "num_input_tokens_seen": 199754976, "step": 92600 }, { "epoch": 15.106851549755302, "grad_norm": 0.04436079040169716, "learning_rate": 0.0001715302043367668, "loss": 0.0037, "num_input_tokens_seen": 199766144, "step": 92605 }, { "epoch": 15.107667210440457, "grad_norm": 0.014464179053902626, "learning_rate": 0.00017147654212093595, "loss": 0.0039, "num_input_tokens_seen": 199777600, "step": 92610 }, { "epoch": 15.108482871125611, "grad_norm": 0.3459320068359375, "learning_rate": 0.00017142288656304977, "loss": 0.0061, "num_input_tokens_seen": 199788064, "step": 92615 }, { "epoch": 15.109298531810767, "grad_norm": 0.043542515486478806, "learning_rate": 0.0001713692376641956, "loss": 0.0866, "num_input_tokens_seen": 199799264, "step": 92620 }, { "epoch": 15.11011419249592, "grad_norm": 0.26057589054107666, "learning_rate": 0.0001713155954254607, "loss": 0.0106, "num_input_tokens_seen": 199809376, "step": 92625 }, { "epoch": 15.110929853181077, "grad_norm": 1.4094449281692505, "learning_rate": 0.00017126195984793225, "loss": 0.1149, "num_input_tokens_seen": 199819296, "step": 92630 }, { "epoch": 15.111745513866232, "grad_norm": 0.008773445151746273, "learning_rate": 0.0001712083309326972, "loss": 0.0097, "num_input_tokens_seen": 199830368, "step": 92635 }, { "epoch": 15.112561174551386, "grad_norm": 0.0020154344383627176, "learning_rate": 0.0001711547086808425, "loss": 0.0202, "num_input_tokens_seen": 199841216, "step": 92640 }, { "epoch": 15.113376835236542, "grad_norm": 0.4791233241558075, "learning_rate": 0.00017110109309345468, "loss": 0.0258, "num_input_tokens_seen": 199851712, "step": 92645 }, { "epoch": 15.114192495921696, "grad_norm": 0.005759683437645435, "learning_rate": 0.00017104748417162054, "loss": 0.0038, "num_input_tokens_seen": 199863200, "step": 92650 }, { "epoch": 15.115008156606851, "grad_norm": 0.038598403334617615, "learning_rate": 0.0001709938819164264, "loss": 0.0283, "num_input_tokens_seen": 199873664, "step": 92655 }, { "epoch": 15.115823817292007, "grad_norm": 0.012022086419165134, "learning_rate": 0.00017094028632895863, "loss": 0.0041, "num_input_tokens_seen": 199885728, "step": 92660 }, { "epoch": 15.116639477977161, "grad_norm": 1.1506651639938354, "learning_rate": 0.0001708866974103034, "loss": 0.0859, "num_input_tokens_seen": 199895968, "step": 92665 }, { "epoch": 15.117455138662317, "grad_norm": 0.1709916889667511, "learning_rate": 0.0001708331151615467, "loss": 0.0081, "num_input_tokens_seen": 199907200, "step": 92670 }, { "epoch": 15.11827079934747, "grad_norm": 0.1829390525817871, "learning_rate": 0.00017077953958377458, "loss": 0.1097, "num_input_tokens_seen": 199917792, "step": 92675 }, { "epoch": 15.119086460032626, "grad_norm": 0.00809413194656372, "learning_rate": 0.0001707259706780727, "loss": 0.003, "num_input_tokens_seen": 199929696, "step": 92680 }, { "epoch": 15.119902120717782, "grad_norm": 0.06895152479410172, "learning_rate": 0.00017067240844552672, "loss": 0.0066, "num_input_tokens_seen": 199940064, "step": 92685 }, { "epoch": 15.120717781402936, "grad_norm": 0.2220110446214676, "learning_rate": 0.00017061885288722218, "loss": 0.0039, "num_input_tokens_seen": 199949664, "step": 92690 }, { "epoch": 15.121533442088092, "grad_norm": 0.1491006314754486, "learning_rate": 0.00017056530400424446, "loss": 0.0051, "num_input_tokens_seen": 199961600, "step": 92695 }, { "epoch": 15.122349102773246, "grad_norm": 0.005414762068539858, "learning_rate": 0.00017051176179767858, "loss": 0.0365, "num_input_tokens_seen": 199973024, "step": 92700 }, { "epoch": 15.123164763458401, "grad_norm": 0.0137543436139822, "learning_rate": 0.00017045822626861017, "loss": 0.0042, "num_input_tokens_seen": 199984352, "step": 92705 }, { "epoch": 15.123980424143557, "grad_norm": 0.042066410183906555, "learning_rate": 0.00017040469741812353, "loss": 0.0057, "num_input_tokens_seen": 199995264, "step": 92710 }, { "epoch": 15.124796084828711, "grad_norm": 0.004376793280243874, "learning_rate": 0.00017035117524730398, "loss": 0.0024, "num_input_tokens_seen": 200005920, "step": 92715 }, { "epoch": 15.125611745513867, "grad_norm": 0.022374460473656654, "learning_rate": 0.00017029765975723604, "loss": 0.0115, "num_input_tokens_seen": 200016416, "step": 92720 }, { "epoch": 15.12642740619902, "grad_norm": 0.7712196707725525, "learning_rate": 0.0001702441509490043, "loss": 0.0102, "num_input_tokens_seen": 200027936, "step": 92725 }, { "epoch": 15.127243066884176, "grad_norm": 0.003630427410826087, "learning_rate": 0.00017019064882369317, "loss": 0.019, "num_input_tokens_seen": 200038432, "step": 92730 }, { "epoch": 15.12805872756933, "grad_norm": 0.8032598495483398, "learning_rate": 0.00017013715338238695, "loss": 0.0242, "num_input_tokens_seen": 200049312, "step": 92735 }, { "epoch": 15.128874388254486, "grad_norm": 0.20272521674633026, "learning_rate": 0.00017008366462616976, "loss": 0.0087, "num_input_tokens_seen": 200060576, "step": 92740 }, { "epoch": 15.129690048939642, "grad_norm": 0.0522928461432457, "learning_rate": 0.00017003018255612562, "loss": 0.0093, "num_input_tokens_seen": 200070176, "step": 92745 }, { "epoch": 15.130505709624796, "grad_norm": 0.019363846629858017, "learning_rate": 0.00016997670717333846, "loss": 0.0022, "num_input_tokens_seen": 200081888, "step": 92750 }, { "epoch": 15.131321370309951, "grad_norm": 0.0029432361479848623, "learning_rate": 0.00016992323847889195, "loss": 0.0033, "num_input_tokens_seen": 200091488, "step": 92755 }, { "epoch": 15.132137030995105, "grad_norm": 0.0115745784714818, "learning_rate": 0.00016986977647386975, "loss": 0.0096, "num_input_tokens_seen": 200102304, "step": 92760 }, { "epoch": 15.132952691680261, "grad_norm": 0.10708451271057129, "learning_rate": 0.00016981632115935536, "loss": 0.0048, "num_input_tokens_seen": 200112576, "step": 92765 }, { "epoch": 15.133768352365417, "grad_norm": 0.008816435933113098, "learning_rate": 0.00016976287253643208, "loss": 0.0845, "num_input_tokens_seen": 200123360, "step": 92770 }, { "epoch": 15.13458401305057, "grad_norm": 0.008158848620951176, "learning_rate": 0.0001697094306061831, "loss": 0.0801, "num_input_tokens_seen": 200134112, "step": 92775 }, { "epoch": 15.135399673735726, "grad_norm": 0.004564171191304922, "learning_rate": 0.00016965599536969156, "loss": 0.0143, "num_input_tokens_seen": 200144704, "step": 92780 }, { "epoch": 15.13621533442088, "grad_norm": 0.024114388972520828, "learning_rate": 0.00016960256682804032, "loss": 0.0078, "num_input_tokens_seen": 200154656, "step": 92785 }, { "epoch": 15.137030995106036, "grad_norm": 0.051260411739349365, "learning_rate": 0.00016954914498231217, "loss": 0.0209, "num_input_tokens_seen": 200167392, "step": 92790 }, { "epoch": 15.137846655791192, "grad_norm": 0.012257966212928295, "learning_rate": 0.00016949572983358986, "loss": 0.1214, "num_input_tokens_seen": 200177632, "step": 92795 }, { "epoch": 15.138662316476346, "grad_norm": 0.01086465921252966, "learning_rate": 0.0001694423213829558, "loss": 0.0015, "num_input_tokens_seen": 200188704, "step": 92800 }, { "epoch": 15.139477977161501, "grad_norm": 0.0018037537811324, "learning_rate": 0.00016938891963149232, "loss": 0.0015, "num_input_tokens_seen": 200199488, "step": 92805 }, { "epoch": 15.140293637846655, "grad_norm": 0.01932554692029953, "learning_rate": 0.00016933552458028213, "loss": 0.0031, "num_input_tokens_seen": 200210560, "step": 92810 }, { "epoch": 15.141109298531811, "grad_norm": 0.09021841734647751, "learning_rate": 0.0001692821362304066, "loss": 0.01, "num_input_tokens_seen": 200221600, "step": 92815 }, { "epoch": 15.141924959216965, "grad_norm": 0.9075613021850586, "learning_rate": 0.00016922875458294856, "loss": 0.0319, "num_input_tokens_seen": 200232928, "step": 92820 }, { "epoch": 15.14274061990212, "grad_norm": 0.027114104479551315, "learning_rate": 0.00016917537963898903, "loss": 0.0021, "num_input_tokens_seen": 200243072, "step": 92825 }, { "epoch": 15.143556280587276, "grad_norm": 0.021894177421927452, "learning_rate": 0.0001691220113996105, "loss": 0.0744, "num_input_tokens_seen": 200254336, "step": 92830 }, { "epoch": 15.14437194127243, "grad_norm": 0.0063597639091312885, "learning_rate": 0.00016906864986589377, "loss": 0.0239, "num_input_tokens_seen": 200263552, "step": 92835 }, { "epoch": 15.145187601957586, "grad_norm": 0.058058395981788635, "learning_rate": 0.00016901529503892098, "loss": 0.2007, "num_input_tokens_seen": 200274304, "step": 92840 }, { "epoch": 15.14600326264274, "grad_norm": 0.8040278553962708, "learning_rate": 0.00016896194691977284, "loss": 0.1365, "num_input_tokens_seen": 200284160, "step": 92845 }, { "epoch": 15.146818923327896, "grad_norm": 0.021565787494182587, "learning_rate": 0.00016890860550953092, "loss": 0.0133, "num_input_tokens_seen": 200294432, "step": 92850 }, { "epoch": 15.147634584013051, "grad_norm": 0.013030310161411762, "learning_rate": 0.00016885527080927616, "loss": 0.0087, "num_input_tokens_seen": 200304928, "step": 92855 }, { "epoch": 15.148450244698205, "grad_norm": 0.023472705855965614, "learning_rate": 0.00016880194282008941, "loss": 0.0227, "num_input_tokens_seen": 200316032, "step": 92860 }, { "epoch": 15.149265905383361, "grad_norm": 0.02735469676554203, "learning_rate": 0.0001687486215430515, "loss": 0.0051, "num_input_tokens_seen": 200326176, "step": 92865 }, { "epoch": 15.150081566068515, "grad_norm": 0.004422585479915142, "learning_rate": 0.0001686953069792429, "loss": 0.0071, "num_input_tokens_seen": 200336320, "step": 92870 }, { "epoch": 15.15089722675367, "grad_norm": 0.030964959412813187, "learning_rate": 0.00016864199912974427, "loss": 0.0044, "num_input_tokens_seen": 200347872, "step": 92875 }, { "epoch": 15.151712887438826, "grad_norm": 0.01169986929744482, "learning_rate": 0.00016858869799563585, "loss": 0.0029, "num_input_tokens_seen": 200359872, "step": 92880 }, { "epoch": 15.15252854812398, "grad_norm": 0.02336915023624897, "learning_rate": 0.0001685354035779979, "loss": 0.0102, "num_input_tokens_seen": 200371328, "step": 92885 }, { "epoch": 15.153344208809136, "grad_norm": 0.0023992410860955715, "learning_rate": 0.00016848211587791045, "loss": 0.0093, "num_input_tokens_seen": 200383648, "step": 92890 }, { "epoch": 15.15415986949429, "grad_norm": 0.034394633024930954, "learning_rate": 0.00016842883489645355, "loss": 0.0043, "num_input_tokens_seen": 200393792, "step": 92895 }, { "epoch": 15.154975530179446, "grad_norm": 0.0048440746031701565, "learning_rate": 0.00016837556063470688, "loss": 0.0255, "num_input_tokens_seen": 200404896, "step": 92900 }, { "epoch": 15.1557911908646, "grad_norm": 0.01845521107316017, "learning_rate": 0.0001683222930937502, "loss": 0.0501, "num_input_tokens_seen": 200415520, "step": 92905 }, { "epoch": 15.156606851549755, "grad_norm": 0.0016556401969864964, "learning_rate": 0.00016826903227466284, "loss": 0.0144, "num_input_tokens_seen": 200426592, "step": 92910 }, { "epoch": 15.15742251223491, "grad_norm": 0.014970269054174423, "learning_rate": 0.00016821577817852473, "loss": 0.0034, "num_input_tokens_seen": 200437792, "step": 92915 }, { "epoch": 15.158238172920065, "grad_norm": 0.02680991031229496, "learning_rate": 0.00016816253080641441, "loss": 0.0034, "num_input_tokens_seen": 200447552, "step": 92920 }, { "epoch": 15.15905383360522, "grad_norm": 0.011493106372654438, "learning_rate": 0.00016810929015941174, "loss": 0.0825, "num_input_tokens_seen": 200458240, "step": 92925 }, { "epoch": 15.159869494290374, "grad_norm": 0.005236038472503424, "learning_rate": 0.00016805605623859492, "loss": 0.0117, "num_input_tokens_seen": 200468832, "step": 92930 }, { "epoch": 15.16068515497553, "grad_norm": 0.017697874456644058, "learning_rate": 0.0001680028290450436, "loss": 0.0172, "num_input_tokens_seen": 200479488, "step": 92935 }, { "epoch": 15.161500815660686, "grad_norm": 0.16629110276699066, "learning_rate": 0.00016794960857983583, "loss": 0.0054, "num_input_tokens_seen": 200489920, "step": 92940 }, { "epoch": 15.16231647634584, "grad_norm": 0.06376711279153824, "learning_rate": 0.00016789639484405077, "loss": 0.0037, "num_input_tokens_seen": 200500544, "step": 92945 }, { "epoch": 15.163132137030995, "grad_norm": 0.006531179416924715, "learning_rate": 0.00016784318783876623, "loss": 0.0015, "num_input_tokens_seen": 200511008, "step": 92950 }, { "epoch": 15.16394779771615, "grad_norm": 0.009545995853841305, "learning_rate": 0.0001677899875650612, "loss": 0.0336, "num_input_tokens_seen": 200522176, "step": 92955 }, { "epoch": 15.164763458401305, "grad_norm": 0.15510085225105286, "learning_rate": 0.00016773679402401321, "loss": 0.013, "num_input_tokens_seen": 200532256, "step": 92960 }, { "epoch": 15.16557911908646, "grad_norm": 0.0063102589920163155, "learning_rate": 0.0001676836072167009, "loss": 0.0023, "num_input_tokens_seen": 200544128, "step": 92965 }, { "epoch": 15.166394779771615, "grad_norm": 1.0723806619644165, "learning_rate": 0.0001676304271442015, "loss": 0.0174, "num_input_tokens_seen": 200553280, "step": 92970 }, { "epoch": 15.16721044045677, "grad_norm": 0.023368358612060547, "learning_rate": 0.00016757725380759354, "loss": 0.0037, "num_input_tokens_seen": 200562976, "step": 92975 }, { "epoch": 15.168026101141924, "grad_norm": 0.005605877842754126, "learning_rate": 0.00016752408720795386, "loss": 0.0024, "num_input_tokens_seen": 200574016, "step": 92980 }, { "epoch": 15.16884176182708, "grad_norm": 0.09518667310476303, "learning_rate": 0.00016747092734636067, "loss": 0.0129, "num_input_tokens_seen": 200584256, "step": 92985 }, { "epoch": 15.169657422512234, "grad_norm": 0.0031309176702052355, "learning_rate": 0.0001674177742238906, "loss": 0.017, "num_input_tokens_seen": 200595136, "step": 92990 }, { "epoch": 15.17047308319739, "grad_norm": 0.013875356875360012, "learning_rate": 0.0001673646278416215, "loss": 0.0043, "num_input_tokens_seen": 200606144, "step": 92995 }, { "epoch": 15.171288743882545, "grad_norm": 0.28312602639198303, "learning_rate": 0.00016731148820063013, "loss": 0.0064, "num_input_tokens_seen": 200616736, "step": 93000 }, { "epoch": 15.1721044045677, "grad_norm": 0.13794448971748352, "learning_rate": 0.00016725835530199352, "loss": 0.017, "num_input_tokens_seen": 200626912, "step": 93005 }, { "epoch": 15.172920065252855, "grad_norm": 0.026828154921531677, "learning_rate": 0.00016720522914678843, "loss": 0.0114, "num_input_tokens_seen": 200637280, "step": 93010 }, { "epoch": 15.173735725938009, "grad_norm": 0.002230308949947357, "learning_rate": 0.00016715210973609158, "loss": 0.0031, "num_input_tokens_seen": 200647264, "step": 93015 }, { "epoch": 15.174551386623165, "grad_norm": 0.026384588330984116, "learning_rate": 0.00016709899707097948, "loss": 0.0196, "num_input_tokens_seen": 200657440, "step": 93020 }, { "epoch": 15.17536704730832, "grad_norm": 0.9383867383003235, "learning_rate": 0.0001670458911525285, "loss": 0.1603, "num_input_tokens_seen": 200667520, "step": 93025 }, { "epoch": 15.176182707993474, "grad_norm": 0.4096151292324066, "learning_rate": 0.00016699279198181493, "loss": 0.0576, "num_input_tokens_seen": 200678176, "step": 93030 }, { "epoch": 15.17699836867863, "grad_norm": 0.037542298436164856, "learning_rate": 0.00016693969955991483, "loss": 0.0018, "num_input_tokens_seen": 200689920, "step": 93035 }, { "epoch": 15.177814029363784, "grad_norm": 0.005515703931450844, "learning_rate": 0.00016688661388790434, "loss": 0.0037, "num_input_tokens_seen": 200700096, "step": 93040 }, { "epoch": 15.17862969004894, "grad_norm": 0.001807856373488903, "learning_rate": 0.00016683353496685895, "loss": 0.0264, "num_input_tokens_seen": 200711744, "step": 93045 }, { "epoch": 15.179445350734095, "grad_norm": 0.2318689078092575, "learning_rate": 0.00016678046279785497, "loss": 0.0261, "num_input_tokens_seen": 200723520, "step": 93050 }, { "epoch": 15.18026101141925, "grad_norm": 0.0435001514852047, "learning_rate": 0.00016672739738196734, "loss": 0.0049, "num_input_tokens_seen": 200734528, "step": 93055 }, { "epoch": 15.181076672104405, "grad_norm": 0.005183860193938017, "learning_rate": 0.0001666743387202721, "loss": 0.0018, "num_input_tokens_seen": 200745632, "step": 93060 }, { "epoch": 15.181892332789559, "grad_norm": 0.0012149765389040112, "learning_rate": 0.00016662128681384388, "loss": 0.0038, "num_input_tokens_seen": 200757216, "step": 93065 }, { "epoch": 15.182707993474715, "grad_norm": 0.01831172965466976, "learning_rate": 0.00016656824166375855, "loss": 0.0374, "num_input_tokens_seen": 200768352, "step": 93070 }, { "epoch": 15.18352365415987, "grad_norm": 0.041524212807416916, "learning_rate": 0.0001665152032710905, "loss": 0.0063, "num_input_tokens_seen": 200778688, "step": 93075 }, { "epoch": 15.184339314845024, "grad_norm": 0.0025538376066833735, "learning_rate": 0.0001664621716369152, "loss": 0.0033, "num_input_tokens_seen": 200789728, "step": 93080 }, { "epoch": 15.18515497553018, "grad_norm": 0.019970331341028214, "learning_rate": 0.00016640914676230677, "loss": 0.0101, "num_input_tokens_seen": 200800832, "step": 93085 }, { "epoch": 15.185970636215334, "grad_norm": 0.0014809992862865329, "learning_rate": 0.00016635612864834048, "loss": 0.0017, "num_input_tokens_seen": 200811904, "step": 93090 }, { "epoch": 15.18678629690049, "grad_norm": 0.0473366379737854, "learning_rate": 0.00016630311729609026, "loss": 0.0041, "num_input_tokens_seen": 200823200, "step": 93095 }, { "epoch": 15.187601957585644, "grad_norm": 0.02218029648065567, "learning_rate": 0.00016625011270663098, "loss": 0.0017, "num_input_tokens_seen": 200832640, "step": 93100 }, { "epoch": 15.1884176182708, "grad_norm": 0.0012344036949798465, "learning_rate": 0.00016619711488103622, "loss": 0.0018, "num_input_tokens_seen": 200843648, "step": 93105 }, { "epoch": 15.189233278955955, "grad_norm": 0.35517001152038574, "learning_rate": 0.0001661441238203807, "loss": 0.0033, "num_input_tokens_seen": 200854496, "step": 93110 }, { "epoch": 15.190048939641109, "grad_norm": 0.060565125197172165, "learning_rate": 0.00016609113952573774, "loss": 0.0023, "num_input_tokens_seen": 200865888, "step": 93115 }, { "epoch": 15.190864600326265, "grad_norm": 0.0015095153357833624, "learning_rate": 0.0001660381619981817, "loss": 0.0066, "num_input_tokens_seen": 200875680, "step": 93120 }, { "epoch": 15.191680261011419, "grad_norm": 0.1777465045452118, "learning_rate": 0.0001659851912387857, "loss": 0.0066, "num_input_tokens_seen": 200886464, "step": 93125 }, { "epoch": 15.192495921696574, "grad_norm": 0.04416933283209801, "learning_rate": 0.00016593222724862366, "loss": 0.0016, "num_input_tokens_seen": 200897536, "step": 93130 }, { "epoch": 15.19331158238173, "grad_norm": 0.986678957939148, "learning_rate": 0.0001658792700287689, "loss": 0.0949, "num_input_tokens_seen": 200908000, "step": 93135 }, { "epoch": 15.194127243066884, "grad_norm": 0.0024869258049875498, "learning_rate": 0.00016582631958029454, "loss": 0.0054, "num_input_tokens_seen": 200918944, "step": 93140 }, { "epoch": 15.19494290375204, "grad_norm": 0.001779734157025814, "learning_rate": 0.00016577337590427372, "loss": 0.0021, "num_input_tokens_seen": 200930784, "step": 93145 }, { "epoch": 15.195758564437194, "grad_norm": 0.011840629391372204, "learning_rate": 0.00016572043900177946, "loss": 0.0034, "num_input_tokens_seen": 200940864, "step": 93150 }, { "epoch": 15.19657422512235, "grad_norm": 0.0016255477676168084, "learning_rate": 0.0001656675088738846, "loss": 0.0087, "num_input_tokens_seen": 200951680, "step": 93155 }, { "epoch": 15.197389885807505, "grad_norm": 0.07022818177938461, "learning_rate": 0.00016561458552166174, "loss": 0.0038, "num_input_tokens_seen": 200961760, "step": 93160 }, { "epoch": 15.198205546492659, "grad_norm": 0.08039414137601852, "learning_rate": 0.00016556166894618352, "loss": 0.0209, "num_input_tokens_seen": 200973344, "step": 93165 }, { "epoch": 15.199021207177815, "grad_norm": 0.019707299768924713, "learning_rate": 0.00016550875914852237, "loss": 0.119, "num_input_tokens_seen": 200983520, "step": 93170 }, { "epoch": 15.199836867862969, "grad_norm": 3.3102333545684814, "learning_rate": 0.00016545585612975051, "loss": 0.0539, "num_input_tokens_seen": 200995872, "step": 93175 }, { "epoch": 15.200652528548124, "grad_norm": 0.0033410172909498215, "learning_rate": 0.00016540295989094018, "loss": 0.0043, "num_input_tokens_seen": 201007104, "step": 93180 }, { "epoch": 15.201468189233278, "grad_norm": 0.0036219051107764244, "learning_rate": 0.0001653500704331633, "loss": 0.0021, "num_input_tokens_seen": 201017728, "step": 93185 }, { "epoch": 15.202283849918434, "grad_norm": 0.02122221328318119, "learning_rate": 0.0001652971877574916, "loss": 0.0076, "num_input_tokens_seen": 201029312, "step": 93190 }, { "epoch": 15.20309951060359, "grad_norm": 0.004231644328683615, "learning_rate": 0.00016524431186499733, "loss": 0.0033, "num_input_tokens_seen": 201039360, "step": 93195 }, { "epoch": 15.203915171288743, "grad_norm": 0.5846337676048279, "learning_rate": 0.0001651914427567514, "loss": 0.1184, "num_input_tokens_seen": 201050656, "step": 93200 }, { "epoch": 15.2047308319739, "grad_norm": 0.0016318875132128596, "learning_rate": 0.000165138580433826, "loss": 0.0049, "num_input_tokens_seen": 201061792, "step": 93205 }, { "epoch": 15.205546492659053, "grad_norm": 0.05991901457309723, "learning_rate": 0.00016508572489729172, "loss": 0.0086, "num_input_tokens_seen": 201072416, "step": 93210 }, { "epoch": 15.206362153344209, "grad_norm": 0.09028225392103195, "learning_rate": 0.00016503287614822042, "loss": 0.0089, "num_input_tokens_seen": 201082016, "step": 93215 }, { "epoch": 15.207177814029365, "grad_norm": 0.020474599674344063, "learning_rate": 0.00016498003418768248, "loss": 0.0669, "num_input_tokens_seen": 201093888, "step": 93220 }, { "epoch": 15.207993474714518, "grad_norm": 1.1281850337982178, "learning_rate": 0.00016492719901674947, "loss": 0.34, "num_input_tokens_seen": 201104864, "step": 93225 }, { "epoch": 15.208809135399674, "grad_norm": 0.0008576255640946329, "learning_rate": 0.00016487437063649152, "loss": 0.0064, "num_input_tokens_seen": 201114368, "step": 93230 }, { "epoch": 15.209624796084828, "grad_norm": 0.05623749643564224, "learning_rate": 0.00016482154904797974, "loss": 0.0051, "num_input_tokens_seen": 201124864, "step": 93235 }, { "epoch": 15.210440456769984, "grad_norm": 0.0029751714318990707, "learning_rate": 0.0001647687342522845, "loss": 0.013, "num_input_tokens_seen": 201135840, "step": 93240 }, { "epoch": 15.21125611745514, "grad_norm": 0.020028823986649513, "learning_rate": 0.00016471592625047615, "loss": 0.0042, "num_input_tokens_seen": 201146528, "step": 93245 }, { "epoch": 15.212071778140293, "grad_norm": 0.0018529262160882354, "learning_rate": 0.00016466312504362485, "loss": 0.0031, "num_input_tokens_seen": 201158528, "step": 93250 }, { "epoch": 15.21288743882545, "grad_norm": 0.5555551648139954, "learning_rate": 0.00016461033063280074, "loss": 0.0101, "num_input_tokens_seen": 201169088, "step": 93255 }, { "epoch": 15.213703099510603, "grad_norm": 0.06631113588809967, "learning_rate": 0.00016455754301907376, "loss": 0.0074, "num_input_tokens_seen": 201178912, "step": 93260 }, { "epoch": 15.214518760195759, "grad_norm": 0.02033190056681633, "learning_rate": 0.00016450476220351368, "loss": 0.0068, "num_input_tokens_seen": 201188224, "step": 93265 }, { "epoch": 15.215334420880913, "grad_norm": 0.9634767174720764, "learning_rate": 0.00016445198818719025, "loss": 0.0904, "num_input_tokens_seen": 201198592, "step": 93270 }, { "epoch": 15.216150081566068, "grad_norm": 0.013013369403779507, "learning_rate": 0.00016439922097117294, "loss": 0.0786, "num_input_tokens_seen": 201209376, "step": 93275 }, { "epoch": 15.216965742251224, "grad_norm": 0.00383618101477623, "learning_rate": 0.00016434646055653112, "loss": 0.0705, "num_input_tokens_seen": 201220960, "step": 93280 }, { "epoch": 15.217781402936378, "grad_norm": 0.8239511251449585, "learning_rate": 0.0001642937069443341, "loss": 0.0225, "num_input_tokens_seen": 201231968, "step": 93285 }, { "epoch": 15.218597063621534, "grad_norm": 0.0029203027952462435, "learning_rate": 0.00016424096013565098, "loss": 0.003, "num_input_tokens_seen": 201244224, "step": 93290 }, { "epoch": 15.219412724306688, "grad_norm": 0.016987305134534836, "learning_rate": 0.00016418822013155077, "loss": 0.0018, "num_input_tokens_seen": 201255968, "step": 93295 }, { "epoch": 15.220228384991843, "grad_norm": 0.0038909113500267267, "learning_rate": 0.00016413548693310225, "loss": 0.0043, "num_input_tokens_seen": 201266176, "step": 93300 }, { "epoch": 15.221044045676999, "grad_norm": 0.0026371332351118326, "learning_rate": 0.00016408276054137417, "loss": 0.0467, "num_input_tokens_seen": 201277984, "step": 93305 }, { "epoch": 15.221859706362153, "grad_norm": 0.006667413283139467, "learning_rate": 0.00016403004095743513, "loss": 0.0964, "num_input_tokens_seen": 201288992, "step": 93310 }, { "epoch": 15.222675367047309, "grad_norm": 0.04084065556526184, "learning_rate": 0.00016397732818235344, "loss": 0.0479, "num_input_tokens_seen": 201298912, "step": 93315 }, { "epoch": 15.223491027732463, "grad_norm": 0.11293726414442062, "learning_rate": 0.0001639246222171975, "loss": 0.0083, "num_input_tokens_seen": 201308672, "step": 93320 }, { "epoch": 15.224306688417618, "grad_norm": 0.029959220439195633, "learning_rate": 0.0001638719230630355, "loss": 0.0038, "num_input_tokens_seen": 201319040, "step": 93325 }, { "epoch": 15.225122349102774, "grad_norm": 0.7915744185447693, "learning_rate": 0.0001638192307209353, "loss": 0.0493, "num_input_tokens_seen": 201329120, "step": 93330 }, { "epoch": 15.225938009787928, "grad_norm": 0.0037176923360675573, "learning_rate": 0.00016376654519196477, "loss": 0.0046, "num_input_tokens_seen": 201339456, "step": 93335 }, { "epoch": 15.226753670473084, "grad_norm": 0.1465604305267334, "learning_rate": 0.00016371386647719182, "loss": 0.0127, "num_input_tokens_seen": 201351136, "step": 93340 }, { "epoch": 15.227569331158238, "grad_norm": 0.04687599465250969, "learning_rate": 0.00016366119457768407, "loss": 0.0381, "num_input_tokens_seen": 201362752, "step": 93345 }, { "epoch": 15.228384991843393, "grad_norm": 0.020495915785431862, "learning_rate": 0.00016360852949450882, "loss": 0.0288, "num_input_tokens_seen": 201373920, "step": 93350 }, { "epoch": 15.229200652528547, "grad_norm": 0.011098107323050499, "learning_rate": 0.00016355587122873349, "loss": 0.0091, "num_input_tokens_seen": 201384160, "step": 93355 }, { "epoch": 15.230016313213703, "grad_norm": 0.011931014247238636, "learning_rate": 0.00016350321978142525, "loss": 0.0091, "num_input_tokens_seen": 201395712, "step": 93360 }, { "epoch": 15.230831973898859, "grad_norm": 0.13941709697246552, "learning_rate": 0.00016345057515365115, "loss": 0.0045, "num_input_tokens_seen": 201406976, "step": 93365 }, { "epoch": 15.231647634584013, "grad_norm": 0.1175413429737091, "learning_rate": 0.00016339793734647807, "loss": 0.0903, "num_input_tokens_seen": 201417632, "step": 93370 }, { "epoch": 15.232463295269168, "grad_norm": 0.015810322016477585, "learning_rate": 0.00016334530636097277, "loss": 0.0033, "num_input_tokens_seen": 201427904, "step": 93375 }, { "epoch": 15.233278955954322, "grad_norm": 0.0014710014220327139, "learning_rate": 0.00016329268219820192, "loss": 0.004, "num_input_tokens_seen": 201438752, "step": 93380 }, { "epoch": 15.234094616639478, "grad_norm": 0.025947077199816704, "learning_rate": 0.00016324006485923204, "loss": 0.0019, "num_input_tokens_seen": 201449440, "step": 93385 }, { "epoch": 15.234910277324634, "grad_norm": 0.005095946602523327, "learning_rate": 0.00016318745434512944, "loss": 0.0572, "num_input_tokens_seen": 201459936, "step": 93390 }, { "epoch": 15.235725938009788, "grad_norm": 0.017466919496655464, "learning_rate": 0.00016313485065696037, "loss": 0.2531, "num_input_tokens_seen": 201471776, "step": 93395 }, { "epoch": 15.236541598694943, "grad_norm": 0.0024784619454294443, "learning_rate": 0.00016308225379579088, "loss": 0.0731, "num_input_tokens_seen": 201482688, "step": 93400 }, { "epoch": 15.237357259380097, "grad_norm": 0.007899872027337551, "learning_rate": 0.0001630296637626869, "loss": 0.0041, "num_input_tokens_seen": 201492672, "step": 93405 }, { "epoch": 15.238172920065253, "grad_norm": 0.004295023623853922, "learning_rate": 0.0001629770805587143, "loss": 0.0068, "num_input_tokens_seen": 201503168, "step": 93410 }, { "epoch": 15.238988580750409, "grad_norm": 0.15483272075653076, "learning_rate": 0.0001629245041849387, "loss": 0.0061, "num_input_tokens_seen": 201513984, "step": 93415 }, { "epoch": 15.239804241435563, "grad_norm": 0.02152002602815628, "learning_rate": 0.0001628719346424256, "loss": 0.0229, "num_input_tokens_seen": 201524512, "step": 93420 }, { "epoch": 15.240619902120718, "grad_norm": 0.025621162727475166, "learning_rate": 0.00016281937193224051, "loss": 0.0058, "num_input_tokens_seen": 201535392, "step": 93425 }, { "epoch": 15.241435562805872, "grad_norm": 0.09032971411943436, "learning_rate": 0.0001627668160554485, "loss": 0.0118, "num_input_tokens_seen": 201546048, "step": 93430 }, { "epoch": 15.242251223491028, "grad_norm": 0.03607800230383873, "learning_rate": 0.00016271426701311483, "loss": 0.0105, "num_input_tokens_seen": 201556800, "step": 93435 }, { "epoch": 15.243066884176184, "grad_norm": 0.08859468251466751, "learning_rate": 0.00016266172480630436, "loss": 0.0027, "num_input_tokens_seen": 201567712, "step": 93440 }, { "epoch": 15.243882544861338, "grad_norm": 0.4996335506439209, "learning_rate": 0.0001626091894360819, "loss": 0.1255, "num_input_tokens_seen": 201579616, "step": 93445 }, { "epoch": 15.244698205546493, "grad_norm": 0.030633576214313507, "learning_rate": 0.00016255666090351245, "loss": 0.0108, "num_input_tokens_seen": 201589472, "step": 93450 }, { "epoch": 15.245513866231647, "grad_norm": 0.005273215938359499, "learning_rate": 0.00016250413920966013, "loss": 0.0135, "num_input_tokens_seen": 201600160, "step": 93455 }, { "epoch": 15.246329526916803, "grad_norm": 0.005053229629993439, "learning_rate": 0.0001624516243555898, "loss": 0.0152, "num_input_tokens_seen": 201610624, "step": 93460 }, { "epoch": 15.247145187601957, "grad_norm": 0.018323170021176338, "learning_rate": 0.00016239911634236527, "loss": 0.1602, "num_input_tokens_seen": 201620832, "step": 93465 }, { "epoch": 15.247960848287113, "grad_norm": 0.010889318771660328, "learning_rate": 0.00016234661517105115, "loss": 0.0115, "num_input_tokens_seen": 201631712, "step": 93470 }, { "epoch": 15.248776508972268, "grad_norm": 0.09475913643836975, "learning_rate": 0.00016229412084271095, "loss": 0.0798, "num_input_tokens_seen": 201643520, "step": 93475 }, { "epoch": 15.249592169657422, "grad_norm": 0.028576266020536423, "learning_rate": 0.00016224163335840897, "loss": 0.0051, "num_input_tokens_seen": 201654560, "step": 93480 }, { "epoch": 15.250407830342578, "grad_norm": 0.01022564247250557, "learning_rate": 0.00016218915271920875, "loss": 0.0806, "num_input_tokens_seen": 201666272, "step": 93485 }, { "epoch": 15.251223491027732, "grad_norm": 0.009482518769800663, "learning_rate": 0.00016213667892617394, "loss": 0.0027, "num_input_tokens_seen": 201677152, "step": 93490 }, { "epoch": 15.252039151712887, "grad_norm": 0.005312257446348667, "learning_rate": 0.00016208421198036789, "loss": 0.0026, "num_input_tokens_seen": 201688256, "step": 93495 }, { "epoch": 15.252854812398043, "grad_norm": 0.039040952920913696, "learning_rate": 0.00016203175188285397, "loss": 0.026, "num_input_tokens_seen": 201698496, "step": 93500 }, { "epoch": 15.253670473083197, "grad_norm": 0.02236112579703331, "learning_rate": 0.00016197929863469534, "loss": 0.0115, "num_input_tokens_seen": 201707552, "step": 93505 }, { "epoch": 15.254486133768353, "grad_norm": 0.00526417326182127, "learning_rate": 0.0001619268522369551, "loss": 0.0025, "num_input_tokens_seen": 201719456, "step": 93510 }, { "epoch": 15.255301794453507, "grad_norm": 1.1048693656921387, "learning_rate": 0.00016187441269069596, "loss": 0.0663, "num_input_tokens_seen": 201730336, "step": 93515 }, { "epoch": 15.256117455138662, "grad_norm": 0.024940865114331245, "learning_rate": 0.00016182197999698084, "loss": 0.0028, "num_input_tokens_seen": 201740512, "step": 93520 }, { "epoch": 15.256933115823816, "grad_norm": 0.3055764436721802, "learning_rate": 0.00016176955415687233, "loss": 0.0715, "num_input_tokens_seen": 201751808, "step": 93525 }, { "epoch": 15.257748776508972, "grad_norm": 0.021552668884396553, "learning_rate": 0.00016171713517143288, "loss": 0.0048, "num_input_tokens_seen": 201763744, "step": 93530 }, { "epoch": 15.258564437194128, "grad_norm": 0.6771237850189209, "learning_rate": 0.0001616647230417248, "loss": 0.1079, "num_input_tokens_seen": 201775552, "step": 93535 }, { "epoch": 15.259380097879282, "grad_norm": 0.008407747372984886, "learning_rate": 0.0001616123177688103, "loss": 0.0169, "num_input_tokens_seen": 201785440, "step": 93540 }, { "epoch": 15.260195758564437, "grad_norm": 0.6358749866485596, "learning_rate": 0.00016155991935375147, "loss": 0.2175, "num_input_tokens_seen": 201797152, "step": 93545 }, { "epoch": 15.261011419249591, "grad_norm": 0.1291075497865677, "learning_rate": 0.00016150752779761008, "loss": 0.0231, "num_input_tokens_seen": 201808384, "step": 93550 }, { "epoch": 15.261827079934747, "grad_norm": 0.018070222809910774, "learning_rate": 0.00016145514310144838, "loss": 0.0095, "num_input_tokens_seen": 201819328, "step": 93555 }, { "epoch": 15.262642740619903, "grad_norm": 0.010035510174930096, "learning_rate": 0.0001614027652663273, "loss": 0.0278, "num_input_tokens_seen": 201830496, "step": 93560 }, { "epoch": 15.263458401305057, "grad_norm": 0.002704473678022623, "learning_rate": 0.00016135039429330912, "loss": 0.0255, "num_input_tokens_seen": 201839232, "step": 93565 }, { "epoch": 15.264274061990212, "grad_norm": 0.033941056579351425, "learning_rate": 0.0001612980301834544, "loss": 0.015, "num_input_tokens_seen": 201849568, "step": 93570 }, { "epoch": 15.265089722675366, "grad_norm": 0.16203400492668152, "learning_rate": 0.00016124567293782517, "loss": 0.0876, "num_input_tokens_seen": 201860064, "step": 93575 }, { "epoch": 15.265905383360522, "grad_norm": 0.09509972482919693, "learning_rate": 0.00016119332255748177, "loss": 0.0125, "num_input_tokens_seen": 201870272, "step": 93580 }, { "epoch": 15.266721044045678, "grad_norm": 0.028133414685726166, "learning_rate": 0.0001611409790434858, "loss": 0.0057, "num_input_tokens_seen": 201879968, "step": 93585 }, { "epoch": 15.267536704730832, "grad_norm": 0.01676175184547901, "learning_rate": 0.00016108864239689746, "loss": 0.0402, "num_input_tokens_seen": 201889536, "step": 93590 }, { "epoch": 15.268352365415987, "grad_norm": 0.019999578595161438, "learning_rate": 0.00016103631261877799, "loss": 0.0352, "num_input_tokens_seen": 201899744, "step": 93595 }, { "epoch": 15.269168026101141, "grad_norm": 0.09407703578472137, "learning_rate": 0.0001609839897101874, "loss": 0.0083, "num_input_tokens_seen": 201910240, "step": 93600 }, { "epoch": 15.269983686786297, "grad_norm": 0.28219860792160034, "learning_rate": 0.00016093167367218665, "loss": 0.0136, "num_input_tokens_seen": 201920832, "step": 93605 }, { "epoch": 15.270799347471453, "grad_norm": 0.13837242126464844, "learning_rate": 0.0001608793645058353, "loss": 0.0118, "num_input_tokens_seen": 201931936, "step": 93610 }, { "epoch": 15.271615008156607, "grad_norm": 0.014441683888435364, "learning_rate": 0.0001608270622121942, "loss": 0.0076, "num_input_tokens_seen": 201941888, "step": 93615 }, { "epoch": 15.272430668841762, "grad_norm": 0.054423991590738297, "learning_rate": 0.00016077476679232262, "loss": 0.0095, "num_input_tokens_seen": 201953120, "step": 93620 }, { "epoch": 15.273246329526916, "grad_norm": 0.01078310888260603, "learning_rate": 0.00016072247824728086, "loss": 0.0045, "num_input_tokens_seen": 201964672, "step": 93625 }, { "epoch": 15.274061990212072, "grad_norm": 0.02499009482562542, "learning_rate": 0.00016067019657812852, "loss": 0.003, "num_input_tokens_seen": 201976000, "step": 93630 }, { "epoch": 15.274877650897226, "grad_norm": 0.053357820957899094, "learning_rate": 0.0001606179217859251, "loss": 0.0425, "num_input_tokens_seen": 201986656, "step": 93635 }, { "epoch": 15.275693311582382, "grad_norm": 0.012285932898521423, "learning_rate": 0.00016056565387173005, "loss": 0.0034, "num_input_tokens_seen": 201996928, "step": 93640 }, { "epoch": 15.276508972267537, "grad_norm": 0.007187076844274998, "learning_rate": 0.0001605133928366026, "loss": 0.0102, "num_input_tokens_seen": 202007712, "step": 93645 }, { "epoch": 15.277324632952691, "grad_norm": 0.8301703333854675, "learning_rate": 0.00016046113868160194, "loss": 0.0899, "num_input_tokens_seen": 202018656, "step": 93650 }, { "epoch": 15.278140293637847, "grad_norm": 0.21546700596809387, "learning_rate": 0.00016040889140778703, "loss": 0.0961, "num_input_tokens_seen": 202029184, "step": 93655 }, { "epoch": 15.278955954323001, "grad_norm": 0.06725151836872101, "learning_rate": 0.00016035665101621672, "loss": 0.0104, "num_input_tokens_seen": 202038976, "step": 93660 }, { "epoch": 15.279771615008157, "grad_norm": 0.1801171600818634, "learning_rate": 0.00016030441750794976, "loss": 0.0165, "num_input_tokens_seen": 202047904, "step": 93665 }, { "epoch": 15.280587275693312, "grad_norm": 0.08822453767061234, "learning_rate": 0.00016025219088404468, "loss": 0.0105, "num_input_tokens_seen": 202058400, "step": 93670 }, { "epoch": 15.281402936378466, "grad_norm": 0.03565447777509689, "learning_rate": 0.00016019997114555983, "loss": 0.0052, "num_input_tokens_seen": 202068896, "step": 93675 }, { "epoch": 15.282218597063622, "grad_norm": 0.020789122208952904, "learning_rate": 0.000160147758293554, "loss": 0.004, "num_input_tokens_seen": 202080000, "step": 93680 }, { "epoch": 15.283034257748776, "grad_norm": 0.009524316526949406, "learning_rate": 0.00016009555232908456, "loss": 0.0102, "num_input_tokens_seen": 202091392, "step": 93685 }, { "epoch": 15.283849918433932, "grad_norm": 0.22591069340705872, "learning_rate": 0.00016004335325321033, "loss": 0.0852, "num_input_tokens_seen": 202102048, "step": 93690 }, { "epoch": 15.284665579119087, "grad_norm": 0.003290201537311077, "learning_rate": 0.00015999116106698848, "loss": 0.0033, "num_input_tokens_seen": 202113472, "step": 93695 }, { "epoch": 15.285481239804241, "grad_norm": 0.010243604891002178, "learning_rate": 0.0001599389757714774, "loss": 0.0023, "num_input_tokens_seen": 202124032, "step": 93700 }, { "epoch": 15.286296900489397, "grad_norm": 0.00821051187813282, "learning_rate": 0.0001598867973677341, "loss": 0.0025, "num_input_tokens_seen": 202134784, "step": 93705 }, { "epoch": 15.28711256117455, "grad_norm": 0.008992733433842659, "learning_rate": 0.00015983462585681657, "loss": 0.0012, "num_input_tokens_seen": 202146368, "step": 93710 }, { "epoch": 15.287928221859707, "grad_norm": 0.010321368463337421, "learning_rate": 0.00015978246123978158, "loss": 0.013, "num_input_tokens_seen": 202156096, "step": 93715 }, { "epoch": 15.28874388254486, "grad_norm": 0.02746463753283024, "learning_rate": 0.0001597303035176869, "loss": 0.0059, "num_input_tokens_seen": 202166368, "step": 93720 }, { "epoch": 15.289559543230016, "grad_norm": 0.0024974963162094355, "learning_rate": 0.00015967815269158904, "loss": 0.0028, "num_input_tokens_seen": 202177120, "step": 93725 }, { "epoch": 15.290375203915172, "grad_norm": 0.016931302845478058, "learning_rate": 0.0001596260087625454, "loss": 0.0028, "num_input_tokens_seen": 202187680, "step": 93730 }, { "epoch": 15.291190864600326, "grad_norm": 0.01744828186929226, "learning_rate": 0.0001595738717316122, "loss": 0.1882, "num_input_tokens_seen": 202198560, "step": 93735 }, { "epoch": 15.292006525285482, "grad_norm": 1.3138394355773926, "learning_rate": 0.00015952174159984667, "loss": 0.0844, "num_input_tokens_seen": 202209952, "step": 93740 }, { "epoch": 15.292822185970635, "grad_norm": 0.01487385667860508, "learning_rate": 0.0001594696183683046, "loss": 0.007, "num_input_tokens_seen": 202221504, "step": 93745 }, { "epoch": 15.293637846655791, "grad_norm": 0.02325529046356678, "learning_rate": 0.00015941750203804305, "loss": 0.0147, "num_input_tokens_seen": 202232832, "step": 93750 }, { "epoch": 15.294453507340947, "grad_norm": 0.0203610360622406, "learning_rate": 0.0001593653926101176, "loss": 0.0054, "num_input_tokens_seen": 202243328, "step": 93755 }, { "epoch": 15.2952691680261, "grad_norm": 0.008362491615116596, "learning_rate": 0.00015931329008558477, "loss": 0.0098, "num_input_tokens_seen": 202255104, "step": 93760 }, { "epoch": 15.296084828711257, "grad_norm": 0.021021800115704536, "learning_rate": 0.00015926119446550024, "loss": 0.0132, "num_input_tokens_seen": 202265984, "step": 93765 }, { "epoch": 15.29690048939641, "grad_norm": 0.0397496335208416, "learning_rate": 0.0001592091057509199, "loss": 0.1376, "num_input_tokens_seen": 202276928, "step": 93770 }, { "epoch": 15.297716150081566, "grad_norm": 0.0030437472742050886, "learning_rate": 0.00015915702394289933, "loss": 0.1071, "num_input_tokens_seen": 202287584, "step": 93775 }, { "epoch": 15.298531810766722, "grad_norm": 0.050418753176927567, "learning_rate": 0.00015910494904249411, "loss": 0.0037, "num_input_tokens_seen": 202298752, "step": 93780 }, { "epoch": 15.299347471451876, "grad_norm": 0.02001696079969406, "learning_rate": 0.0001590528810507595, "loss": 0.0054, "num_input_tokens_seen": 202308704, "step": 93785 }, { "epoch": 15.300163132137031, "grad_norm": 0.012709655798971653, "learning_rate": 0.00015900081996875082, "loss": 0.02, "num_input_tokens_seen": 202320192, "step": 93790 }, { "epoch": 15.300978792822185, "grad_norm": 0.004497983958572149, "learning_rate": 0.0001589487657975231, "loss": 0.0101, "num_input_tokens_seen": 202330944, "step": 93795 }, { "epoch": 15.301794453507341, "grad_norm": 0.010057850740849972, "learning_rate": 0.00015889671853813126, "loss": 0.0094, "num_input_tokens_seen": 202340800, "step": 93800 }, { "epoch": 15.302610114192497, "grad_norm": 0.0044313217513263226, "learning_rate": 0.0001588446781916302, "loss": 0.0039, "num_input_tokens_seen": 202352704, "step": 93805 }, { "epoch": 15.30342577487765, "grad_norm": 0.012907215394079685, "learning_rate": 0.00015879264475907447, "loss": 0.0088, "num_input_tokens_seen": 202363648, "step": 93810 }, { "epoch": 15.304241435562806, "grad_norm": 0.04030029475688934, "learning_rate": 0.00015874061824151865, "loss": 0.0095, "num_input_tokens_seen": 202375776, "step": 93815 }, { "epoch": 15.30505709624796, "grad_norm": 0.012292114086449146, "learning_rate": 0.00015868859864001693, "loss": 0.007, "num_input_tokens_seen": 202386784, "step": 93820 }, { "epoch": 15.305872756933116, "grad_norm": 0.05473395809531212, "learning_rate": 0.00015863658595562414, "loss": 0.0188, "num_input_tokens_seen": 202397536, "step": 93825 }, { "epoch": 15.30668841761827, "grad_norm": 0.019811108708381653, "learning_rate": 0.00015858458018939365, "loss": 0.0047, "num_input_tokens_seen": 202407104, "step": 93830 }, { "epoch": 15.307504078303426, "grad_norm": 0.021202294155955315, "learning_rate": 0.00015853258134238007, "loss": 0.0086, "num_input_tokens_seen": 202417568, "step": 93835 }, { "epoch": 15.308319738988581, "grad_norm": 0.04574183002114296, "learning_rate": 0.0001584805894156366, "loss": 0.0017, "num_input_tokens_seen": 202428960, "step": 93840 }, { "epoch": 15.309135399673735, "grad_norm": 0.010427230037748814, "learning_rate": 0.0001584286044102175, "loss": 0.0037, "num_input_tokens_seen": 202440640, "step": 93845 }, { "epoch": 15.309951060358891, "grad_norm": 0.007195925805717707, "learning_rate": 0.00015837662632717575, "loss": 0.0044, "num_input_tokens_seen": 202449984, "step": 93850 }, { "epoch": 15.310766721044045, "grad_norm": 0.006297925487160683, "learning_rate": 0.00015832465516756538, "loss": 0.0037, "num_input_tokens_seen": 202460992, "step": 93855 }, { "epoch": 15.3115823817292, "grad_norm": 0.011007649824023247, "learning_rate": 0.00015827269093243902, "loss": 0.0087, "num_input_tokens_seen": 202472736, "step": 93860 }, { "epoch": 15.312398042414356, "grad_norm": 0.7102978825569153, "learning_rate": 0.0001582207336228504, "loss": 0.0406, "num_input_tokens_seen": 202482528, "step": 93865 }, { "epoch": 15.31321370309951, "grad_norm": 0.034770816564559937, "learning_rate": 0.00015816878323985184, "loss": 0.0133, "num_input_tokens_seen": 202493408, "step": 93870 }, { "epoch": 15.314029363784666, "grad_norm": 0.030466092750430107, "learning_rate": 0.0001581168397844967, "loss": 0.0039, "num_input_tokens_seen": 202503968, "step": 93875 }, { "epoch": 15.31484502446982, "grad_norm": 0.010595270432531834, "learning_rate": 0.0001580649032578375, "loss": 0.1128, "num_input_tokens_seen": 202514528, "step": 93880 }, { "epoch": 15.315660685154976, "grad_norm": 0.0174619872123003, "learning_rate": 0.00015801297366092689, "loss": 0.0107, "num_input_tokens_seen": 202526560, "step": 93885 }, { "epoch": 15.31647634584013, "grad_norm": 0.0036752589512616396, "learning_rate": 0.00015796105099481712, "loss": 0.0205, "num_input_tokens_seen": 202536704, "step": 93890 }, { "epoch": 15.317292006525285, "grad_norm": 0.044251494109630585, "learning_rate": 0.00015790913526056061, "loss": 0.0032, "num_input_tokens_seen": 202546848, "step": 93895 }, { "epoch": 15.318107667210441, "grad_norm": 0.010758808813989162, "learning_rate": 0.00015785722645920942, "loss": 0.0025, "num_input_tokens_seen": 202557376, "step": 93900 }, { "epoch": 15.318923327895595, "grad_norm": 0.02921767719089985, "learning_rate": 0.00015780532459181557, "loss": 0.1465, "num_input_tokens_seen": 202569344, "step": 93905 }, { "epoch": 15.31973898858075, "grad_norm": 0.07249391078948975, "learning_rate": 0.00015775342965943095, "loss": 0.0547, "num_input_tokens_seen": 202578624, "step": 93910 }, { "epoch": 15.320554649265905, "grad_norm": 0.04899735748767853, "learning_rate": 0.00015770154166310724, "loss": 0.0042, "num_input_tokens_seen": 202589120, "step": 93915 }, { "epoch": 15.32137030995106, "grad_norm": 0.021752165630459785, "learning_rate": 0.00015764966060389602, "loss": 0.003, "num_input_tokens_seen": 202599072, "step": 93920 }, { "epoch": 15.322185970636216, "grad_norm": 0.66218101978302, "learning_rate": 0.00015759778648284873, "loss": 0.0181, "num_input_tokens_seen": 202609728, "step": 93925 }, { "epoch": 15.32300163132137, "grad_norm": 0.008354924619197845, "learning_rate": 0.00015754591930101664, "loss": 0.004, "num_input_tokens_seen": 202620608, "step": 93930 }, { "epoch": 15.323817292006526, "grad_norm": 0.11938784271478653, "learning_rate": 0.00015749405905945095, "loss": 0.0808, "num_input_tokens_seen": 202631904, "step": 93935 }, { "epoch": 15.32463295269168, "grad_norm": 0.001243383507244289, "learning_rate": 0.00015744220575920266, "loss": 0.0019, "num_input_tokens_seen": 202643424, "step": 93940 }, { "epoch": 15.325448613376835, "grad_norm": 0.3083398640155792, "learning_rate": 0.00015739035940132262, "loss": 0.0137, "num_input_tokens_seen": 202654848, "step": 93945 }, { "epoch": 15.326264274061991, "grad_norm": 0.023957649245858192, "learning_rate": 0.0001573385199868616, "loss": 0.0072, "num_input_tokens_seen": 202665312, "step": 93950 }, { "epoch": 15.327079934747145, "grad_norm": 0.0029676971025764942, "learning_rate": 0.00015728668751687015, "loss": 0.0022, "num_input_tokens_seen": 202675552, "step": 93955 }, { "epoch": 15.3278955954323, "grad_norm": 0.005043042358011007, "learning_rate": 0.00015723486199239878, "loss": 0.0026, "num_input_tokens_seen": 202685760, "step": 93960 }, { "epoch": 15.328711256117455, "grad_norm": 0.23348653316497803, "learning_rate": 0.00015718304341449759, "loss": 0.0057, "num_input_tokens_seen": 202695968, "step": 93965 }, { "epoch": 15.32952691680261, "grad_norm": 0.003099653869867325, "learning_rate": 0.00015713123178421717, "loss": 0.0107, "num_input_tokens_seen": 202706912, "step": 93970 }, { "epoch": 15.330342577487766, "grad_norm": 0.030132077634334564, "learning_rate": 0.00015707942710260704, "loss": 0.0218, "num_input_tokens_seen": 202717984, "step": 93975 }, { "epoch": 15.33115823817292, "grad_norm": 0.17657029628753662, "learning_rate": 0.00015702762937071747, "loss": 0.0398, "num_input_tokens_seen": 202728704, "step": 93980 }, { "epoch": 15.331973898858076, "grad_norm": 0.002543839393183589, "learning_rate": 0.00015697583858959813, "loss": 0.1112, "num_input_tokens_seen": 202739040, "step": 93985 }, { "epoch": 15.33278955954323, "grad_norm": 0.002317660255357623, "learning_rate": 0.00015692405476029853, "loss": 0.0206, "num_input_tokens_seen": 202750048, "step": 93990 }, { "epoch": 15.333605220228385, "grad_norm": 0.005843766499310732, "learning_rate": 0.00015687227788386822, "loss": 0.0497, "num_input_tokens_seen": 202760576, "step": 93995 }, { "epoch": 15.33442088091354, "grad_norm": 0.021233415231108665, "learning_rate": 0.00015682050796135644, "loss": 0.0895, "num_input_tokens_seen": 202770240, "step": 94000 }, { "epoch": 15.335236541598695, "grad_norm": 0.5034717917442322, "learning_rate": 0.0001567687449938125, "loss": 0.0272, "num_input_tokens_seen": 202780640, "step": 94005 }, { "epoch": 15.33605220228385, "grad_norm": 0.014063271693885326, "learning_rate": 0.0001567169889822853, "loss": 0.0554, "num_input_tokens_seen": 202790784, "step": 94010 }, { "epoch": 15.336867862969005, "grad_norm": 0.055124759674072266, "learning_rate": 0.00015666523992782384, "loss": 0.0468, "num_input_tokens_seen": 202801120, "step": 94015 }, { "epoch": 15.33768352365416, "grad_norm": 0.003551836358383298, "learning_rate": 0.00015661349783147678, "loss": 0.0728, "num_input_tokens_seen": 202812224, "step": 94020 }, { "epoch": 15.338499184339314, "grad_norm": 0.6280102729797363, "learning_rate": 0.00015656176269429283, "loss": 0.1172, "num_input_tokens_seen": 202821536, "step": 94025 }, { "epoch": 15.33931484502447, "grad_norm": 0.01257323194295168, "learning_rate": 0.00015651003451732048, "loss": 0.0031, "num_input_tokens_seen": 202831776, "step": 94030 }, { "epoch": 15.340130505709626, "grad_norm": 0.029040977358818054, "learning_rate": 0.00015645831330160804, "loss": 0.006, "num_input_tokens_seen": 202842272, "step": 94035 }, { "epoch": 15.34094616639478, "grad_norm": 0.008295672945678234, "learning_rate": 0.00015640659904820364, "loss": 0.0057, "num_input_tokens_seen": 202853088, "step": 94040 }, { "epoch": 15.341761827079935, "grad_norm": 0.006166861858218908, "learning_rate": 0.00015635489175815537, "loss": 0.0084, "num_input_tokens_seen": 202865056, "step": 94045 }, { "epoch": 15.34257748776509, "grad_norm": 0.004824750125408173, "learning_rate": 0.0001563031914325112, "loss": 0.0804, "num_input_tokens_seen": 202876256, "step": 94050 }, { "epoch": 15.343393148450245, "grad_norm": 0.2541813552379608, "learning_rate": 0.00015625149807231892, "loss": 0.0173, "num_input_tokens_seen": 202887040, "step": 94055 }, { "epoch": 15.3442088091354, "grad_norm": 0.020611584186553955, "learning_rate": 0.00015619981167862602, "loss": 0.0034, "num_input_tokens_seen": 202898240, "step": 94060 }, { "epoch": 15.345024469820554, "grad_norm": 0.03855913504958153, "learning_rate": 0.00015614813225248015, "loss": 0.0057, "num_input_tokens_seen": 202909568, "step": 94065 }, { "epoch": 15.34584013050571, "grad_norm": 0.023286335170269012, "learning_rate": 0.00015609645979492855, "loss": 0.0041, "num_input_tokens_seen": 202921632, "step": 94070 }, { "epoch": 15.346655791190864, "grad_norm": 0.12060810625553131, "learning_rate": 0.00015604479430701845, "loss": 0.0345, "num_input_tokens_seen": 202932576, "step": 94075 }, { "epoch": 15.34747145187602, "grad_norm": 0.0034178285859525204, "learning_rate": 0.00015599313578979696, "loss": 0.0111, "num_input_tokens_seen": 202943264, "step": 94080 }, { "epoch": 15.348287112561174, "grad_norm": 0.12140186876058578, "learning_rate": 0.00015594148424431076, "loss": 0.0048, "num_input_tokens_seen": 202955040, "step": 94085 }, { "epoch": 15.34910277324633, "grad_norm": 0.40168941020965576, "learning_rate": 0.00015588983967160724, "loss": 0.0265, "num_input_tokens_seen": 202966656, "step": 94090 }, { "epoch": 15.349918433931485, "grad_norm": 0.009765084832906723, "learning_rate": 0.0001558382020727323, "loss": 0.1218, "num_input_tokens_seen": 202977216, "step": 94095 }, { "epoch": 15.350734094616639, "grad_norm": 0.03378693014383316, "learning_rate": 0.00015578657144873316, "loss": 0.0034, "num_input_tokens_seen": 202988224, "step": 94100 }, { "epoch": 15.351549755301795, "grad_norm": 0.07392531633377075, "learning_rate": 0.00015573494780065543, "loss": 0.0081, "num_input_tokens_seen": 202998848, "step": 94105 }, { "epoch": 15.352365415986949, "grad_norm": 0.14011482894420624, "learning_rate": 0.00015568333112954592, "loss": 0.0705, "num_input_tokens_seen": 203009664, "step": 94110 }, { "epoch": 15.353181076672104, "grad_norm": 0.15435102581977844, "learning_rate": 0.00015563172143645044, "loss": 0.0176, "num_input_tokens_seen": 203020448, "step": 94115 }, { "epoch": 15.35399673735726, "grad_norm": 0.005808738525956869, "learning_rate": 0.00015558011872241506, "loss": 0.0248, "num_input_tokens_seen": 203031456, "step": 94120 }, { "epoch": 15.354812398042414, "grad_norm": 0.014651612378656864, "learning_rate": 0.00015552852298848546, "loss": 0.0449, "num_input_tokens_seen": 203043872, "step": 94125 }, { "epoch": 15.35562805872757, "grad_norm": 0.043832726776599884, "learning_rate": 0.00015547693423570736, "loss": 0.0109, "num_input_tokens_seen": 203055136, "step": 94130 }, { "epoch": 15.356443719412724, "grad_norm": 0.0008435246418230236, "learning_rate": 0.00015542535246512623, "loss": 0.0044, "num_input_tokens_seen": 203066048, "step": 94135 }, { "epoch": 15.35725938009788, "grad_norm": 0.08758535981178284, "learning_rate": 0.00015537377767778742, "loss": 0.0403, "num_input_tokens_seen": 203076640, "step": 94140 }, { "epoch": 15.358075040783035, "grad_norm": 0.06614108383655548, "learning_rate": 0.00015532220987473627, "loss": 0.0129, "num_input_tokens_seen": 203088128, "step": 94145 }, { "epoch": 15.358890701468189, "grad_norm": 0.02279900386929512, "learning_rate": 0.00015527064905701776, "loss": 0.0032, "num_input_tokens_seen": 203099200, "step": 94150 }, { "epoch": 15.359706362153345, "grad_norm": 0.00944602396339178, "learning_rate": 0.00015521909522567685, "loss": 0.0026, "num_input_tokens_seen": 203109984, "step": 94155 }, { "epoch": 15.360522022838499, "grad_norm": 1.0137381553649902, "learning_rate": 0.0001551675483817584, "loss": 0.0573, "num_input_tokens_seen": 203119680, "step": 94160 }, { "epoch": 15.361337683523654, "grad_norm": 0.0651179626584053, "learning_rate": 0.00015511600852630698, "loss": 0.0122, "num_input_tokens_seen": 203130208, "step": 94165 }, { "epoch": 15.362153344208808, "grad_norm": 0.06507917493581772, "learning_rate": 0.0001550644756603672, "loss": 0.0681, "num_input_tokens_seen": 203141568, "step": 94170 }, { "epoch": 15.362969004893964, "grad_norm": 0.011280824430286884, "learning_rate": 0.00015501294978498344, "loss": 0.0056, "num_input_tokens_seen": 203151264, "step": 94175 }, { "epoch": 15.36378466557912, "grad_norm": 0.017381692305207253, "learning_rate": 0.0001549614309011998, "loss": 0.0077, "num_input_tokens_seen": 203162592, "step": 94180 }, { "epoch": 15.364600326264274, "grad_norm": 0.03149690851569176, "learning_rate": 0.00015490991901006052, "loss": 0.0615, "num_input_tokens_seen": 203173280, "step": 94185 }, { "epoch": 15.36541598694943, "grad_norm": 0.010247171856462955, "learning_rate": 0.00015485841411260937, "loss": 0.0021, "num_input_tokens_seen": 203183808, "step": 94190 }, { "epoch": 15.366231647634583, "grad_norm": 0.00801602378487587, "learning_rate": 0.00015480691620989062, "loss": 0.006, "num_input_tokens_seen": 203195360, "step": 94195 }, { "epoch": 15.367047308319739, "grad_norm": 0.0101214200258255, "learning_rate": 0.00015475542530294728, "loss": 0.0067, "num_input_tokens_seen": 203205792, "step": 94200 }, { "epoch": 15.367862969004895, "grad_norm": 0.39725321531295776, "learning_rate": 0.00015470394139282357, "loss": 0.0278, "num_input_tokens_seen": 203217408, "step": 94205 }, { "epoch": 15.368678629690049, "grad_norm": 0.15786515176296234, "learning_rate": 0.0001546524644805622, "loss": 0.0077, "num_input_tokens_seen": 203228608, "step": 94210 }, { "epoch": 15.369494290375204, "grad_norm": 0.041214123368263245, "learning_rate": 0.00015460099456720706, "loss": 0.0039, "num_input_tokens_seen": 203239456, "step": 94215 }, { "epoch": 15.370309951060358, "grad_norm": 0.0028494198340922594, "learning_rate": 0.0001545495316538006, "loss": 0.0789, "num_input_tokens_seen": 203250848, "step": 94220 }, { "epoch": 15.371125611745514, "grad_norm": 0.01823677122592926, "learning_rate": 0.0001544980757413864, "loss": 0.0037, "num_input_tokens_seen": 203260608, "step": 94225 }, { "epoch": 15.37194127243067, "grad_norm": 0.07509520649909973, "learning_rate": 0.00015444662683100676, "loss": 0.005, "num_input_tokens_seen": 203272480, "step": 94230 }, { "epoch": 15.372756933115824, "grad_norm": 0.08808441460132599, "learning_rate": 0.00015439518492370486, "loss": 0.0087, "num_input_tokens_seen": 203283040, "step": 94235 }, { "epoch": 15.37357259380098, "grad_norm": 0.03848183900117874, "learning_rate": 0.00015434375002052264, "loss": 0.0029, "num_input_tokens_seen": 203293024, "step": 94240 }, { "epoch": 15.374388254486133, "grad_norm": 0.0050722090527415276, "learning_rate": 0.00015429232212250317, "loss": 0.0052, "num_input_tokens_seen": 203303616, "step": 94245 }, { "epoch": 15.375203915171289, "grad_norm": 0.020570583641529083, "learning_rate": 0.00015424090123068802, "loss": 0.0043, "num_input_tokens_seen": 203314336, "step": 94250 }, { "epoch": 15.376019575856443, "grad_norm": 0.0038783657364547253, "learning_rate": 0.00015418948734611976, "loss": 0.1566, "num_input_tokens_seen": 203323104, "step": 94255 }, { "epoch": 15.376835236541599, "grad_norm": 0.14448069036006927, "learning_rate": 0.0001541380804698403, "loss": 0.0239, "num_input_tokens_seen": 203334144, "step": 94260 }, { "epoch": 15.377650897226754, "grad_norm": 0.14540328085422516, "learning_rate": 0.00015408668060289132, "loss": 0.0117, "num_input_tokens_seen": 203344960, "step": 94265 }, { "epoch": 15.378466557911908, "grad_norm": 0.004196932539343834, "learning_rate": 0.00015403528774631463, "loss": 0.1772, "num_input_tokens_seen": 203356128, "step": 94270 }, { "epoch": 15.379282218597064, "grad_norm": 0.04792657494544983, "learning_rate": 0.00015398390190115175, "loss": 0.0191, "num_input_tokens_seen": 203366048, "step": 94275 }, { "epoch": 15.380097879282218, "grad_norm": 0.03220883384346962, "learning_rate": 0.00015393252306844402, "loss": 0.0033, "num_input_tokens_seen": 203376192, "step": 94280 }, { "epoch": 15.380913539967374, "grad_norm": 0.015784654766321182, "learning_rate": 0.00015388115124923267, "loss": 0.0027, "num_input_tokens_seen": 203387520, "step": 94285 }, { "epoch": 15.38172920065253, "grad_norm": 0.009768318384885788, "learning_rate": 0.00015382978644455896, "loss": 0.0055, "num_input_tokens_seen": 203397888, "step": 94290 }, { "epoch": 15.382544861337683, "grad_norm": 0.004610124044120312, "learning_rate": 0.00015377842865546372, "loss": 0.0165, "num_input_tokens_seen": 203408032, "step": 94295 }, { "epoch": 15.383360522022839, "grad_norm": 0.1311192363500595, "learning_rate": 0.0001537270778829879, "loss": 0.1143, "num_input_tokens_seen": 203418784, "step": 94300 }, { "epoch": 15.384176182707993, "grad_norm": 0.00880477111786604, "learning_rate": 0.00015367573412817186, "loss": 0.0272, "num_input_tokens_seen": 203429664, "step": 94305 }, { "epoch": 15.384991843393149, "grad_norm": 0.0029072389006614685, "learning_rate": 0.0001536243973920568, "loss": 0.0048, "num_input_tokens_seen": 203439552, "step": 94310 }, { "epoch": 15.385807504078304, "grad_norm": 0.006674721371382475, "learning_rate": 0.00015357306767568242, "loss": 0.0028, "num_input_tokens_seen": 203450528, "step": 94315 }, { "epoch": 15.386623164763458, "grad_norm": 0.0066412282176315784, "learning_rate": 0.00015352174498008963, "loss": 0.0173, "num_input_tokens_seen": 203460800, "step": 94320 }, { "epoch": 15.387438825448614, "grad_norm": 0.030233990401029587, "learning_rate": 0.00015347042930631788, "loss": 0.0046, "num_input_tokens_seen": 203473056, "step": 94325 }, { "epoch": 15.388254486133768, "grad_norm": 0.701711118221283, "learning_rate": 0.0001534191206554078, "loss": 0.0622, "num_input_tokens_seen": 203483904, "step": 94330 }, { "epoch": 15.389070146818923, "grad_norm": 0.0024088085629045963, "learning_rate": 0.00015336781902839858, "loss": 0.002, "num_input_tokens_seen": 203492608, "step": 94335 }, { "epoch": 15.38988580750408, "grad_norm": 0.172325998544693, "learning_rate": 0.00015331652442633053, "loss": 0.0124, "num_input_tokens_seen": 203504192, "step": 94340 }, { "epoch": 15.390701468189233, "grad_norm": 0.03991619870066643, "learning_rate": 0.00015326523685024263, "loss": 0.0034, "num_input_tokens_seen": 203515296, "step": 94345 }, { "epoch": 15.391517128874389, "grad_norm": 0.06536217778921127, "learning_rate": 0.0001532139563011749, "loss": 0.0147, "num_input_tokens_seen": 203526304, "step": 94350 }, { "epoch": 15.392332789559543, "grad_norm": 0.007389438804239035, "learning_rate": 0.00015316268278016594, "loss": 0.0045, "num_input_tokens_seen": 203537312, "step": 94355 }, { "epoch": 15.393148450244698, "grad_norm": 0.021807361394166946, "learning_rate": 0.00015311141628825554, "loss": 0.0059, "num_input_tokens_seen": 203549632, "step": 94360 }, { "epoch": 15.393964110929852, "grad_norm": 0.08653487265110016, "learning_rate": 0.000153060156826482, "loss": 0.0838, "num_input_tokens_seen": 203560928, "step": 94365 }, { "epoch": 15.394779771615008, "grad_norm": 0.014106176793575287, "learning_rate": 0.0001530089043958849, "loss": 0.003, "num_input_tokens_seen": 203573280, "step": 94370 }, { "epoch": 15.395595432300164, "grad_norm": 0.0045098853297531605, "learning_rate": 0.00015295765899750214, "loss": 0.0069, "num_input_tokens_seen": 203584192, "step": 94375 }, { "epoch": 15.396411092985318, "grad_norm": 0.05391095206141472, "learning_rate": 0.00015290642063237302, "loss": 0.0041, "num_input_tokens_seen": 203594880, "step": 94380 }, { "epoch": 15.397226753670473, "grad_norm": 0.07209216058254242, "learning_rate": 0.0001528551893015353, "loss": 0.0088, "num_input_tokens_seen": 203606176, "step": 94385 }, { "epoch": 15.398042414355627, "grad_norm": 0.30466848611831665, "learning_rate": 0.00015280396500602783, "loss": 0.029, "num_input_tokens_seen": 203616864, "step": 94390 }, { "epoch": 15.398858075040783, "grad_norm": 0.003423368791118264, "learning_rate": 0.00015275274774688817, "loss": 0.0336, "num_input_tokens_seen": 203627424, "step": 94395 }, { "epoch": 15.399673735725939, "grad_norm": 0.044894564896821976, "learning_rate": 0.00015270153752515474, "loss": 0.0072, "num_input_tokens_seen": 203638816, "step": 94400 }, { "epoch": 15.400489396411093, "grad_norm": 0.10705263912677765, "learning_rate": 0.00015265033434186525, "loss": 0.0377, "num_input_tokens_seen": 203649632, "step": 94405 }, { "epoch": 15.401305057096248, "grad_norm": 0.014901341870427132, "learning_rate": 0.00015259913819805736, "loss": 0.0056, "num_input_tokens_seen": 203660832, "step": 94410 }, { "epoch": 15.402120717781402, "grad_norm": 0.00327356462366879, "learning_rate": 0.0001525479490947687, "loss": 0.0137, "num_input_tokens_seen": 203671968, "step": 94415 }, { "epoch": 15.402936378466558, "grad_norm": 0.06755118072032928, "learning_rate": 0.00015249676703303654, "loss": 0.0057, "num_input_tokens_seen": 203683552, "step": 94420 }, { "epoch": 15.403752039151712, "grad_norm": 0.0630609467625618, "learning_rate": 0.0001524455920138983, "loss": 0.0055, "num_input_tokens_seen": 203694592, "step": 94425 }, { "epoch": 15.404567699836868, "grad_norm": 0.0015592732233926654, "learning_rate": 0.00015239442403839105, "loss": 0.0036, "num_input_tokens_seen": 203704192, "step": 94430 }, { "epoch": 15.405383360522023, "grad_norm": 0.049530018121004105, "learning_rate": 0.0001523432631075517, "loss": 0.0077, "num_input_tokens_seen": 203714464, "step": 94435 }, { "epoch": 15.406199021207177, "grad_norm": 0.009484097361564636, "learning_rate": 0.00015229210922241721, "loss": 0.0026, "num_input_tokens_seen": 203726016, "step": 94440 }, { "epoch": 15.407014681892333, "grad_norm": 0.004205542150884867, "learning_rate": 0.0001522409623840242, "loss": 0.0167, "num_input_tokens_seen": 203737920, "step": 94445 }, { "epoch": 15.407830342577487, "grad_norm": 0.01891661249101162, "learning_rate": 0.00015218982259340908, "loss": 0.0034, "num_input_tokens_seen": 203747744, "step": 94450 }, { "epoch": 15.408646003262643, "grad_norm": 0.01589014008641243, "learning_rate": 0.0001521386898516088, "loss": 0.0192, "num_input_tokens_seen": 203759392, "step": 94455 }, { "epoch": 15.409461663947798, "grad_norm": 0.0013193151680752635, "learning_rate": 0.0001520875641596589, "loss": 0.0035, "num_input_tokens_seen": 203768960, "step": 94460 }, { "epoch": 15.410277324632952, "grad_norm": 0.0024471550714224577, "learning_rate": 0.0001520364455185962, "loss": 0.0033, "num_input_tokens_seen": 203781632, "step": 94465 }, { "epoch": 15.411092985318108, "grad_norm": 0.010977689176797867, "learning_rate": 0.00015198533392945602, "loss": 0.0018, "num_input_tokens_seen": 203792288, "step": 94470 }, { "epoch": 15.411908646003262, "grad_norm": 0.006150348577648401, "learning_rate": 0.00015193422939327488, "loss": 0.2144, "num_input_tokens_seen": 203804032, "step": 94475 }, { "epoch": 15.412724306688418, "grad_norm": 0.01531371008604765, "learning_rate": 0.00015188313191108783, "loss": 0.0056, "num_input_tokens_seen": 203814880, "step": 94480 }, { "epoch": 15.413539967373573, "grad_norm": 0.0015796285588294268, "learning_rate": 0.00015183204148393103, "loss": 0.0029, "num_input_tokens_seen": 203826176, "step": 94485 }, { "epoch": 15.414355628058727, "grad_norm": 0.029708055779337883, "learning_rate": 0.00015178095811283927, "loss": 0.0145, "num_input_tokens_seen": 203837632, "step": 94490 }, { "epoch": 15.415171288743883, "grad_norm": 0.0020372539293020964, "learning_rate": 0.00015172988179884846, "loss": 0.0247, "num_input_tokens_seen": 203849920, "step": 94495 }, { "epoch": 15.415986949429037, "grad_norm": 0.0008212000248022377, "learning_rate": 0.0001516788125429931, "loss": 0.0148, "num_input_tokens_seen": 203860512, "step": 94500 }, { "epoch": 15.416802610114193, "grad_norm": 0.19552737474441528, "learning_rate": 0.0001516277503463086, "loss": 0.0633, "num_input_tokens_seen": 203872128, "step": 94505 }, { "epoch": 15.417618270799348, "grad_norm": 0.007861997000873089, "learning_rate": 0.00015157669520982975, "loss": 0.0029, "num_input_tokens_seen": 203883488, "step": 94510 }, { "epoch": 15.418433931484502, "grad_norm": 0.0019350452348589897, "learning_rate": 0.0001515256471345911, "loss": 0.0075, "num_input_tokens_seen": 203893760, "step": 94515 }, { "epoch": 15.419249592169658, "grad_norm": 0.22429165244102478, "learning_rate": 0.00015147460612162733, "loss": 0.0234, "num_input_tokens_seen": 203904384, "step": 94520 }, { "epoch": 15.420065252854812, "grad_norm": 0.03595906123518944, "learning_rate": 0.00015142357217197278, "loss": 0.0087, "num_input_tokens_seen": 203916096, "step": 94525 }, { "epoch": 15.420880913539968, "grad_norm": 0.34652552008628845, "learning_rate": 0.00015137254528666178, "loss": 0.0168, "num_input_tokens_seen": 203925504, "step": 94530 }, { "epoch": 15.421696574225122, "grad_norm": 0.009729431010782719, "learning_rate": 0.0001513215254667284, "loss": 0.1057, "num_input_tokens_seen": 203936896, "step": 94535 }, { "epoch": 15.422512234910277, "grad_norm": 0.07300486415624619, "learning_rate": 0.00015127051271320664, "loss": 0.004, "num_input_tokens_seen": 203947552, "step": 94540 }, { "epoch": 15.423327895595433, "grad_norm": 0.18068827688694, "learning_rate": 0.00015121950702713029, "loss": 0.0376, "num_input_tokens_seen": 203959104, "step": 94545 }, { "epoch": 15.424143556280587, "grad_norm": 0.030452514067292213, "learning_rate": 0.00015116850840953311, "loss": 0.0236, "num_input_tokens_seen": 203969824, "step": 94550 }, { "epoch": 15.424959216965743, "grad_norm": 0.04277221858501434, "learning_rate": 0.00015111751686144864, "loss": 0.0401, "num_input_tokens_seen": 203980768, "step": 94555 }, { "epoch": 15.425774877650896, "grad_norm": 0.8214955925941467, "learning_rate": 0.00015106653238391028, "loss": 0.0849, "num_input_tokens_seen": 203991616, "step": 94560 }, { "epoch": 15.426590538336052, "grad_norm": 0.49074503779411316, "learning_rate": 0.00015101555497795127, "loss": 0.0127, "num_input_tokens_seen": 204002304, "step": 94565 }, { "epoch": 15.427406199021208, "grad_norm": 0.9176173806190491, "learning_rate": 0.00015096458464460482, "loss": 0.0556, "num_input_tokens_seen": 204012640, "step": 94570 }, { "epoch": 15.428221859706362, "grad_norm": 0.0021187884267419577, "learning_rate": 0.0001509136213849038, "loss": 0.0046, "num_input_tokens_seen": 204023520, "step": 94575 }, { "epoch": 15.429037520391518, "grad_norm": 0.17080815136432648, "learning_rate": 0.00015086266519988108, "loss": 0.0143, "num_input_tokens_seen": 204034848, "step": 94580 }, { "epoch": 15.429853181076671, "grad_norm": 0.09362485259771347, "learning_rate": 0.00015081171609056937, "loss": 0.0134, "num_input_tokens_seen": 204046048, "step": 94585 }, { "epoch": 15.430668841761827, "grad_norm": 0.0071033271960914135, "learning_rate": 0.00015076077405800126, "loss": 0.0069, "num_input_tokens_seen": 204056992, "step": 94590 }, { "epoch": 15.431484502446983, "grad_norm": 0.029995357617735863, "learning_rate": 0.0001507098391032089, "loss": 0.0252, "num_input_tokens_seen": 204067168, "step": 94595 }, { "epoch": 15.432300163132137, "grad_norm": 0.8844358921051025, "learning_rate": 0.00015065891122722507, "loss": 0.0414, "num_input_tokens_seen": 204077504, "step": 94600 }, { "epoch": 15.433115823817293, "grad_norm": 0.22242112457752228, "learning_rate": 0.00015060799043108126, "loss": 0.0123, "num_input_tokens_seen": 204087840, "step": 94605 }, { "epoch": 15.433931484502446, "grad_norm": 0.11149289458990097, "learning_rate": 0.00015055707671581008, "loss": 0.0072, "num_input_tokens_seen": 204098912, "step": 94610 }, { "epoch": 15.434747145187602, "grad_norm": 0.7047805190086365, "learning_rate": 0.00015050617008244272, "loss": 0.0424, "num_input_tokens_seen": 204109792, "step": 94615 }, { "epoch": 15.435562805872756, "grad_norm": 0.9532516002655029, "learning_rate": 0.00015045527053201137, "loss": 0.024, "num_input_tokens_seen": 204120128, "step": 94620 }, { "epoch": 15.436378466557912, "grad_norm": 0.023955071344971657, "learning_rate": 0.00015040437806554735, "loss": 0.0075, "num_input_tokens_seen": 204130720, "step": 94625 }, { "epoch": 15.437194127243067, "grad_norm": 0.006448355969041586, "learning_rate": 0.00015035349268408216, "loss": 0.0039, "num_input_tokens_seen": 204141504, "step": 94630 }, { "epoch": 15.438009787928221, "grad_norm": 0.05781847611069679, "learning_rate": 0.00015030261438864694, "loss": 0.0768, "num_input_tokens_seen": 204151776, "step": 94635 }, { "epoch": 15.438825448613377, "grad_norm": 0.011851697228848934, "learning_rate": 0.0001502517431802729, "loss": 0.0072, "num_input_tokens_seen": 204162880, "step": 94640 }, { "epoch": 15.439641109298531, "grad_norm": 0.11821319162845612, "learning_rate": 0.00015020087905999097, "loss": 0.005, "num_input_tokens_seen": 204173792, "step": 94645 }, { "epoch": 15.440456769983687, "grad_norm": 0.024995019659399986, "learning_rate": 0.00015015002202883193, "loss": 0.0052, "num_input_tokens_seen": 204183872, "step": 94650 }, { "epoch": 15.441272430668842, "grad_norm": 0.035056278109550476, "learning_rate": 0.00015009917208782657, "loss": 0.0348, "num_input_tokens_seen": 204194976, "step": 94655 }, { "epoch": 15.442088091353996, "grad_norm": 0.005104908253997564, "learning_rate": 0.00015004832923800533, "loss": 0.0673, "num_input_tokens_seen": 204206528, "step": 94660 }, { "epoch": 15.442903752039152, "grad_norm": 0.013268248178064823, "learning_rate": 0.00014999749348039866, "loss": 0.0076, "num_input_tokens_seen": 204218432, "step": 94665 }, { "epoch": 15.443719412724306, "grad_norm": 0.023536432534456253, "learning_rate": 0.0001499466648160368, "loss": 0.0145, "num_input_tokens_seen": 204228640, "step": 94670 }, { "epoch": 15.444535073409462, "grad_norm": 0.03660520166158676, "learning_rate": 0.00014989584324594986, "loss": 0.0489, "num_input_tokens_seen": 204240672, "step": 94675 }, { "epoch": 15.445350734094617, "grad_norm": 0.008136703632771969, "learning_rate": 0.00014984502877116773, "loss": 0.0021, "num_input_tokens_seen": 204252576, "step": 94680 }, { "epoch": 15.446166394779771, "grad_norm": 0.014154862612485886, "learning_rate": 0.00014979422139272037, "loss": 0.0017, "num_input_tokens_seen": 204262272, "step": 94685 }, { "epoch": 15.446982055464927, "grad_norm": 0.00247193849645555, "learning_rate": 0.00014974342111163735, "loss": 0.0096, "num_input_tokens_seen": 204273920, "step": 94690 }, { "epoch": 15.447797716150081, "grad_norm": 0.01128793228417635, "learning_rate": 0.00014969262792894822, "loss": 0.0917, "num_input_tokens_seen": 204284736, "step": 94695 }, { "epoch": 15.448613376835237, "grad_norm": 0.004287285264581442, "learning_rate": 0.0001496418418456824, "loss": 0.0129, "num_input_tokens_seen": 204295168, "step": 94700 }, { "epoch": 15.449429037520392, "grad_norm": 0.00537848798558116, "learning_rate": 0.0001495910628628691, "loss": 0.054, "num_input_tokens_seen": 204306368, "step": 94705 }, { "epoch": 15.450244698205546, "grad_norm": 0.019754892215132713, "learning_rate": 0.00014954029098153748, "loss": 0.0057, "num_input_tokens_seen": 204317056, "step": 94710 }, { "epoch": 15.451060358890702, "grad_norm": 0.005110419820994139, "learning_rate": 0.00014948952620271643, "loss": 0.0043, "num_input_tokens_seen": 204328384, "step": 94715 }, { "epoch": 15.451876019575856, "grad_norm": 0.0034630547743290663, "learning_rate": 0.00014943876852743475, "loss": 0.0141, "num_input_tokens_seen": 204338592, "step": 94720 }, { "epoch": 15.452691680261012, "grad_norm": 1.6319184303283691, "learning_rate": 0.00014938801795672102, "loss": 0.0447, "num_input_tokens_seen": 204349600, "step": 94725 }, { "epoch": 15.453507340946166, "grad_norm": 0.07469076663255692, "learning_rate": 0.00014933727449160423, "loss": 0.005, "num_input_tokens_seen": 204359712, "step": 94730 }, { "epoch": 15.454323001631321, "grad_norm": 0.006328065879642963, "learning_rate": 0.00014928653813311204, "loss": 0.0028, "num_input_tokens_seen": 204370816, "step": 94735 }, { "epoch": 15.455138662316477, "grad_norm": 0.021682793274521828, "learning_rate": 0.00014923580888227329, "loss": 0.0043, "num_input_tokens_seen": 204381216, "step": 94740 }, { "epoch": 15.455954323001631, "grad_norm": 0.13365812599658966, "learning_rate": 0.00014918508674011582, "loss": 0.0046, "num_input_tokens_seen": 204392032, "step": 94745 }, { "epoch": 15.456769983686787, "grad_norm": 0.0022824727930128574, "learning_rate": 0.0001491343717076676, "loss": 0.0049, "num_input_tokens_seen": 204402880, "step": 94750 }, { "epoch": 15.45758564437194, "grad_norm": 0.025725001469254494, "learning_rate": 0.00014908366378595645, "loss": 0.0139, "num_input_tokens_seen": 204413856, "step": 94755 }, { "epoch": 15.458401305057096, "grad_norm": 0.02584800310432911, "learning_rate": 0.00014903296297601, "loss": 0.0023, "num_input_tokens_seen": 204423872, "step": 94760 }, { "epoch": 15.459216965742252, "grad_norm": 0.008857563138008118, "learning_rate": 0.00014898226927885584, "loss": 0.0008, "num_input_tokens_seen": 204434720, "step": 94765 }, { "epoch": 15.460032626427406, "grad_norm": 0.01981770619750023, "learning_rate": 0.00014893158269552127, "loss": 0.0057, "num_input_tokens_seen": 204445600, "step": 94770 }, { "epoch": 15.460848287112562, "grad_norm": 0.0045865741558372974, "learning_rate": 0.00014888090322703353, "loss": 0.003, "num_input_tokens_seen": 204457376, "step": 94775 }, { "epoch": 15.461663947797716, "grad_norm": 0.04247048869729042, "learning_rate": 0.00014883023087441965, "loss": 0.0029, "num_input_tokens_seen": 204468192, "step": 94780 }, { "epoch": 15.462479608482871, "grad_norm": 0.05159754678606987, "learning_rate": 0.0001487795656387067, "loss": 0.0028, "num_input_tokens_seen": 204478912, "step": 94785 }, { "epoch": 15.463295269168025, "grad_norm": 0.004477463662624359, "learning_rate": 0.00014872890752092144, "loss": 0.0103, "num_input_tokens_seen": 204489600, "step": 94790 }, { "epoch": 15.464110929853181, "grad_norm": 0.04609847068786621, "learning_rate": 0.00014867825652209045, "loss": 0.0037, "num_input_tokens_seen": 204501568, "step": 94795 }, { "epoch": 15.464926590538337, "grad_norm": 0.24107162654399872, "learning_rate": 0.00014862761264324025, "loss": 0.0192, "num_input_tokens_seen": 204512000, "step": 94800 }, { "epoch": 15.46574225122349, "grad_norm": 0.2036074697971344, "learning_rate": 0.00014857697588539727, "loss": 0.0132, "num_input_tokens_seen": 204521568, "step": 94805 }, { "epoch": 15.466557911908646, "grad_norm": 0.0027138374280184507, "learning_rate": 0.00014852634624958766, "loss": 0.0049, "num_input_tokens_seen": 204532736, "step": 94810 }, { "epoch": 15.4673735725938, "grad_norm": 0.014516768977046013, "learning_rate": 0.00014847572373683749, "loss": 0.0037, "num_input_tokens_seen": 204543904, "step": 94815 }, { "epoch": 15.468189233278956, "grad_norm": 0.09488122910261154, "learning_rate": 0.00014842510834817274, "loss": 0.1597, "num_input_tokens_seen": 204555936, "step": 94820 }, { "epoch": 15.469004893964112, "grad_norm": 0.002272210316732526, "learning_rate": 0.00014837450008461922, "loss": 0.002, "num_input_tokens_seen": 204566720, "step": 94825 }, { "epoch": 15.469820554649266, "grad_norm": 0.012722480110824108, "learning_rate": 0.00014832389894720233, "loss": 0.0027, "num_input_tokens_seen": 204576928, "step": 94830 }, { "epoch": 15.470636215334421, "grad_norm": 0.11957122385501862, "learning_rate": 0.00014827330493694807, "loss": 0.0049, "num_input_tokens_seen": 204587840, "step": 94835 }, { "epoch": 15.471451876019575, "grad_norm": 0.006982048507779837, "learning_rate": 0.0001482227180548812, "loss": 0.0028, "num_input_tokens_seen": 204597856, "step": 94840 }, { "epoch": 15.47226753670473, "grad_norm": 0.4383132755756378, "learning_rate": 0.00014817213830202748, "loss": 0.0204, "num_input_tokens_seen": 204608256, "step": 94845 }, { "epoch": 15.473083197389887, "grad_norm": 0.005097550339996815, "learning_rate": 0.00014812156567941143, "loss": 0.0057, "num_input_tokens_seen": 204619328, "step": 94850 }, { "epoch": 15.47389885807504, "grad_norm": 0.011928047984838486, "learning_rate": 0.00014807100018805853, "loss": 0.144, "num_input_tokens_seen": 204629280, "step": 94855 }, { "epoch": 15.474714518760196, "grad_norm": 0.05958898365497589, "learning_rate": 0.00014802044182899294, "loss": 0.0049, "num_input_tokens_seen": 204640000, "step": 94860 }, { "epoch": 15.47553017944535, "grad_norm": 0.29844918847084045, "learning_rate": 0.00014796989060323997, "loss": 0.0623, "num_input_tokens_seen": 204650528, "step": 94865 }, { "epoch": 15.476345840130506, "grad_norm": 0.05278659611940384, "learning_rate": 0.00014791934651182338, "loss": 0.0189, "num_input_tokens_seen": 204660832, "step": 94870 }, { "epoch": 15.477161500815662, "grad_norm": 0.005115514155477285, "learning_rate": 0.0001478688095557682, "loss": 0.0095, "num_input_tokens_seen": 204671936, "step": 94875 }, { "epoch": 15.477977161500815, "grad_norm": 0.7572254538536072, "learning_rate": 0.00014781827973609803, "loss": 0.0503, "num_input_tokens_seen": 204682464, "step": 94880 }, { "epoch": 15.478792822185971, "grad_norm": 0.010641224682331085, "learning_rate": 0.00014776775705383733, "loss": 0.0048, "num_input_tokens_seen": 204693376, "step": 94885 }, { "epoch": 15.479608482871125, "grad_norm": 0.002268557669594884, "learning_rate": 0.00014771724151000986, "loss": 0.0929, "num_input_tokens_seen": 204703200, "step": 94890 }, { "epoch": 15.48042414355628, "grad_norm": 0.0021116656716912985, "learning_rate": 0.00014766673310563945, "loss": 0.0079, "num_input_tokens_seen": 204714240, "step": 94895 }, { "epoch": 15.481239804241435, "grad_norm": 0.06048103794455528, "learning_rate": 0.0001476162318417496, "loss": 0.0887, "num_input_tokens_seen": 204724928, "step": 94900 }, { "epoch": 15.48205546492659, "grad_norm": 0.0091262087225914, "learning_rate": 0.00014756573771936382, "loss": 0.008, "num_input_tokens_seen": 204735744, "step": 94905 }, { "epoch": 15.482871125611746, "grad_norm": 0.0009744201670400798, "learning_rate": 0.0001475152507395055, "loss": 0.0224, "num_input_tokens_seen": 204746848, "step": 94910 }, { "epoch": 15.4836867862969, "grad_norm": 0.00341918901540339, "learning_rate": 0.00014746477090319781, "loss": 0.0014, "num_input_tokens_seen": 204756672, "step": 94915 }, { "epoch": 15.484502446982056, "grad_norm": 0.6979557275772095, "learning_rate": 0.00014741429821146375, "loss": 0.0396, "num_input_tokens_seen": 204768288, "step": 94920 }, { "epoch": 15.48531810766721, "grad_norm": 0.01023927889764309, "learning_rate": 0.00014736383266532622, "loss": 0.0081, "num_input_tokens_seen": 204778976, "step": 94925 }, { "epoch": 15.486133768352365, "grad_norm": 0.2636188268661499, "learning_rate": 0.00014731337426580792, "loss": 0.0056, "num_input_tokens_seen": 204788608, "step": 94930 }, { "epoch": 15.486949429037521, "grad_norm": 0.11239346116781235, "learning_rate": 0.0001472629230139314, "loss": 0.0099, "num_input_tokens_seen": 204799104, "step": 94935 }, { "epoch": 15.487765089722675, "grad_norm": 0.015417056158185005, "learning_rate": 0.00014721247891071954, "loss": 0.0731, "num_input_tokens_seen": 204809312, "step": 94940 }, { "epoch": 15.48858075040783, "grad_norm": 0.004904933273792267, "learning_rate": 0.00014716204195719396, "loss": 0.0015, "num_input_tokens_seen": 204821600, "step": 94945 }, { "epoch": 15.489396411092985, "grad_norm": 0.024617401883006096, "learning_rate": 0.00014711161215437757, "loss": 0.0114, "num_input_tokens_seen": 204831552, "step": 94950 }, { "epoch": 15.49021207177814, "grad_norm": 0.010465862229466438, "learning_rate": 0.00014706118950329173, "loss": 0.0016, "num_input_tokens_seen": 204843616, "step": 94955 }, { "epoch": 15.491027732463296, "grad_norm": 0.029554126784205437, "learning_rate": 0.00014701077400495894, "loss": 0.0431, "num_input_tokens_seen": 204854272, "step": 94960 }, { "epoch": 15.49184339314845, "grad_norm": 0.03670038282871246, "learning_rate": 0.00014696036566040028, "loss": 0.014, "num_input_tokens_seen": 204863904, "step": 94965 }, { "epoch": 15.492659053833606, "grad_norm": 0.0063223764300346375, "learning_rate": 0.00014690996447063798, "loss": 0.0046, "num_input_tokens_seen": 204875424, "step": 94970 }, { "epoch": 15.49347471451876, "grad_norm": 0.039299722760915756, "learning_rate": 0.00014685957043669283, "loss": 0.0038, "num_input_tokens_seen": 204886560, "step": 94975 }, { "epoch": 15.494290375203915, "grad_norm": 0.015654640272259712, "learning_rate": 0.00014680918355958683, "loss": 0.0936, "num_input_tokens_seen": 204897504, "step": 94980 }, { "epoch": 15.49510603588907, "grad_norm": 0.11566969007253647, "learning_rate": 0.00014675880384034046, "loss": 0.2411, "num_input_tokens_seen": 204908160, "step": 94985 }, { "epoch": 15.495921696574225, "grad_norm": 0.5235056281089783, "learning_rate": 0.00014670843127997542, "loss": 0.0187, "num_input_tokens_seen": 204918336, "step": 94990 }, { "epoch": 15.49673735725938, "grad_norm": 0.01573905535042286, "learning_rate": 0.0001466580658795118, "loss": 0.0078, "num_input_tokens_seen": 204928672, "step": 94995 }, { "epoch": 15.497553017944535, "grad_norm": 0.026887550950050354, "learning_rate": 0.00014660770763997105, "loss": 0.0139, "num_input_tokens_seen": 204940288, "step": 95000 }, { "epoch": 15.49836867862969, "grad_norm": 0.04700768366456032, "learning_rate": 0.00014655735656237312, "loss": 0.0025, "num_input_tokens_seen": 204950240, "step": 95005 }, { "epoch": 15.499184339314844, "grad_norm": 0.761813223361969, "learning_rate": 0.00014650701264773907, "loss": 0.1209, "num_input_tokens_seen": 204960640, "step": 95010 }, { "epoch": 15.5, "grad_norm": 0.21248912811279297, "learning_rate": 0.0001464566758970885, "loss": 0.0104, "num_input_tokens_seen": 204970624, "step": 95015 }, { "epoch": 15.500815660685156, "grad_norm": 0.052297309041023254, "learning_rate": 0.00014640634631144206, "loss": 0.005, "num_input_tokens_seen": 204981856, "step": 95020 }, { "epoch": 15.50163132137031, "grad_norm": 0.01444829162210226, "learning_rate": 0.00014635602389181956, "loss": 0.0031, "num_input_tokens_seen": 204992544, "step": 95025 }, { "epoch": 15.502446982055465, "grad_norm": 0.015513258054852486, "learning_rate": 0.00014630570863924088, "loss": 0.0029, "num_input_tokens_seen": 205005056, "step": 95030 }, { "epoch": 15.50326264274062, "grad_norm": 0.00217048148624599, "learning_rate": 0.0001462554005547257, "loss": 0.0033, "num_input_tokens_seen": 205015264, "step": 95035 }, { "epoch": 15.504078303425775, "grad_norm": 2.784677505493164, "learning_rate": 0.00014620509963929362, "loss": 0.0815, "num_input_tokens_seen": 205025792, "step": 95040 }, { "epoch": 15.50489396411093, "grad_norm": 0.003909597638994455, "learning_rate": 0.00014615480589396396, "loss": 0.0058, "num_input_tokens_seen": 205035712, "step": 95045 }, { "epoch": 15.505709624796085, "grad_norm": 0.05657457187771797, "learning_rate": 0.0001461045193197561, "loss": 0.0806, "num_input_tokens_seen": 205047232, "step": 95050 }, { "epoch": 15.50652528548124, "grad_norm": 0.008316060528159142, "learning_rate": 0.00014605423991768908, "loss": 0.0022, "num_input_tokens_seen": 205058240, "step": 95055 }, { "epoch": 15.507340946166394, "grad_norm": 0.0024814011994749308, "learning_rate": 0.00014600396768878188, "loss": 0.1083, "num_input_tokens_seen": 205069536, "step": 95060 }, { "epoch": 15.50815660685155, "grad_norm": 1.057434320449829, "learning_rate": 0.0001459537026340534, "loss": 0.0192, "num_input_tokens_seen": 205080608, "step": 95065 }, { "epoch": 15.508972267536706, "grad_norm": 1.5728540420532227, "learning_rate": 0.0001459034447545222, "loss": 0.1528, "num_input_tokens_seen": 205090624, "step": 95070 }, { "epoch": 15.50978792822186, "grad_norm": 0.006273490842431784, "learning_rate": 0.00014585319405120695, "loss": 0.0024, "num_input_tokens_seen": 205101856, "step": 95075 }, { "epoch": 15.510603588907015, "grad_norm": 0.004266352392733097, "learning_rate": 0.0001458029505251258, "loss": 0.0743, "num_input_tokens_seen": 205112096, "step": 95080 }, { "epoch": 15.51141924959217, "grad_norm": 0.014332637190818787, "learning_rate": 0.0001457527141772975, "loss": 0.0018, "num_input_tokens_seen": 205123616, "step": 95085 }, { "epoch": 15.512234910277325, "grad_norm": 0.0016954129096120596, "learning_rate": 0.00014570248500873963, "loss": 0.0335, "num_input_tokens_seen": 205133888, "step": 95090 }, { "epoch": 15.513050570962479, "grad_norm": 0.008394996635615826, "learning_rate": 0.00014565226302047058, "loss": 0.0096, "num_input_tokens_seen": 205145536, "step": 95095 }, { "epoch": 15.513866231647635, "grad_norm": 0.018150197342038155, "learning_rate": 0.00014560204821350764, "loss": 0.0027, "num_input_tokens_seen": 205156704, "step": 95100 }, { "epoch": 15.51468189233279, "grad_norm": 0.01957550458610058, "learning_rate": 0.00014555184058886905, "loss": 0.0014, "num_input_tokens_seen": 205167072, "step": 95105 }, { "epoch": 15.515497553017944, "grad_norm": 0.045719701796770096, "learning_rate": 0.00014550164014757183, "loss": 0.0031, "num_input_tokens_seen": 205178560, "step": 95110 }, { "epoch": 15.5163132137031, "grad_norm": 0.010288912802934647, "learning_rate": 0.00014545144689063382, "loss": 0.0786, "num_input_tokens_seen": 205189664, "step": 95115 }, { "epoch": 15.517128874388254, "grad_norm": 0.00576343759894371, "learning_rate": 0.0001454012608190718, "loss": 0.0038, "num_input_tokens_seen": 205201152, "step": 95120 }, { "epoch": 15.51794453507341, "grad_norm": 0.030536195263266563, "learning_rate": 0.0001453510819339033, "loss": 0.0495, "num_input_tokens_seen": 205211872, "step": 95125 }, { "epoch": 15.518760195758565, "grad_norm": 0.005196613259613514, "learning_rate": 0.0001453009102361447, "loss": 0.0107, "num_input_tokens_seen": 205222528, "step": 95130 }, { "epoch": 15.51957585644372, "grad_norm": 0.009649330750107765, "learning_rate": 0.0001452507457268135, "loss": 0.012, "num_input_tokens_seen": 205233248, "step": 95135 }, { "epoch": 15.520391517128875, "grad_norm": 0.11522848159074783, "learning_rate": 0.00014520058840692562, "loss": 0.0085, "num_input_tokens_seen": 205244480, "step": 95140 }, { "epoch": 15.521207177814029, "grad_norm": 0.06710866093635559, "learning_rate": 0.00014515043827749812, "loss": 0.1878, "num_input_tokens_seen": 205256000, "step": 95145 }, { "epoch": 15.522022838499185, "grad_norm": 0.0015318681253120303, "learning_rate": 0.0001451002953395471, "loss": 0.0351, "num_input_tokens_seen": 205267776, "step": 95150 }, { "epoch": 15.522838499184338, "grad_norm": 0.03392402455210686, "learning_rate": 0.00014505015959408884, "loss": 0.0894, "num_input_tokens_seen": 205279424, "step": 95155 }, { "epoch": 15.523654159869494, "grad_norm": 0.03276089206337929, "learning_rate": 0.00014500003104213932, "loss": 0.013, "num_input_tokens_seen": 205290368, "step": 95160 }, { "epoch": 15.52446982055465, "grad_norm": 0.7798789739608765, "learning_rate": 0.0001449499096847146, "loss": 0.0258, "num_input_tokens_seen": 205300640, "step": 95165 }, { "epoch": 15.525285481239804, "grad_norm": 0.349316269159317, "learning_rate": 0.00014489979552283035, "loss": 0.0139, "num_input_tokens_seen": 205311904, "step": 95170 }, { "epoch": 15.52610114192496, "grad_norm": 0.03591860458254814, "learning_rate": 0.0001448496885575022, "loss": 0.0046, "num_input_tokens_seen": 205321728, "step": 95175 }, { "epoch": 15.526916802610113, "grad_norm": 0.047168806195259094, "learning_rate": 0.00014479958878974564, "loss": 0.0045, "num_input_tokens_seen": 205333536, "step": 95180 }, { "epoch": 15.52773246329527, "grad_norm": 0.17809604108333588, "learning_rate": 0.00014474949622057603, "loss": 0.0061, "num_input_tokens_seen": 205345248, "step": 95185 }, { "epoch": 15.528548123980425, "grad_norm": 0.0030888488981872797, "learning_rate": 0.00014469941085100857, "loss": 0.0041, "num_input_tokens_seen": 205356992, "step": 95190 }, { "epoch": 15.529363784665579, "grad_norm": 0.013002474792301655, "learning_rate": 0.00014464933268205826, "loss": 0.0091, "num_input_tokens_seen": 205368384, "step": 95195 }, { "epoch": 15.530179445350734, "grad_norm": 0.026545340195298195, "learning_rate": 0.00014459926171474002, "loss": 0.0294, "num_input_tokens_seen": 205379744, "step": 95200 }, { "epoch": 15.530995106035888, "grad_norm": 0.010294906795024872, "learning_rate": 0.0001445491979500686, "loss": 0.0656, "num_input_tokens_seen": 205390336, "step": 95205 }, { "epoch": 15.531810766721044, "grad_norm": 0.11178641021251678, "learning_rate": 0.0001444991413890586, "loss": 0.1713, "num_input_tokens_seen": 205400800, "step": 95210 }, { "epoch": 15.5326264274062, "grad_norm": 0.01834450662136078, "learning_rate": 0.00014444909203272438, "loss": 0.085, "num_input_tokens_seen": 205412416, "step": 95215 }, { "epoch": 15.533442088091354, "grad_norm": 0.0027292254380881786, "learning_rate": 0.0001443990498820806, "loss": 0.0527, "num_input_tokens_seen": 205423552, "step": 95220 }, { "epoch": 15.53425774877651, "grad_norm": 0.07576511800289154, "learning_rate": 0.0001443490149381409, "loss": 0.0066, "num_input_tokens_seen": 205435584, "step": 95225 }, { "epoch": 15.535073409461663, "grad_norm": 0.0196362491697073, "learning_rate": 0.0001442989872019199, "loss": 0.0044, "num_input_tokens_seen": 205447328, "step": 95230 }, { "epoch": 15.535889070146819, "grad_norm": 0.003983140457421541, "learning_rate": 0.00014424896667443083, "loss": 0.0979, "num_input_tokens_seen": 205457024, "step": 95235 }, { "epoch": 15.536704730831975, "grad_norm": 0.16658292710781097, "learning_rate": 0.00014419895335668809, "loss": 0.0141, "num_input_tokens_seen": 205468864, "step": 95240 }, { "epoch": 15.537520391517129, "grad_norm": 0.062435559928417206, "learning_rate": 0.00014414894724970462, "loss": 0.0054, "num_input_tokens_seen": 205480000, "step": 95245 }, { "epoch": 15.538336052202284, "grad_norm": 0.018457751721143723, "learning_rate": 0.00014409894835449444, "loss": 0.0026, "num_input_tokens_seen": 205490528, "step": 95250 }, { "epoch": 15.539151712887438, "grad_norm": 0.00772630563005805, "learning_rate": 0.00014404895667207028, "loss": 0.0201, "num_input_tokens_seen": 205500448, "step": 95255 }, { "epoch": 15.539967373572594, "grad_norm": 0.46286696195602417, "learning_rate": 0.00014399897220344576, "loss": 0.0259, "num_input_tokens_seen": 205512288, "step": 95260 }, { "epoch": 15.540783034257748, "grad_norm": 0.1748085916042328, "learning_rate": 0.00014394899494963364, "loss": 0.0351, "num_input_tokens_seen": 205523456, "step": 95265 }, { "epoch": 15.541598694942904, "grad_norm": 0.013476897962391376, "learning_rate": 0.00014389902491164681, "loss": 0.0167, "num_input_tokens_seen": 205534112, "step": 95270 }, { "epoch": 15.54241435562806, "grad_norm": 0.058507081121206284, "learning_rate": 0.00014384906209049804, "loss": 0.0304, "num_input_tokens_seen": 205544832, "step": 95275 }, { "epoch": 15.543230016313213, "grad_norm": 0.46414992213249207, "learning_rate": 0.0001437991064871998, "loss": 0.0145, "num_input_tokens_seen": 205553408, "step": 95280 }, { "epoch": 15.544045676998369, "grad_norm": 0.0059353322722017765, "learning_rate": 0.0001437491581027645, "loss": 0.0016, "num_input_tokens_seen": 205564064, "step": 95285 }, { "epoch": 15.544861337683523, "grad_norm": 0.014222140423953533, "learning_rate": 0.00014369921693820447, "loss": 0.0187, "num_input_tokens_seen": 205573920, "step": 95290 }, { "epoch": 15.545676998368679, "grad_norm": 0.1142835021018982, "learning_rate": 0.00014364928299453184, "loss": 0.0096, "num_input_tokens_seen": 205582528, "step": 95295 }, { "epoch": 15.546492659053834, "grad_norm": 0.006916298530995846, "learning_rate": 0.00014359935627275856, "loss": 0.0339, "num_input_tokens_seen": 205593984, "step": 95300 }, { "epoch": 15.547308319738988, "grad_norm": 0.008835156448185444, "learning_rate": 0.00014354943677389643, "loss": 0.0745, "num_input_tokens_seen": 205605664, "step": 95305 }, { "epoch": 15.548123980424144, "grad_norm": 0.007379354443401098, "learning_rate": 0.00014349952449895715, "loss": 0.0026, "num_input_tokens_seen": 205616224, "step": 95310 }, { "epoch": 15.548939641109298, "grad_norm": 0.6989656090736389, "learning_rate": 0.00014344961944895223, "loss": 0.0854, "num_input_tokens_seen": 205626656, "step": 95315 }, { "epoch": 15.549755301794454, "grad_norm": 0.05918343365192413, "learning_rate": 0.00014339972162489317, "loss": 0.0248, "num_input_tokens_seen": 205638368, "step": 95320 }, { "epoch": 15.550570962479608, "grad_norm": 0.07005748897790909, "learning_rate": 0.0001433498310277911, "loss": 0.0059, "num_input_tokens_seen": 205649568, "step": 95325 }, { "epoch": 15.551386623164763, "grad_norm": 0.03827957063913345, "learning_rate": 0.0001432999476586571, "loss": 0.0042, "num_input_tokens_seen": 205660864, "step": 95330 }, { "epoch": 15.552202283849919, "grad_norm": 0.004585604183375835, "learning_rate": 0.00014325007151850218, "loss": 0.0197, "num_input_tokens_seen": 205672608, "step": 95335 }, { "epoch": 15.553017944535073, "grad_norm": 0.024896059185266495, "learning_rate": 0.00014320020260833716, "loss": 0.0885, "num_input_tokens_seen": 205684160, "step": 95340 }, { "epoch": 15.553833605220229, "grad_norm": 0.6745076775550842, "learning_rate": 0.00014315034092917268, "loss": 0.2251, "num_input_tokens_seen": 205694432, "step": 95345 }, { "epoch": 15.554649265905383, "grad_norm": 0.4475511908531189, "learning_rate": 0.00014310048648201917, "loss": 0.0234, "num_input_tokens_seen": 205703808, "step": 95350 }, { "epoch": 15.555464926590538, "grad_norm": 0.013264917768537998, "learning_rate": 0.0001430506392678871, "loss": 0.0227, "num_input_tokens_seen": 205713408, "step": 95355 }, { "epoch": 15.556280587275694, "grad_norm": 0.004234666470438242, "learning_rate": 0.00014300079928778646, "loss": 0.0035, "num_input_tokens_seen": 205725056, "step": 95360 }, { "epoch": 15.557096247960848, "grad_norm": 0.010939162224531174, "learning_rate": 0.00014295096654272772, "loss": 0.0045, "num_input_tokens_seen": 205736576, "step": 95365 }, { "epoch": 15.557911908646004, "grad_norm": 0.0070751383900642395, "learning_rate": 0.00014290114103372058, "loss": 0.0869, "num_input_tokens_seen": 205747744, "step": 95370 }, { "epoch": 15.558727569331158, "grad_norm": 0.1485825479030609, "learning_rate": 0.00014285132276177482, "loss": 0.0113, "num_input_tokens_seen": 205758848, "step": 95375 }, { "epoch": 15.559543230016313, "grad_norm": 0.2433205246925354, "learning_rate": 0.00014280151172790006, "loss": 0.1152, "num_input_tokens_seen": 205769856, "step": 95380 }, { "epoch": 15.560358890701469, "grad_norm": 0.004385693464428186, "learning_rate": 0.00014275170793310582, "loss": 0.0083, "num_input_tokens_seen": 205780896, "step": 95385 }, { "epoch": 15.561174551386623, "grad_norm": 0.011068751104176044, "learning_rate": 0.00014270191137840145, "loss": 0.0032, "num_input_tokens_seen": 205789664, "step": 95390 }, { "epoch": 15.561990212071779, "grad_norm": 0.042384933680295944, "learning_rate": 0.00014265212206479604, "loss": 0.1064, "num_input_tokens_seen": 205800544, "step": 95395 }, { "epoch": 15.562805872756933, "grad_norm": 0.04173944145441055, "learning_rate": 0.00014260233999329873, "loss": 0.1057, "num_input_tokens_seen": 205811456, "step": 95400 }, { "epoch": 15.563621533442088, "grad_norm": 0.00545839685946703, "learning_rate": 0.00014255256516491845, "loss": 0.022, "num_input_tokens_seen": 205822432, "step": 95405 }, { "epoch": 15.564437194127244, "grad_norm": 0.01669471710920334, "learning_rate": 0.00014250279758066387, "loss": 0.0044, "num_input_tokens_seen": 205833248, "step": 95410 }, { "epoch": 15.565252854812398, "grad_norm": 0.04778437316417694, "learning_rate": 0.00014245303724154358, "loss": 0.0125, "num_input_tokens_seen": 205843584, "step": 95415 }, { "epoch": 15.566068515497554, "grad_norm": 0.00581990834325552, "learning_rate": 0.00014240328414856607, "loss": 0.014, "num_input_tokens_seen": 205853984, "step": 95420 }, { "epoch": 15.566884176182707, "grad_norm": 0.015163683332502842, "learning_rate": 0.00014235353830273966, "loss": 0.0413, "num_input_tokens_seen": 205864448, "step": 95425 }, { "epoch": 15.567699836867863, "grad_norm": 0.007937601767480373, "learning_rate": 0.00014230379970507252, "loss": 0.0661, "num_input_tokens_seen": 205876224, "step": 95430 }, { "epoch": 15.568515497553017, "grad_norm": 0.26424267888069153, "learning_rate": 0.00014225406835657262, "loss": 0.0125, "num_input_tokens_seen": 205885952, "step": 95435 }, { "epoch": 15.569331158238173, "grad_norm": 0.3381975293159485, "learning_rate": 0.00014220434425824785, "loss": 0.0276, "num_input_tokens_seen": 205896352, "step": 95440 }, { "epoch": 15.570146818923329, "grad_norm": 0.0034016945865005255, "learning_rate": 0.00014215462741110597, "loss": 0.0343, "num_input_tokens_seen": 205908832, "step": 95445 }, { "epoch": 15.570962479608482, "grad_norm": 0.7144123911857605, "learning_rate": 0.00014210491781615453, "loss": 0.0106, "num_input_tokens_seen": 205919648, "step": 95450 }, { "epoch": 15.571778140293638, "grad_norm": 0.017977062612771988, "learning_rate": 0.00014205521547440092, "loss": 0.0176, "num_input_tokens_seen": 205929248, "step": 95455 }, { "epoch": 15.572593800978792, "grad_norm": 0.006979598198086023, "learning_rate": 0.00014200552038685249, "loss": 0.0076, "num_input_tokens_seen": 205940192, "step": 95460 }, { "epoch": 15.573409461663948, "grad_norm": 0.6939419507980347, "learning_rate": 0.00014195583255451633, "loss": 0.1169, "num_input_tokens_seen": 205950944, "step": 95465 }, { "epoch": 15.574225122349104, "grad_norm": 0.0033475421369075775, "learning_rate": 0.00014190615197839929, "loss": 0.0042, "num_input_tokens_seen": 205962400, "step": 95470 }, { "epoch": 15.575040783034257, "grad_norm": 0.14744523167610168, "learning_rate": 0.00014185647865950861, "loss": 0.0063, "num_input_tokens_seen": 205973632, "step": 95475 }, { "epoch": 15.575856443719413, "grad_norm": 0.022903164848685265, "learning_rate": 0.00014180681259885048, "loss": 0.0036, "num_input_tokens_seen": 205984064, "step": 95480 }, { "epoch": 15.576672104404567, "grad_norm": 0.002572441939264536, "learning_rate": 0.000141757153797432, "loss": 0.018, "num_input_tokens_seen": 205994240, "step": 95485 }, { "epoch": 15.577487765089723, "grad_norm": 0.03215838968753815, "learning_rate": 0.00014170750225625888, "loss": 0.0043, "num_input_tokens_seen": 206002304, "step": 95490 }, { "epoch": 15.578303425774878, "grad_norm": 0.004546868149191141, "learning_rate": 0.00014165785797633812, "loss": 0.018, "num_input_tokens_seen": 206014112, "step": 95495 }, { "epoch": 15.579119086460032, "grad_norm": 0.03626079857349396, "learning_rate": 0.00014160822095867515, "loss": 0.0048, "num_input_tokens_seen": 206024896, "step": 95500 }, { "epoch": 15.579934747145188, "grad_norm": 0.0058312248438596725, "learning_rate": 0.00014155859120427633, "loss": 0.0218, "num_input_tokens_seen": 206034688, "step": 95505 }, { "epoch": 15.580750407830342, "grad_norm": 0.1801530122756958, "learning_rate": 0.00014150896871414743, "loss": 0.0048, "num_input_tokens_seen": 206045184, "step": 95510 }, { "epoch": 15.581566068515498, "grad_norm": 0.02728962153196335, "learning_rate": 0.00014145935348929407, "loss": 0.0022, "num_input_tokens_seen": 206054400, "step": 95515 }, { "epoch": 15.582381729200652, "grad_norm": 0.00474175252020359, "learning_rate": 0.0001414097455307217, "loss": 0.0068, "num_input_tokens_seen": 206064704, "step": 95520 }, { "epoch": 15.583197389885807, "grad_norm": 0.008055509999394417, "learning_rate": 0.00014136014483943576, "loss": 0.0025, "num_input_tokens_seen": 206074688, "step": 95525 }, { "epoch": 15.584013050570963, "grad_norm": 0.02255905419588089, "learning_rate": 0.0001413105514164415, "loss": 0.0059, "num_input_tokens_seen": 206085920, "step": 95530 }, { "epoch": 15.584828711256117, "grad_norm": 0.019128749147057533, "learning_rate": 0.0001412609652627439, "loss": 0.0205, "num_input_tokens_seen": 206096384, "step": 95535 }, { "epoch": 15.585644371941273, "grad_norm": 0.11031868308782578, "learning_rate": 0.00014121138637934795, "loss": 0.0078, "num_input_tokens_seen": 206106816, "step": 95540 }, { "epoch": 15.586460032626427, "grad_norm": 0.0037778846453875303, "learning_rate": 0.00014116181476725838, "loss": 0.0027, "num_input_tokens_seen": 206117824, "step": 95545 }, { "epoch": 15.587275693311582, "grad_norm": 0.012948816642165184, "learning_rate": 0.00014111225042747987, "loss": 0.0243, "num_input_tokens_seen": 206127520, "step": 95550 }, { "epoch": 15.588091353996738, "grad_norm": 0.01513448916375637, "learning_rate": 0.00014106269336101692, "loss": 0.0077, "num_input_tokens_seen": 206137888, "step": 95555 }, { "epoch": 15.588907014681892, "grad_norm": 0.012556265108287334, "learning_rate": 0.0001410131435688738, "loss": 0.0036, "num_input_tokens_seen": 206148992, "step": 95560 }, { "epoch": 15.589722675367048, "grad_norm": 0.002665876876562834, "learning_rate": 0.00014096360105205475, "loss": 0.0032, "num_input_tokens_seen": 206159648, "step": 95565 }, { "epoch": 15.590538336052202, "grad_norm": 0.019830789417028427, "learning_rate": 0.00014091406581156373, "loss": 0.0287, "num_input_tokens_seen": 206169504, "step": 95570 }, { "epoch": 15.591353996737357, "grad_norm": 0.003747944487258792, "learning_rate": 0.00014086453784840463, "loss": 0.0016, "num_input_tokens_seen": 206179488, "step": 95575 }, { "epoch": 15.592169657422513, "grad_norm": 0.05996265262365341, "learning_rate": 0.00014081501716358154, "loss": 0.0035, "num_input_tokens_seen": 206189920, "step": 95580 }, { "epoch": 15.592985318107667, "grad_norm": 0.08386008441448212, "learning_rate": 0.0001407655037580975, "loss": 0.0059, "num_input_tokens_seen": 206200608, "step": 95585 }, { "epoch": 15.593800978792823, "grad_norm": 0.006318517494946718, "learning_rate": 0.0001407159976329565, "loss": 0.0082, "num_input_tokens_seen": 206210816, "step": 95590 }, { "epoch": 15.594616639477977, "grad_norm": 0.04145456850528717, "learning_rate": 0.00014066649878916133, "loss": 0.0052, "num_input_tokens_seen": 206222880, "step": 95595 }, { "epoch": 15.595432300163132, "grad_norm": 0.04293735697865486, "learning_rate": 0.00014061700722771569, "loss": 0.0045, "num_input_tokens_seen": 206233248, "step": 95600 }, { "epoch": 15.596247960848288, "grad_norm": 0.03934451565146446, "learning_rate": 0.000140567522949622, "loss": 0.0023, "num_input_tokens_seen": 206244672, "step": 95605 }, { "epoch": 15.597063621533442, "grad_norm": 0.07162196189165115, "learning_rate": 0.00014051804595588375, "loss": 0.0053, "num_input_tokens_seen": 206255392, "step": 95610 }, { "epoch": 15.597879282218598, "grad_norm": 0.0015586281660944223, "learning_rate": 0.00014046857624750304, "loss": 0.0557, "num_input_tokens_seen": 206267200, "step": 95615 }, { "epoch": 15.598694942903752, "grad_norm": 0.19091452658176422, "learning_rate": 0.00014041911382548305, "loss": 0.0065, "num_input_tokens_seen": 206278368, "step": 95620 }, { "epoch": 15.599510603588907, "grad_norm": 0.010266013443470001, "learning_rate": 0.00014036965869082551, "loss": 0.0022, "num_input_tokens_seen": 206288576, "step": 95625 }, { "epoch": 15.600326264274061, "grad_norm": 0.0075054774060845375, "learning_rate": 0.00014032021084453344, "loss": 0.0028, "num_input_tokens_seen": 206299488, "step": 95630 }, { "epoch": 15.601141924959217, "grad_norm": 0.0018897277768701315, "learning_rate": 0.0001402707702876082, "loss": 0.0013, "num_input_tokens_seen": 206310016, "step": 95635 }, { "epoch": 15.601957585644373, "grad_norm": 0.012948420830070972, "learning_rate": 0.0001402213370210525, "loss": 0.0037, "num_input_tokens_seen": 206321760, "step": 95640 }, { "epoch": 15.602773246329527, "grad_norm": 0.004016253165900707, "learning_rate": 0.00014017191104586751, "loss": 0.0533, "num_input_tokens_seen": 206333664, "step": 95645 }, { "epoch": 15.603588907014682, "grad_norm": 0.001181950094178319, "learning_rate": 0.00014012249236305542, "loss": 0.0035, "num_input_tokens_seen": 206344416, "step": 95650 }, { "epoch": 15.604404567699836, "grad_norm": 0.011298573575913906, "learning_rate": 0.00014007308097361749, "loss": 0.0026, "num_input_tokens_seen": 206354592, "step": 95655 }, { "epoch": 15.605220228384992, "grad_norm": 0.0010861607734113932, "learning_rate": 0.00014002367687855516, "loss": 0.002, "num_input_tokens_seen": 206365920, "step": 95660 }, { "epoch": 15.606035889070148, "grad_norm": 0.007202661130577326, "learning_rate": 0.00013997428007886975, "loss": 0.0034, "num_input_tokens_seen": 206376960, "step": 95665 }, { "epoch": 15.606851549755302, "grad_norm": 0.010722985491156578, "learning_rate": 0.00013992489057556223, "loss": 0.0122, "num_input_tokens_seen": 206388448, "step": 95670 }, { "epoch": 15.607667210440457, "grad_norm": 0.013042837381362915, "learning_rate": 0.00013987550836963358, "loss": 0.0183, "num_input_tokens_seen": 206399584, "step": 95675 }, { "epoch": 15.608482871125611, "grad_norm": 0.0021078349091112614, "learning_rate": 0.0001398261334620846, "loss": 0.0052, "num_input_tokens_seen": 206410784, "step": 95680 }, { "epoch": 15.609298531810767, "grad_norm": 0.009603566490113735, "learning_rate": 0.00013977676585391597, "loss": 0.1108, "num_input_tokens_seen": 206422688, "step": 95685 }, { "epoch": 15.61011419249592, "grad_norm": 0.004575230646878481, "learning_rate": 0.00013972740554612817, "loss": 0.004, "num_input_tokens_seen": 206433760, "step": 95690 }, { "epoch": 15.610929853181077, "grad_norm": 0.0028347403276711702, "learning_rate": 0.0001396780525397215, "loss": 0.0341, "num_input_tokens_seen": 206443520, "step": 95695 }, { "epoch": 15.611745513866232, "grad_norm": 0.0210505910217762, "learning_rate": 0.00013962870683569605, "loss": 0.01, "num_input_tokens_seen": 206454848, "step": 95700 }, { "epoch": 15.612561174551386, "grad_norm": 0.004142044112086296, "learning_rate": 0.00013957936843505238, "loss": 0.0017, "num_input_tokens_seen": 206466208, "step": 95705 }, { "epoch": 15.613376835236542, "grad_norm": 0.043968360871076584, "learning_rate": 0.00013953003733878965, "loss": 0.0023, "num_input_tokens_seen": 206476832, "step": 95710 }, { "epoch": 15.614192495921696, "grad_norm": 0.003359774360433221, "learning_rate": 0.0001394807135479083, "loss": 0.0015, "num_input_tokens_seen": 206488256, "step": 95715 }, { "epoch": 15.615008156606851, "grad_norm": 0.0018455464160069823, "learning_rate": 0.0001394313970634074, "loss": 0.0014, "num_input_tokens_seen": 206499040, "step": 95720 }, { "epoch": 15.615823817292007, "grad_norm": 0.09072218090295792, "learning_rate": 0.0001393820878862869, "loss": 0.0187, "num_input_tokens_seen": 206508864, "step": 95725 }, { "epoch": 15.616639477977161, "grad_norm": 0.08390726149082184, "learning_rate": 0.00013933278601754563, "loss": 0.0064, "num_input_tokens_seen": 206519968, "step": 95730 }, { "epoch": 15.617455138662317, "grad_norm": 0.005438143387436867, "learning_rate": 0.00013928349145818326, "loss": 0.0015, "num_input_tokens_seen": 206529920, "step": 95735 }, { "epoch": 15.61827079934747, "grad_norm": 0.0037689022719860077, "learning_rate": 0.00013923420420919823, "loss": 0.0075, "num_input_tokens_seen": 206539680, "step": 95740 }, { "epoch": 15.619086460032626, "grad_norm": 0.0005843095132149756, "learning_rate": 0.00013918492427159002, "loss": 0.0028, "num_input_tokens_seen": 206550912, "step": 95745 }, { "epoch": 15.619902120717782, "grad_norm": 0.009634661488234997, "learning_rate": 0.00013913565164635672, "loss": 0.0053, "num_input_tokens_seen": 206561920, "step": 95750 }, { "epoch": 15.620717781402936, "grad_norm": 0.027437442913651466, "learning_rate": 0.00013908638633449756, "loss": 0.0107, "num_input_tokens_seen": 206572864, "step": 95755 }, { "epoch": 15.621533442088092, "grad_norm": 0.00839274562895298, "learning_rate": 0.00013903712833701032, "loss": 0.004, "num_input_tokens_seen": 206582816, "step": 95760 }, { "epoch": 15.622349102773246, "grad_norm": 0.0017398009076714516, "learning_rate": 0.0001389878776548939, "loss": 0.002, "num_input_tokens_seen": 206593472, "step": 95765 }, { "epoch": 15.623164763458401, "grad_norm": 0.4918045699596405, "learning_rate": 0.00013893863428914583, "loss": 0.0126, "num_input_tokens_seen": 206604288, "step": 95770 }, { "epoch": 15.623980424143557, "grad_norm": 0.00416945293545723, "learning_rate": 0.00013888939824076464, "loss": 0.0238, "num_input_tokens_seen": 206614976, "step": 95775 }, { "epoch": 15.624796084828711, "grad_norm": 0.020162688568234444, "learning_rate": 0.00013884016951074758, "loss": 0.0633, "num_input_tokens_seen": 206626944, "step": 95780 }, { "epoch": 15.625611745513867, "grad_norm": 0.025522055104374886, "learning_rate": 0.00013879094810009284, "loss": 0.0031, "num_input_tokens_seen": 206638016, "step": 95785 }, { "epoch": 15.62642740619902, "grad_norm": 0.6935184001922607, "learning_rate": 0.00013874173400979772, "loss": 0.1426, "num_input_tokens_seen": 206648800, "step": 95790 }, { "epoch": 15.627243066884176, "grad_norm": 0.008708599954843521, "learning_rate": 0.00013869252724085974, "loss": 0.0025, "num_input_tokens_seen": 206659936, "step": 95795 }, { "epoch": 15.62805872756933, "grad_norm": 0.020554156973958015, "learning_rate": 0.00013864332779427597, "loss": 0.0436, "num_input_tokens_seen": 206670432, "step": 95800 }, { "epoch": 15.628874388254486, "grad_norm": 0.005696819629520178, "learning_rate": 0.00013859413567104357, "loss": 0.0026, "num_input_tokens_seen": 206680192, "step": 95805 }, { "epoch": 15.629690048939642, "grad_norm": 0.013314320705831051, "learning_rate": 0.00013854495087215951, "loss": 0.1316, "num_input_tokens_seen": 206690400, "step": 95810 }, { "epoch": 15.630505709624796, "grad_norm": 0.021477151662111282, "learning_rate": 0.00013849577339862057, "loss": 0.0036, "num_input_tokens_seen": 206699680, "step": 95815 }, { "epoch": 15.631321370309951, "grad_norm": 0.009091731160879135, "learning_rate": 0.00013844660325142334, "loss": 0.0225, "num_input_tokens_seen": 206709888, "step": 95820 }, { "epoch": 15.632137030995105, "grad_norm": 0.007731936872005463, "learning_rate": 0.00013839744043156438, "loss": 0.0544, "num_input_tokens_seen": 206720928, "step": 95825 }, { "epoch": 15.632952691680261, "grad_norm": 0.040000490844249725, "learning_rate": 0.00013834828494004004, "loss": 0.0042, "num_input_tokens_seen": 206732064, "step": 95830 }, { "epoch": 15.633768352365417, "grad_norm": 0.004892498720437288, "learning_rate": 0.0001382991367778465, "loss": 0.022, "num_input_tokens_seen": 206742752, "step": 95835 }, { "epoch": 15.63458401305057, "grad_norm": 0.030733227729797363, "learning_rate": 0.00013824999594597975, "loss": 0.0088, "num_input_tokens_seen": 206753472, "step": 95840 }, { "epoch": 15.635399673735726, "grad_norm": 1.7156044244766235, "learning_rate": 0.00013820086244543562, "loss": 0.0621, "num_input_tokens_seen": 206765504, "step": 95845 }, { "epoch": 15.63621533442088, "grad_norm": 0.01411938015371561, "learning_rate": 0.00013815173627721027, "loss": 0.0103, "num_input_tokens_seen": 206776000, "step": 95850 }, { "epoch": 15.637030995106036, "grad_norm": 0.35875430703163147, "learning_rate": 0.00013810261744229873, "loss": 0.0099, "num_input_tokens_seen": 206787040, "step": 95855 }, { "epoch": 15.63784665579119, "grad_norm": 0.498166561126709, "learning_rate": 0.00013805350594169708, "loss": 0.0079, "num_input_tokens_seen": 206798336, "step": 95860 }, { "epoch": 15.638662316476346, "grad_norm": 0.0032771669793874025, "learning_rate": 0.0001380044017764, "loss": 0.0029, "num_input_tokens_seen": 206808512, "step": 95865 }, { "epoch": 15.639477977161501, "grad_norm": 0.0017199410358443856, "learning_rate": 0.0001379553049474032, "loss": 0.0062, "num_input_tokens_seen": 206820512, "step": 95870 }, { "epoch": 15.640293637846655, "grad_norm": 0.006401192396879196, "learning_rate": 0.00013790621545570114, "loss": 0.0032, "num_input_tokens_seen": 206832320, "step": 95875 }, { "epoch": 15.641109298531811, "grad_norm": 0.4542958438396454, "learning_rate": 0.00013785713330228928, "loss": 0.0107, "num_input_tokens_seen": 206841472, "step": 95880 }, { "epoch": 15.641924959216965, "grad_norm": 0.005210032686591148, "learning_rate": 0.00013780805848816175, "loss": 0.0022, "num_input_tokens_seen": 206851200, "step": 95885 }, { "epoch": 15.64274061990212, "grad_norm": 0.00542182894423604, "learning_rate": 0.0001377589910143135, "loss": 0.0739, "num_input_tokens_seen": 206861056, "step": 95890 }, { "epoch": 15.643556280587276, "grad_norm": 0.002829355886206031, "learning_rate": 0.00013770993088173884, "loss": 0.1166, "num_input_tokens_seen": 206871872, "step": 95895 }, { "epoch": 15.64437194127243, "grad_norm": 0.021954821422696114, "learning_rate": 0.000137660878091432, "loss": 0.0047, "num_input_tokens_seen": 206881824, "step": 95900 }, { "epoch": 15.645187601957586, "grad_norm": 0.05870498716831207, "learning_rate": 0.0001376118326443872, "loss": 0.016, "num_input_tokens_seen": 206891840, "step": 95905 }, { "epoch": 15.64600326264274, "grad_norm": 0.003055767621845007, "learning_rate": 0.00013756279454159827, "loss": 0.0463, "num_input_tokens_seen": 206903168, "step": 95910 }, { "epoch": 15.646818923327896, "grad_norm": 0.045317795127630234, "learning_rate": 0.0001375137637840591, "loss": 0.1065, "num_input_tokens_seen": 206914976, "step": 95915 }, { "epoch": 15.647634584013051, "grad_norm": 0.002622077474370599, "learning_rate": 0.00013746474037276335, "loss": 0.0044, "num_input_tokens_seen": 206926784, "step": 95920 }, { "epoch": 15.648450244698205, "grad_norm": 0.037151820957660675, "learning_rate": 0.0001374157243087046, "loss": 0.0084, "num_input_tokens_seen": 206937504, "step": 95925 }, { "epoch": 15.649265905383361, "grad_norm": 0.0036785327829420567, "learning_rate": 0.00013736671559287612, "loss": 0.0092, "num_input_tokens_seen": 206948000, "step": 95930 }, { "epoch": 15.650081566068515, "grad_norm": 0.06395067274570465, "learning_rate": 0.0001373177142262712, "loss": 0.0181, "num_input_tokens_seen": 206958432, "step": 95935 }, { "epoch": 15.65089722675367, "grad_norm": 0.011749418452382088, "learning_rate": 0.0001372687202098829, "loss": 0.0085, "num_input_tokens_seen": 206969280, "step": 95940 }, { "epoch": 15.651712887438826, "grad_norm": 0.0022289990447461605, "learning_rate": 0.00013721973354470412, "loss": 0.0011, "num_input_tokens_seen": 206980416, "step": 95945 }, { "epoch": 15.65252854812398, "grad_norm": 0.00578713696449995, "learning_rate": 0.00013717075423172765, "loss": 0.1329, "num_input_tokens_seen": 206991264, "step": 95950 }, { "epoch": 15.653344208809136, "grad_norm": 0.08570286631584167, "learning_rate": 0.00013712178227194617, "loss": 0.0766, "num_input_tokens_seen": 207000640, "step": 95955 }, { "epoch": 15.65415986949429, "grad_norm": 0.23701781034469604, "learning_rate": 0.00013707281766635204, "loss": 0.0051, "num_input_tokens_seen": 207013056, "step": 95960 }, { "epoch": 15.654975530179446, "grad_norm": 0.006764712277799845, "learning_rate": 0.00013702386041593772, "loss": 0.0043, "num_input_tokens_seen": 207023776, "step": 95965 }, { "epoch": 15.655791190864601, "grad_norm": 0.059138741344213486, "learning_rate": 0.00013697491052169536, "loss": 0.0071, "num_input_tokens_seen": 207034336, "step": 95970 }, { "epoch": 15.656606851549755, "grad_norm": 0.002485127653926611, "learning_rate": 0.00013692596798461692, "loss": 0.0166, "num_input_tokens_seen": 207044608, "step": 95975 }, { "epoch": 15.65742251223491, "grad_norm": 0.006188517902046442, "learning_rate": 0.00013687703280569437, "loss": 0.0019, "num_input_tokens_seen": 207054464, "step": 95980 }, { "epoch": 15.658238172920065, "grad_norm": 0.0017350531416013837, "learning_rate": 0.0001368281049859194, "loss": 0.0067, "num_input_tokens_seen": 207065248, "step": 95985 }, { "epoch": 15.65905383360522, "grad_norm": 0.018194345757365227, "learning_rate": 0.0001367791845262834, "loss": 0.0854, "num_input_tokens_seen": 207075072, "step": 95990 }, { "epoch": 15.659869494290374, "grad_norm": 0.0034470162354409695, "learning_rate": 0.0001367302714277784, "loss": 0.0061, "num_input_tokens_seen": 207085312, "step": 95995 }, { "epoch": 15.66068515497553, "grad_norm": 0.002840275876224041, "learning_rate": 0.00013668136569139488, "loss": 0.0034, "num_input_tokens_seen": 207095808, "step": 96000 }, { "epoch": 15.661500815660686, "grad_norm": 1.662861704826355, "learning_rate": 0.00013663246731812463, "loss": 0.1123, "num_input_tokens_seen": 207105536, "step": 96005 }, { "epoch": 15.66231647634584, "grad_norm": 0.6768800616264343, "learning_rate": 0.00013658357630895834, "loss": 0.1115, "num_input_tokens_seen": 207114656, "step": 96010 }, { "epoch": 15.663132137030995, "grad_norm": 0.0028156812768429518, "learning_rate": 0.00013653469266488688, "loss": 0.0037, "num_input_tokens_seen": 207125280, "step": 96015 }, { "epoch": 15.66394779771615, "grad_norm": 0.02756207063794136, "learning_rate": 0.000136485816386901, "loss": 0.0147, "num_input_tokens_seen": 207136384, "step": 96020 }, { "epoch": 15.664763458401305, "grad_norm": 0.004363304004073143, "learning_rate": 0.00013643694747599123, "loss": 0.004, "num_input_tokens_seen": 207147648, "step": 96025 }, { "epoch": 15.66557911908646, "grad_norm": 0.004354171920567751, "learning_rate": 0.0001363880859331479, "loss": 0.0688, "num_input_tokens_seen": 207158560, "step": 96030 }, { "epoch": 15.666394779771615, "grad_norm": 0.49409517645835876, "learning_rate": 0.00013633923175936124, "loss": 0.0131, "num_input_tokens_seen": 207170048, "step": 96035 }, { "epoch": 15.66721044045677, "grad_norm": 0.026044761762022972, "learning_rate": 0.00013629038495562145, "loss": 0.0068, "num_input_tokens_seen": 207179456, "step": 96040 }, { "epoch": 15.668026101141924, "grad_norm": 0.016294756904244423, "learning_rate": 0.00013624154552291834, "loss": 0.0044, "num_input_tokens_seen": 207190496, "step": 96045 }, { "epoch": 15.66884176182708, "grad_norm": 0.001970576820895076, "learning_rate": 0.00013619271346224183, "loss": 0.1582, "num_input_tokens_seen": 207200320, "step": 96050 }, { "epoch": 15.669657422512234, "grad_norm": 0.07763614505529404, "learning_rate": 0.0001361438887745815, "loss": 0.125, "num_input_tokens_seen": 207211392, "step": 96055 }, { "epoch": 15.67047308319739, "grad_norm": 0.28726714849472046, "learning_rate": 0.0001360950714609268, "loss": 0.0056, "num_input_tokens_seen": 207222560, "step": 96060 }, { "epoch": 15.671288743882545, "grad_norm": 0.0055867829360067844, "learning_rate": 0.00013604626152226719, "loss": 0.0046, "num_input_tokens_seen": 207233184, "step": 96065 }, { "epoch": 15.6721044045677, "grad_norm": 0.22191260755062103, "learning_rate": 0.00013599745895959175, "loss": 0.0174, "num_input_tokens_seen": 207243680, "step": 96070 }, { "epoch": 15.672920065252855, "grad_norm": 0.31636282801628113, "learning_rate": 0.00013594866377388958, "loss": 0.0663, "num_input_tokens_seen": 207254208, "step": 96075 }, { "epoch": 15.673735725938009, "grad_norm": 0.023122340440750122, "learning_rate": 0.0001358998759661496, "loss": 0.0018, "num_input_tokens_seen": 207266016, "step": 96080 }, { "epoch": 15.674551386623165, "grad_norm": 0.03378022834658623, "learning_rate": 0.00013585109553736053, "loss": 0.0092, "num_input_tokens_seen": 207276416, "step": 96085 }, { "epoch": 15.67536704730832, "grad_norm": 0.012239009141921997, "learning_rate": 0.00013580232248851094, "loss": 0.005, "num_input_tokens_seen": 207286592, "step": 96090 }, { "epoch": 15.676182707993474, "grad_norm": 0.02011961117386818, "learning_rate": 0.00013575355682058932, "loss": 0.0035, "num_input_tokens_seen": 207297568, "step": 96095 }, { "epoch": 15.67699836867863, "grad_norm": 0.0059354305267333984, "learning_rate": 0.0001357047985345839, "loss": 0.0018, "num_input_tokens_seen": 207307616, "step": 96100 }, { "epoch": 15.677814029363784, "grad_norm": 0.010483992286026478, "learning_rate": 0.00013565604763148294, "loss": 0.0059, "num_input_tokens_seen": 207318592, "step": 96105 }, { "epoch": 15.67862969004894, "grad_norm": 0.04286354407668114, "learning_rate": 0.00013560730411227417, "loss": 0.0024, "num_input_tokens_seen": 207330816, "step": 96110 }, { "epoch": 15.679445350734095, "grad_norm": 0.008749879896640778, "learning_rate": 0.000135558567977946, "loss": 0.1119, "num_input_tokens_seen": 207341312, "step": 96115 }, { "epoch": 15.68026101141925, "grad_norm": 0.00973444152623415, "learning_rate": 0.00013550983922948546, "loss": 0.0025, "num_input_tokens_seen": 207352864, "step": 96120 }, { "epoch": 15.681076672104405, "grad_norm": 0.00944803562015295, "learning_rate": 0.00013546111786788073, "loss": 0.0013, "num_input_tokens_seen": 207363296, "step": 96125 }, { "epoch": 15.681892332789559, "grad_norm": 0.013702768832445145, "learning_rate": 0.00013541240389411857, "loss": 0.0493, "num_input_tokens_seen": 207373472, "step": 96130 }, { "epoch": 15.682707993474715, "grad_norm": 0.017592385411262512, "learning_rate": 0.00013536369730918668, "loss": 0.0026, "num_input_tokens_seen": 207383520, "step": 96135 }, { "epoch": 15.68352365415987, "grad_norm": 0.07516640424728394, "learning_rate": 0.00013531499811407212, "loss": 0.1292, "num_input_tokens_seen": 207394624, "step": 96140 }, { "epoch": 15.684339314845024, "grad_norm": 0.036877475678920746, "learning_rate": 0.00013526630630976172, "loss": 0.0052, "num_input_tokens_seen": 207407328, "step": 96145 }, { "epoch": 15.68515497553018, "grad_norm": 0.0028708872850984335, "learning_rate": 0.00013521762189724228, "loss": 0.003, "num_input_tokens_seen": 207418080, "step": 96150 }, { "epoch": 15.685970636215334, "grad_norm": 0.010629983618855476, "learning_rate": 0.00013516894487750053, "loss": 0.0116, "num_input_tokens_seen": 207429664, "step": 96155 }, { "epoch": 15.68678629690049, "grad_norm": 0.02227703481912613, "learning_rate": 0.00013512027525152293, "loss": 0.0081, "num_input_tokens_seen": 207439744, "step": 96160 }, { "epoch": 15.687601957585644, "grad_norm": 0.039497148245573044, "learning_rate": 0.00013507161302029586, "loss": 0.015, "num_input_tokens_seen": 207450016, "step": 96165 }, { "epoch": 15.6884176182708, "grad_norm": 0.020852066576480865, "learning_rate": 0.00013502295818480548, "loss": 0.0137, "num_input_tokens_seen": 207461568, "step": 96170 }, { "epoch": 15.689233278955955, "grad_norm": 0.03393710032105446, "learning_rate": 0.00013497431074603784, "loss": 0.0078, "num_input_tokens_seen": 207470912, "step": 96175 }, { "epoch": 15.690048939641109, "grad_norm": 0.08285146206617355, "learning_rate": 0.00013492567070497885, "loss": 0.0046, "num_input_tokens_seen": 207482752, "step": 96180 }, { "epoch": 15.690864600326265, "grad_norm": 0.018778374418616295, "learning_rate": 0.0001348770380626143, "loss": 0.0021, "num_input_tokens_seen": 207494464, "step": 96185 }, { "epoch": 15.691680261011419, "grad_norm": 0.011069892905652523, "learning_rate": 0.00013482841281992975, "loss": 0.0043, "num_input_tokens_seen": 207506016, "step": 96190 }, { "epoch": 15.692495921696574, "grad_norm": 0.003060832154005766, "learning_rate": 0.00013477979497791064, "loss": 0.0016, "num_input_tokens_seen": 207517280, "step": 96195 }, { "epoch": 15.69331158238173, "grad_norm": 0.00635279668495059, "learning_rate": 0.00013473118453754236, "loss": 0.0014, "num_input_tokens_seen": 207527712, "step": 96200 }, { "epoch": 15.694127243066884, "grad_norm": 0.021024027839303017, "learning_rate": 0.00013468258149981, "loss": 0.0377, "num_input_tokens_seen": 207537568, "step": 96205 }, { "epoch": 15.69494290375204, "grad_norm": 0.002454231260344386, "learning_rate": 0.00013463398586569854, "loss": 0.0018, "num_input_tokens_seen": 207548768, "step": 96210 }, { "epoch": 15.695758564437194, "grad_norm": 0.030305100604891777, "learning_rate": 0.00013458539763619272, "loss": 0.032, "num_input_tokens_seen": 207560480, "step": 96215 }, { "epoch": 15.69657422512235, "grad_norm": 0.009657066315412521, "learning_rate": 0.00013453681681227763, "loss": 0.0019, "num_input_tokens_seen": 207571680, "step": 96220 }, { "epoch": 15.697389885807503, "grad_norm": 0.1387793868780136, "learning_rate": 0.0001344882433949373, "loss": 0.0137, "num_input_tokens_seen": 207581280, "step": 96225 }, { "epoch": 15.698205546492659, "grad_norm": 0.05323566868901253, "learning_rate": 0.00013443967738515673, "loss": 0.02, "num_input_tokens_seen": 207592768, "step": 96230 }, { "epoch": 15.699021207177815, "grad_norm": 0.32300618290901184, "learning_rate": 0.00013439111878391953, "loss": 0.0206, "num_input_tokens_seen": 207602848, "step": 96235 }, { "epoch": 15.699836867862969, "grad_norm": 0.005715306848287582, "learning_rate": 0.00013434256759221037, "loss": 0.0012, "num_input_tokens_seen": 207612576, "step": 96240 }, { "epoch": 15.700652528548124, "grad_norm": 0.005758438725024462, "learning_rate": 0.00013429402381101268, "loss": 0.0039, "num_input_tokens_seen": 207622304, "step": 96245 }, { "epoch": 15.701468189233278, "grad_norm": 0.005090982653200626, "learning_rate": 0.00013424548744131088, "loss": 0.0023, "num_input_tokens_seen": 207633088, "step": 96250 }, { "epoch": 15.702283849918434, "grad_norm": 0.013610446825623512, "learning_rate": 0.00013419695848408792, "loss": 0.0062, "num_input_tokens_seen": 207643808, "step": 96255 }, { "epoch": 15.70309951060359, "grad_norm": 0.4166752099990845, "learning_rate": 0.00013414843694032792, "loss": 0.0258, "num_input_tokens_seen": 207653664, "step": 96260 }, { "epoch": 15.703915171288743, "grad_norm": 0.0034570132847875357, "learning_rate": 0.00013409992281101368, "loss": 0.014, "num_input_tokens_seen": 207664704, "step": 96265 }, { "epoch": 15.7047308319739, "grad_norm": 0.004271363373845816, "learning_rate": 0.000134051416097129, "loss": 0.0048, "num_input_tokens_seen": 207676352, "step": 96270 }, { "epoch": 15.705546492659053, "grad_norm": 0.1572641134262085, "learning_rate": 0.00013400291679965633, "loss": 0.0851, "num_input_tokens_seen": 207688128, "step": 96275 }, { "epoch": 15.706362153344209, "grad_norm": 0.01544899120926857, "learning_rate": 0.000133954424919579, "loss": 0.0026, "num_input_tokens_seen": 207699200, "step": 96280 }, { "epoch": 15.707177814029365, "grad_norm": 0.02416606806218624, "learning_rate": 0.00013390594045787957, "loss": 0.0027, "num_input_tokens_seen": 207709248, "step": 96285 }, { "epoch": 15.707993474714518, "grad_norm": 0.004076474346220493, "learning_rate": 0.00013385746341554067, "loss": 0.0053, "num_input_tokens_seen": 207720160, "step": 96290 }, { "epoch": 15.708809135399674, "grad_norm": 0.5935994386672974, "learning_rate": 0.0001338089937935448, "loss": 0.032, "num_input_tokens_seen": 207732160, "step": 96295 }, { "epoch": 15.709624796084828, "grad_norm": 0.008171262219548225, "learning_rate": 0.0001337605315928742, "loss": 0.0055, "num_input_tokens_seen": 207741920, "step": 96300 }, { "epoch": 15.710440456769984, "grad_norm": 0.004785776603966951, "learning_rate": 0.00013371207681451102, "loss": 0.0021, "num_input_tokens_seen": 207753152, "step": 96305 }, { "epoch": 15.71125611745514, "grad_norm": 0.0020426535047590733, "learning_rate": 0.00013366362945943733, "loss": 0.0027, "num_input_tokens_seen": 207764928, "step": 96310 }, { "epoch": 15.712071778140293, "grad_norm": 0.11794447898864746, "learning_rate": 0.00013361518952863488, "loss": 0.0117, "num_input_tokens_seen": 207775872, "step": 96315 }, { "epoch": 15.71288743882545, "grad_norm": 0.005704896058887243, "learning_rate": 0.00013356675702308541, "loss": 0.0032, "num_input_tokens_seen": 207786720, "step": 96320 }, { "epoch": 15.713703099510603, "grad_norm": 0.07015540450811386, "learning_rate": 0.00013351833194377044, "loss": 0.0982, "num_input_tokens_seen": 207797760, "step": 96325 }, { "epoch": 15.714518760195759, "grad_norm": 0.007592173758894205, "learning_rate": 0.00013346991429167128, "loss": 0.0026, "num_input_tokens_seen": 207809280, "step": 96330 }, { "epoch": 15.715334420880914, "grad_norm": 0.007777614053338766, "learning_rate": 0.00013342150406776953, "loss": 0.0014, "num_input_tokens_seen": 207820832, "step": 96335 }, { "epoch": 15.716150081566068, "grad_norm": 1.1334835290908813, "learning_rate": 0.00013337310127304575, "loss": 0.096, "num_input_tokens_seen": 207831584, "step": 96340 }, { "epoch": 15.716965742251224, "grad_norm": 0.2663789987564087, "learning_rate": 0.0001333247059084815, "loss": 0.0097, "num_input_tokens_seen": 207843520, "step": 96345 }, { "epoch": 15.717781402936378, "grad_norm": 0.23745083808898926, "learning_rate": 0.00013327631797505697, "loss": 0.0104, "num_input_tokens_seen": 207854176, "step": 96350 }, { "epoch": 15.718597063621534, "grad_norm": 0.003714781953021884, "learning_rate": 0.00013322793747375333, "loss": 0.0037, "num_input_tokens_seen": 207865248, "step": 96355 }, { "epoch": 15.719412724306688, "grad_norm": 0.018075041472911835, "learning_rate": 0.00013317956440555051, "loss": 0.1312, "num_input_tokens_seen": 207875008, "step": 96360 }, { "epoch": 15.720228384991843, "grad_norm": 0.009823007509112358, "learning_rate": 0.00013313119877142947, "loss": 0.0269, "num_input_tokens_seen": 207884480, "step": 96365 }, { "epoch": 15.721044045676999, "grad_norm": 0.053147438913583755, "learning_rate": 0.00013308284057236984, "loss": 0.0047, "num_input_tokens_seen": 207894752, "step": 96370 }, { "epoch": 15.721859706362153, "grad_norm": 0.05107883736491203, "learning_rate": 0.00013303448980935218, "loss": 0.007, "num_input_tokens_seen": 207905440, "step": 96375 }, { "epoch": 15.722675367047309, "grad_norm": 0.006013005506247282, "learning_rate": 0.00013298614648335583, "loss": 0.0022, "num_input_tokens_seen": 207916032, "step": 96380 }, { "epoch": 15.723491027732463, "grad_norm": 0.183906689286232, "learning_rate": 0.0001329378105953611, "loss": 0.0059, "num_input_tokens_seen": 207924864, "step": 96385 }, { "epoch": 15.724306688417618, "grad_norm": 0.004951221868395805, "learning_rate": 0.00013288948214634698, "loss": 0.0653, "num_input_tokens_seen": 207936128, "step": 96390 }, { "epoch": 15.725122349102774, "grad_norm": 0.031489670276641846, "learning_rate": 0.00013284116113729356, "loss": 0.005, "num_input_tokens_seen": 207946624, "step": 96395 }, { "epoch": 15.725938009787928, "grad_norm": 0.025610938668251038, "learning_rate": 0.00013279284756917943, "loss": 0.0048, "num_input_tokens_seen": 207958464, "step": 96400 }, { "epoch": 15.726753670473084, "grad_norm": 0.001836855080910027, "learning_rate": 0.00013274454144298438, "loss": 0.0035, "num_input_tokens_seen": 207967648, "step": 96405 }, { "epoch": 15.727569331158238, "grad_norm": 0.005809025373309851, "learning_rate": 0.00013269624275968683, "loss": 0.0043, "num_input_tokens_seen": 207978240, "step": 96410 }, { "epoch": 15.728384991843393, "grad_norm": 1.105023980140686, "learning_rate": 0.00013264795152026615, "loss": 0.0999, "num_input_tokens_seen": 207989856, "step": 96415 }, { "epoch": 15.729200652528547, "grad_norm": 0.005033268127590418, "learning_rate": 0.00013259966772570048, "loss": 0.0784, "num_input_tokens_seen": 207998592, "step": 96420 }, { "epoch": 15.730016313213703, "grad_norm": 0.016424372792243958, "learning_rate": 0.00013255139137696874, "loss": 0.0016, "num_input_tokens_seen": 208008928, "step": 96425 }, { "epoch": 15.730831973898859, "grad_norm": 0.0566287524998188, "learning_rate": 0.0001325031224750492, "loss": 0.0048, "num_input_tokens_seen": 208020032, "step": 96430 }, { "epoch": 15.731647634584013, "grad_norm": 0.28063589334487915, "learning_rate": 0.0001324548610209201, "loss": 0.0087, "num_input_tokens_seen": 208031328, "step": 96435 }, { "epoch": 15.732463295269168, "grad_norm": 0.06952225416898727, "learning_rate": 0.00013240660701555951, "loss": 0.0414, "num_input_tokens_seen": 208042656, "step": 96440 }, { "epoch": 15.733278955954322, "grad_norm": 0.01839137077331543, "learning_rate": 0.00013235836045994532, "loss": 0.007, "num_input_tokens_seen": 208052896, "step": 96445 }, { "epoch": 15.734094616639478, "grad_norm": 0.024102121591567993, "learning_rate": 0.00013231012135505538, "loss": 0.0265, "num_input_tokens_seen": 208063520, "step": 96450 }, { "epoch": 15.734910277324634, "grad_norm": 0.04609030485153198, "learning_rate": 0.00013226188970186725, "loss": 0.0056, "num_input_tokens_seen": 208075296, "step": 96455 }, { "epoch": 15.735725938009788, "grad_norm": 0.022327857092022896, "learning_rate": 0.0001322136655013585, "loss": 0.0029, "num_input_tokens_seen": 208084704, "step": 96460 }, { "epoch": 15.736541598694943, "grad_norm": 1.0370302200317383, "learning_rate": 0.00013216544875450633, "loss": 0.056, "num_input_tokens_seen": 208096256, "step": 96465 }, { "epoch": 15.737357259380097, "grad_norm": 0.0039937132969498634, "learning_rate": 0.00013211723946228798, "loss": 0.0015, "num_input_tokens_seen": 208106656, "step": 96470 }, { "epoch": 15.738172920065253, "grad_norm": 0.03214043006300926, "learning_rate": 0.00013206903762568028, "loss": 0.0037, "num_input_tokens_seen": 208117856, "step": 96475 }, { "epoch": 15.738988580750409, "grad_norm": 0.5465738773345947, "learning_rate": 0.00013202084324566066, "loss": 0.0202, "num_input_tokens_seen": 208129984, "step": 96480 }, { "epoch": 15.739804241435563, "grad_norm": 0.09564939886331558, "learning_rate": 0.0001319726563232051, "loss": 0.0041, "num_input_tokens_seen": 208140672, "step": 96485 }, { "epoch": 15.740619902120718, "grad_norm": 0.002084150677546859, "learning_rate": 0.00013192447685929088, "loss": 0.0556, "num_input_tokens_seen": 208151840, "step": 96490 }, { "epoch": 15.741435562805872, "grad_norm": 0.04926175996661186, "learning_rate": 0.00013187630485489378, "loss": 0.0048, "num_input_tokens_seen": 208163136, "step": 96495 }, { "epoch": 15.742251223491028, "grad_norm": 0.003923801239579916, "learning_rate": 0.0001318281403109906, "loss": 0.0055, "num_input_tokens_seen": 208173344, "step": 96500 }, { "epoch": 15.743066884176184, "grad_norm": 0.19115930795669556, "learning_rate": 0.00013177998322855695, "loss": 0.1396, "num_input_tokens_seen": 208184800, "step": 96505 }, { "epoch": 15.743882544861338, "grad_norm": 0.07015623152256012, "learning_rate": 0.00013173183360856938, "loss": 0.0037, "num_input_tokens_seen": 208196064, "step": 96510 }, { "epoch": 15.744698205546493, "grad_norm": 0.06715485453605652, "learning_rate": 0.00013168369145200303, "loss": 0.0037, "num_input_tokens_seen": 208206432, "step": 96515 }, { "epoch": 15.745513866231647, "grad_norm": 0.02284327708184719, "learning_rate": 0.0001316355567598343, "loss": 0.0026, "num_input_tokens_seen": 208217888, "step": 96520 }, { "epoch": 15.746329526916803, "grad_norm": 0.013283081352710724, "learning_rate": 0.00013158742953303792, "loss": 0.0824, "num_input_tokens_seen": 208229696, "step": 96525 }, { "epoch": 15.747145187601957, "grad_norm": 0.01185830868780613, "learning_rate": 0.00013153930977258987, "loss": 0.0045, "num_input_tokens_seen": 208238784, "step": 96530 }, { "epoch": 15.747960848287113, "grad_norm": 0.0028059978503733873, "learning_rate": 0.0001314911974794651, "loss": 0.0416, "num_input_tokens_seen": 208249472, "step": 96535 }, { "epoch": 15.748776508972268, "grad_norm": 0.046009503304958344, "learning_rate": 0.00013144309265463873, "loss": 0.0452, "num_input_tokens_seen": 208259424, "step": 96540 }, { "epoch": 15.749592169657422, "grad_norm": 1.3836567401885986, "learning_rate": 0.00013139499529908562, "loss": 0.0734, "num_input_tokens_seen": 208270816, "step": 96545 }, { "epoch": 15.750407830342578, "grad_norm": 0.0923125371336937, "learning_rate": 0.00013134690541378053, "loss": 0.0228, "num_input_tokens_seen": 208281408, "step": 96550 }, { "epoch": 15.751223491027732, "grad_norm": 0.020593542605638504, "learning_rate": 0.00013129882299969803, "loss": 0.1001, "num_input_tokens_seen": 208291392, "step": 96555 }, { "epoch": 15.752039151712887, "grad_norm": 0.036182329058647156, "learning_rate": 0.00013125074805781268, "loss": 0.0097, "num_input_tokens_seen": 208303520, "step": 96560 }, { "epoch": 15.752854812398043, "grad_norm": 0.0177278034389019, "learning_rate": 0.0001312026805890987, "loss": 0.0105, "num_input_tokens_seen": 208315360, "step": 96565 }, { "epoch": 15.753670473083197, "grad_norm": 0.002483449410647154, "learning_rate": 0.00013115462059453022, "loss": 0.0042, "num_input_tokens_seen": 208326464, "step": 96570 }, { "epoch": 15.754486133768353, "grad_norm": 0.026654629036784172, "learning_rate": 0.00013110656807508125, "loss": 0.0041, "num_input_tokens_seen": 208337792, "step": 96575 }, { "epoch": 15.755301794453507, "grad_norm": 0.03444868326187134, "learning_rate": 0.0001310585230317257, "loss": 0.0128, "num_input_tokens_seen": 208348672, "step": 96580 }, { "epoch": 15.756117455138662, "grad_norm": 0.004291105549782515, "learning_rate": 0.0001310104854654372, "loss": 0.0182, "num_input_tokens_seen": 208360704, "step": 96585 }, { "epoch": 15.756933115823816, "grad_norm": 0.12416823208332062, "learning_rate": 0.0001309624553771893, "loss": 0.0078, "num_input_tokens_seen": 208370752, "step": 96590 }, { "epoch": 15.757748776508972, "grad_norm": 0.00854440126568079, "learning_rate": 0.00013091443276795544, "loss": 0.0025, "num_input_tokens_seen": 208382976, "step": 96595 }, { "epoch": 15.758564437194128, "grad_norm": 0.0758536234498024, "learning_rate": 0.00013086641763870876, "loss": 0.0124, "num_input_tokens_seen": 208394496, "step": 96600 }, { "epoch": 15.759380097879282, "grad_norm": 0.0860213190317154, "learning_rate": 0.00013081840999042244, "loss": 0.0046, "num_input_tokens_seen": 208405856, "step": 96605 }, { "epoch": 15.760195758564437, "grad_norm": 0.006144104525446892, "learning_rate": 0.0001307704098240694, "loss": 0.0179, "num_input_tokens_seen": 208415520, "step": 96610 }, { "epoch": 15.761011419249591, "grad_norm": 0.29862090945243835, "learning_rate": 0.0001307224171406224, "loss": 0.0132, "num_input_tokens_seen": 208424448, "step": 96615 }, { "epoch": 15.761827079934747, "grad_norm": 0.05420632287859917, "learning_rate": 0.0001306744319410539, "loss": 0.0076, "num_input_tokens_seen": 208435552, "step": 96620 }, { "epoch": 15.762642740619903, "grad_norm": 0.010102360509335995, "learning_rate": 0.00013062645422633683, "loss": 0.0038, "num_input_tokens_seen": 208445376, "step": 96625 }, { "epoch": 15.763458401305057, "grad_norm": 0.1289862096309662, "learning_rate": 0.000130578483997443, "loss": 0.0041, "num_input_tokens_seen": 208455680, "step": 96630 }, { "epoch": 15.764274061990212, "grad_norm": 0.005380489397794008, "learning_rate": 0.00013053052125534497, "loss": 0.0042, "num_input_tokens_seen": 208465280, "step": 96635 }, { "epoch": 15.765089722675366, "grad_norm": 0.8622177839279175, "learning_rate": 0.00013048256600101465, "loss": 0.1143, "num_input_tokens_seen": 208476768, "step": 96640 }, { "epoch": 15.765905383360522, "grad_norm": 0.2440263032913208, "learning_rate": 0.00013043461823542387, "loss": 0.0061, "num_input_tokens_seen": 208487552, "step": 96645 }, { "epoch": 15.766721044045678, "grad_norm": 0.006261552218347788, "learning_rate": 0.0001303866779595444, "loss": 0.0046, "num_input_tokens_seen": 208499200, "step": 96650 }, { "epoch": 15.767536704730832, "grad_norm": 0.05640345811843872, "learning_rate": 0.0001303387451743478, "loss": 0.0185, "num_input_tokens_seen": 208510400, "step": 96655 }, { "epoch": 15.768352365415987, "grad_norm": 0.017750395461916924, "learning_rate": 0.00013029081988080545, "loss": 0.0023, "num_input_tokens_seen": 208521984, "step": 96660 }, { "epoch": 15.769168026101141, "grad_norm": 0.01157664880156517, "learning_rate": 0.00013024290207988866, "loss": 0.1625, "num_input_tokens_seen": 208532736, "step": 96665 }, { "epoch": 15.769983686786297, "grad_norm": 0.030493445694446564, "learning_rate": 0.00013019499177256848, "loss": 0.0022, "num_input_tokens_seen": 208543744, "step": 96670 }, { "epoch": 15.770799347471453, "grad_norm": 0.02414344623684883, "learning_rate": 0.00013014708895981597, "loss": 0.0096, "num_input_tokens_seen": 208554848, "step": 96675 }, { "epoch": 15.771615008156607, "grad_norm": 0.010344515554606915, "learning_rate": 0.00013009919364260193, "loss": 0.0034, "num_input_tokens_seen": 208565952, "step": 96680 }, { "epoch": 15.772430668841762, "grad_norm": 0.12728098034858704, "learning_rate": 0.0001300513058218969, "loss": 0.0036, "num_input_tokens_seen": 208576896, "step": 96685 }, { "epoch": 15.773246329526916, "grad_norm": 0.11622268706560135, "learning_rate": 0.0001300034254986715, "loss": 0.0071, "num_input_tokens_seen": 208587008, "step": 96690 }, { "epoch": 15.774061990212072, "grad_norm": 0.03894001245498657, "learning_rate": 0.00012995555267389608, "loss": 0.0094, "num_input_tokens_seen": 208598272, "step": 96695 }, { "epoch": 15.774877650897226, "grad_norm": 0.01585175096988678, "learning_rate": 0.0001299076873485408, "loss": 0.0058, "num_input_tokens_seen": 208609216, "step": 96700 }, { "epoch": 15.775693311582382, "grad_norm": 0.004255862440913916, "learning_rate": 0.00012985982952357577, "loss": 0.0011, "num_input_tokens_seen": 208620448, "step": 96705 }, { "epoch": 15.776508972267537, "grad_norm": 0.07645806670188904, "learning_rate": 0.00012981197919997078, "loss": 0.0053, "num_input_tokens_seen": 208631104, "step": 96710 }, { "epoch": 15.777324632952691, "grad_norm": 0.004870134871453047, "learning_rate": 0.00012976413637869573, "loss": 0.0142, "num_input_tokens_seen": 208641824, "step": 96715 }, { "epoch": 15.778140293637847, "grad_norm": 0.05121361464262009, "learning_rate": 0.00012971630106072007, "loss": 0.008, "num_input_tokens_seen": 208651968, "step": 96720 }, { "epoch": 15.778955954323001, "grad_norm": 0.005336792673915625, "learning_rate": 0.00012966847324701337, "loss": 0.0019, "num_input_tokens_seen": 208661152, "step": 96725 }, { "epoch": 15.779771615008157, "grad_norm": 0.010330567136406898, "learning_rate": 0.0001296206529385448, "loss": 0.0633, "num_input_tokens_seen": 208671392, "step": 96730 }, { "epoch": 15.780587275693312, "grad_norm": 0.9588799476623535, "learning_rate": 0.00012957284013628357, "loss": 0.0663, "num_input_tokens_seen": 208680832, "step": 96735 }, { "epoch": 15.781402936378466, "grad_norm": 0.009097116068005562, "learning_rate": 0.00012952503484119866, "loss": 0.0083, "num_input_tokens_seen": 208691904, "step": 96740 }, { "epoch": 15.782218597063622, "grad_norm": 0.3552965521812439, "learning_rate": 0.0001294772370542589, "loss": 0.0175, "num_input_tokens_seen": 208702944, "step": 96745 }, { "epoch": 15.783034257748776, "grad_norm": 0.06464696675539017, "learning_rate": 0.00012942944677643282, "loss": 0.0178, "num_input_tokens_seen": 208713600, "step": 96750 }, { "epoch": 15.783849918433932, "grad_norm": 0.007371923886239529, "learning_rate": 0.0001293816640086894, "loss": 0.0157, "num_input_tokens_seen": 208725504, "step": 96755 }, { "epoch": 15.784665579119086, "grad_norm": 0.04986845701932907, "learning_rate": 0.00012933388875199643, "loss": 0.0033, "num_input_tokens_seen": 208736256, "step": 96760 }, { "epoch": 15.785481239804241, "grad_norm": 1.3820956945419312, "learning_rate": 0.00012928612100732257, "loss": 0.0242, "num_input_tokens_seen": 208747968, "step": 96765 }, { "epoch": 15.786296900489397, "grad_norm": 0.013739545829594135, "learning_rate": 0.00012923836077563576, "loss": 0.0047, "num_input_tokens_seen": 208759072, "step": 96770 }, { "epoch": 15.78711256117455, "grad_norm": 0.017670713365077972, "learning_rate": 0.0001291906080579039, "loss": 0.0707, "num_input_tokens_seen": 208769472, "step": 96775 }, { "epoch": 15.787928221859707, "grad_norm": 0.025673670694231987, "learning_rate": 0.0001291428628550948, "loss": 0.0056, "num_input_tokens_seen": 208780608, "step": 96780 }, { "epoch": 15.78874388254486, "grad_norm": 0.004640126135200262, "learning_rate": 0.000129095125168176, "loss": 0.0055, "num_input_tokens_seen": 208792608, "step": 96785 }, { "epoch": 15.789559543230016, "grad_norm": 0.004782696720212698, "learning_rate": 0.00012904739499811508, "loss": 0.0719, "num_input_tokens_seen": 208803680, "step": 96790 }, { "epoch": 15.790375203915172, "grad_norm": 0.3421061336994171, "learning_rate": 0.00012899967234587922, "loss": 0.0304, "num_input_tokens_seen": 208815712, "step": 96795 }, { "epoch": 15.791190864600326, "grad_norm": 0.025700274854898453, "learning_rate": 0.00012895195721243568, "loss": 0.0122, "num_input_tokens_seen": 208827552, "step": 96800 }, { "epoch": 15.792006525285482, "grad_norm": 0.005519390571862459, "learning_rate": 0.00012890424959875147, "loss": 0.048, "num_input_tokens_seen": 208838208, "step": 96805 }, { "epoch": 15.792822185970635, "grad_norm": 0.04856310039758682, "learning_rate": 0.0001288565495057934, "loss": 0.0105, "num_input_tokens_seen": 208849504, "step": 96810 }, { "epoch": 15.793637846655791, "grad_norm": 0.03747653588652611, "learning_rate": 0.00012880885693452814, "loss": 0.0019, "num_input_tokens_seen": 208861152, "step": 96815 }, { "epoch": 15.794453507340947, "grad_norm": 0.003426379757001996, "learning_rate": 0.0001287611718859223, "loss": 0.0023, "num_input_tokens_seen": 208872800, "step": 96820 }, { "epoch": 15.7952691680261, "grad_norm": 0.0056681083515286446, "learning_rate": 0.00012871349436094226, "loss": 0.0031, "num_input_tokens_seen": 208882048, "step": 96825 }, { "epoch": 15.796084828711257, "grad_norm": 0.00908627174794674, "learning_rate": 0.0001286658243605543, "loss": 0.0084, "num_input_tokens_seen": 208892992, "step": 96830 }, { "epoch": 15.79690048939641, "grad_norm": 0.07815408706665039, "learning_rate": 0.00012861816188572444, "loss": 0.0761, "num_input_tokens_seen": 208903840, "step": 96835 }, { "epoch": 15.797716150081566, "grad_norm": 0.03831071779131889, "learning_rate": 0.00012857050693741866, "loss": 0.0042, "num_input_tokens_seen": 208915200, "step": 96840 }, { "epoch": 15.798531810766722, "grad_norm": 0.029482483863830566, "learning_rate": 0.00012852285951660275, "loss": 0.0072, "num_input_tokens_seen": 208926144, "step": 96845 }, { "epoch": 15.799347471451876, "grad_norm": 0.005080622620880604, "learning_rate": 0.00012847521962424237, "loss": 0.0034, "num_input_tokens_seen": 208936864, "step": 96850 }, { "epoch": 15.800163132137031, "grad_norm": 0.0042381929233670235, "learning_rate": 0.00012842758726130281, "loss": 0.003, "num_input_tokens_seen": 208948320, "step": 96855 }, { "epoch": 15.800978792822185, "grad_norm": 2.045799732208252, "learning_rate": 0.0001283799624287499, "loss": 0.1059, "num_input_tokens_seen": 208958976, "step": 96860 }, { "epoch": 15.801794453507341, "grad_norm": 0.020309513434767723, "learning_rate": 0.00012833234512754817, "loss": 0.0034, "num_input_tokens_seen": 208970624, "step": 96865 }, { "epoch": 15.802610114192497, "grad_norm": 0.0032519439700990915, "learning_rate": 0.0001282847353586632, "loss": 0.0029, "num_input_tokens_seen": 208981344, "step": 96870 }, { "epoch": 15.80342577487765, "grad_norm": 0.0018996887374669313, "learning_rate": 0.0001282371331230594, "loss": 0.0565, "num_input_tokens_seen": 208993280, "step": 96875 }, { "epoch": 15.804241435562806, "grad_norm": 0.011625824496150017, "learning_rate": 0.00012818953842170193, "loss": 0.0285, "num_input_tokens_seen": 209002720, "step": 96880 }, { "epoch": 15.80505709624796, "grad_norm": 0.017657577991485596, "learning_rate": 0.0001281419512555549, "loss": 0.0189, "num_input_tokens_seen": 209013568, "step": 96885 }, { "epoch": 15.805872756933116, "grad_norm": 0.16607539355754852, "learning_rate": 0.00012809437162558324, "loss": 0.0106, "num_input_tokens_seen": 209024256, "step": 96890 }, { "epoch": 15.80668841761827, "grad_norm": 0.20681478083133698, "learning_rate": 0.00012804679953275068, "loss": 0.0187, "num_input_tokens_seen": 209035552, "step": 96895 }, { "epoch": 15.807504078303426, "grad_norm": 0.053800273686647415, "learning_rate": 0.00012799923497802185, "loss": 0.0073, "num_input_tokens_seen": 209047200, "step": 96900 }, { "epoch": 15.808319738988581, "grad_norm": 0.012709928676486015, "learning_rate": 0.00012795167796236012, "loss": 0.0191, "num_input_tokens_seen": 209056352, "step": 96905 }, { "epoch": 15.809135399673735, "grad_norm": 0.003476071171462536, "learning_rate": 0.00012790412848672977, "loss": 0.0057, "num_input_tokens_seen": 209068416, "step": 96910 }, { "epoch": 15.809951060358891, "grad_norm": 0.01998192071914673, "learning_rate": 0.0001278565865520943, "loss": 0.1314, "num_input_tokens_seen": 209078720, "step": 96915 }, { "epoch": 15.810766721044045, "grad_norm": 0.006971122231334448, "learning_rate": 0.00012780905215941724, "loss": 0.0042, "num_input_tokens_seen": 209089120, "step": 96920 }, { "epoch": 15.8115823817292, "grad_norm": 0.025958050042390823, "learning_rate": 0.00012776152530966184, "loss": 0.0465, "num_input_tokens_seen": 209099968, "step": 96925 }, { "epoch": 15.812398042414356, "grad_norm": 0.1817023605108261, "learning_rate": 0.0001277140060037914, "loss": 0.0117, "num_input_tokens_seen": 209110784, "step": 96930 }, { "epoch": 15.81321370309951, "grad_norm": 0.0055557494051754475, "learning_rate": 0.00012766649424276888, "loss": 0.0068, "num_input_tokens_seen": 209121536, "step": 96935 }, { "epoch": 15.814029363784666, "grad_norm": 0.0026944964192807674, "learning_rate": 0.00012761899002755716, "loss": 0.0032, "num_input_tokens_seen": 209131136, "step": 96940 }, { "epoch": 15.81484502446982, "grad_norm": 0.14713174104690552, "learning_rate": 0.00012757149335911906, "loss": 0.0603, "num_input_tokens_seen": 209142240, "step": 96945 }, { "epoch": 15.815660685154976, "grad_norm": 0.01389075256884098, "learning_rate": 0.00012752400423841708, "loss": 0.0093, "num_input_tokens_seen": 209152704, "step": 96950 }, { "epoch": 15.81647634584013, "grad_norm": 0.12502925097942352, "learning_rate": 0.0001274765226664137, "loss": 0.032, "num_input_tokens_seen": 209162688, "step": 96955 }, { "epoch": 15.817292006525285, "grad_norm": 0.014377755112946033, "learning_rate": 0.00012742904864407095, "loss": 0.0022, "num_input_tokens_seen": 209174112, "step": 96960 }, { "epoch": 15.818107667210441, "grad_norm": 0.004015770740807056, "learning_rate": 0.0001273815821723515, "loss": 0.0029, "num_input_tokens_seen": 209184608, "step": 96965 }, { "epoch": 15.818923327895595, "grad_norm": 0.0704021230340004, "learning_rate": 0.00012733412325221673, "loss": 0.0035, "num_input_tokens_seen": 209195392, "step": 96970 }, { "epoch": 15.81973898858075, "grad_norm": 0.023486455902457237, "learning_rate": 0.00012728667188462893, "loss": 0.0023, "num_input_tokens_seen": 209205376, "step": 96975 }, { "epoch": 15.820554649265905, "grad_norm": 0.06093943119049072, "learning_rate": 0.00012723922807054934, "loss": 0.1324, "num_input_tokens_seen": 209215424, "step": 96980 }, { "epoch": 15.82137030995106, "grad_norm": 0.0031446374487131834, "learning_rate": 0.00012719179181093992, "loss": 0.1274, "num_input_tokens_seen": 209227008, "step": 96985 }, { "epoch": 15.822185970636216, "grad_norm": 0.03403894975781441, "learning_rate": 0.00012714436310676147, "loss": 0.012, "num_input_tokens_seen": 209236896, "step": 96990 }, { "epoch": 15.82300163132137, "grad_norm": 0.004726029001176357, "learning_rate": 0.00012709694195897587, "loss": 0.0359, "num_input_tokens_seen": 209247360, "step": 96995 }, { "epoch": 15.823817292006526, "grad_norm": 0.01124323345720768, "learning_rate": 0.00012704952836854345, "loss": 0.0057, "num_input_tokens_seen": 209257856, "step": 97000 }, { "epoch": 15.82463295269168, "grad_norm": 0.0033708312548696995, "learning_rate": 0.00012700212233642577, "loss": 0.0028, "num_input_tokens_seen": 209267872, "step": 97005 }, { "epoch": 15.825448613376835, "grad_norm": 0.0017069539753720164, "learning_rate": 0.00012695472386358293, "loss": 0.0028, "num_input_tokens_seen": 209278592, "step": 97010 }, { "epoch": 15.826264274061991, "grad_norm": 0.0476064458489418, "learning_rate": 0.00012690733295097617, "loss": 0.0034, "num_input_tokens_seen": 209289056, "step": 97015 }, { "epoch": 15.827079934747145, "grad_norm": 0.028568781912326813, "learning_rate": 0.00012685994959956532, "loss": 0.0089, "num_input_tokens_seen": 209299872, "step": 97020 }, { "epoch": 15.8278955954323, "grad_norm": 0.004579820670187473, "learning_rate": 0.00012681257381031124, "loss": 0.0856, "num_input_tokens_seen": 209310528, "step": 97025 }, { "epoch": 15.828711256117455, "grad_norm": 0.04857729747891426, "learning_rate": 0.00012676520558417347, "loss": 0.004, "num_input_tokens_seen": 209322304, "step": 97030 }, { "epoch": 15.82952691680261, "grad_norm": 0.42051494121551514, "learning_rate": 0.00012671784492211262, "loss": 0.012, "num_input_tokens_seen": 209333888, "step": 97035 }, { "epoch": 15.830342577487766, "grad_norm": 0.03346581384539604, "learning_rate": 0.00012667049182508788, "loss": 0.1237, "num_input_tokens_seen": 209343360, "step": 97040 }, { "epoch": 15.83115823817292, "grad_norm": 0.06645258516073227, "learning_rate": 0.00012662314629405936, "loss": 0.0036, "num_input_tokens_seen": 209354368, "step": 97045 }, { "epoch": 15.831973898858076, "grad_norm": 0.011081513948738575, "learning_rate": 0.00012657580832998644, "loss": 0.0052, "num_input_tokens_seen": 209364384, "step": 97050 }, { "epoch": 15.83278955954323, "grad_norm": 0.5147018432617188, "learning_rate": 0.0001265284779338285, "loss": 0.0389, "num_input_tokens_seen": 209374080, "step": 97055 }, { "epoch": 15.833605220228385, "grad_norm": 0.04169975966215134, "learning_rate": 0.00012648115510654473, "loss": 0.1404, "num_input_tokens_seen": 209385088, "step": 97060 }, { "epoch": 15.83442088091354, "grad_norm": 0.04843230918049812, "learning_rate": 0.00012643383984909423, "loss": 0.0067, "num_input_tokens_seen": 209395808, "step": 97065 }, { "epoch": 15.835236541598695, "grad_norm": 0.0028321431018412113, "learning_rate": 0.0001263865321624358, "loss": 0.0395, "num_input_tokens_seen": 209407936, "step": 97070 }, { "epoch": 15.83605220228385, "grad_norm": 0.010693809948861599, "learning_rate": 0.0001263392320475283, "loss": 0.0417, "num_input_tokens_seen": 209418944, "step": 97075 }, { "epoch": 15.836867862969005, "grad_norm": 0.016398241743445396, "learning_rate": 0.0001262919395053303, "loss": 0.0037, "num_input_tokens_seen": 209429024, "step": 97080 }, { "epoch": 15.83768352365416, "grad_norm": 0.006453553680330515, "learning_rate": 0.0001262446545368002, "loss": 0.0106, "num_input_tokens_seen": 209440416, "step": 97085 }, { "epoch": 15.838499184339314, "grad_norm": 0.03695518523454666, "learning_rate": 0.0001261973771428963, "loss": 0.0039, "num_input_tokens_seen": 209452416, "step": 97090 }, { "epoch": 15.83931484502447, "grad_norm": 0.13572728633880615, "learning_rate": 0.00012615010732457677, "loss": 0.0109, "num_input_tokens_seen": 209464352, "step": 97095 }, { "epoch": 15.840130505709626, "grad_norm": 0.004834496416151524, "learning_rate": 0.00012610284508279956, "loss": 0.124, "num_input_tokens_seen": 209475200, "step": 97100 }, { "epoch": 15.84094616639478, "grad_norm": 0.0015597307356074452, "learning_rate": 0.00012605559041852245, "loss": 0.033, "num_input_tokens_seen": 209485920, "step": 97105 }, { "epoch": 15.841761827079935, "grad_norm": 0.006651945877820253, "learning_rate": 0.0001260083433327034, "loss": 0.0059, "num_input_tokens_seen": 209497504, "step": 97110 }, { "epoch": 15.84257748776509, "grad_norm": 0.008571265265345573, "learning_rate": 0.00012596110382629943, "loss": 0.0022, "num_input_tokens_seen": 209508256, "step": 97115 }, { "epoch": 15.843393148450245, "grad_norm": 0.0227603018283844, "learning_rate": 0.0001259138719002685, "loss": 0.1314, "num_input_tokens_seen": 209517472, "step": 97120 }, { "epoch": 15.844208809135399, "grad_norm": 0.0076928939670324326, "learning_rate": 0.0001258666475555672, "loss": 0.0101, "num_input_tokens_seen": 209530176, "step": 97125 }, { "epoch": 15.845024469820554, "grad_norm": 0.0635632798075676, "learning_rate": 0.00012581943079315323, "loss": 0.0055, "num_input_tokens_seen": 209539968, "step": 97130 }, { "epoch": 15.84584013050571, "grad_norm": 1.1265257596969604, "learning_rate": 0.00012577222161398288, "loss": 0.1015, "num_input_tokens_seen": 209550336, "step": 97135 }, { "epoch": 15.846655791190864, "grad_norm": 0.0043081240728497505, "learning_rate": 0.00012572502001901347, "loss": 0.0116, "num_input_tokens_seen": 209560896, "step": 97140 }, { "epoch": 15.84747145187602, "grad_norm": 0.007782109547406435, "learning_rate": 0.00012567782600920107, "loss": 0.0102, "num_input_tokens_seen": 209572288, "step": 97145 }, { "epoch": 15.848287112561174, "grad_norm": 0.006718491669744253, "learning_rate": 0.0001256306395855027, "loss": 0.0222, "num_input_tokens_seen": 209583008, "step": 97150 }, { "epoch": 15.84910277324633, "grad_norm": 1.5292181968688965, "learning_rate": 0.000125583460748874, "loss": 0.0318, "num_input_tokens_seen": 209593088, "step": 97155 }, { "epoch": 15.849918433931485, "grad_norm": 0.034812018275260925, "learning_rate": 0.00012553628950027175, "loss": 0.0152, "num_input_tokens_seen": 209602560, "step": 97160 }, { "epoch": 15.850734094616639, "grad_norm": 0.11004146188497543, "learning_rate": 0.00012548912584065135, "loss": 0.0059, "num_input_tokens_seen": 209612928, "step": 97165 }, { "epoch": 15.851549755301795, "grad_norm": 0.08165767788887024, "learning_rate": 0.00012544196977096905, "loss": 0.0035, "num_input_tokens_seen": 209623424, "step": 97170 }, { "epoch": 15.852365415986949, "grad_norm": 0.009296312928199768, "learning_rate": 0.00012539482129218045, "loss": 0.0034, "num_input_tokens_seen": 209633536, "step": 97175 }, { "epoch": 15.853181076672104, "grad_norm": 0.4064804017543793, "learning_rate": 0.00012534768040524098, "loss": 0.0133, "num_input_tokens_seen": 209645184, "step": 97180 }, { "epoch": 15.85399673735726, "grad_norm": 0.02962566912174225, "learning_rate": 0.000125300547111106, "loss": 0.0038, "num_input_tokens_seen": 209656160, "step": 97185 }, { "epoch": 15.854812398042414, "grad_norm": 0.11291283369064331, "learning_rate": 0.00012525342141073083, "loss": 0.0039, "num_input_tokens_seen": 209667328, "step": 97190 }, { "epoch": 15.85562805872757, "grad_norm": 0.008448568172752857, "learning_rate": 0.00012520630330507042, "loss": 0.0806, "num_input_tokens_seen": 209678880, "step": 97195 }, { "epoch": 15.856443719412724, "grad_norm": 0.9808346033096313, "learning_rate": 0.0001251591927950798, "loss": 0.0265, "num_input_tokens_seen": 209689568, "step": 97200 }, { "epoch": 15.85725938009788, "grad_norm": 0.4561034142971039, "learning_rate": 0.00012511208988171362, "loss": 0.0598, "num_input_tokens_seen": 209699808, "step": 97205 }, { "epoch": 15.858075040783035, "grad_norm": 0.06088285520672798, "learning_rate": 0.0001250649945659265, "loss": 0.0062, "num_input_tokens_seen": 209711072, "step": 97210 }, { "epoch": 15.858890701468189, "grad_norm": 0.0050840117037296295, "learning_rate": 0.00012501790684867292, "loss": 0.0779, "num_input_tokens_seen": 209722016, "step": 97215 }, { "epoch": 15.859706362153345, "grad_norm": 0.26799535751342773, "learning_rate": 0.0001249708267309072, "loss": 0.0103, "num_input_tokens_seen": 209731168, "step": 97220 }, { "epoch": 15.860522022838499, "grad_norm": 0.008156484924256802, "learning_rate": 0.00012492375421358336, "loss": 0.0055, "num_input_tokens_seen": 209740704, "step": 97225 }, { "epoch": 15.861337683523654, "grad_norm": 0.010354150086641312, "learning_rate": 0.00012487668929765555, "loss": 0.019, "num_input_tokens_seen": 209752160, "step": 97230 }, { "epoch": 15.86215334420881, "grad_norm": 0.008712238632142544, "learning_rate": 0.00012482963198407742, "loss": 0.0044, "num_input_tokens_seen": 209764192, "step": 97235 }, { "epoch": 15.862969004893964, "grad_norm": 0.056229643523693085, "learning_rate": 0.00012478258227380262, "loss": 0.0063, "num_input_tokens_seen": 209774208, "step": 97240 }, { "epoch": 15.86378466557912, "grad_norm": 0.0064669763669371605, "learning_rate": 0.0001247355401677851, "loss": 0.0018, "num_input_tokens_seen": 209783744, "step": 97245 }, { "epoch": 15.864600326264274, "grad_norm": 0.007030479609966278, "learning_rate": 0.00012468850566697758, "loss": 0.0082, "num_input_tokens_seen": 209794912, "step": 97250 }, { "epoch": 15.86541598694943, "grad_norm": 0.031139055266976357, "learning_rate": 0.00012464147877233394, "loss": 0.0489, "num_input_tokens_seen": 209806304, "step": 97255 }, { "epoch": 15.866231647634583, "grad_norm": 0.23773415386676788, "learning_rate": 0.00012459445948480663, "loss": 0.0046, "num_input_tokens_seen": 209816032, "step": 97260 }, { "epoch": 15.867047308319739, "grad_norm": 0.00116371747571975, "learning_rate": 0.0001245474478053491, "loss": 0.009, "num_input_tokens_seen": 209827680, "step": 97265 }, { "epoch": 15.867862969004895, "grad_norm": 1.0092875957489014, "learning_rate": 0.00012450044373491355, "loss": 0.0437, "num_input_tokens_seen": 209838560, "step": 97270 }, { "epoch": 15.868678629690049, "grad_norm": 0.02437322959303856, "learning_rate": 0.00012445344727445303, "loss": 0.0356, "num_input_tokens_seen": 209848800, "step": 97275 }, { "epoch": 15.869494290375204, "grad_norm": 0.00395537493750453, "learning_rate": 0.00012440645842491977, "loss": 0.0018, "num_input_tokens_seen": 209858304, "step": 97280 }, { "epoch": 15.870309951060358, "grad_norm": 0.31160348653793335, "learning_rate": 0.0001243594771872661, "loss": 0.1063, "num_input_tokens_seen": 209868032, "step": 97285 }, { "epoch": 15.871125611745514, "grad_norm": 0.006852383259683847, "learning_rate": 0.00012431250356244422, "loss": 0.0023, "num_input_tokens_seen": 209879712, "step": 97290 }, { "epoch": 15.87194127243067, "grad_norm": 0.009777972474694252, "learning_rate": 0.000124265537551406, "loss": 0.0066, "num_input_tokens_seen": 209890144, "step": 97295 }, { "epoch": 15.872756933115824, "grad_norm": 0.08918800950050354, "learning_rate": 0.00012421857915510332, "loss": 0.0922, "num_input_tokens_seen": 209900736, "step": 97300 }, { "epoch": 15.87357259380098, "grad_norm": 0.010353722609579563, "learning_rate": 0.00012417162837448787, "loss": 0.0023, "num_input_tokens_seen": 209912224, "step": 97305 }, { "epoch": 15.874388254486133, "grad_norm": 0.1301814764738083, "learning_rate": 0.0001241246852105111, "loss": 0.0636, "num_input_tokens_seen": 209923616, "step": 97310 }, { "epoch": 15.875203915171289, "grad_norm": 0.13054613769054413, "learning_rate": 0.00012407774966412445, "loss": 0.0302, "num_input_tokens_seen": 209934400, "step": 97315 }, { "epoch": 15.876019575856443, "grad_norm": 0.044631995260715485, "learning_rate": 0.0001240308217362791, "loss": 0.0072, "num_input_tokens_seen": 209945696, "step": 97320 }, { "epoch": 15.876835236541599, "grad_norm": 0.23582838475704193, "learning_rate": 0.0001239839014279261, "loss": 0.0085, "num_input_tokens_seen": 209956800, "step": 97325 }, { "epoch": 15.877650897226754, "grad_norm": 0.014151723124086857, "learning_rate": 0.0001239369887400163, "loss": 0.0273, "num_input_tokens_seen": 209968224, "step": 97330 }, { "epoch": 15.878466557911908, "grad_norm": 0.012680897489190102, "learning_rate": 0.0001238900836735005, "loss": 0.0187, "num_input_tokens_seen": 209978656, "step": 97335 }, { "epoch": 15.879282218597064, "grad_norm": 0.009599206037819386, "learning_rate": 0.00012384318622932932, "loss": 0.005, "num_input_tokens_seen": 209989056, "step": 97340 }, { "epoch": 15.880097879282218, "grad_norm": 0.023121481761336327, "learning_rate": 0.00012379629640845314, "loss": 0.0032, "num_input_tokens_seen": 209999712, "step": 97345 }, { "epoch": 15.880913539967374, "grad_norm": 0.0306924469769001, "learning_rate": 0.0001237494142118223, "loss": 0.0039, "num_input_tokens_seen": 210009088, "step": 97350 }, { "epoch": 15.88172920065253, "grad_norm": 0.0051122503355145454, "learning_rate": 0.00012370253964038685, "loss": 0.0014, "num_input_tokens_seen": 210020032, "step": 97355 }, { "epoch": 15.882544861337683, "grad_norm": 0.49188414216041565, "learning_rate": 0.0001236556726950968, "loss": 0.0304, "num_input_tokens_seen": 210030464, "step": 97360 }, { "epoch": 15.883360522022839, "grad_norm": 0.9735483527183533, "learning_rate": 0.000123608813376902, "loss": 0.0246, "num_input_tokens_seen": 210040480, "step": 97365 }, { "epoch": 15.884176182707993, "grad_norm": 0.07198720425367355, "learning_rate": 0.00012356196168675205, "loss": 0.0051, "num_input_tokens_seen": 210051424, "step": 97370 }, { "epoch": 15.884991843393149, "grad_norm": 0.021851979196071625, "learning_rate": 0.00012351511762559653, "loss": 0.0024, "num_input_tokens_seen": 210061312, "step": 97375 }, { "epoch": 15.885807504078304, "grad_norm": 0.7693410515785217, "learning_rate": 0.0001234682811943847, "loss": 0.1455, "num_input_tokens_seen": 210071616, "step": 97380 }, { "epoch": 15.886623164763458, "grad_norm": 0.013447145000100136, "learning_rate": 0.00012342145239406573, "loss": 0.0171, "num_input_tokens_seen": 210083040, "step": 97385 }, { "epoch": 15.887438825448614, "grad_norm": 0.001750773866660893, "learning_rate": 0.00012337463122558885, "loss": 0.0028, "num_input_tokens_seen": 210093664, "step": 97390 }, { "epoch": 15.888254486133768, "grad_norm": 0.10174345225095749, "learning_rate": 0.00012332781768990286, "loss": 0.0141, "num_input_tokens_seen": 210104352, "step": 97395 }, { "epoch": 15.889070146818923, "grad_norm": 0.8297173380851746, "learning_rate": 0.00012328101178795648, "loss": 0.06, "num_input_tokens_seen": 210114656, "step": 97400 }, { "epoch": 15.88988580750408, "grad_norm": 0.28521132469177246, "learning_rate": 0.0001232342135206983, "loss": 0.0106, "num_input_tokens_seen": 210125728, "step": 97405 }, { "epoch": 15.890701468189233, "grad_norm": 0.8682891726493835, "learning_rate": 0.0001231874228890768, "loss": 0.1177, "num_input_tokens_seen": 210136928, "step": 97410 }, { "epoch": 15.891517128874389, "grad_norm": 0.2116265743970871, "learning_rate": 0.00012314063989404012, "loss": 0.206, "num_input_tokens_seen": 210146752, "step": 97415 }, { "epoch": 15.892332789559543, "grad_norm": 0.05007200315594673, "learning_rate": 0.00012309386453653647, "loss": 0.006, "num_input_tokens_seen": 210156352, "step": 97420 }, { "epoch": 15.893148450244698, "grad_norm": 0.012831252068281174, "learning_rate": 0.00012304709681751385, "loss": 0.0077, "num_input_tokens_seen": 210166912, "step": 97425 }, { "epoch": 15.893964110929852, "grad_norm": 0.006236710119992495, "learning_rate": 0.00012300033673792, "loss": 0.003, "num_input_tokens_seen": 210178272, "step": 97430 }, { "epoch": 15.894779771615008, "grad_norm": 0.008272969163954258, "learning_rate": 0.00012295358429870252, "loss": 0.0065, "num_input_tokens_seen": 210189728, "step": 97435 }, { "epoch": 15.895595432300164, "grad_norm": 0.008013091050088406, "learning_rate": 0.000122906839500809, "loss": 0.0027, "num_input_tokens_seen": 210200160, "step": 97440 }, { "epoch": 15.896411092985318, "grad_norm": 0.033426105976104736, "learning_rate": 0.0001228601023451868, "loss": 0.0031, "num_input_tokens_seen": 210210560, "step": 97445 }, { "epoch": 15.897226753670473, "grad_norm": 0.03067987598478794, "learning_rate": 0.00012281337283278298, "loss": 0.0084, "num_input_tokens_seen": 210221376, "step": 97450 }, { "epoch": 15.898042414355627, "grad_norm": 0.016450131312012672, "learning_rate": 0.0001227666509645447, "loss": 0.005, "num_input_tokens_seen": 210233568, "step": 97455 }, { "epoch": 15.898858075040783, "grad_norm": 0.03008451499044895, "learning_rate": 0.00012271993674141878, "loss": 0.0023, "num_input_tokens_seen": 210243808, "step": 97460 }, { "epoch": 15.899673735725939, "grad_norm": 0.12227435410022736, "learning_rate": 0.000122673230164352, "loss": 0.0059, "num_input_tokens_seen": 210253952, "step": 97465 }, { "epoch": 15.900489396411093, "grad_norm": 0.006045287940651178, "learning_rate": 0.00012262653123429085, "loss": 0.0032, "num_input_tokens_seen": 210265248, "step": 97470 }, { "epoch": 15.901305057096248, "grad_norm": 0.07498399168252945, "learning_rate": 0.0001225798399521818, "loss": 0.0081, "num_input_tokens_seen": 210275264, "step": 97475 }, { "epoch": 15.902120717781402, "grad_norm": 0.007311370689421892, "learning_rate": 0.00012253315631897106, "loss": 0.0737, "num_input_tokens_seen": 210286240, "step": 97480 }, { "epoch": 15.902936378466558, "grad_norm": 1.721595048904419, "learning_rate": 0.00012248648033560473, "loss": 0.0228, "num_input_tokens_seen": 210297056, "step": 97485 }, { "epoch": 15.903752039151712, "grad_norm": 0.015165494754910469, "learning_rate": 0.00012243981200302885, "loss": 0.0022, "num_input_tokens_seen": 210307968, "step": 97490 }, { "epoch": 15.904567699836868, "grad_norm": 0.0031194828916341066, "learning_rate": 0.00012239315132218898, "loss": 0.0077, "num_input_tokens_seen": 210318816, "step": 97495 }, { "epoch": 15.905383360522023, "grad_norm": 0.01592426188290119, "learning_rate": 0.00012234649829403116, "loss": 0.0305, "num_input_tokens_seen": 210328832, "step": 97500 }, { "epoch": 15.906199021207177, "grad_norm": 0.033693429082632065, "learning_rate": 0.0001222998529195004, "loss": 0.0077, "num_input_tokens_seen": 210339392, "step": 97505 }, { "epoch": 15.907014681892333, "grad_norm": 0.011216314509510994, "learning_rate": 0.00012225321519954258, "loss": 0.0042, "num_input_tokens_seen": 210349696, "step": 97510 }, { "epoch": 15.907830342577487, "grad_norm": 0.2592774033546448, "learning_rate": 0.00012220658513510224, "loss": 0.1646, "num_input_tokens_seen": 210359552, "step": 97515 }, { "epoch": 15.908646003262643, "grad_norm": 0.022455226629972458, "learning_rate": 0.00012215996272712498, "loss": 0.0021, "num_input_tokens_seen": 210372160, "step": 97520 }, { "epoch": 15.909461663947798, "grad_norm": 0.006272462662309408, "learning_rate": 0.00012211334797655515, "loss": 0.0223, "num_input_tokens_seen": 210382848, "step": 97525 }, { "epoch": 15.910277324632952, "grad_norm": 0.0010491888970136642, "learning_rate": 0.00012206674088433784, "loss": 0.0119, "num_input_tokens_seen": 210392448, "step": 97530 }, { "epoch": 15.911092985318108, "grad_norm": 0.009957434609532356, "learning_rate": 0.00012202014145141749, "loss": 0.0029, "num_input_tokens_seen": 210403296, "step": 97535 }, { "epoch": 15.911908646003262, "grad_norm": 0.0256801787763834, "learning_rate": 0.00012197354967873847, "loss": 0.0266, "num_input_tokens_seen": 210414368, "step": 97540 }, { "epoch": 15.912724306688418, "grad_norm": 0.006286490708589554, "learning_rate": 0.00012192696556724497, "loss": 0.0113, "num_input_tokens_seen": 210425568, "step": 97545 }, { "epoch": 15.913539967373573, "grad_norm": 0.05345306918025017, "learning_rate": 0.00012188038911788119, "loss": 0.1055, "num_input_tokens_seen": 210436576, "step": 97550 }, { "epoch": 15.914355628058727, "grad_norm": 0.017919493839144707, "learning_rate": 0.00012183382033159101, "loss": 0.0084, "num_input_tokens_seen": 210446144, "step": 97555 }, { "epoch": 15.915171288743883, "grad_norm": 0.05290329456329346, "learning_rate": 0.00012178725920931816, "loss": 0.0062, "num_input_tokens_seen": 210457600, "step": 97560 }, { "epoch": 15.915986949429037, "grad_norm": 0.000804480048827827, "learning_rate": 0.0001217407057520063, "loss": 0.0092, "num_input_tokens_seen": 210469984, "step": 97565 }, { "epoch": 15.916802610114193, "grad_norm": 0.5227135419845581, "learning_rate": 0.0001216941599605989, "loss": 0.0188, "num_input_tokens_seen": 210481600, "step": 97570 }, { "epoch": 15.917618270799348, "grad_norm": 0.006612697616219521, "learning_rate": 0.00012164762183603928, "loss": 0.002, "num_input_tokens_seen": 210492384, "step": 97575 }, { "epoch": 15.918433931484502, "grad_norm": 0.004800699185580015, "learning_rate": 0.00012160109137927061, "loss": 0.0017, "num_input_tokens_seen": 210504032, "step": 97580 }, { "epoch": 15.919249592169658, "grad_norm": 0.008400495164096355, "learning_rate": 0.00012155456859123582, "loss": 0.0033, "num_input_tokens_seen": 210513888, "step": 97585 }, { "epoch": 15.920065252854812, "grad_norm": 0.011940158903598785, "learning_rate": 0.00012150805347287774, "loss": 0.0022, "num_input_tokens_seen": 210525120, "step": 97590 }, { "epoch": 15.920880913539968, "grad_norm": 0.030431652441620827, "learning_rate": 0.00012146154602513915, "loss": 0.0027, "num_input_tokens_seen": 210536224, "step": 97595 }, { "epoch": 15.921696574225122, "grad_norm": 0.07820103317499161, "learning_rate": 0.00012141504624896244, "loss": 0.0289, "num_input_tokens_seen": 210547072, "step": 97600 }, { "epoch": 15.922512234910277, "grad_norm": 0.03417756035923958, "learning_rate": 0.0001213685541452903, "loss": 0.0041, "num_input_tokens_seen": 210557568, "step": 97605 }, { "epoch": 15.923327895595433, "grad_norm": 0.12447967380285263, "learning_rate": 0.00012132206971506449, "loss": 0.0034, "num_input_tokens_seen": 210568224, "step": 97610 }, { "epoch": 15.924143556280587, "grad_norm": 0.03131439536809921, "learning_rate": 0.00012127559295922764, "loss": 0.0046, "num_input_tokens_seen": 210578144, "step": 97615 }, { "epoch": 15.924959216965743, "grad_norm": 0.006115563679486513, "learning_rate": 0.00012122912387872098, "loss": 0.0032, "num_input_tokens_seen": 210589440, "step": 97620 }, { "epoch": 15.925774877650896, "grad_norm": 0.2525862455368042, "learning_rate": 0.000121182662474487, "loss": 0.0444, "num_input_tokens_seen": 210599168, "step": 97625 }, { "epoch": 15.926590538336052, "grad_norm": 0.0015870665665715933, "learning_rate": 0.00012113620874746656, "loss": 0.0028, "num_input_tokens_seen": 210608384, "step": 97630 }, { "epoch": 15.927406199021208, "grad_norm": 0.014097308740019798, "learning_rate": 0.00012108976269860183, "loss": 0.0293, "num_input_tokens_seen": 210618912, "step": 97635 }, { "epoch": 15.928221859706362, "grad_norm": 0.004535838030278683, "learning_rate": 0.00012104332432883342, "loss": 0.002, "num_input_tokens_seen": 210629312, "step": 97640 }, { "epoch": 15.929037520391518, "grad_norm": 0.00039765218389220536, "learning_rate": 0.0001209968936391031, "loss": 0.0012, "num_input_tokens_seen": 210640000, "step": 97645 }, { "epoch": 15.929853181076671, "grad_norm": 0.9562644958496094, "learning_rate": 0.00012095047063035119, "loss": 0.0349, "num_input_tokens_seen": 210651104, "step": 97650 }, { "epoch": 15.930668841761827, "grad_norm": 0.3894575834274292, "learning_rate": 0.00012090405530351916, "loss": 0.0451, "num_input_tokens_seen": 210661920, "step": 97655 }, { "epoch": 15.931484502446983, "grad_norm": 0.031039129942655563, "learning_rate": 0.0001208576476595471, "loss": 0.0097, "num_input_tokens_seen": 210672480, "step": 97660 }, { "epoch": 15.932300163132137, "grad_norm": 0.04668223112821579, "learning_rate": 0.00012081124769937607, "loss": 0.0056, "num_input_tokens_seen": 210682944, "step": 97665 }, { "epoch": 15.933115823817293, "grad_norm": 0.011268621310591698, "learning_rate": 0.00012076485542394583, "loss": 0.0115, "num_input_tokens_seen": 210693568, "step": 97670 }, { "epoch": 15.933931484502446, "grad_norm": 0.002161469077691436, "learning_rate": 0.00012071847083419708, "loss": 0.0031, "num_input_tokens_seen": 210705632, "step": 97675 }, { "epoch": 15.934747145187602, "grad_norm": 0.0266000647097826, "learning_rate": 0.00012067209393106959, "loss": 0.0028, "num_input_tokens_seen": 210716768, "step": 97680 }, { "epoch": 15.935562805872756, "grad_norm": 0.01039186306297779, "learning_rate": 0.00012062572471550337, "loss": 0.0431, "num_input_tokens_seen": 210727968, "step": 97685 }, { "epoch": 15.936378466557912, "grad_norm": 0.010303999297320843, "learning_rate": 0.00012057936318843816, "loss": 0.099, "num_input_tokens_seen": 210738528, "step": 97690 }, { "epoch": 15.937194127243067, "grad_norm": 0.0054239025339484215, "learning_rate": 0.00012053300935081341, "loss": 0.0105, "num_input_tokens_seen": 210749248, "step": 97695 }, { "epoch": 15.938009787928221, "grad_norm": 0.030438082292675972, "learning_rate": 0.00012048666320356865, "loss": 0.0033, "num_input_tokens_seen": 210759808, "step": 97700 }, { "epoch": 15.938825448613377, "grad_norm": 0.0023649618960916996, "learning_rate": 0.0001204403247476431, "loss": 0.0034, "num_input_tokens_seen": 210770560, "step": 97705 }, { "epoch": 15.939641109298531, "grad_norm": 0.007579031866043806, "learning_rate": 0.00012039399398397588, "loss": 0.003, "num_input_tokens_seen": 210780608, "step": 97710 }, { "epoch": 15.940456769983687, "grad_norm": 0.008803587406873703, "learning_rate": 0.00012034767091350591, "loss": 0.0036, "num_input_tokens_seen": 210792864, "step": 97715 }, { "epoch": 15.941272430668842, "grad_norm": 0.0021561270114034414, "learning_rate": 0.00012030135553717204, "loss": 0.0066, "num_input_tokens_seen": 210803232, "step": 97720 }, { "epoch": 15.942088091353996, "grad_norm": 0.04169066250324249, "learning_rate": 0.00012025504785591273, "loss": 0.0248, "num_input_tokens_seen": 210813312, "step": 97725 }, { "epoch": 15.942903752039152, "grad_norm": 0.0005412654136307538, "learning_rate": 0.00012020874787066688, "loss": 0.0548, "num_input_tokens_seen": 210822912, "step": 97730 }, { "epoch": 15.943719412724306, "grad_norm": 0.01384122297167778, "learning_rate": 0.00012016245558237232, "loss": 0.0019, "num_input_tokens_seen": 210835008, "step": 97735 }, { "epoch": 15.944535073409462, "grad_norm": 0.14727036654949188, "learning_rate": 0.0001201161709919677, "loss": 0.0099, "num_input_tokens_seen": 210846144, "step": 97740 }, { "epoch": 15.945350734094617, "grad_norm": 0.14274829626083374, "learning_rate": 0.00012006989410039055, "loss": 0.0063, "num_input_tokens_seen": 210857664, "step": 97745 }, { "epoch": 15.946166394779771, "grad_norm": 0.27936190366744995, "learning_rate": 0.00012002362490857921, "loss": 0.1303, "num_input_tokens_seen": 210868768, "step": 97750 }, { "epoch": 15.946982055464927, "grad_norm": 0.020314475521445274, "learning_rate": 0.00011997736341747085, "loss": 0.0065, "num_input_tokens_seen": 210879680, "step": 97755 }, { "epoch": 15.947797716150081, "grad_norm": 0.012945069931447506, "learning_rate": 0.00011993110962800363, "loss": 0.0026, "num_input_tokens_seen": 210889888, "step": 97760 }, { "epoch": 15.948613376835237, "grad_norm": 0.9098187685012817, "learning_rate": 0.00011988486354111433, "loss": 0.0649, "num_input_tokens_seen": 210900768, "step": 97765 }, { "epoch": 15.949429037520392, "grad_norm": 0.002324692439287901, "learning_rate": 0.0001198386251577408, "loss": 0.0028, "num_input_tokens_seen": 210910848, "step": 97770 }, { "epoch": 15.950244698205546, "grad_norm": 0.029050586745142937, "learning_rate": 0.00011979239447881945, "loss": 0.0021, "num_input_tokens_seen": 210920960, "step": 97775 }, { "epoch": 15.951060358890702, "grad_norm": 0.02958422712981701, "learning_rate": 0.00011974617150528788, "loss": 0.0661, "num_input_tokens_seen": 210932864, "step": 97780 }, { "epoch": 15.951876019575856, "grad_norm": 0.014732618816196918, "learning_rate": 0.00011969995623808221, "loss": 0.0028, "num_input_tokens_seen": 210944192, "step": 97785 }, { "epoch": 15.952691680261012, "grad_norm": 0.0340808629989624, "learning_rate": 0.00011965374867813972, "loss": 0.0046, "num_input_tokens_seen": 210954560, "step": 97790 }, { "epoch": 15.953507340946166, "grad_norm": 0.011251387186348438, "learning_rate": 0.00011960754882639619, "loss": 0.0015, "num_input_tokens_seen": 210965920, "step": 97795 }, { "epoch": 15.954323001631321, "grad_norm": 0.009405967779457569, "learning_rate": 0.00011956135668378853, "loss": 0.0089, "num_input_tokens_seen": 210977280, "step": 97800 }, { "epoch": 15.955138662316477, "grad_norm": 0.03605485334992409, "learning_rate": 0.00011951517225125231, "loss": 0.0039, "num_input_tokens_seen": 210987712, "step": 97805 }, { "epoch": 15.955954323001631, "grad_norm": 0.009923968464136124, "learning_rate": 0.00011946899552972395, "loss": 0.0042, "num_input_tokens_seen": 210997728, "step": 97810 }, { "epoch": 15.956769983686787, "grad_norm": 0.006833584979176521, "learning_rate": 0.00011942282652013914, "loss": 0.005, "num_input_tokens_seen": 211008576, "step": 97815 }, { "epoch": 15.95758564437194, "grad_norm": 0.00566292367875576, "learning_rate": 0.00011937666522343354, "loss": 0.0032, "num_input_tokens_seen": 211018784, "step": 97820 }, { "epoch": 15.958401305057096, "grad_norm": 0.012962086126208305, "learning_rate": 0.0001193305116405427, "loss": 0.0472, "num_input_tokens_seen": 211030688, "step": 97825 }, { "epoch": 15.959216965742252, "grad_norm": 0.02679431065917015, "learning_rate": 0.00011928436577240193, "loss": 0.0025, "num_input_tokens_seen": 211041664, "step": 97830 }, { "epoch": 15.960032626427406, "grad_norm": 0.0019437966402620077, "learning_rate": 0.00011923822761994646, "loss": 0.001, "num_input_tokens_seen": 211052960, "step": 97835 }, { "epoch": 15.960848287112562, "grad_norm": 0.06853377819061279, "learning_rate": 0.00011919209718411134, "loss": 0.0047, "num_input_tokens_seen": 211064736, "step": 97840 }, { "epoch": 15.961663947797716, "grad_norm": 0.0066619026474654675, "learning_rate": 0.00011914597446583147, "loss": 0.0026, "num_input_tokens_seen": 211075712, "step": 97845 }, { "epoch": 15.962479608482871, "grad_norm": 0.008050302974879742, "learning_rate": 0.00011909985946604157, "loss": 0.0013, "num_input_tokens_seen": 211086080, "step": 97850 }, { "epoch": 15.963295269168025, "grad_norm": 0.0067341625690460205, "learning_rate": 0.00011905375218567621, "loss": 0.014, "num_input_tokens_seen": 211097280, "step": 97855 }, { "epoch": 15.964110929853181, "grad_norm": 0.005524253007024527, "learning_rate": 0.00011900765262566988, "loss": 0.0442, "num_input_tokens_seen": 211107104, "step": 97860 }, { "epoch": 15.964926590538337, "grad_norm": 0.005729553289711475, "learning_rate": 0.00011896156078695675, "loss": 0.0082, "num_input_tokens_seen": 211117056, "step": 97865 }, { "epoch": 15.96574225122349, "grad_norm": 0.024632873013615608, "learning_rate": 0.00011891547667047082, "loss": 0.0053, "num_input_tokens_seen": 211128192, "step": 97870 }, { "epoch": 15.966557911908646, "grad_norm": 0.034333083778619766, "learning_rate": 0.00011886940027714649, "loss": 0.002, "num_input_tokens_seen": 211139392, "step": 97875 }, { "epoch": 15.9673735725938, "grad_norm": 0.0021403080318123102, "learning_rate": 0.00011882333160791697, "loss": 0.0024, "num_input_tokens_seen": 211149696, "step": 97880 }, { "epoch": 15.968189233278956, "grad_norm": 0.024381868541240692, "learning_rate": 0.00011877727066371646, "loss": 0.0044, "num_input_tokens_seen": 211161120, "step": 97885 }, { "epoch": 15.969004893964112, "grad_norm": 0.0076603880152106285, "learning_rate": 0.00011873121744547794, "loss": 0.005, "num_input_tokens_seen": 211171712, "step": 97890 }, { "epoch": 15.969820554649266, "grad_norm": 0.007282884791493416, "learning_rate": 0.00011868517195413525, "loss": 0.0011, "num_input_tokens_seen": 211182944, "step": 97895 }, { "epoch": 15.970636215334421, "grad_norm": 0.15059101581573486, "learning_rate": 0.00011863913419062095, "loss": 0.0068, "num_input_tokens_seen": 211193536, "step": 97900 }, { "epoch": 15.971451876019575, "grad_norm": 0.031285881996154785, "learning_rate": 0.00011859310415586871, "loss": 0.0474, "num_input_tokens_seen": 211204544, "step": 97905 }, { "epoch": 15.97226753670473, "grad_norm": 0.10427773743867874, "learning_rate": 0.00011854708185081076, "loss": 0.0835, "num_input_tokens_seen": 211214592, "step": 97910 }, { "epoch": 15.973083197389887, "grad_norm": 0.06792265176773071, "learning_rate": 0.00011850106727638026, "loss": 0.0035, "num_input_tokens_seen": 211224928, "step": 97915 }, { "epoch": 15.97389885807504, "grad_norm": 0.036905232816934586, "learning_rate": 0.00011845506043350956, "loss": 0.1094, "num_input_tokens_seen": 211236640, "step": 97920 }, { "epoch": 15.974714518760196, "grad_norm": 0.020171459764242172, "learning_rate": 0.00011840906132313117, "loss": 0.0027, "num_input_tokens_seen": 211246848, "step": 97925 }, { "epoch": 15.97553017944535, "grad_norm": 0.004613845609128475, "learning_rate": 0.00011836306994617718, "loss": 0.0434, "num_input_tokens_seen": 211256640, "step": 97930 }, { "epoch": 15.976345840130506, "grad_norm": 0.13685919344425201, "learning_rate": 0.00011831708630357968, "loss": 0.0043, "num_input_tokens_seen": 211266848, "step": 97935 }, { "epoch": 15.977161500815662, "grad_norm": 0.045827798545360565, "learning_rate": 0.0001182711103962707, "loss": 0.0044, "num_input_tokens_seen": 211277504, "step": 97940 }, { "epoch": 15.977977161500815, "grad_norm": 0.32249608635902405, "learning_rate": 0.00011822514222518188, "loss": 0.0325, "num_input_tokens_seen": 211288672, "step": 97945 }, { "epoch": 15.978792822185971, "grad_norm": 0.09804785251617432, "learning_rate": 0.00011817918179124487, "loss": 0.2345, "num_input_tokens_seen": 211299808, "step": 97950 }, { "epoch": 15.979608482871125, "grad_norm": 0.014171267859637737, "learning_rate": 0.00011813322909539115, "loss": 0.0044, "num_input_tokens_seen": 211309088, "step": 97955 }, { "epoch": 15.98042414355628, "grad_norm": 0.0012629975099116564, "learning_rate": 0.0001180872841385519, "loss": 0.0023, "num_input_tokens_seen": 211318656, "step": 97960 }, { "epoch": 15.981239804241435, "grad_norm": 0.011100394651293755, "learning_rate": 0.00011804134692165841, "loss": 0.0027, "num_input_tokens_seen": 211328640, "step": 97965 }, { "epoch": 15.98205546492659, "grad_norm": 0.006437437143176794, "learning_rate": 0.00011799541744564151, "loss": 0.0049, "num_input_tokens_seen": 211340160, "step": 97970 }, { "epoch": 15.982871125611746, "grad_norm": 0.022876478731632233, "learning_rate": 0.00011794949571143215, "loss": 0.0012, "num_input_tokens_seen": 211351008, "step": 97975 }, { "epoch": 15.9836867862969, "grad_norm": 0.005354802589863539, "learning_rate": 0.00011790358171996086, "loss": 0.0803, "num_input_tokens_seen": 211363392, "step": 97980 }, { "epoch": 15.984502446982056, "grad_norm": 0.0010631013428792357, "learning_rate": 0.00011785767547215825, "loss": 0.1709, "num_input_tokens_seen": 211372832, "step": 97985 }, { "epoch": 15.98531810766721, "grad_norm": 0.042163535952568054, "learning_rate": 0.00011781177696895462, "loss": 0.0086, "num_input_tokens_seen": 211382784, "step": 97990 }, { "epoch": 15.986133768352365, "grad_norm": 0.006655391305685043, "learning_rate": 0.00011776588621128015, "loss": 0.0047, "num_input_tokens_seen": 211395168, "step": 97995 }, { "epoch": 15.986949429037521, "grad_norm": 0.021443411707878113, "learning_rate": 0.00011772000320006493, "loss": 0.0034, "num_input_tokens_seen": 211406464, "step": 98000 }, { "epoch": 15.987765089722675, "grad_norm": 0.1270068734884262, "learning_rate": 0.00011767412793623878, "loss": 0.0029, "num_input_tokens_seen": 211417408, "step": 98005 }, { "epoch": 15.98858075040783, "grad_norm": 0.003618381218984723, "learning_rate": 0.00011762826042073144, "loss": 0.0057, "num_input_tokens_seen": 211427968, "step": 98010 }, { "epoch": 15.989396411092985, "grad_norm": 0.06153593584895134, "learning_rate": 0.00011758240065447234, "loss": 0.0496, "num_input_tokens_seen": 211440256, "step": 98015 }, { "epoch": 15.99021207177814, "grad_norm": 0.01717258431017399, "learning_rate": 0.00011753654863839114, "loss": 0.0035, "num_input_tokens_seen": 211451552, "step": 98020 }, { "epoch": 15.991027732463294, "grad_norm": 0.004304427187889814, "learning_rate": 0.00011749070437341702, "loss": 0.0015, "num_input_tokens_seen": 211462272, "step": 98025 }, { "epoch": 15.99184339314845, "grad_norm": 1.1380677223205566, "learning_rate": 0.00011744486786047898, "loss": 0.0691, "num_input_tokens_seen": 211473760, "step": 98030 }, { "epoch": 15.992659053833606, "grad_norm": 0.01103879138827324, "learning_rate": 0.00011739903910050603, "loss": 0.0842, "num_input_tokens_seen": 211483968, "step": 98035 }, { "epoch": 15.99347471451876, "grad_norm": 0.008627597242593765, "learning_rate": 0.00011735321809442689, "loss": 0.1849, "num_input_tokens_seen": 211494656, "step": 98040 }, { "epoch": 15.994290375203915, "grad_norm": 0.02655285969376564, "learning_rate": 0.00011730740484317021, "loss": 0.0069, "num_input_tokens_seen": 211506080, "step": 98045 }, { "epoch": 15.99510603588907, "grad_norm": 0.029401404783129692, "learning_rate": 0.00011726159934766445, "loss": 0.115, "num_input_tokens_seen": 211517088, "step": 98050 }, { "epoch": 15.995921696574225, "grad_norm": 0.06734762340784073, "learning_rate": 0.00011721580160883794, "loss": 0.0178, "num_input_tokens_seen": 211527616, "step": 98055 }, { "epoch": 15.99673735725938, "grad_norm": 0.016371045261621475, "learning_rate": 0.00011717001162761881, "loss": 0.0046, "num_input_tokens_seen": 211539200, "step": 98060 }, { "epoch": 15.997553017944535, "grad_norm": 0.006909481715410948, "learning_rate": 0.000117124229404935, "loss": 0.0044, "num_input_tokens_seen": 211550720, "step": 98065 }, { "epoch": 15.99836867862969, "grad_norm": 0.10381163656711578, "learning_rate": 0.00011707845494171443, "loss": 0.009, "num_input_tokens_seen": 211561280, "step": 98070 }, { "epoch": 15.999184339314844, "grad_norm": 0.029712708666920662, "learning_rate": 0.00011703268823888475, "loss": 0.0069, "num_input_tokens_seen": 211571968, "step": 98075 }, { "epoch": 16.0, "grad_norm": 0.0011337876785546541, "learning_rate": 0.00011698692929737348, "loss": 0.0468, "num_input_tokens_seen": 211581728, "step": 98080 }, { "epoch": 16.0, "eval_loss": 0.211046501994133, "eval_runtime": 104.6486, "eval_samples_per_second": 26.04, "eval_steps_per_second": 6.517, "num_input_tokens_seen": 211581728, "step": 98080 }, { "epoch": 16.000815660685156, "grad_norm": 0.014061979949474335, "learning_rate": 0.00011694117811810795, "loss": 0.0179, "num_input_tokens_seen": 211591360, "step": 98085 }, { "epoch": 16.00163132137031, "grad_norm": 0.06118741258978844, "learning_rate": 0.00011689543470201536, "loss": 0.0089, "num_input_tokens_seen": 211602752, "step": 98090 }, { "epoch": 16.002446982055464, "grad_norm": 0.014648097567260265, "learning_rate": 0.00011684969905002286, "loss": 0.0029, "num_input_tokens_seen": 211613600, "step": 98095 }, { "epoch": 16.00326264274062, "grad_norm": 0.07661961764097214, "learning_rate": 0.00011680397116305719, "loss": 0.0138, "num_input_tokens_seen": 211623872, "step": 98100 }, { "epoch": 16.004078303425775, "grad_norm": 0.025017227977514267, "learning_rate": 0.00011675825104204523, "loss": 0.008, "num_input_tokens_seen": 211633504, "step": 98105 }, { "epoch": 16.00489396411093, "grad_norm": 0.010437053628265858, "learning_rate": 0.00011671253868791343, "loss": 0.0542, "num_input_tokens_seen": 211644416, "step": 98110 }, { "epoch": 16.005709624796086, "grad_norm": 0.002585688605904579, "learning_rate": 0.00011666683410158829, "loss": 0.0035, "num_input_tokens_seen": 211654848, "step": 98115 }, { "epoch": 16.00652528548124, "grad_norm": 0.0050298604182899, "learning_rate": 0.0001166211372839961, "loss": 0.0013, "num_input_tokens_seen": 211666432, "step": 98120 }, { "epoch": 16.007340946166394, "grad_norm": 0.0019909690599888563, "learning_rate": 0.00011657544823606286, "loss": 0.0087, "num_input_tokens_seen": 211677120, "step": 98125 }, { "epoch": 16.00815660685155, "grad_norm": 0.02870413288474083, "learning_rate": 0.00011652976695871459, "loss": 0.0047, "num_input_tokens_seen": 211687264, "step": 98130 }, { "epoch": 16.008972267536706, "grad_norm": 0.10732894390821457, "learning_rate": 0.00011648409345287691, "loss": 0.016, "num_input_tokens_seen": 211696544, "step": 98135 }, { "epoch": 16.00978792822186, "grad_norm": 0.03988833725452423, "learning_rate": 0.00011643842771947588, "loss": 0.0039, "num_input_tokens_seen": 211708352, "step": 98140 }, { "epoch": 16.010603588907014, "grad_norm": 0.07157090306282043, "learning_rate": 0.00011639276975943641, "loss": 0.0096, "num_input_tokens_seen": 211717792, "step": 98145 }, { "epoch": 16.01141924959217, "grad_norm": 0.06820016354322433, "learning_rate": 0.00011634711957368438, "loss": 0.0072, "num_input_tokens_seen": 211727680, "step": 98150 }, { "epoch": 16.012234910277325, "grad_norm": 0.07080288231372833, "learning_rate": 0.00011630147716314443, "loss": 0.0098, "num_input_tokens_seen": 211737760, "step": 98155 }, { "epoch": 16.01305057096248, "grad_norm": 0.1349984109401703, "learning_rate": 0.00011625584252874189, "loss": 0.0157, "num_input_tokens_seen": 211748320, "step": 98160 }, { "epoch": 16.013866231647636, "grad_norm": 0.0020269739907234907, "learning_rate": 0.00011621021567140156, "loss": 0.0042, "num_input_tokens_seen": 211760192, "step": 98165 }, { "epoch": 16.01468189233279, "grad_norm": 0.023259898647665977, "learning_rate": 0.00011616459659204803, "loss": 0.0033, "num_input_tokens_seen": 211770112, "step": 98170 }, { "epoch": 16.015497553017944, "grad_norm": 0.0028729154728353024, "learning_rate": 0.00011611898529160591, "loss": 0.0071, "num_input_tokens_seen": 211779968, "step": 98175 }, { "epoch": 16.0163132137031, "grad_norm": 0.02291432023048401, "learning_rate": 0.00011607338177099952, "loss": 0.0463, "num_input_tokens_seen": 211789760, "step": 98180 }, { "epoch": 16.017128874388256, "grad_norm": 0.0037261280231177807, "learning_rate": 0.00011602778603115311, "loss": 0.0028, "num_input_tokens_seen": 211801504, "step": 98185 }, { "epoch": 16.017944535073408, "grad_norm": 1.0579633712768555, "learning_rate": 0.00011598219807299076, "loss": 0.0356, "num_input_tokens_seen": 211812160, "step": 98190 }, { "epoch": 16.018760195758563, "grad_norm": 0.015229511074721813, "learning_rate": 0.00011593661789743626, "loss": 0.0378, "num_input_tokens_seen": 211823008, "step": 98195 }, { "epoch": 16.01957585644372, "grad_norm": 0.035843051970005035, "learning_rate": 0.00011589104550541346, "loss": 0.0031, "num_input_tokens_seen": 211832640, "step": 98200 }, { "epoch": 16.020391517128875, "grad_norm": 0.03807327523827553, "learning_rate": 0.00011584548089784585, "loss": 0.1038, "num_input_tokens_seen": 211843648, "step": 98205 }, { "epoch": 16.02120717781403, "grad_norm": 0.0585746243596077, "learning_rate": 0.00011579992407565698, "loss": 0.0035, "num_input_tokens_seen": 211854976, "step": 98210 }, { "epoch": 16.022022838499183, "grad_norm": 0.006157930940389633, "learning_rate": 0.00011575437503976998, "loss": 0.005, "num_input_tokens_seen": 211866144, "step": 98215 }, { "epoch": 16.02283849918434, "grad_norm": 0.0419309064745903, "learning_rate": 0.00011570883379110803, "loss": 0.0032, "num_input_tokens_seen": 211876640, "step": 98220 }, { "epoch": 16.023654159869494, "grad_norm": 0.022299962118268013, "learning_rate": 0.00011566330033059407, "loss": 0.0916, "num_input_tokens_seen": 211887968, "step": 98225 }, { "epoch": 16.02446982055465, "grad_norm": 0.035330675542354584, "learning_rate": 0.00011561777465915091, "loss": 0.0146, "num_input_tokens_seen": 211897952, "step": 98230 }, { "epoch": 16.025285481239806, "grad_norm": 0.29575732350349426, "learning_rate": 0.00011557225677770116, "loss": 0.0461, "num_input_tokens_seen": 211908064, "step": 98235 }, { "epoch": 16.026101141924958, "grad_norm": 0.22786028683185577, "learning_rate": 0.00011552674668716723, "loss": 0.0105, "num_input_tokens_seen": 211919360, "step": 98240 }, { "epoch": 16.026916802610113, "grad_norm": 0.020476827397942543, "learning_rate": 0.00011548124438847174, "loss": 0.0016, "num_input_tokens_seen": 211929472, "step": 98245 }, { "epoch": 16.02773246329527, "grad_norm": 0.001144716516137123, "learning_rate": 0.0001154357498825363, "loss": 0.0047, "num_input_tokens_seen": 211940096, "step": 98250 }, { "epoch": 16.028548123980425, "grad_norm": 0.05208508297801018, "learning_rate": 0.00011539026317028361, "loss": 0.0049, "num_input_tokens_seen": 211951200, "step": 98255 }, { "epoch": 16.02936378466558, "grad_norm": 0.009101410396397114, "learning_rate": 0.00011534478425263484, "loss": 0.0176, "num_input_tokens_seen": 211961696, "step": 98260 }, { "epoch": 16.030179445350733, "grad_norm": 0.0007650189218111336, "learning_rate": 0.00011529931313051222, "loss": 0.001, "num_input_tokens_seen": 211971840, "step": 98265 }, { "epoch": 16.03099510603589, "grad_norm": 0.0010098203783854842, "learning_rate": 0.00011525384980483683, "loss": 0.018, "num_input_tokens_seen": 211982432, "step": 98270 }, { "epoch": 16.031810766721044, "grad_norm": 0.03951151296496391, "learning_rate": 0.00011520839427653052, "loss": 0.0024, "num_input_tokens_seen": 211993952, "step": 98275 }, { "epoch": 16.0326264274062, "grad_norm": 0.019800737500190735, "learning_rate": 0.00011516294654651393, "loss": 0.0079, "num_input_tokens_seen": 212004768, "step": 98280 }, { "epoch": 16.033442088091356, "grad_norm": 0.004405012354254723, "learning_rate": 0.00011511750661570875, "loss": 0.039, "num_input_tokens_seen": 212014592, "step": 98285 }, { "epoch": 16.034257748776508, "grad_norm": 0.9602201581001282, "learning_rate": 0.00011507207448503526, "loss": 0.0109, "num_input_tokens_seen": 212025952, "step": 98290 }, { "epoch": 16.035073409461663, "grad_norm": 0.005322612822055817, "learning_rate": 0.00011502665015541481, "loss": 0.024, "num_input_tokens_seen": 212035392, "step": 98295 }, { "epoch": 16.03588907014682, "grad_norm": 0.030692705884575844, "learning_rate": 0.0001149812336277673, "loss": 0.0195, "num_input_tokens_seen": 212045888, "step": 98300 }, { "epoch": 16.036704730831975, "grad_norm": 0.08591753989458084, "learning_rate": 0.00011493582490301374, "loss": 0.013, "num_input_tokens_seen": 212055488, "step": 98305 }, { "epoch": 16.03752039151713, "grad_norm": 0.0017643326427787542, "learning_rate": 0.00011489042398207416, "loss": 0.0152, "num_input_tokens_seen": 212067200, "step": 98310 }, { "epoch": 16.038336052202283, "grad_norm": 0.002909943927079439, "learning_rate": 0.00011484503086586867, "loss": 0.1149, "num_input_tokens_seen": 212077632, "step": 98315 }, { "epoch": 16.03915171288744, "grad_norm": 0.02796890400350094, "learning_rate": 0.00011479964555531725, "loss": 0.0028, "num_input_tokens_seen": 212088768, "step": 98320 }, { "epoch": 16.039967373572594, "grad_norm": 0.0020413952879607677, "learning_rate": 0.00011475426805133965, "loss": 0.0026, "num_input_tokens_seen": 212098880, "step": 98325 }, { "epoch": 16.04078303425775, "grad_norm": 0.0013046185486018658, "learning_rate": 0.00011470889835485554, "loss": 0.0025, "num_input_tokens_seen": 212110976, "step": 98330 }, { "epoch": 16.041598694942905, "grad_norm": 0.002638821257278323, "learning_rate": 0.0001146635364667844, "loss": 0.1091, "num_input_tokens_seen": 212123040, "step": 98335 }, { "epoch": 16.042414355628058, "grad_norm": 0.16568031907081604, "learning_rate": 0.0001146181823880455, "loss": 0.0059, "num_input_tokens_seen": 212134048, "step": 98340 }, { "epoch": 16.043230016313213, "grad_norm": 0.010371629148721695, "learning_rate": 0.00011457283611955804, "loss": 0.0044, "num_input_tokens_seen": 212144384, "step": 98345 }, { "epoch": 16.04404567699837, "grad_norm": 0.006838376168161631, "learning_rate": 0.00011452749766224102, "loss": 0.0033, "num_input_tokens_seen": 212154624, "step": 98350 }, { "epoch": 16.044861337683525, "grad_norm": 0.001064163283444941, "learning_rate": 0.00011448216701701309, "loss": 0.0146, "num_input_tokens_seen": 212165472, "step": 98355 }, { "epoch": 16.045676998368677, "grad_norm": 0.001959593966603279, "learning_rate": 0.00011443684418479344, "loss": 0.0069, "num_input_tokens_seen": 212176032, "step": 98360 }, { "epoch": 16.046492659053833, "grad_norm": 0.00421307235956192, "learning_rate": 0.00011439152916649992, "loss": 0.0032, "num_input_tokens_seen": 212187232, "step": 98365 }, { "epoch": 16.04730831973899, "grad_norm": 0.0011043678969144821, "learning_rate": 0.00011434622196305156, "loss": 0.005, "num_input_tokens_seen": 212197728, "step": 98370 }, { "epoch": 16.048123980424144, "grad_norm": 0.00984360370784998, "learning_rate": 0.00011430092257536596, "loss": 0.0835, "num_input_tokens_seen": 212207200, "step": 98375 }, { "epoch": 16.0489396411093, "grad_norm": 0.02689846232533455, "learning_rate": 0.00011425563100436175, "loss": 0.0025, "num_input_tokens_seen": 212217600, "step": 98380 }, { "epoch": 16.049755301794452, "grad_norm": 0.001654561492614448, "learning_rate": 0.00011421034725095625, "loss": 0.0023, "num_input_tokens_seen": 212228768, "step": 98385 }, { "epoch": 16.050570962479608, "grad_norm": 0.020587975159287453, "learning_rate": 0.00011416507131606773, "loss": 0.0028, "num_input_tokens_seen": 212239488, "step": 98390 }, { "epoch": 16.051386623164763, "grad_norm": 0.029053471982479095, "learning_rate": 0.00011411980320061322, "loss": 0.0272, "num_input_tokens_seen": 212250848, "step": 98395 }, { "epoch": 16.05220228384992, "grad_norm": 0.0015529135707765818, "learning_rate": 0.00011407454290551073, "loss": 0.0189, "num_input_tokens_seen": 212262688, "step": 98400 }, { "epoch": 16.053017944535075, "grad_norm": 0.005250139161944389, "learning_rate": 0.00011402929043167692, "loss": 0.0025, "num_input_tokens_seen": 212273504, "step": 98405 }, { "epoch": 16.053833605220227, "grad_norm": 0.01583044044673443, "learning_rate": 0.00011398404578002946, "loss": 0.0095, "num_input_tokens_seen": 212285056, "step": 98410 }, { "epoch": 16.054649265905383, "grad_norm": 0.010609490796923637, "learning_rate": 0.00011393880895148473, "loss": 0.0545, "num_input_tokens_seen": 212295040, "step": 98415 }, { "epoch": 16.05546492659054, "grad_norm": 0.0019070047419518232, "learning_rate": 0.00011389357994696003, "loss": 0.017, "num_input_tokens_seen": 212305376, "step": 98420 }, { "epoch": 16.056280587275694, "grad_norm": 0.0015133727574720979, "learning_rate": 0.00011384835876737154, "loss": 0.0763, "num_input_tokens_seen": 212316416, "step": 98425 }, { "epoch": 16.05709624796085, "grad_norm": 0.0030139454174786806, "learning_rate": 0.00011380314541363612, "loss": 0.0101, "num_input_tokens_seen": 212327488, "step": 98430 }, { "epoch": 16.057911908646002, "grad_norm": 0.22676359117031097, "learning_rate": 0.00011375793988666966, "loss": 0.0061, "num_input_tokens_seen": 212337152, "step": 98435 }, { "epoch": 16.058727569331158, "grad_norm": 0.0027484793681651354, "learning_rate": 0.0001137127421873888, "loss": 0.0045, "num_input_tokens_seen": 212347296, "step": 98440 }, { "epoch": 16.059543230016313, "grad_norm": 0.4476744830608368, "learning_rate": 0.000113667552316709, "loss": 0.0125, "num_input_tokens_seen": 212358784, "step": 98445 }, { "epoch": 16.06035889070147, "grad_norm": 0.0030025746673345566, "learning_rate": 0.00011362237027554645, "loss": 0.0816, "num_input_tokens_seen": 212369824, "step": 98450 }, { "epoch": 16.061174551386625, "grad_norm": 0.4363758862018585, "learning_rate": 0.00011357719606481675, "loss": 0.0123, "num_input_tokens_seen": 212381856, "step": 98455 }, { "epoch": 16.061990212071777, "grad_norm": 0.02422868274152279, "learning_rate": 0.00011353202968543535, "loss": 0.0118, "num_input_tokens_seen": 212392576, "step": 98460 }, { "epoch": 16.062805872756933, "grad_norm": 0.0016975492471829057, "learning_rate": 0.00011348687113831768, "loss": 0.0028, "num_input_tokens_seen": 212403744, "step": 98465 }, { "epoch": 16.063621533442088, "grad_norm": 0.012342218309640884, "learning_rate": 0.00011344172042437889, "loss": 0.0016, "num_input_tokens_seen": 212415296, "step": 98470 }, { "epoch": 16.064437194127244, "grad_norm": 0.007630425039678812, "learning_rate": 0.00011339657754453398, "loss": 0.0034, "num_input_tokens_seen": 212426080, "step": 98475 }, { "epoch": 16.0652528548124, "grad_norm": 0.001513462164439261, "learning_rate": 0.00011335144249969793, "loss": 0.028, "num_input_tokens_seen": 212437248, "step": 98480 }, { "epoch": 16.06606851549755, "grad_norm": 0.09210729598999023, "learning_rate": 0.00011330631529078533, "loss": 0.0829, "num_input_tokens_seen": 212448192, "step": 98485 }, { "epoch": 16.066884176182707, "grad_norm": 0.001121697248890996, "learning_rate": 0.00011326119591871087, "loss": 0.0433, "num_input_tokens_seen": 212459680, "step": 98490 }, { "epoch": 16.067699836867863, "grad_norm": 0.01716712862253189, "learning_rate": 0.00011321608438438885, "loss": 0.0032, "num_input_tokens_seen": 212469632, "step": 98495 }, { "epoch": 16.06851549755302, "grad_norm": 0.058946046978235245, "learning_rate": 0.00011317098068873339, "loss": 0.0485, "num_input_tokens_seen": 212481056, "step": 98500 }, { "epoch": 16.069331158238175, "grad_norm": 0.005601988639682531, "learning_rate": 0.000113125884832659, "loss": 0.0019, "num_input_tokens_seen": 212490752, "step": 98505 }, { "epoch": 16.070146818923327, "grad_norm": 0.07200689613819122, "learning_rate": 0.00011308079681707911, "loss": 0.0049, "num_input_tokens_seen": 212500672, "step": 98510 }, { "epoch": 16.070962479608482, "grad_norm": 0.00815859530121088, "learning_rate": 0.00011303571664290801, "loss": 0.0097, "num_input_tokens_seen": 212511680, "step": 98515 }, { "epoch": 16.071778140293638, "grad_norm": 0.003553292015567422, "learning_rate": 0.0001129906443110587, "loss": 0.0022, "num_input_tokens_seen": 212521696, "step": 98520 }, { "epoch": 16.072593800978794, "grad_norm": 0.052878763526678085, "learning_rate": 0.0001129455798224452, "loss": 0.0586, "num_input_tokens_seen": 212532000, "step": 98525 }, { "epoch": 16.07340946166395, "grad_norm": 0.018652718514204025, "learning_rate": 0.00011290052317798027, "loss": 0.0062, "num_input_tokens_seen": 212543200, "step": 98530 }, { "epoch": 16.0742251223491, "grad_norm": 0.005968988873064518, "learning_rate": 0.00011285547437857763, "loss": 0.0565, "num_input_tokens_seen": 212553280, "step": 98535 }, { "epoch": 16.075040783034257, "grad_norm": 0.005674042273312807, "learning_rate": 0.00011281043342514957, "loss": 0.001, "num_input_tokens_seen": 212563776, "step": 98540 }, { "epoch": 16.075856443719413, "grad_norm": 0.0037803344894200563, "learning_rate": 0.0001127654003186096, "loss": 0.0193, "num_input_tokens_seen": 212575296, "step": 98545 }, { "epoch": 16.07667210440457, "grad_norm": 0.022578131407499313, "learning_rate": 0.00011272037505986976, "loss": 0.0029, "num_input_tokens_seen": 212586784, "step": 98550 }, { "epoch": 16.07748776508972, "grad_norm": 0.1368393748998642, "learning_rate": 0.00011267535764984293, "loss": 0.0128, "num_input_tokens_seen": 212597376, "step": 98555 }, { "epoch": 16.078303425774877, "grad_norm": 0.0061044637113809586, "learning_rate": 0.00011263034808944134, "loss": 0.002, "num_input_tokens_seen": 212607328, "step": 98560 }, { "epoch": 16.079119086460032, "grad_norm": 0.007938819006085396, "learning_rate": 0.00011258534637957718, "loss": 0.1201, "num_input_tokens_seen": 212618464, "step": 98565 }, { "epoch": 16.079934747145188, "grad_norm": 0.2532525360584259, "learning_rate": 0.0001125403525211624, "loss": 0.0112, "num_input_tokens_seen": 212628064, "step": 98570 }, { "epoch": 16.080750407830344, "grad_norm": 0.026719998568296432, "learning_rate": 0.00011249536651510894, "loss": 0.0045, "num_input_tokens_seen": 212638816, "step": 98575 }, { "epoch": 16.081566068515496, "grad_norm": 0.020720263943076134, "learning_rate": 0.00011245038836232846, "loss": 0.0111, "num_input_tokens_seen": 212649120, "step": 98580 }, { "epoch": 16.08238172920065, "grad_norm": 0.11096493899822235, "learning_rate": 0.0001124054180637325, "loss": 0.0365, "num_input_tokens_seen": 212659936, "step": 98585 }, { "epoch": 16.083197389885807, "grad_norm": 0.0662073865532875, "learning_rate": 0.00011236045562023245, "loss": 0.0065, "num_input_tokens_seen": 212671040, "step": 98590 }, { "epoch": 16.084013050570963, "grad_norm": 0.04565121978521347, "learning_rate": 0.00011231550103273952, "loss": 0.0086, "num_input_tokens_seen": 212681312, "step": 98595 }, { "epoch": 16.08482871125612, "grad_norm": 0.017612259835004807, "learning_rate": 0.00011227055430216476, "loss": 0.0611, "num_input_tokens_seen": 212691904, "step": 98600 }, { "epoch": 16.08564437194127, "grad_norm": 0.011116662994027138, "learning_rate": 0.00011222561542941906, "loss": 0.0022, "num_input_tokens_seen": 212702592, "step": 98605 }, { "epoch": 16.086460032626427, "grad_norm": 0.0011180120054632425, "learning_rate": 0.00011218068441541323, "loss": 0.0051, "num_input_tokens_seen": 212713632, "step": 98610 }, { "epoch": 16.087275693311582, "grad_norm": 0.09455166012048721, "learning_rate": 0.0001121357612610578, "loss": 0.0032, "num_input_tokens_seen": 212723904, "step": 98615 }, { "epoch": 16.088091353996738, "grad_norm": 0.012023817747831345, "learning_rate": 0.0001120908459672632, "loss": 0.0019, "num_input_tokens_seen": 212734560, "step": 98620 }, { "epoch": 16.088907014681894, "grad_norm": 0.1238924115896225, "learning_rate": 0.00011204593853493978, "loss": 0.0606, "num_input_tokens_seen": 212745632, "step": 98625 }, { "epoch": 16.089722675367046, "grad_norm": 0.34778541326522827, "learning_rate": 0.00011200103896499748, "loss": 0.0264, "num_input_tokens_seen": 212756672, "step": 98630 }, { "epoch": 16.0905383360522, "grad_norm": 0.001408744021318853, "learning_rate": 0.00011195614725834636, "loss": 0.0049, "num_input_tokens_seen": 212767392, "step": 98635 }, { "epoch": 16.091353996737357, "grad_norm": 0.013653202913701534, "learning_rate": 0.0001119112634158962, "loss": 0.0023, "num_input_tokens_seen": 212778624, "step": 98640 }, { "epoch": 16.092169657422513, "grad_norm": 0.12523357570171356, "learning_rate": 0.00011186638743855643, "loss": 0.0087, "num_input_tokens_seen": 212789824, "step": 98645 }, { "epoch": 16.09298531810767, "grad_norm": 0.00870191864669323, "learning_rate": 0.00011182151932723706, "loss": 0.0037, "num_input_tokens_seen": 212800352, "step": 98650 }, { "epoch": 16.09380097879282, "grad_norm": 0.02469947375357151, "learning_rate": 0.00011177665908284667, "loss": 0.0075, "num_input_tokens_seen": 212812096, "step": 98655 }, { "epoch": 16.094616639477977, "grad_norm": 0.0050477394834160805, "learning_rate": 0.00011173180670629496, "loss": 0.0037, "num_input_tokens_seen": 212822080, "step": 98660 }, { "epoch": 16.095432300163132, "grad_norm": 0.010724669322371483, "learning_rate": 0.00011168696219849078, "loss": 0.0348, "num_input_tokens_seen": 212832320, "step": 98665 }, { "epoch": 16.096247960848288, "grad_norm": 0.14807988703250885, "learning_rate": 0.00011164212556034287, "loss": 0.0053, "num_input_tokens_seen": 212842848, "step": 98670 }, { "epoch": 16.097063621533444, "grad_norm": 0.015191467478871346, "learning_rate": 0.00011159729679275999, "loss": 0.0042, "num_input_tokens_seen": 212854656, "step": 98675 }, { "epoch": 16.097879282218596, "grad_norm": 0.06117101013660431, "learning_rate": 0.00011155247589665057, "loss": 0.0073, "num_input_tokens_seen": 212866688, "step": 98680 }, { "epoch": 16.09869494290375, "grad_norm": 0.02610846422612667, "learning_rate": 0.00011150766287292302, "loss": 0.0042, "num_input_tokens_seen": 212876160, "step": 98685 }, { "epoch": 16.099510603588907, "grad_norm": 0.01927991397678852, "learning_rate": 0.00011146285772248555, "loss": 0.0013, "num_input_tokens_seen": 212887680, "step": 98690 }, { "epoch": 16.100326264274063, "grad_norm": 0.49013498425483704, "learning_rate": 0.00011141806044624614, "loss": 0.0202, "num_input_tokens_seen": 212898240, "step": 98695 }, { "epoch": 16.10114192495922, "grad_norm": 0.004540109541267157, "learning_rate": 0.00011137327104511268, "loss": 0.0294, "num_input_tokens_seen": 212908832, "step": 98700 }, { "epoch": 16.10195758564437, "grad_norm": 0.00426593329757452, "learning_rate": 0.00011132848951999286, "loss": 0.004, "num_input_tokens_seen": 212919456, "step": 98705 }, { "epoch": 16.102773246329527, "grad_norm": 0.0026505249552428722, "learning_rate": 0.00011128371587179431, "loss": 0.0156, "num_input_tokens_seen": 212929696, "step": 98710 }, { "epoch": 16.103588907014682, "grad_norm": 0.2665514349937439, "learning_rate": 0.00011123895010142437, "loss": 0.0077, "num_input_tokens_seen": 212940320, "step": 98715 }, { "epoch": 16.104404567699838, "grad_norm": 0.006377922371029854, "learning_rate": 0.00011119419220979033, "loss": 0.0039, "num_input_tokens_seen": 212951200, "step": 98720 }, { "epoch": 16.10522022838499, "grad_norm": 0.003125737654045224, "learning_rate": 0.00011114944219779916, "loss": 0.003, "num_input_tokens_seen": 212962592, "step": 98725 }, { "epoch": 16.106035889070146, "grad_norm": 0.1174820140004158, "learning_rate": 0.00011110470006635781, "loss": 0.0178, "num_input_tokens_seen": 212973696, "step": 98730 }, { "epoch": 16.1068515497553, "grad_norm": 0.0018968537915498018, "learning_rate": 0.00011105996581637312, "loss": 0.0031, "num_input_tokens_seen": 212984448, "step": 98735 }, { "epoch": 16.107667210440457, "grad_norm": 0.0030298312194645405, "learning_rate": 0.00011101523944875163, "loss": 0.0017, "num_input_tokens_seen": 212994720, "step": 98740 }, { "epoch": 16.108482871125613, "grad_norm": 0.03181707113981247, "learning_rate": 0.00011097052096439974, "loss": 0.0029, "num_input_tokens_seen": 213004928, "step": 98745 }, { "epoch": 16.109298531810765, "grad_norm": 0.08727334439754486, "learning_rate": 0.00011092581036422378, "loss": 0.0036, "num_input_tokens_seen": 213016416, "step": 98750 }, { "epoch": 16.11011419249592, "grad_norm": 0.15821963548660278, "learning_rate": 0.00011088110764912984, "loss": 0.082, "num_input_tokens_seen": 213026304, "step": 98755 }, { "epoch": 16.110929853181077, "grad_norm": 0.06720523536205292, "learning_rate": 0.00011083641282002387, "loss": 0.0081, "num_input_tokens_seen": 213037280, "step": 98760 }, { "epoch": 16.111745513866232, "grad_norm": 0.004281187895685434, "learning_rate": 0.00011079172587781172, "loss": 0.0032, "num_input_tokens_seen": 213048416, "step": 98765 }, { "epoch": 16.112561174551388, "grad_norm": 0.03786589950323105, "learning_rate": 0.00011074704682339897, "loss": 0.0068, "num_input_tokens_seen": 213059680, "step": 98770 }, { "epoch": 16.11337683523654, "grad_norm": 0.022931082174181938, "learning_rate": 0.00011070237565769097, "loss": 0.0017, "num_input_tokens_seen": 213070368, "step": 98775 }, { "epoch": 16.114192495921696, "grad_norm": 0.0015584161737933755, "learning_rate": 0.0001106577123815935, "loss": 0.0078, "num_input_tokens_seen": 213080608, "step": 98780 }, { "epoch": 16.11500815660685, "grad_norm": 0.01906164549291134, "learning_rate": 0.0001106130569960111, "loss": 0.0053, "num_input_tokens_seen": 213092032, "step": 98785 }, { "epoch": 16.115823817292007, "grad_norm": 0.04040228947997093, "learning_rate": 0.00011056840950184921, "loss": 0.0099, "num_input_tokens_seen": 213103360, "step": 98790 }, { "epoch": 16.116639477977163, "grad_norm": 0.02088017575442791, "learning_rate": 0.00011052376990001256, "loss": 0.0034, "num_input_tokens_seen": 213115168, "step": 98795 }, { "epoch": 16.117455138662315, "grad_norm": 0.0036294125020503998, "learning_rate": 0.00011047913819140576, "loss": 0.0784, "num_input_tokens_seen": 213126208, "step": 98800 }, { "epoch": 16.11827079934747, "grad_norm": 0.029621530324220657, "learning_rate": 0.00011043451437693342, "loss": 0.0105, "num_input_tokens_seen": 213137632, "step": 98805 }, { "epoch": 16.119086460032626, "grad_norm": 0.010897582396864891, "learning_rate": 0.00011038989845749981, "loss": 0.0038, "num_input_tokens_seen": 213149056, "step": 98810 }, { "epoch": 16.119902120717782, "grad_norm": 0.01271396316587925, "learning_rate": 0.00011034529043400915, "loss": 0.008, "num_input_tokens_seen": 213160704, "step": 98815 }, { "epoch": 16.120717781402938, "grad_norm": 0.0027830717153847218, "learning_rate": 0.00011030069030736551, "loss": 0.1097, "num_input_tokens_seen": 213171424, "step": 98820 }, { "epoch": 16.12153344208809, "grad_norm": 0.035669613629579544, "learning_rate": 0.0001102560980784727, "loss": 0.003, "num_input_tokens_seen": 213181664, "step": 98825 }, { "epoch": 16.122349102773246, "grad_norm": 0.006291791796684265, "learning_rate": 0.00011021151374823457, "loss": 0.0021, "num_input_tokens_seen": 213192928, "step": 98830 }, { "epoch": 16.1231647634584, "grad_norm": 0.0034950568806380033, "learning_rate": 0.00011016693731755456, "loss": 0.0036, "num_input_tokens_seen": 213205088, "step": 98835 }, { "epoch": 16.123980424143557, "grad_norm": 0.00854918546974659, "learning_rate": 0.00011012236878733606, "loss": 0.0035, "num_input_tokens_seen": 213215904, "step": 98840 }, { "epoch": 16.124796084828713, "grad_norm": 0.0064804647117853165, "learning_rate": 0.00011007780815848239, "loss": 0.1154, "num_input_tokens_seen": 213225504, "step": 98845 }, { "epoch": 16.125611745513865, "grad_norm": 0.045591238886117935, "learning_rate": 0.00011003325543189663, "loss": 0.005, "num_input_tokens_seen": 213236864, "step": 98850 }, { "epoch": 16.12642740619902, "grad_norm": 0.001109926961362362, "learning_rate": 0.0001099887106084816, "loss": 0.0171, "num_input_tokens_seen": 213248000, "step": 98855 }, { "epoch": 16.127243066884176, "grad_norm": 0.0029370267875492573, "learning_rate": 0.00010994417368914011, "loss": 0.0351, "num_input_tokens_seen": 213258496, "step": 98860 }, { "epoch": 16.128058727569332, "grad_norm": 0.017867419868707657, "learning_rate": 0.00010989964467477481, "loss": 0.0262, "num_input_tokens_seen": 213268384, "step": 98865 }, { "epoch": 16.128874388254488, "grad_norm": 0.0008110353955999017, "learning_rate": 0.00010985512356628807, "loss": 0.0017, "num_input_tokens_seen": 213280192, "step": 98870 }, { "epoch": 16.12969004893964, "grad_norm": 0.010616148822009563, "learning_rate": 0.00010981061036458218, "loss": 0.0041, "num_input_tokens_seen": 213291456, "step": 98875 }, { "epoch": 16.130505709624796, "grad_norm": 0.09281786531209946, "learning_rate": 0.00010976610507055906, "loss": 0.0097, "num_input_tokens_seen": 213301696, "step": 98880 }, { "epoch": 16.13132137030995, "grad_norm": 0.029799096286296844, "learning_rate": 0.00010972160768512123, "loss": 0.0092, "num_input_tokens_seen": 213310720, "step": 98885 }, { "epoch": 16.132137030995107, "grad_norm": 0.000842552341055125, "learning_rate": 0.00010967711820916982, "loss": 0.0013, "num_input_tokens_seen": 213322176, "step": 98890 }, { "epoch": 16.13295269168026, "grad_norm": 0.005548515822738409, "learning_rate": 0.00010963263664360706, "loss": 0.0014, "num_input_tokens_seen": 213331680, "step": 98895 }, { "epoch": 16.133768352365415, "grad_norm": 0.05913207679986954, "learning_rate": 0.00010958816298933383, "loss": 0.0017, "num_input_tokens_seen": 213344096, "step": 98900 }, { "epoch": 16.13458401305057, "grad_norm": 0.019280293956398964, "learning_rate": 0.00010954369724725205, "loss": 0.0081, "num_input_tokens_seen": 213354432, "step": 98905 }, { "epoch": 16.135399673735726, "grad_norm": 0.014653909020125866, "learning_rate": 0.00010949923941826229, "loss": 0.0101, "num_input_tokens_seen": 213364384, "step": 98910 }, { "epoch": 16.136215334420882, "grad_norm": 0.10136594623327255, "learning_rate": 0.0001094547895032661, "loss": 0.0713, "num_input_tokens_seen": 213374848, "step": 98915 }, { "epoch": 16.137030995106034, "grad_norm": 0.016417449340224266, "learning_rate": 0.00010941034750316375, "loss": 0.0441, "num_input_tokens_seen": 213385472, "step": 98920 }, { "epoch": 16.13784665579119, "grad_norm": 0.006173157598823309, "learning_rate": 0.00010936591341885648, "loss": 0.0012, "num_input_tokens_seen": 213397248, "step": 98925 }, { "epoch": 16.138662316476346, "grad_norm": 0.035272568464279175, "learning_rate": 0.0001093214872512443, "loss": 0.0029, "num_input_tokens_seen": 213407296, "step": 98930 }, { "epoch": 16.1394779771615, "grad_norm": 0.0009733866318129003, "learning_rate": 0.00010927706900122791, "loss": 0.0016, "num_input_tokens_seen": 213416352, "step": 98935 }, { "epoch": 16.140293637846657, "grad_norm": 0.011634573340415955, "learning_rate": 0.00010923265866970739, "loss": 0.0102, "num_input_tokens_seen": 213427104, "step": 98940 }, { "epoch": 16.14110929853181, "grad_norm": 0.008579501882195473, "learning_rate": 0.00010918825625758273, "loss": 0.0022, "num_input_tokens_seen": 213436448, "step": 98945 }, { "epoch": 16.141924959216965, "grad_norm": 0.021616380661725998, "learning_rate": 0.00010914386176575386, "loss": 0.0023, "num_input_tokens_seen": 213447328, "step": 98950 }, { "epoch": 16.14274061990212, "grad_norm": 0.0032613142393529415, "learning_rate": 0.00010909947519512048, "loss": 0.0075, "num_input_tokens_seen": 213457824, "step": 98955 }, { "epoch": 16.143556280587276, "grad_norm": 0.4297551214694977, "learning_rate": 0.00010905509654658208, "loss": 0.0197, "num_input_tokens_seen": 213468928, "step": 98960 }, { "epoch": 16.144371941272432, "grad_norm": 0.007163587026298046, "learning_rate": 0.00010901072582103816, "loss": 0.0035, "num_input_tokens_seen": 213479904, "step": 98965 }, { "epoch": 16.145187601957584, "grad_norm": 0.006671950686722994, "learning_rate": 0.00010896636301938784, "loss": 0.0027, "num_input_tokens_seen": 213488512, "step": 98970 }, { "epoch": 16.14600326264274, "grad_norm": 0.005038075149059296, "learning_rate": 0.00010892200814253023, "loss": 0.0031, "num_input_tokens_seen": 213500352, "step": 98975 }, { "epoch": 16.146818923327896, "grad_norm": 0.058990173041820526, "learning_rate": 0.00010887766119136427, "loss": 0.0048, "num_input_tokens_seen": 213511136, "step": 98980 }, { "epoch": 16.14763458401305, "grad_norm": 0.039940159767866135, "learning_rate": 0.00010883332216678853, "loss": 0.0053, "num_input_tokens_seen": 213522144, "step": 98985 }, { "epoch": 16.148450244698207, "grad_norm": 0.0576813779771328, "learning_rate": 0.00010878899106970203, "loss": 0.0649, "num_input_tokens_seen": 213532224, "step": 98990 }, { "epoch": 16.14926590538336, "grad_norm": 0.0011376934126019478, "learning_rate": 0.00010874466790100268, "loss": 0.0007, "num_input_tokens_seen": 213542240, "step": 98995 }, { "epoch": 16.150081566068515, "grad_norm": 0.003912642132490873, "learning_rate": 0.00010870035266158918, "loss": 0.001, "num_input_tokens_seen": 213551744, "step": 99000 }, { "epoch": 16.15089722675367, "grad_norm": 0.001231255941092968, "learning_rate": 0.00010865604535235918, "loss": 0.0056, "num_input_tokens_seen": 213561600, "step": 99005 }, { "epoch": 16.151712887438826, "grad_norm": 0.037317562848329544, "learning_rate": 0.0001086117459742112, "loss": 0.0018, "num_input_tokens_seen": 213573184, "step": 99010 }, { "epoch": 16.152528548123982, "grad_norm": 0.0021097564604133368, "learning_rate": 0.00010856745452804234, "loss": 0.0019, "num_input_tokens_seen": 213584640, "step": 99015 }, { "epoch": 16.153344208809134, "grad_norm": 0.10271336138248444, "learning_rate": 0.0001085231710147509, "loss": 0.0062, "num_input_tokens_seen": 213595488, "step": 99020 }, { "epoch": 16.15415986949429, "grad_norm": 0.008951042778789997, "learning_rate": 0.00010847889543523376, "loss": 0.0037, "num_input_tokens_seen": 213605856, "step": 99025 }, { "epoch": 16.154975530179446, "grad_norm": 0.27626246213912964, "learning_rate": 0.00010843462779038876, "loss": 0.0043, "num_input_tokens_seen": 213617760, "step": 99030 }, { "epoch": 16.1557911908646, "grad_norm": 0.001341187977232039, "learning_rate": 0.00010839036808111246, "loss": 0.0066, "num_input_tokens_seen": 213628544, "step": 99035 }, { "epoch": 16.156606851549757, "grad_norm": 0.003060397459194064, "learning_rate": 0.00010834611630830244, "loss": 0.0068, "num_input_tokens_seen": 213639936, "step": 99040 }, { "epoch": 16.15742251223491, "grad_norm": 0.7268686890602112, "learning_rate": 0.00010830187247285489, "loss": 0.0508, "num_input_tokens_seen": 213650400, "step": 99045 }, { "epoch": 16.158238172920065, "grad_norm": 0.07604926824569702, "learning_rate": 0.00010825763657566717, "loss": 0.002, "num_input_tokens_seen": 213661088, "step": 99050 }, { "epoch": 16.15905383360522, "grad_norm": 0.003925354219973087, "learning_rate": 0.00010821340861763506, "loss": 0.0026, "num_input_tokens_seen": 213671744, "step": 99055 }, { "epoch": 16.159869494290376, "grad_norm": 0.043311867862939835, "learning_rate": 0.00010816918859965552, "loss": 0.0016, "num_input_tokens_seen": 213681408, "step": 99060 }, { "epoch": 16.160685154975532, "grad_norm": 0.009146024473011494, "learning_rate": 0.00010812497652262421, "loss": 0.0018, "num_input_tokens_seen": 213692704, "step": 99065 }, { "epoch": 16.161500815660684, "grad_norm": 0.01568761095404625, "learning_rate": 0.00010808077238743763, "loss": 0.023, "num_input_tokens_seen": 213703104, "step": 99070 }, { "epoch": 16.16231647634584, "grad_norm": 0.010364172048866749, "learning_rate": 0.00010803657619499107, "loss": 0.0049, "num_input_tokens_seen": 213714944, "step": 99075 }, { "epoch": 16.163132137030995, "grad_norm": 0.9936302900314331, "learning_rate": 0.00010799238794618077, "loss": 0.1279, "num_input_tokens_seen": 213726400, "step": 99080 }, { "epoch": 16.16394779771615, "grad_norm": 0.022238511592149734, "learning_rate": 0.00010794820764190194, "loss": 0.0023, "num_input_tokens_seen": 213737216, "step": 99085 }, { "epoch": 16.164763458401303, "grad_norm": 0.010459660552442074, "learning_rate": 0.00010790403528305004, "loss": 0.0086, "num_input_tokens_seen": 213749792, "step": 99090 }, { "epoch": 16.16557911908646, "grad_norm": 0.023092826828360558, "learning_rate": 0.0001078598708705203, "loss": 0.0024, "num_input_tokens_seen": 213760704, "step": 99095 }, { "epoch": 16.166394779771615, "grad_norm": 0.0017964895814657211, "learning_rate": 0.00010781571440520777, "loss": 0.0026, "num_input_tokens_seen": 213770912, "step": 99100 }, { "epoch": 16.16721044045677, "grad_norm": 0.005506117828190327, "learning_rate": 0.00010777156588800724, "loss": 0.0017, "num_input_tokens_seen": 213781088, "step": 99105 }, { "epoch": 16.168026101141926, "grad_norm": 1.1772229671478271, "learning_rate": 0.00010772742531981356, "loss": 0.0967, "num_input_tokens_seen": 213792160, "step": 99110 }, { "epoch": 16.16884176182708, "grad_norm": 1.1142627000808716, "learning_rate": 0.00010768329270152122, "loss": 0.0804, "num_input_tokens_seen": 213802560, "step": 99115 }, { "epoch": 16.169657422512234, "grad_norm": 0.10168357938528061, "learning_rate": 0.00010763916803402463, "loss": 0.0035, "num_input_tokens_seen": 213813728, "step": 99120 }, { "epoch": 16.17047308319739, "grad_norm": 0.0059912665747106075, "learning_rate": 0.00010759505131821806, "loss": 0.0024, "num_input_tokens_seen": 213822176, "step": 99125 }, { "epoch": 16.171288743882545, "grad_norm": 0.014900967478752136, "learning_rate": 0.00010755094255499542, "loss": 0.0069, "num_input_tokens_seen": 213833376, "step": 99130 }, { "epoch": 16.1721044045677, "grad_norm": 0.02179352380335331, "learning_rate": 0.00010750684174525111, "loss": 0.0079, "num_input_tokens_seen": 213844704, "step": 99135 }, { "epoch": 16.172920065252853, "grad_norm": 0.299467533826828, "learning_rate": 0.00010746274888987822, "loss": 0.1355, "num_input_tokens_seen": 213854560, "step": 99140 }, { "epoch": 16.17373572593801, "grad_norm": 0.020754924044013023, "learning_rate": 0.00010741866398977101, "loss": 0.0018, "num_input_tokens_seen": 213866304, "step": 99145 }, { "epoch": 16.174551386623165, "grad_norm": 0.025870757177472115, "learning_rate": 0.00010737458704582232, "loss": 0.0019, "num_input_tokens_seen": 213878176, "step": 99150 }, { "epoch": 16.17536704730832, "grad_norm": 0.06583768874406815, "learning_rate": 0.00010733051805892602, "loss": 0.0046, "num_input_tokens_seen": 213890176, "step": 99155 }, { "epoch": 16.176182707993476, "grad_norm": 0.0025543500669300556, "learning_rate": 0.00010728645702997458, "loss": 0.002, "num_input_tokens_seen": 213900416, "step": 99160 }, { "epoch": 16.17699836867863, "grad_norm": 0.006583359092473984, "learning_rate": 0.00010724240395986156, "loss": 0.0054, "num_input_tokens_seen": 213911232, "step": 99165 }, { "epoch": 16.177814029363784, "grad_norm": 0.024948759004473686, "learning_rate": 0.00010719835884947921, "loss": 0.0018, "num_input_tokens_seen": 213921824, "step": 99170 }, { "epoch": 16.17862969004894, "grad_norm": 0.0032384039368480444, "learning_rate": 0.00010715432169972067, "loss": 0.001, "num_input_tokens_seen": 213932608, "step": 99175 }, { "epoch": 16.179445350734095, "grad_norm": 0.05808654800057411, "learning_rate": 0.00010711029251147791, "loss": 0.0654, "num_input_tokens_seen": 213944000, "step": 99180 }, { "epoch": 16.18026101141925, "grad_norm": 0.005605350248515606, "learning_rate": 0.00010706627128564378, "loss": 0.0022, "num_input_tokens_seen": 213954720, "step": 99185 }, { "epoch": 16.181076672104403, "grad_norm": 0.0045409901067614555, "learning_rate": 0.00010702225802310983, "loss": 0.0022, "num_input_tokens_seen": 213964480, "step": 99190 }, { "epoch": 16.18189233278956, "grad_norm": 0.052849821746349335, "learning_rate": 0.00010697825272476847, "loss": 0.0044, "num_input_tokens_seen": 213974624, "step": 99195 }, { "epoch": 16.182707993474715, "grad_norm": 0.0035770973190665245, "learning_rate": 0.00010693425539151141, "loss": 0.0027, "num_input_tokens_seen": 213985280, "step": 99200 }, { "epoch": 16.18352365415987, "grad_norm": 0.006668663118034601, "learning_rate": 0.00010689026602423036, "loss": 0.0038, "num_input_tokens_seen": 213995680, "step": 99205 }, { "epoch": 16.184339314845026, "grad_norm": 1.0718663930892944, "learning_rate": 0.00010684628462381673, "loss": 0.0448, "num_input_tokens_seen": 214006176, "step": 99210 }, { "epoch": 16.18515497553018, "grad_norm": 0.22310368716716766, "learning_rate": 0.00010680231119116185, "loss": 0.0054, "num_input_tokens_seen": 214016992, "step": 99215 }, { "epoch": 16.185970636215334, "grad_norm": 0.015842298045754433, "learning_rate": 0.00010675834572715698, "loss": 0.0017, "num_input_tokens_seen": 214028288, "step": 99220 }, { "epoch": 16.18678629690049, "grad_norm": 0.09696901589632034, "learning_rate": 0.00010671438823269314, "loss": 0.1403, "num_input_tokens_seen": 214038880, "step": 99225 }, { "epoch": 16.187601957585645, "grad_norm": 0.011777103878557682, "learning_rate": 0.00010667043870866105, "loss": 0.0029, "num_input_tokens_seen": 214050080, "step": 99230 }, { "epoch": 16.1884176182708, "grad_norm": 0.04588920250535011, "learning_rate": 0.00010662649715595157, "loss": 0.0025, "num_input_tokens_seen": 214060512, "step": 99235 }, { "epoch": 16.189233278955953, "grad_norm": 0.023716576397418976, "learning_rate": 0.00010658256357545509, "loss": 0.0175, "num_input_tokens_seen": 214071296, "step": 99240 }, { "epoch": 16.19004893964111, "grad_norm": 0.002402663230895996, "learning_rate": 0.00010653863796806213, "loss": 0.0034, "num_input_tokens_seen": 214080960, "step": 99245 }, { "epoch": 16.190864600326265, "grad_norm": 0.007612693123519421, "learning_rate": 0.00010649472033466273, "loss": 0.0074, "num_input_tokens_seen": 214091360, "step": 99250 }, { "epoch": 16.19168026101142, "grad_norm": 0.23627546429634094, "learning_rate": 0.00010645081067614703, "loss": 0.0066, "num_input_tokens_seen": 214102464, "step": 99255 }, { "epoch": 16.192495921696572, "grad_norm": 0.055140238255262375, "learning_rate": 0.00010640690899340494, "loss": 0.0279, "num_input_tokens_seen": 214112800, "step": 99260 }, { "epoch": 16.193311582381728, "grad_norm": 0.5487972497940063, "learning_rate": 0.00010636301528732612, "loss": 0.0162, "num_input_tokens_seen": 214122464, "step": 99265 }, { "epoch": 16.194127243066884, "grad_norm": 0.042633190751075745, "learning_rate": 0.00010631912955880018, "loss": 0.0027, "num_input_tokens_seen": 214132096, "step": 99270 }, { "epoch": 16.19494290375204, "grad_norm": 0.005254723131656647, "learning_rate": 0.00010627525180871633, "loss": 0.004, "num_input_tokens_seen": 214142528, "step": 99275 }, { "epoch": 16.195758564437195, "grad_norm": 0.02609480731189251, "learning_rate": 0.00010623138203796429, "loss": 0.0024, "num_input_tokens_seen": 214153664, "step": 99280 }, { "epoch": 16.196574225122347, "grad_norm": 0.03706686198711395, "learning_rate": 0.00010618752024743255, "loss": 0.0079, "num_input_tokens_seen": 214164416, "step": 99285 }, { "epoch": 16.197389885807503, "grad_norm": 0.007206575945019722, "learning_rate": 0.00010614366643801055, "loss": 0.0696, "num_input_tokens_seen": 214176096, "step": 99290 }, { "epoch": 16.19820554649266, "grad_norm": 0.016659243032336235, "learning_rate": 0.00010609982061058654, "loss": 0.0024, "num_input_tokens_seen": 214186848, "step": 99295 }, { "epoch": 16.199021207177815, "grad_norm": 0.011172675527632236, "learning_rate": 0.0001060559827660495, "loss": 0.0034, "num_input_tokens_seen": 214198432, "step": 99300 }, { "epoch": 16.19983686786297, "grad_norm": 0.21045157313346863, "learning_rate": 0.0001060121529052877, "loss": 0.0066, "num_input_tokens_seen": 214209920, "step": 99305 }, { "epoch": 16.200652528548122, "grad_norm": 0.04318992793560028, "learning_rate": 0.0001059683310291894, "loss": 0.0189, "num_input_tokens_seen": 214220416, "step": 99310 }, { "epoch": 16.201468189233278, "grad_norm": 0.029243484139442444, "learning_rate": 0.00010592451713864282, "loss": 0.0025, "num_input_tokens_seen": 214231488, "step": 99315 }, { "epoch": 16.202283849918434, "grad_norm": 0.02107887901365757, "learning_rate": 0.00010588071123453574, "loss": 0.0039, "num_input_tokens_seen": 214243520, "step": 99320 }, { "epoch": 16.20309951060359, "grad_norm": 0.009316026233136654, "learning_rate": 0.00010583691331775608, "loss": 0.0012, "num_input_tokens_seen": 214254688, "step": 99325 }, { "epoch": 16.203915171288745, "grad_norm": 0.01135186105966568, "learning_rate": 0.0001057931233891914, "loss": 0.0027, "num_input_tokens_seen": 214265792, "step": 99330 }, { "epoch": 16.204730831973897, "grad_norm": 0.00273346365429461, "learning_rate": 0.00010574934144972908, "loss": 0.1014, "num_input_tokens_seen": 214277312, "step": 99335 }, { "epoch": 16.205546492659053, "grad_norm": 0.1742607057094574, "learning_rate": 0.00010570556750025656, "loss": 0.0029, "num_input_tokens_seen": 214288608, "step": 99340 }, { "epoch": 16.20636215334421, "grad_norm": 0.008680494502186775, "learning_rate": 0.00010566180154166094, "loss": 0.0064, "num_input_tokens_seen": 214299520, "step": 99345 }, { "epoch": 16.207177814029365, "grad_norm": 3.7642643451690674, "learning_rate": 0.00010561804357482912, "loss": 0.0251, "num_input_tokens_seen": 214311072, "step": 99350 }, { "epoch": 16.20799347471452, "grad_norm": 0.0069508980959653854, "learning_rate": 0.00010557429360064796, "loss": 0.0058, "num_input_tokens_seen": 214320928, "step": 99355 }, { "epoch": 16.208809135399672, "grad_norm": 0.019175725057721138, "learning_rate": 0.00010553055162000414, "loss": 0.0032, "num_input_tokens_seen": 214332064, "step": 99360 }, { "epoch": 16.209624796084828, "grad_norm": 0.012586962431669235, "learning_rate": 0.0001054868176337841, "loss": 0.0016, "num_input_tokens_seen": 214342880, "step": 99365 }, { "epoch": 16.210440456769984, "grad_norm": 0.00920949224382639, "learning_rate": 0.00010544309164287418, "loss": 0.0018, "num_input_tokens_seen": 214354496, "step": 99370 }, { "epoch": 16.21125611745514, "grad_norm": 0.005372723564505577, "learning_rate": 0.00010539937364816049, "loss": 0.087, "num_input_tokens_seen": 214365216, "step": 99375 }, { "epoch": 16.212071778140295, "grad_norm": 0.3136146664619446, "learning_rate": 0.00010535566365052913, "loss": 0.0132, "num_input_tokens_seen": 214374240, "step": 99380 }, { "epoch": 16.212887438825447, "grad_norm": 0.04971996322274208, "learning_rate": 0.00010531196165086587, "loss": 0.0026, "num_input_tokens_seen": 214383456, "step": 99385 }, { "epoch": 16.213703099510603, "grad_norm": 0.002650562673807144, "learning_rate": 0.00010526826765005642, "loss": 0.0567, "num_input_tokens_seen": 214394304, "step": 99390 }, { "epoch": 16.21451876019576, "grad_norm": 0.30796241760253906, "learning_rate": 0.00010522458164898624, "loss": 0.0102, "num_input_tokens_seen": 214404704, "step": 99395 }, { "epoch": 16.215334420880914, "grad_norm": 0.027413716539740562, "learning_rate": 0.00010518090364854077, "loss": 0.0016, "num_input_tokens_seen": 214415168, "step": 99400 }, { "epoch": 16.21615008156607, "grad_norm": 0.06792109459638596, "learning_rate": 0.00010513723364960497, "loss": 0.0441, "num_input_tokens_seen": 214425888, "step": 99405 }, { "epoch": 16.216965742251222, "grad_norm": 0.022429026663303375, "learning_rate": 0.00010509357165306422, "loss": 0.0035, "num_input_tokens_seen": 214436448, "step": 99410 }, { "epoch": 16.217781402936378, "grad_norm": 0.010088340379297733, "learning_rate": 0.00010504991765980321, "loss": 0.0061, "num_input_tokens_seen": 214445248, "step": 99415 }, { "epoch": 16.218597063621534, "grad_norm": 0.12079522013664246, "learning_rate": 0.00010500627167070665, "loss": 0.0097, "num_input_tokens_seen": 214456352, "step": 99420 }, { "epoch": 16.21941272430669, "grad_norm": 0.004956993740051985, "learning_rate": 0.00010496263368665904, "loss": 0.0021, "num_input_tokens_seen": 214466976, "step": 99425 }, { "epoch": 16.22022838499184, "grad_norm": 0.0074790776707232, "learning_rate": 0.00010491900370854484, "loss": 0.0013, "num_input_tokens_seen": 214477952, "step": 99430 }, { "epoch": 16.221044045676997, "grad_norm": 0.004937389399856329, "learning_rate": 0.0001048753817372482, "loss": 0.0056, "num_input_tokens_seen": 214488320, "step": 99435 }, { "epoch": 16.221859706362153, "grad_norm": 0.008123103529214859, "learning_rate": 0.00010483176777365322, "loss": 0.0055, "num_input_tokens_seen": 214498944, "step": 99440 }, { "epoch": 16.22267536704731, "grad_norm": 0.005231812130659819, "learning_rate": 0.00010478816181864376, "loss": 0.0049, "num_input_tokens_seen": 214510112, "step": 99445 }, { "epoch": 16.223491027732464, "grad_norm": 0.0036407143343240023, "learning_rate": 0.0001047445638731036, "loss": 0.0343, "num_input_tokens_seen": 214520224, "step": 99450 }, { "epoch": 16.224306688417617, "grad_norm": 0.022329874336719513, "learning_rate": 0.00010470097393791622, "loss": 0.0035, "num_input_tokens_seen": 214531296, "step": 99455 }, { "epoch": 16.225122349102772, "grad_norm": 0.0054546622559428215, "learning_rate": 0.00010465739201396512, "loss": 0.0012, "num_input_tokens_seen": 214543104, "step": 99460 }, { "epoch": 16.225938009787928, "grad_norm": 0.014620702713727951, "learning_rate": 0.00010461381810213344, "loss": 0.0013, "num_input_tokens_seen": 214554464, "step": 99465 }, { "epoch": 16.226753670473084, "grad_norm": 0.03210921213030815, "learning_rate": 0.00010457025220330435, "loss": 0.005, "num_input_tokens_seen": 214564768, "step": 99470 }, { "epoch": 16.22756933115824, "grad_norm": 0.017705243080854416, "learning_rate": 0.00010452669431836076, "loss": 0.02, "num_input_tokens_seen": 214576576, "step": 99475 }, { "epoch": 16.22838499184339, "grad_norm": 0.004085623659193516, "learning_rate": 0.00010448314444818541, "loss": 0.0019, "num_input_tokens_seen": 214588192, "step": 99480 }, { "epoch": 16.229200652528547, "grad_norm": 0.01895381323993206, "learning_rate": 0.00010443960259366081, "loss": 0.0052, "num_input_tokens_seen": 214597408, "step": 99485 }, { "epoch": 16.230016313213703, "grad_norm": 0.03669867664575577, "learning_rate": 0.00010439606875566954, "loss": 0.0028, "num_input_tokens_seen": 214609088, "step": 99490 }, { "epoch": 16.23083197389886, "grad_norm": 0.006370703689754009, "learning_rate": 0.00010435254293509378, "loss": 0.0042, "num_input_tokens_seen": 214620512, "step": 99495 }, { "epoch": 16.231647634584014, "grad_norm": 0.003387035569176078, "learning_rate": 0.00010430902513281565, "loss": 0.0049, "num_input_tokens_seen": 214630784, "step": 99500 }, { "epoch": 16.232463295269167, "grad_norm": 0.0021967643406242132, "learning_rate": 0.00010426551534971706, "loss": 0.0056, "num_input_tokens_seen": 214640896, "step": 99505 }, { "epoch": 16.233278955954322, "grad_norm": 1.206979751586914, "learning_rate": 0.00010422201358667987, "loss": 0.034, "num_input_tokens_seen": 214651680, "step": 99510 }, { "epoch": 16.234094616639478, "grad_norm": 0.011937047354876995, "learning_rate": 0.00010417851984458565, "loss": 0.0035, "num_input_tokens_seen": 214662752, "step": 99515 }, { "epoch": 16.234910277324634, "grad_norm": 0.01807512901723385, "learning_rate": 0.00010413503412431568, "loss": 0.0097, "num_input_tokens_seen": 214673952, "step": 99520 }, { "epoch": 16.23572593800979, "grad_norm": 0.01114695519208908, "learning_rate": 0.00010409155642675178, "loss": 0.0129, "num_input_tokens_seen": 214684832, "step": 99525 }, { "epoch": 16.23654159869494, "grad_norm": 0.0014167737681418657, "learning_rate": 0.00010404808675277444, "loss": 0.0024, "num_input_tokens_seen": 214696352, "step": 99530 }, { "epoch": 16.237357259380097, "grad_norm": 0.009098142385482788, "learning_rate": 0.00010400462510326513, "loss": 0.0019, "num_input_tokens_seen": 214706720, "step": 99535 }, { "epoch": 16.238172920065253, "grad_norm": 0.014389188028872013, "learning_rate": 0.00010396117147910422, "loss": 0.0276, "num_input_tokens_seen": 214718144, "step": 99540 }, { "epoch": 16.23898858075041, "grad_norm": 0.0053284564055502415, "learning_rate": 0.00010391772588117288, "loss": 0.023, "num_input_tokens_seen": 214728128, "step": 99545 }, { "epoch": 16.239804241435564, "grad_norm": 0.009899914264678955, "learning_rate": 0.000103874288310351, "loss": 0.0012, "num_input_tokens_seen": 214738816, "step": 99550 }, { "epoch": 16.240619902120716, "grad_norm": 0.004958040546625853, "learning_rate": 0.0001038308587675193, "loss": 0.0013, "num_input_tokens_seen": 214749472, "step": 99555 }, { "epoch": 16.241435562805872, "grad_norm": 0.1690913587808609, "learning_rate": 0.00010378743725355788, "loss": 0.0202, "num_input_tokens_seen": 214760160, "step": 99560 }, { "epoch": 16.242251223491028, "grad_norm": 0.7172198295593262, "learning_rate": 0.00010374402376934661, "loss": 0.0193, "num_input_tokens_seen": 214771392, "step": 99565 }, { "epoch": 16.243066884176184, "grad_norm": 0.9244903326034546, "learning_rate": 0.00010370061831576544, "loss": 0.1392, "num_input_tokens_seen": 214782400, "step": 99570 }, { "epoch": 16.24388254486134, "grad_norm": 0.0059557813219726086, "learning_rate": 0.00010365722089369395, "loss": 0.0016, "num_input_tokens_seen": 214792416, "step": 99575 }, { "epoch": 16.24469820554649, "grad_norm": 0.03628982976078987, "learning_rate": 0.00010361383150401165, "loss": 0.07, "num_input_tokens_seen": 214804128, "step": 99580 }, { "epoch": 16.245513866231647, "grad_norm": 0.010102677159011364, "learning_rate": 0.00010357045014759797, "loss": 0.0027, "num_input_tokens_seen": 214814848, "step": 99585 }, { "epoch": 16.246329526916803, "grad_norm": 0.0009897854179143906, "learning_rate": 0.00010352707682533197, "loss": 0.1197, "num_input_tokens_seen": 214825728, "step": 99590 }, { "epoch": 16.24714518760196, "grad_norm": 0.004363001324236393, "learning_rate": 0.00010348371153809277, "loss": 0.014, "num_input_tokens_seen": 214836352, "step": 99595 }, { "epoch": 16.247960848287114, "grad_norm": 1.3230475187301636, "learning_rate": 0.00010344035428675914, "loss": 0.1087, "num_input_tokens_seen": 214846816, "step": 99600 }, { "epoch": 16.248776508972266, "grad_norm": 0.028095008805394173, "learning_rate": 0.00010339700507220978, "loss": 0.0652, "num_input_tokens_seen": 214858944, "step": 99605 }, { "epoch": 16.249592169657422, "grad_norm": 0.03313996642827988, "learning_rate": 0.0001033536638953233, "loss": 0.003, "num_input_tokens_seen": 214870304, "step": 99610 }, { "epoch": 16.250407830342578, "grad_norm": 0.0027572500985115767, "learning_rate": 0.00010331033075697793, "loss": 0.0046, "num_input_tokens_seen": 214881184, "step": 99615 }, { "epoch": 16.251223491027734, "grad_norm": 0.002723895013332367, "learning_rate": 0.00010326700565805197, "loss": 0.0687, "num_input_tokens_seen": 214891392, "step": 99620 }, { "epoch": 16.252039151712886, "grad_norm": 0.05306994542479515, "learning_rate": 0.00010322368859942333, "loss": 0.004, "num_input_tokens_seen": 214903392, "step": 99625 }, { "epoch": 16.25285481239804, "grad_norm": 0.027606399729847908, "learning_rate": 0.00010318037958197024, "loss": 0.0016, "num_input_tokens_seen": 214914464, "step": 99630 }, { "epoch": 16.253670473083197, "grad_norm": 0.020546523854136467, "learning_rate": 0.0001031370786065699, "loss": 0.0022, "num_input_tokens_seen": 214925280, "step": 99635 }, { "epoch": 16.254486133768353, "grad_norm": 0.04525211825966835, "learning_rate": 0.00010309378567410039, "loss": 0.0021, "num_input_tokens_seen": 214935744, "step": 99640 }, { "epoch": 16.25530179445351, "grad_norm": 0.00319711584597826, "learning_rate": 0.00010305050078543848, "loss": 0.002, "num_input_tokens_seen": 214948064, "step": 99645 }, { "epoch": 16.25611745513866, "grad_norm": 0.006094567012041807, "learning_rate": 0.00010300722394146212, "loss": 0.0049, "num_input_tokens_seen": 214959360, "step": 99650 }, { "epoch": 16.256933115823816, "grad_norm": 0.0017145955935120583, "learning_rate": 0.00010296395514304763, "loss": 0.0225, "num_input_tokens_seen": 214970528, "step": 99655 }, { "epoch": 16.257748776508972, "grad_norm": 0.12342602759599686, "learning_rate": 0.00010292069439107254, "loss": 0.0083, "num_input_tokens_seen": 214981792, "step": 99660 }, { "epoch": 16.258564437194128, "grad_norm": 0.0071209874004125595, "learning_rate": 0.00010287744168641311, "loss": 0.0017, "num_input_tokens_seen": 214993280, "step": 99665 }, { "epoch": 16.259380097879284, "grad_norm": 0.0018977465806528926, "learning_rate": 0.00010283419702994634, "loss": 0.0329, "num_input_tokens_seen": 215004192, "step": 99670 }, { "epoch": 16.260195758564436, "grad_norm": 0.40363216400146484, "learning_rate": 0.0001027909604225481, "loss": 0.0056, "num_input_tokens_seen": 215014272, "step": 99675 }, { "epoch": 16.26101141924959, "grad_norm": 0.022581901401281357, "learning_rate": 0.00010274773186509528, "loss": 0.0016, "num_input_tokens_seen": 215023552, "step": 99680 }, { "epoch": 16.261827079934747, "grad_norm": 0.009051907807588577, "learning_rate": 0.00010270451135846332, "loss": 0.0049, "num_input_tokens_seen": 215035296, "step": 99685 }, { "epoch": 16.262642740619903, "grad_norm": 0.0025326667819172144, "learning_rate": 0.00010266129890352872, "loss": 0.1259, "num_input_tokens_seen": 215046368, "step": 99690 }, { "epoch": 16.26345840130506, "grad_norm": 0.0054851071909070015, "learning_rate": 0.00010261809450116666, "loss": 0.002, "num_input_tokens_seen": 215055936, "step": 99695 }, { "epoch": 16.26427406199021, "grad_norm": 0.062379613518714905, "learning_rate": 0.00010257489815225318, "loss": 0.1588, "num_input_tokens_seen": 215068064, "step": 99700 }, { "epoch": 16.265089722675366, "grad_norm": 0.01089994516223669, "learning_rate": 0.00010253170985766357, "loss": 0.0103, "num_input_tokens_seen": 215077600, "step": 99705 }, { "epoch": 16.265905383360522, "grad_norm": 0.006142259109765291, "learning_rate": 0.00010248852961827309, "loss": 0.0027, "num_input_tokens_seen": 215089216, "step": 99710 }, { "epoch": 16.266721044045678, "grad_norm": 0.0017639747820794582, "learning_rate": 0.00010244535743495681, "loss": 0.0018, "num_input_tokens_seen": 215100384, "step": 99715 }, { "epoch": 16.267536704730833, "grad_norm": 0.12671665847301483, "learning_rate": 0.00010240219330858969, "loss": 0.013, "num_input_tokens_seen": 215110880, "step": 99720 }, { "epoch": 16.268352365415986, "grad_norm": 0.0020797583274543285, "learning_rate": 0.00010235903724004652, "loss": 0.027, "num_input_tokens_seen": 215121120, "step": 99725 }, { "epoch": 16.26916802610114, "grad_norm": 0.008084515109658241, "learning_rate": 0.00010231588923020196, "loss": 0.0098, "num_input_tokens_seen": 215131968, "step": 99730 }, { "epoch": 16.269983686786297, "grad_norm": 0.006117525510489941, "learning_rate": 0.00010227274927993035, "loss": 0.0097, "num_input_tokens_seen": 215143456, "step": 99735 }, { "epoch": 16.270799347471453, "grad_norm": 0.0036402654368430376, "learning_rate": 0.000102229617390106, "loss": 0.0027, "num_input_tokens_seen": 215154208, "step": 99740 }, { "epoch": 16.27161500815661, "grad_norm": 0.0322255939245224, "learning_rate": 0.00010218649356160314, "loss": 0.0059, "num_input_tokens_seen": 215164672, "step": 99745 }, { "epoch": 16.27243066884176, "grad_norm": 0.001658755587413907, "learning_rate": 0.00010214337779529548, "loss": 0.0091, "num_input_tokens_seen": 215175392, "step": 99750 }, { "epoch": 16.273246329526916, "grad_norm": 0.0018133209086954594, "learning_rate": 0.00010210027009205719, "loss": 0.0025, "num_input_tokens_seen": 215186752, "step": 99755 }, { "epoch": 16.274061990212072, "grad_norm": 1.2181055545806885, "learning_rate": 0.00010205717045276153, "loss": 0.0346, "num_input_tokens_seen": 215199296, "step": 99760 }, { "epoch": 16.274877650897228, "grad_norm": 0.00502747343853116, "learning_rate": 0.00010201407887828234, "loss": 0.002, "num_input_tokens_seen": 215209344, "step": 99765 }, { "epoch": 16.275693311582383, "grad_norm": 0.009152781218290329, "learning_rate": 0.0001019709953694925, "loss": 0.0328, "num_input_tokens_seen": 215219680, "step": 99770 }, { "epoch": 16.276508972267536, "grad_norm": 0.001744898152537644, "learning_rate": 0.00010192791992726558, "loss": 0.007, "num_input_tokens_seen": 215230784, "step": 99775 }, { "epoch": 16.27732463295269, "grad_norm": 0.019375447183847427, "learning_rate": 0.00010188485255247415, "loss": 0.0027, "num_input_tokens_seen": 215239968, "step": 99780 }, { "epoch": 16.278140293637847, "grad_norm": 1.0153248310089111, "learning_rate": 0.00010184179324599147, "loss": 0.0938, "num_input_tokens_seen": 215248992, "step": 99785 }, { "epoch": 16.278955954323003, "grad_norm": 0.001854499103501439, "learning_rate": 0.00010179874200868966, "loss": 0.0014, "num_input_tokens_seen": 215259680, "step": 99790 }, { "epoch": 16.27977161500816, "grad_norm": 0.01951390877366066, "learning_rate": 0.00010175569884144182, "loss": 0.0676, "num_input_tokens_seen": 215268704, "step": 99795 }, { "epoch": 16.28058727569331, "grad_norm": 0.000784439267590642, "learning_rate": 0.00010171266374511962, "loss": 0.0018, "num_input_tokens_seen": 215278336, "step": 99800 }, { "epoch": 16.281402936378466, "grad_norm": 0.00939230713993311, "learning_rate": 0.00010166963672059588, "loss": 0.0012, "num_input_tokens_seen": 215289536, "step": 99805 }, { "epoch": 16.282218597063622, "grad_norm": 0.0883348137140274, "learning_rate": 0.00010162661776874193, "loss": 0.0061, "num_input_tokens_seen": 215301664, "step": 99810 }, { "epoch": 16.283034257748778, "grad_norm": 0.015548190101981163, "learning_rate": 0.00010158360689043028, "loss": 0.0026, "num_input_tokens_seen": 215311104, "step": 99815 }, { "epoch": 16.28384991843393, "grad_norm": 0.0026966894511133432, "learning_rate": 0.00010154060408653198, "loss": 0.0102, "num_input_tokens_seen": 215320128, "step": 99820 }, { "epoch": 16.284665579119086, "grad_norm": 0.010926101356744766, "learning_rate": 0.00010149760935791907, "loss": 0.0054, "num_input_tokens_seen": 215330464, "step": 99825 }, { "epoch": 16.28548123980424, "grad_norm": 0.0013593408511951566, "learning_rate": 0.00010145462270546241, "loss": 0.0045, "num_input_tokens_seen": 215341984, "step": 99830 }, { "epoch": 16.286296900489397, "grad_norm": 0.05608679726719856, "learning_rate": 0.00010141164413003351, "loss": 0.0057, "num_input_tokens_seen": 215353632, "step": 99835 }, { "epoch": 16.287112561174553, "grad_norm": 0.001737497397698462, "learning_rate": 0.00010136867363250329, "loss": 0.002, "num_input_tokens_seen": 215364640, "step": 99840 }, { "epoch": 16.287928221859705, "grad_norm": 0.002180078299716115, "learning_rate": 0.00010132571121374257, "loss": 0.0009, "num_input_tokens_seen": 215375488, "step": 99845 }, { "epoch": 16.28874388254486, "grad_norm": 0.126896932721138, "learning_rate": 0.00010128275687462212, "loss": 0.0059, "num_input_tokens_seen": 215386208, "step": 99850 }, { "epoch": 16.289559543230016, "grad_norm": 0.0912637934088707, "learning_rate": 0.0001012398106160124, "loss": 0.0161, "num_input_tokens_seen": 215396896, "step": 99855 }, { "epoch": 16.290375203915172, "grad_norm": 0.4383808374404907, "learning_rate": 0.00010119687243878379, "loss": 0.0078, "num_input_tokens_seen": 215407328, "step": 99860 }, { "epoch": 16.291190864600328, "grad_norm": 0.002988095162436366, "learning_rate": 0.00010115394234380642, "loss": 0.0107, "num_input_tokens_seen": 215419072, "step": 99865 }, { "epoch": 16.29200652528548, "grad_norm": 0.00764456856995821, "learning_rate": 0.00010111102033195041, "loss": 0.0029, "num_input_tokens_seen": 215429024, "step": 99870 }, { "epoch": 16.292822185970635, "grad_norm": 0.008290603756904602, "learning_rate": 0.00010106810640408564, "loss": 0.0029, "num_input_tokens_seen": 215439648, "step": 99875 }, { "epoch": 16.29363784665579, "grad_norm": 0.0007162063848227262, "learning_rate": 0.00010102520056108172, "loss": 0.0067, "num_input_tokens_seen": 215450560, "step": 99880 }, { "epoch": 16.294453507340947, "grad_norm": 0.007930433377623558, "learning_rate": 0.00010098230280380826, "loss": 0.0044, "num_input_tokens_seen": 215461920, "step": 99885 }, { "epoch": 16.295269168026103, "grad_norm": 0.08465391397476196, "learning_rate": 0.00010093941313313465, "loss": 0.0033, "num_input_tokens_seen": 215473056, "step": 99890 }, { "epoch": 16.296084828711255, "grad_norm": 0.012853926047682762, "learning_rate": 0.00010089653154992994, "loss": 0.0025, "num_input_tokens_seen": 215484352, "step": 99895 }, { "epoch": 16.29690048939641, "grad_norm": 0.0028023093473166227, "learning_rate": 0.00010085365805506358, "loss": 0.0015, "num_input_tokens_seen": 215494528, "step": 99900 }, { "epoch": 16.297716150081566, "grad_norm": 0.000508651603013277, "learning_rate": 0.00010081079264940391, "loss": 0.0021, "num_input_tokens_seen": 215506016, "step": 99905 }, { "epoch": 16.298531810766722, "grad_norm": 0.8173092007637024, "learning_rate": 0.00010076793533382022, "loss": 0.0089, "num_input_tokens_seen": 215517344, "step": 99910 }, { "epoch": 16.299347471451878, "grad_norm": 0.01673734560608864, "learning_rate": 0.00010072508610918046, "loss": 0.0016, "num_input_tokens_seen": 215528256, "step": 99915 }, { "epoch": 16.30016313213703, "grad_norm": 0.00841209851205349, "learning_rate": 0.00010068224497635369, "loss": 0.0014, "num_input_tokens_seen": 215538880, "step": 99920 }, { "epoch": 16.300978792822185, "grad_norm": 0.0008518766262568533, "learning_rate": 0.00010063941193620751, "loss": 0.0086, "num_input_tokens_seen": 215549696, "step": 99925 }, { "epoch": 16.30179445350734, "grad_norm": 0.0017631900263950229, "learning_rate": 0.0001005965869896105, "loss": 0.0025, "num_input_tokens_seen": 215559520, "step": 99930 }, { "epoch": 16.302610114192497, "grad_norm": 0.08385173976421356, "learning_rate": 0.00010055377013743012, "loss": 0.0023, "num_input_tokens_seen": 215569664, "step": 99935 }, { "epoch": 16.303425774877653, "grad_norm": 0.0025800170842558146, "learning_rate": 0.0001005109613805344, "loss": 0.0014, "num_input_tokens_seen": 215578432, "step": 99940 }, { "epoch": 16.304241435562805, "grad_norm": 0.0062077902257442474, "learning_rate": 0.00010046816071979087, "loss": 0.0682, "num_input_tokens_seen": 215588320, "step": 99945 }, { "epoch": 16.30505709624796, "grad_norm": 0.004471068736165762, "learning_rate": 0.0001004253681560669, "loss": 0.0015, "num_input_tokens_seen": 215598944, "step": 99950 }, { "epoch": 16.305872756933116, "grad_norm": 0.02066086232662201, "learning_rate": 0.00010038258369022974, "loss": 0.0052, "num_input_tokens_seen": 215609408, "step": 99955 }, { "epoch": 16.306688417618272, "grad_norm": 0.005451408214867115, "learning_rate": 0.00010033980732314646, "loss": 0.0056, "num_input_tokens_seen": 215619456, "step": 99960 }, { "epoch": 16.307504078303428, "grad_norm": 0.002764195902273059, "learning_rate": 0.00010029703905568399, "loss": 0.0016, "num_input_tokens_seen": 215630368, "step": 99965 }, { "epoch": 16.30831973898858, "grad_norm": 0.0019299183040857315, "learning_rate": 0.00010025427888870909, "loss": 0.0011, "num_input_tokens_seen": 215640832, "step": 99970 }, { "epoch": 16.309135399673735, "grad_norm": 1.382248044013977, "learning_rate": 0.00010021152682308837, "loss": 0.1532, "num_input_tokens_seen": 215651744, "step": 99975 }, { "epoch": 16.30995106035889, "grad_norm": 0.003220693673938513, "learning_rate": 0.00010016878285968816, "loss": 0.0017, "num_input_tokens_seen": 215661600, "step": 99980 }, { "epoch": 16.310766721044047, "grad_norm": 0.03084133379161358, "learning_rate": 0.00010012604699937483, "loss": 0.0051, "num_input_tokens_seen": 215672320, "step": 99985 }, { "epoch": 16.3115823817292, "grad_norm": 0.001754662487655878, "learning_rate": 0.00010008331924301445, "loss": 0.0182, "num_input_tokens_seen": 215682976, "step": 99990 }, { "epoch": 16.312398042414355, "grad_norm": 0.0012226407416164875, "learning_rate": 0.00010004059959147293, "loss": 0.0022, "num_input_tokens_seen": 215693024, "step": 99995 }, { "epoch": 16.31321370309951, "grad_norm": 0.04454412683844566, "learning_rate": 9.999788804561605e-05, "loss": 0.0165, "num_input_tokens_seen": 215703968, "step": 100000 }, { "epoch": 16.314029363784666, "grad_norm": 0.30733397603034973, "learning_rate": 9.995518460630937e-05, "loss": 0.0146, "num_input_tokens_seen": 215714720, "step": 100005 }, { "epoch": 16.31484502446982, "grad_norm": 0.20619358122348785, "learning_rate": 9.991248927441837e-05, "loss": 0.0153, "num_input_tokens_seen": 215726048, "step": 100010 }, { "epoch": 16.315660685154974, "grad_norm": 0.007945992983877659, "learning_rate": 9.986980205080837e-05, "loss": 0.0054, "num_input_tokens_seen": 215737248, "step": 100015 }, { "epoch": 16.31647634584013, "grad_norm": 0.0007334966212511063, "learning_rate": 9.982712293634438e-05, "loss": 0.0017, "num_input_tokens_seen": 215748160, "step": 100020 }, { "epoch": 16.317292006525285, "grad_norm": 0.008733578026294708, "learning_rate": 9.97844519318914e-05, "loss": 0.001, "num_input_tokens_seen": 215759328, "step": 100025 }, { "epoch": 16.31810766721044, "grad_norm": 0.0032867519184947014, "learning_rate": 9.974178903831427e-05, "loss": 0.0026, "num_input_tokens_seen": 215770048, "step": 100030 }, { "epoch": 16.318923327895597, "grad_norm": 0.0008559562265872955, "learning_rate": 9.969913425647747e-05, "loss": 0.149, "num_input_tokens_seen": 215781472, "step": 100035 }, { "epoch": 16.31973898858075, "grad_norm": 0.03256795182824135, "learning_rate": 9.965648758724544e-05, "loss": 0.0047, "num_input_tokens_seen": 215792896, "step": 100040 }, { "epoch": 16.320554649265905, "grad_norm": 0.005858627613633871, "learning_rate": 9.961384903148269e-05, "loss": 0.0017, "num_input_tokens_seen": 215803104, "step": 100045 }, { "epoch": 16.32137030995106, "grad_norm": 0.06610412150621414, "learning_rate": 9.957121859005324e-05, "loss": 0.0085, "num_input_tokens_seen": 215813632, "step": 100050 }, { "epoch": 16.322185970636216, "grad_norm": 0.0075034829787909985, "learning_rate": 9.952859626382099e-05, "loss": 0.0217, "num_input_tokens_seen": 215824032, "step": 100055 }, { "epoch": 16.32300163132137, "grad_norm": 0.015948081389069557, "learning_rate": 9.948598205364979e-05, "loss": 0.0194, "num_input_tokens_seen": 215835360, "step": 100060 }, { "epoch": 16.323817292006524, "grad_norm": 0.037474725395441055, "learning_rate": 9.944337596040326e-05, "loss": 0.0232, "num_input_tokens_seen": 215845824, "step": 100065 }, { "epoch": 16.32463295269168, "grad_norm": 0.0010142576647922397, "learning_rate": 9.940077798494485e-05, "loss": 0.0022, "num_input_tokens_seen": 215856544, "step": 100070 }, { "epoch": 16.325448613376835, "grad_norm": 0.017588535323739052, "learning_rate": 9.935818812813784e-05, "loss": 0.0037, "num_input_tokens_seen": 215867168, "step": 100075 }, { "epoch": 16.32626427406199, "grad_norm": 0.0065208375453948975, "learning_rate": 9.931560639084541e-05, "loss": 0.0027, "num_input_tokens_seen": 215877856, "step": 100080 }, { "epoch": 16.327079934747147, "grad_norm": 0.08464358001947403, "learning_rate": 9.927303277393051e-05, "loss": 0.0461, "num_input_tokens_seen": 215888416, "step": 100085 }, { "epoch": 16.3278955954323, "grad_norm": 0.25659850239753723, "learning_rate": 9.923046727825602e-05, "loss": 0.0049, "num_input_tokens_seen": 215899072, "step": 100090 }, { "epoch": 16.328711256117455, "grad_norm": 0.2483597993850708, "learning_rate": 9.918790990468446e-05, "loss": 0.0187, "num_input_tokens_seen": 215909472, "step": 100095 }, { "epoch": 16.32952691680261, "grad_norm": 0.02955242060124874, "learning_rate": 9.914536065407842e-05, "loss": 0.002, "num_input_tokens_seen": 215921312, "step": 100100 }, { "epoch": 16.330342577487766, "grad_norm": 0.049712058156728745, "learning_rate": 9.910281952730011e-05, "loss": 0.0018, "num_input_tokens_seen": 215932576, "step": 100105 }, { "epoch": 16.33115823817292, "grad_norm": 0.0034594545140862465, "learning_rate": 9.906028652521176e-05, "loss": 0.0038, "num_input_tokens_seen": 215943680, "step": 100110 }, { "epoch": 16.331973898858074, "grad_norm": 0.0014360647182911634, "learning_rate": 9.901776164867538e-05, "loss": 0.1218, "num_input_tokens_seen": 215955840, "step": 100115 }, { "epoch": 16.33278955954323, "grad_norm": 0.023604584857821465, "learning_rate": 9.89752448985527e-05, "loss": 0.0057, "num_input_tokens_seen": 215966720, "step": 100120 }, { "epoch": 16.333605220228385, "grad_norm": 0.0037194816395640373, "learning_rate": 9.893273627570542e-05, "loss": 0.0032, "num_input_tokens_seen": 215977536, "step": 100125 }, { "epoch": 16.33442088091354, "grad_norm": 0.02687995880842209, "learning_rate": 9.889023578099504e-05, "loss": 0.0017, "num_input_tokens_seen": 215989312, "step": 100130 }, { "epoch": 16.335236541598697, "grad_norm": 0.0725824385881424, "learning_rate": 9.884774341528285e-05, "loss": 0.0039, "num_input_tokens_seen": 215999968, "step": 100135 }, { "epoch": 16.33605220228385, "grad_norm": 0.004015383310616016, "learning_rate": 9.880525917943006e-05, "loss": 0.0012, "num_input_tokens_seen": 216011040, "step": 100140 }, { "epoch": 16.336867862969005, "grad_norm": 0.04311608895659447, "learning_rate": 9.876278307429764e-05, "loss": 0.0032, "num_input_tokens_seen": 216021760, "step": 100145 }, { "epoch": 16.33768352365416, "grad_norm": 0.0015153535641729832, "learning_rate": 9.872031510074625e-05, "loss": 0.0084, "num_input_tokens_seen": 216033792, "step": 100150 }, { "epoch": 16.338499184339316, "grad_norm": 0.001191094284877181, "learning_rate": 9.867785525963707e-05, "loss": 0.0138, "num_input_tokens_seen": 216042816, "step": 100155 }, { "epoch": 16.339314845024468, "grad_norm": 0.0022393432445824146, "learning_rate": 9.863540355182998e-05, "loss": 0.0046, "num_input_tokens_seen": 216054144, "step": 100160 }, { "epoch": 16.340130505709624, "grad_norm": 0.07019706070423126, "learning_rate": 9.859295997818585e-05, "loss": 0.0081, "num_input_tokens_seen": 216066272, "step": 100165 }, { "epoch": 16.34094616639478, "grad_norm": 0.009494810365140438, "learning_rate": 9.855052453956437e-05, "loss": 0.0046, "num_input_tokens_seen": 216077472, "step": 100170 }, { "epoch": 16.341761827079935, "grad_norm": 0.16330066323280334, "learning_rate": 9.850809723682603e-05, "loss": 0.0044, "num_input_tokens_seen": 216088416, "step": 100175 }, { "epoch": 16.34257748776509, "grad_norm": 0.02010112628340721, "learning_rate": 9.846567807083018e-05, "loss": 0.0046, "num_input_tokens_seen": 216098656, "step": 100180 }, { "epoch": 16.343393148450243, "grad_norm": 0.013957624323666096, "learning_rate": 9.842326704243682e-05, "loss": 0.0021, "num_input_tokens_seen": 216109024, "step": 100185 }, { "epoch": 16.3442088091354, "grad_norm": 0.014768439345061779, "learning_rate": 9.838086415250547e-05, "loss": 0.0035, "num_input_tokens_seen": 216120064, "step": 100190 }, { "epoch": 16.345024469820554, "grad_norm": 0.01501684170216322, "learning_rate": 9.833846940189533e-05, "loss": 0.0032, "num_input_tokens_seen": 216129952, "step": 100195 }, { "epoch": 16.34584013050571, "grad_norm": 0.0025052789133042097, "learning_rate": 9.829608279146568e-05, "loss": 0.0008, "num_input_tokens_seen": 216141408, "step": 100200 }, { "epoch": 16.346655791190866, "grad_norm": 0.0029337145388126373, "learning_rate": 9.825370432207554e-05, "loss": 0.0012, "num_input_tokens_seen": 216152608, "step": 100205 }, { "epoch": 16.347471451876018, "grad_norm": 0.792515218257904, "learning_rate": 9.821133399458371e-05, "loss": 0.0168, "num_input_tokens_seen": 216163488, "step": 100210 }, { "epoch": 16.348287112561174, "grad_norm": 0.3510511815547943, "learning_rate": 9.81689718098489e-05, "loss": 0.0157, "num_input_tokens_seen": 216173536, "step": 100215 }, { "epoch": 16.34910277324633, "grad_norm": 0.003969075623899698, "learning_rate": 9.81266177687296e-05, "loss": 0.0012, "num_input_tokens_seen": 216186368, "step": 100220 }, { "epoch": 16.349918433931485, "grad_norm": 0.0022820986341685057, "learning_rate": 9.808427187208424e-05, "loss": 0.0023, "num_input_tokens_seen": 216197728, "step": 100225 }, { "epoch": 16.35073409461664, "grad_norm": 0.03464748337864876, "learning_rate": 9.8041934120771e-05, "loss": 0.0018, "num_input_tokens_seen": 216208832, "step": 100230 }, { "epoch": 16.351549755301793, "grad_norm": 0.0020508873276412487, "learning_rate": 9.799960451564787e-05, "loss": 0.0017, "num_input_tokens_seen": 216218848, "step": 100235 }, { "epoch": 16.35236541598695, "grad_norm": 0.1778988540172577, "learning_rate": 9.795728305757267e-05, "loss": 0.1305, "num_input_tokens_seen": 216228096, "step": 100240 }, { "epoch": 16.353181076672104, "grad_norm": 0.0028658085502684116, "learning_rate": 9.791496974740321e-05, "loss": 0.0344, "num_input_tokens_seen": 216239936, "step": 100245 }, { "epoch": 16.35399673735726, "grad_norm": 0.8233887553215027, "learning_rate": 9.787266458599697e-05, "loss": 0.0392, "num_input_tokens_seen": 216250528, "step": 100250 }, { "epoch": 16.354812398042416, "grad_norm": 0.002061696257442236, "learning_rate": 9.783036757421132e-05, "loss": 0.0236, "num_input_tokens_seen": 216260928, "step": 100255 }, { "epoch": 16.355628058727568, "grad_norm": 0.01203071791678667, "learning_rate": 9.778807871290346e-05, "loss": 0.0032, "num_input_tokens_seen": 216271744, "step": 100260 }, { "epoch": 16.356443719412724, "grad_norm": 0.04068765044212341, "learning_rate": 9.774579800293026e-05, "loss": 0.0311, "num_input_tokens_seen": 216282560, "step": 100265 }, { "epoch": 16.35725938009788, "grad_norm": 0.04643794149160385, "learning_rate": 9.770352544514904e-05, "loss": 0.0059, "num_input_tokens_seen": 216292160, "step": 100270 }, { "epoch": 16.358075040783035, "grad_norm": 0.08739187568426132, "learning_rate": 9.766126104041601e-05, "loss": 0.0062, "num_input_tokens_seen": 216301952, "step": 100275 }, { "epoch": 16.35889070146819, "grad_norm": 0.12298665195703506, "learning_rate": 9.761900478958813e-05, "loss": 0.0054, "num_input_tokens_seen": 216314400, "step": 100280 }, { "epoch": 16.359706362153343, "grad_norm": 0.0008307526586577296, "learning_rate": 9.757675669352133e-05, "loss": 0.134, "num_input_tokens_seen": 216324896, "step": 100285 }, { "epoch": 16.3605220228385, "grad_norm": 0.7640464901924133, "learning_rate": 9.753451675307234e-05, "loss": 0.0357, "num_input_tokens_seen": 216336640, "step": 100290 }, { "epoch": 16.361337683523654, "grad_norm": 0.09045685827732086, "learning_rate": 9.749228496909668e-05, "loss": 0.0064, "num_input_tokens_seen": 216348192, "step": 100295 }, { "epoch": 16.36215334420881, "grad_norm": 0.04170398786664009, "learning_rate": 9.745006134245072e-05, "loss": 0.0299, "num_input_tokens_seen": 216357760, "step": 100300 }, { "epoch": 16.362969004893966, "grad_norm": 0.021265432238578796, "learning_rate": 9.740784587398965e-05, "loss": 0.107, "num_input_tokens_seen": 216368992, "step": 100305 }, { "epoch": 16.363784665579118, "grad_norm": 1.2718743085861206, "learning_rate": 9.736563856456959e-05, "loss": 0.0896, "num_input_tokens_seen": 216379168, "step": 100310 }, { "epoch": 16.364600326264274, "grad_norm": 0.00466530304402113, "learning_rate": 9.73234394150454e-05, "loss": 0.0121, "num_input_tokens_seen": 216390624, "step": 100315 }, { "epoch": 16.36541598694943, "grad_norm": 0.007446472533047199, "learning_rate": 9.728124842627278e-05, "loss": 0.0034, "num_input_tokens_seen": 216401248, "step": 100320 }, { "epoch": 16.366231647634585, "grad_norm": 0.11789732426404953, "learning_rate": 9.723906559910634e-05, "loss": 0.003, "num_input_tokens_seen": 216410720, "step": 100325 }, { "epoch": 16.36704730831974, "grad_norm": 0.2559731602668762, "learning_rate": 9.719689093440126e-05, "loss": 0.0129, "num_input_tokens_seen": 216421888, "step": 100330 }, { "epoch": 16.367862969004893, "grad_norm": 0.17895765602588654, "learning_rate": 9.715472443301215e-05, "loss": 0.0051, "num_input_tokens_seen": 216432512, "step": 100335 }, { "epoch": 16.36867862969005, "grad_norm": 0.0039016883820295334, "learning_rate": 9.711256609579367e-05, "loss": 0.0036, "num_input_tokens_seen": 216441120, "step": 100340 }, { "epoch": 16.369494290375204, "grad_norm": 0.07602254301309586, "learning_rate": 9.707041592360005e-05, "loss": 0.0063, "num_input_tokens_seen": 216451744, "step": 100345 }, { "epoch": 16.37030995106036, "grad_norm": 0.004850522615015507, "learning_rate": 9.702827391728564e-05, "loss": 0.0192, "num_input_tokens_seen": 216462880, "step": 100350 }, { "epoch": 16.371125611745512, "grad_norm": 0.0016205641441047192, "learning_rate": 9.69861400777045e-05, "loss": 0.0065, "num_input_tokens_seen": 216474240, "step": 100355 }, { "epoch": 16.371941272430668, "grad_norm": 0.004035997670143843, "learning_rate": 9.694401440571043e-05, "loss": 0.0048, "num_input_tokens_seen": 216485600, "step": 100360 }, { "epoch": 16.372756933115824, "grad_norm": 0.013447748497128487, "learning_rate": 9.690189690215728e-05, "loss": 0.0033, "num_input_tokens_seen": 216496416, "step": 100365 }, { "epoch": 16.37357259380098, "grad_norm": 0.016734566539525986, "learning_rate": 9.685978756789854e-05, "loss": 0.0018, "num_input_tokens_seen": 216506304, "step": 100370 }, { "epoch": 16.374388254486135, "grad_norm": 0.0038405403029173613, "learning_rate": 9.681768640378757e-05, "loss": 0.0029, "num_input_tokens_seen": 216517536, "step": 100375 }, { "epoch": 16.375203915171287, "grad_norm": 0.00497449329122901, "learning_rate": 9.677559341067759e-05, "loss": 0.0162, "num_input_tokens_seen": 216529056, "step": 100380 }, { "epoch": 16.376019575856443, "grad_norm": 0.05698425695300102, "learning_rate": 9.673350858942198e-05, "loss": 0.0045, "num_input_tokens_seen": 216540992, "step": 100385 }, { "epoch": 16.3768352365416, "grad_norm": 0.07416532188653946, "learning_rate": 9.669143194087315e-05, "loss": 0.0042, "num_input_tokens_seen": 216551328, "step": 100390 }, { "epoch": 16.377650897226754, "grad_norm": 0.010993343777954578, "learning_rate": 9.664936346588432e-05, "loss": 0.0032, "num_input_tokens_seen": 216562624, "step": 100395 }, { "epoch": 16.37846655791191, "grad_norm": 0.00603041797876358, "learning_rate": 9.660730316530757e-05, "loss": 0.0017, "num_input_tokens_seen": 216572960, "step": 100400 }, { "epoch": 16.379282218597062, "grad_norm": 0.1404844969511032, "learning_rate": 9.65652510399958e-05, "loss": 0.0051, "num_input_tokens_seen": 216583360, "step": 100405 }, { "epoch": 16.380097879282218, "grad_norm": 0.0013082518707960844, "learning_rate": 9.652320709080082e-05, "loss": 0.0014, "num_input_tokens_seen": 216594976, "step": 100410 }, { "epoch": 16.380913539967374, "grad_norm": 0.11330363899469376, "learning_rate": 9.648117131857509e-05, "loss": 0.0034, "num_input_tokens_seen": 216606752, "step": 100415 }, { "epoch": 16.38172920065253, "grad_norm": 0.01271946169435978, "learning_rate": 9.643914372417011e-05, "loss": 0.0048, "num_input_tokens_seen": 216617856, "step": 100420 }, { "epoch": 16.382544861337685, "grad_norm": 0.004375270567834377, "learning_rate": 9.639712430843806e-05, "loss": 0.001, "num_input_tokens_seen": 216628384, "step": 100425 }, { "epoch": 16.383360522022837, "grad_norm": 0.004039763938635588, "learning_rate": 9.635511307223005e-05, "loss": 0.0038, "num_input_tokens_seen": 216638912, "step": 100430 }, { "epoch": 16.384176182707993, "grad_norm": 1.3684515953063965, "learning_rate": 9.631311001639798e-05, "loss": 0.0495, "num_input_tokens_seen": 216649696, "step": 100435 }, { "epoch": 16.38499184339315, "grad_norm": 0.02274392731487751, "learning_rate": 9.62711151417926e-05, "loss": 0.1091, "num_input_tokens_seen": 216659840, "step": 100440 }, { "epoch": 16.385807504078304, "grad_norm": 0.002220106078311801, "learning_rate": 9.622912844926551e-05, "loss": 0.0039, "num_input_tokens_seen": 216670688, "step": 100445 }, { "epoch": 16.38662316476346, "grad_norm": 0.07055911421775818, "learning_rate": 9.618714993966704e-05, "loss": 0.0035, "num_input_tokens_seen": 216680736, "step": 100450 }, { "epoch": 16.387438825448612, "grad_norm": 0.052689433097839355, "learning_rate": 9.614517961384856e-05, "loss": 0.0031, "num_input_tokens_seen": 216690688, "step": 100455 }, { "epoch": 16.388254486133768, "grad_norm": 0.01399444043636322, "learning_rate": 9.610321747266005e-05, "loss": 0.0015, "num_input_tokens_seen": 216703392, "step": 100460 }, { "epoch": 16.389070146818923, "grad_norm": 0.4460059702396393, "learning_rate": 9.60612635169525e-05, "loss": 0.0136, "num_input_tokens_seen": 216714048, "step": 100465 }, { "epoch": 16.38988580750408, "grad_norm": 0.007544883526861668, "learning_rate": 9.601931774757561e-05, "loss": 0.0017, "num_input_tokens_seen": 216724992, "step": 100470 }, { "epoch": 16.390701468189235, "grad_norm": 0.029649809002876282, "learning_rate": 9.597738016537988e-05, "loss": 0.0013, "num_input_tokens_seen": 216735520, "step": 100475 }, { "epoch": 16.391517128874387, "grad_norm": 0.0020419061183929443, "learning_rate": 9.593545077121507e-05, "loss": 0.0039, "num_input_tokens_seen": 216746944, "step": 100480 }, { "epoch": 16.392332789559543, "grad_norm": 0.005828389432281256, "learning_rate": 9.589352956593095e-05, "loss": 0.0034, "num_input_tokens_seen": 216757632, "step": 100485 }, { "epoch": 16.3931484502447, "grad_norm": 0.00434563122689724, "learning_rate": 9.585161655037705e-05, "loss": 0.005, "num_input_tokens_seen": 216768960, "step": 100490 }, { "epoch": 16.393964110929854, "grad_norm": 0.010777014307677746, "learning_rate": 9.580971172540287e-05, "loss": 0.0045, "num_input_tokens_seen": 216780192, "step": 100495 }, { "epoch": 16.39477977161501, "grad_norm": 0.006483625154942274, "learning_rate": 9.576781509185766e-05, "loss": 0.0335, "num_input_tokens_seen": 216791232, "step": 100500 }, { "epoch": 16.395595432300162, "grad_norm": 0.007594752591103315, "learning_rate": 9.572592665059043e-05, "loss": 0.0552, "num_input_tokens_seen": 216802336, "step": 100505 }, { "epoch": 16.396411092985318, "grad_norm": 0.05979802459478378, "learning_rate": 9.568404640245022e-05, "loss": 0.0631, "num_input_tokens_seen": 216812768, "step": 100510 }, { "epoch": 16.397226753670473, "grad_norm": 0.032655466347932816, "learning_rate": 9.564217434828565e-05, "loss": 0.0182, "num_input_tokens_seen": 216824224, "step": 100515 }, { "epoch": 16.39804241435563, "grad_norm": 0.03154059126973152, "learning_rate": 9.56003104889454e-05, "loss": 0.0032, "num_input_tokens_seen": 216834432, "step": 100520 }, { "epoch": 16.39885807504078, "grad_norm": 0.015749523416161537, "learning_rate": 9.55584548252778e-05, "loss": 0.0032, "num_input_tokens_seen": 216845408, "step": 100525 }, { "epoch": 16.399673735725937, "grad_norm": 0.005066398996859789, "learning_rate": 9.55166073581314e-05, "loss": 0.0014, "num_input_tokens_seen": 216856096, "step": 100530 }, { "epoch": 16.400489396411093, "grad_norm": 0.02469783090054989, "learning_rate": 9.547476808835381e-05, "loss": 0.1235, "num_input_tokens_seen": 216865952, "step": 100535 }, { "epoch": 16.40130505709625, "grad_norm": 0.007051562890410423, "learning_rate": 9.54329370167935e-05, "loss": 0.003, "num_input_tokens_seen": 216876800, "step": 100540 }, { "epoch": 16.402120717781404, "grad_norm": 0.0009620111086405814, "learning_rate": 9.539111414429769e-05, "loss": 0.0162, "num_input_tokens_seen": 216887872, "step": 100545 }, { "epoch": 16.402936378466556, "grad_norm": 0.004180533811450005, "learning_rate": 9.53492994717145e-05, "loss": 0.0044, "num_input_tokens_seen": 216898496, "step": 100550 }, { "epoch": 16.403752039151712, "grad_norm": 0.017319774255156517, "learning_rate": 9.530749299989078e-05, "loss": 0.0038, "num_input_tokens_seen": 216908864, "step": 100555 }, { "epoch": 16.404567699836868, "grad_norm": 0.0033540851436555386, "learning_rate": 9.526569472967444e-05, "loss": 0.0019, "num_input_tokens_seen": 216920384, "step": 100560 }, { "epoch": 16.405383360522023, "grad_norm": 0.007153135724365711, "learning_rate": 9.522390466191194e-05, "loss": 0.0037, "num_input_tokens_seen": 216931360, "step": 100565 }, { "epoch": 16.40619902120718, "grad_norm": 0.0007327127968892455, "learning_rate": 9.518212279745075e-05, "loss": 0.0119, "num_input_tokens_seen": 216942464, "step": 100570 }, { "epoch": 16.40701468189233, "grad_norm": 0.017105452716350555, "learning_rate": 9.514034913713714e-05, "loss": 0.0048, "num_input_tokens_seen": 216953760, "step": 100575 }, { "epoch": 16.407830342577487, "grad_norm": 0.006722565740346909, "learning_rate": 9.509858368181812e-05, "loss": 0.0028, "num_input_tokens_seen": 216963872, "step": 100580 }, { "epoch": 16.408646003262643, "grad_norm": 0.0021115958224982023, "learning_rate": 9.505682643233993e-05, "loss": 0.0022, "num_input_tokens_seen": 216974784, "step": 100585 }, { "epoch": 16.4094616639478, "grad_norm": 0.08702608197927475, "learning_rate": 9.501507738954884e-05, "loss": 0.0082, "num_input_tokens_seen": 216985984, "step": 100590 }, { "epoch": 16.410277324632954, "grad_norm": 0.051207542419433594, "learning_rate": 9.497333655429097e-05, "loss": 0.0042, "num_input_tokens_seen": 216995328, "step": 100595 }, { "epoch": 16.411092985318106, "grad_norm": 0.000962368561886251, "learning_rate": 9.493160392741229e-05, "loss": 0.0162, "num_input_tokens_seen": 217006496, "step": 100600 }, { "epoch": 16.411908646003262, "grad_norm": 0.0014400726649910212, "learning_rate": 9.488987950975847e-05, "loss": 0.005, "num_input_tokens_seen": 217017376, "step": 100605 }, { "epoch": 16.412724306688418, "grad_norm": 0.002891391050070524, "learning_rate": 9.484816330217522e-05, "loss": 0.1372, "num_input_tokens_seen": 217027200, "step": 100610 }, { "epoch": 16.413539967373573, "grad_norm": 0.011941032484173775, "learning_rate": 9.480645530550785e-05, "loss": 0.0068, "num_input_tokens_seen": 217037152, "step": 100615 }, { "epoch": 16.41435562805873, "grad_norm": 0.01951504312455654, "learning_rate": 9.47647555206017e-05, "loss": 0.0035, "num_input_tokens_seen": 217048640, "step": 100620 }, { "epoch": 16.41517128874388, "grad_norm": 0.0011732889106497169, "learning_rate": 9.472306394830188e-05, "loss": 0.0285, "num_input_tokens_seen": 217059648, "step": 100625 }, { "epoch": 16.415986949429037, "grad_norm": 0.015243087895214558, "learning_rate": 9.46813805894533e-05, "loss": 0.0043, "num_input_tokens_seen": 217071040, "step": 100630 }, { "epoch": 16.416802610114193, "grad_norm": 0.0028607030399143696, "learning_rate": 9.46397054449007e-05, "loss": 0.0009, "num_input_tokens_seen": 217081888, "step": 100635 }, { "epoch": 16.41761827079935, "grad_norm": 0.06583647429943085, "learning_rate": 9.459803851548876e-05, "loss": 0.0066, "num_input_tokens_seen": 217093760, "step": 100640 }, { "epoch": 16.418433931484504, "grad_norm": 0.002370997332036495, "learning_rate": 9.455637980206177e-05, "loss": 0.0014, "num_input_tokens_seen": 217103200, "step": 100645 }, { "epoch": 16.419249592169656, "grad_norm": 0.0014608802739530802, "learning_rate": 9.451472930546417e-05, "loss": 0.0027, "num_input_tokens_seen": 217113536, "step": 100650 }, { "epoch": 16.420065252854812, "grad_norm": 0.013768395408987999, "learning_rate": 9.447308702653995e-05, "loss": 0.0395, "num_input_tokens_seen": 217123104, "step": 100655 }, { "epoch": 16.420880913539968, "grad_norm": 0.004126961342990398, "learning_rate": 9.443145296613303e-05, "loss": 0.0072, "num_input_tokens_seen": 217133920, "step": 100660 }, { "epoch": 16.421696574225123, "grad_norm": 0.006806209217756987, "learning_rate": 9.438982712508726e-05, "loss": 0.0079, "num_input_tokens_seen": 217144192, "step": 100665 }, { "epoch": 16.42251223491028, "grad_norm": 0.01501559279859066, "learning_rate": 9.434820950424605e-05, "loss": 0.0016, "num_input_tokens_seen": 217154240, "step": 100670 }, { "epoch": 16.42332789559543, "grad_norm": 0.005268144886940718, "learning_rate": 9.430660010445325e-05, "loss": 0.004, "num_input_tokens_seen": 217164480, "step": 100675 }, { "epoch": 16.424143556280587, "grad_norm": 0.009838788770139217, "learning_rate": 9.426499892655155e-05, "loss": 0.0098, "num_input_tokens_seen": 217174048, "step": 100680 }, { "epoch": 16.424959216965743, "grad_norm": 0.010458672419190407, "learning_rate": 9.422340597138457e-05, "loss": 0.1974, "num_input_tokens_seen": 217185920, "step": 100685 }, { "epoch": 16.4257748776509, "grad_norm": 0.2971159815788269, "learning_rate": 9.418182123979496e-05, "loss": 0.0081, "num_input_tokens_seen": 217196864, "step": 100690 }, { "epoch": 16.42659053833605, "grad_norm": 0.0010622895788401365, "learning_rate": 9.414024473262561e-05, "loss": 0.0008, "num_input_tokens_seen": 217206592, "step": 100695 }, { "epoch": 16.427406199021206, "grad_norm": 0.02269851602613926, "learning_rate": 9.409867645071901e-05, "loss": 0.0931, "num_input_tokens_seen": 217217312, "step": 100700 }, { "epoch": 16.428221859706362, "grad_norm": 0.01458274107426405, "learning_rate": 9.405711639491771e-05, "loss": 0.0411, "num_input_tokens_seen": 217229888, "step": 100705 }, { "epoch": 16.429037520391518, "grad_norm": 0.013653422705829144, "learning_rate": 9.401556456606392e-05, "loss": 0.0023, "num_input_tokens_seen": 217240384, "step": 100710 }, { "epoch": 16.429853181076673, "grad_norm": 0.1476871818304062, "learning_rate": 9.397402096499973e-05, "loss": 0.0117, "num_input_tokens_seen": 217250752, "step": 100715 }, { "epoch": 16.430668841761825, "grad_norm": 0.0003703658876474947, "learning_rate": 9.393248559256706e-05, "loss": 0.0085, "num_input_tokens_seen": 217262816, "step": 100720 }, { "epoch": 16.43148450244698, "grad_norm": 0.0041404906660318375, "learning_rate": 9.389095844960771e-05, "loss": 0.0105, "num_input_tokens_seen": 217272896, "step": 100725 }, { "epoch": 16.432300163132137, "grad_norm": 0.022594226524233818, "learning_rate": 9.384943953696329e-05, "loss": 0.0024, "num_input_tokens_seen": 217284480, "step": 100730 }, { "epoch": 16.433115823817293, "grad_norm": 0.08890705555677414, "learning_rate": 9.380792885547523e-05, "loss": 0.0032, "num_input_tokens_seen": 217295104, "step": 100735 }, { "epoch": 16.43393148450245, "grad_norm": 0.016333790495991707, "learning_rate": 9.376642640598476e-05, "loss": 0.0038, "num_input_tokens_seen": 217304800, "step": 100740 }, { "epoch": 16.4347471451876, "grad_norm": 0.02868318185210228, "learning_rate": 9.372493218933303e-05, "loss": 0.1649, "num_input_tokens_seen": 217315904, "step": 100745 }, { "epoch": 16.435562805872756, "grad_norm": 0.003543232334777713, "learning_rate": 9.368344620636094e-05, "loss": 0.0033, "num_input_tokens_seen": 217327392, "step": 100750 }, { "epoch": 16.436378466557912, "grad_norm": 0.0032591603230684996, "learning_rate": 9.364196845790924e-05, "loss": 0.0027, "num_input_tokens_seen": 217338720, "step": 100755 }, { "epoch": 16.437194127243067, "grad_norm": 0.0032866154797375202, "learning_rate": 9.360049894481854e-05, "loss": 0.0053, "num_input_tokens_seen": 217350080, "step": 100760 }, { "epoch": 16.438009787928223, "grad_norm": 0.0028755348175764084, "learning_rate": 9.355903766792929e-05, "loss": 0.0058, "num_input_tokens_seen": 217361312, "step": 100765 }, { "epoch": 16.438825448613375, "grad_norm": 0.9423817992210388, "learning_rate": 9.351758462808174e-05, "loss": 0.0156, "num_input_tokens_seen": 217371488, "step": 100770 }, { "epoch": 16.43964110929853, "grad_norm": 0.0011390353320166469, "learning_rate": 9.347613982611603e-05, "loss": 0.0063, "num_input_tokens_seen": 217382080, "step": 100775 }, { "epoch": 16.440456769983687, "grad_norm": 0.15933747589588165, "learning_rate": 9.343470326287206e-05, "loss": 0.0129, "num_input_tokens_seen": 217393376, "step": 100780 }, { "epoch": 16.441272430668842, "grad_norm": 0.028406405821442604, "learning_rate": 9.339327493918958e-05, "loss": 0.0023, "num_input_tokens_seen": 217405184, "step": 100785 }, { "epoch": 16.442088091353998, "grad_norm": 0.016658103093504906, "learning_rate": 9.335185485590807e-05, "loss": 0.0059, "num_input_tokens_seen": 217415936, "step": 100790 }, { "epoch": 16.44290375203915, "grad_norm": 0.2685297727584839, "learning_rate": 9.331044301386732e-05, "loss": 0.0054, "num_input_tokens_seen": 217425856, "step": 100795 }, { "epoch": 16.443719412724306, "grad_norm": 0.004534325562417507, "learning_rate": 9.326903941390613e-05, "loss": 0.012, "num_input_tokens_seen": 217436928, "step": 100800 }, { "epoch": 16.44453507340946, "grad_norm": 0.003292136825621128, "learning_rate": 9.322764405686412e-05, "loss": 0.0089, "num_input_tokens_seen": 217448000, "step": 100805 }, { "epoch": 16.445350734094617, "grad_norm": 0.0566856749355793, "learning_rate": 9.318625694357962e-05, "loss": 0.0074, "num_input_tokens_seen": 217458624, "step": 100810 }, { "epoch": 16.446166394779773, "grad_norm": 0.019686246290802956, "learning_rate": 9.314487807489186e-05, "loss": 0.0014, "num_input_tokens_seen": 217469504, "step": 100815 }, { "epoch": 16.446982055464925, "grad_norm": 0.36817097663879395, "learning_rate": 9.310350745163931e-05, "loss": 0.0086, "num_input_tokens_seen": 217480384, "step": 100820 }, { "epoch": 16.44779771615008, "grad_norm": 0.005643445998430252, "learning_rate": 9.306214507466032e-05, "loss": 0.0152, "num_input_tokens_seen": 217491104, "step": 100825 }, { "epoch": 16.448613376835237, "grad_norm": 0.06611660122871399, "learning_rate": 9.302079094479321e-05, "loss": 0.0035, "num_input_tokens_seen": 217502240, "step": 100830 }, { "epoch": 16.449429037520392, "grad_norm": 0.0038219010457396507, "learning_rate": 9.297944506287609e-05, "loss": 0.0027, "num_input_tokens_seen": 217512672, "step": 100835 }, { "epoch": 16.450244698205548, "grad_norm": 0.3511030972003937, "learning_rate": 9.293810742974679e-05, "loss": 0.0223, "num_input_tokens_seen": 217524128, "step": 100840 }, { "epoch": 16.4510603588907, "grad_norm": 0.012025956995785236, "learning_rate": 9.28967780462432e-05, "loss": 0.0031, "num_input_tokens_seen": 217533696, "step": 100845 }, { "epoch": 16.451876019575856, "grad_norm": 0.013197160325944424, "learning_rate": 9.28554569132028e-05, "loss": 0.0042, "num_input_tokens_seen": 217544288, "step": 100850 }, { "epoch": 16.45269168026101, "grad_norm": 0.20114360749721527, "learning_rate": 9.28141440314631e-05, "loss": 0.0132, "num_input_tokens_seen": 217553760, "step": 100855 }, { "epoch": 16.453507340946167, "grad_norm": 0.018120115622878075, "learning_rate": 9.277283940186132e-05, "loss": 0.0078, "num_input_tokens_seen": 217563584, "step": 100860 }, { "epoch": 16.454323001631323, "grad_norm": 0.05748572573065758, "learning_rate": 9.273154302523456e-05, "loss": 0.0049, "num_input_tokens_seen": 217576064, "step": 100865 }, { "epoch": 16.455138662316475, "grad_norm": 0.003015045775100589, "learning_rate": 9.269025490241972e-05, "loss": 0.0009, "num_input_tokens_seen": 217586304, "step": 100870 }, { "epoch": 16.45595432300163, "grad_norm": 0.07976733893156052, "learning_rate": 9.264897503425357e-05, "loss": 0.0101, "num_input_tokens_seen": 217598304, "step": 100875 }, { "epoch": 16.456769983686787, "grad_norm": 0.0006440498982556164, "learning_rate": 9.260770342157272e-05, "loss": 0.0042, "num_input_tokens_seen": 217609376, "step": 100880 }, { "epoch": 16.457585644371942, "grad_norm": 0.0032303736079484224, "learning_rate": 9.256644006521358e-05, "loss": 0.0238, "num_input_tokens_seen": 217619360, "step": 100885 }, { "epoch": 16.458401305057095, "grad_norm": 0.004021855536848307, "learning_rate": 9.252518496601237e-05, "loss": 0.0023, "num_input_tokens_seen": 217629888, "step": 100890 }, { "epoch": 16.45921696574225, "grad_norm": 0.024304958060383797, "learning_rate": 9.248393812480522e-05, "loss": 0.1278, "num_input_tokens_seen": 217640192, "step": 100895 }, { "epoch": 16.460032626427406, "grad_norm": 0.02928841859102249, "learning_rate": 9.244269954242806e-05, "loss": 0.0362, "num_input_tokens_seen": 217649952, "step": 100900 }, { "epoch": 16.46084828711256, "grad_norm": 0.5276224613189697, "learning_rate": 9.240146921971642e-05, "loss": 0.0188, "num_input_tokens_seen": 217659776, "step": 100905 }, { "epoch": 16.461663947797717, "grad_norm": 0.009660049341619015, "learning_rate": 9.23602471575064e-05, "loss": 0.0024, "num_input_tokens_seen": 217670048, "step": 100910 }, { "epoch": 16.46247960848287, "grad_norm": 0.0017419654177501798, "learning_rate": 9.231903335663283e-05, "loss": 0.0006, "num_input_tokens_seen": 217680352, "step": 100915 }, { "epoch": 16.463295269168025, "grad_norm": 0.001143762143328786, "learning_rate": 9.227782781793148e-05, "loss": 0.0012, "num_input_tokens_seen": 217691360, "step": 100920 }, { "epoch": 16.46411092985318, "grad_norm": 0.004461290314793587, "learning_rate": 9.223663054223692e-05, "loss": 0.0915, "num_input_tokens_seen": 217702144, "step": 100925 }, { "epoch": 16.464926590538337, "grad_norm": 0.024504657834768295, "learning_rate": 9.219544153038462e-05, "loss": 0.0097, "num_input_tokens_seen": 217711904, "step": 100930 }, { "epoch": 16.465742251223492, "grad_norm": 0.06024233251810074, "learning_rate": 9.21542607832087e-05, "loss": 0.0024, "num_input_tokens_seen": 217722688, "step": 100935 }, { "epoch": 16.466557911908644, "grad_norm": 0.000889261020347476, "learning_rate": 9.211308830154441e-05, "loss": 0.0011, "num_input_tokens_seen": 217733824, "step": 100940 }, { "epoch": 16.4673735725938, "grad_norm": 0.08880539983510971, "learning_rate": 9.20719240862255e-05, "loss": 0.0031, "num_input_tokens_seen": 217745760, "step": 100945 }, { "epoch": 16.468189233278956, "grad_norm": 0.00549895316362381, "learning_rate": 9.203076813808687e-05, "loss": 0.0016, "num_input_tokens_seen": 217757280, "step": 100950 }, { "epoch": 16.46900489396411, "grad_norm": 0.6288931369781494, "learning_rate": 9.198962045796195e-05, "loss": 0.2416, "num_input_tokens_seen": 217767584, "step": 100955 }, { "epoch": 16.469820554649267, "grad_norm": 0.006103844847530127, "learning_rate": 9.194848104668513e-05, "loss": 0.0008, "num_input_tokens_seen": 217778144, "step": 100960 }, { "epoch": 16.47063621533442, "grad_norm": 0.003826084081083536, "learning_rate": 9.190734990508998e-05, "loss": 0.0889, "num_input_tokens_seen": 217788512, "step": 100965 }, { "epoch": 16.471451876019575, "grad_norm": 0.05754251033067703, "learning_rate": 9.18662270340101e-05, "loss": 0.0027, "num_input_tokens_seen": 217800064, "step": 100970 }, { "epoch": 16.47226753670473, "grad_norm": 0.03979867324233055, "learning_rate": 9.182511243427888e-05, "loss": 0.0044, "num_input_tokens_seen": 217810528, "step": 100975 }, { "epoch": 16.473083197389887, "grad_norm": 0.008189466781914234, "learning_rate": 9.178400610672954e-05, "loss": 0.0053, "num_input_tokens_seen": 217821888, "step": 100980 }, { "epoch": 16.473898858075042, "grad_norm": 0.025714466348290443, "learning_rate": 9.174290805219521e-05, "loss": 0.0033, "num_input_tokens_seen": 217832256, "step": 100985 }, { "epoch": 16.474714518760194, "grad_norm": 0.003959680907428265, "learning_rate": 9.170181827150875e-05, "loss": 0.0025, "num_input_tokens_seen": 217842944, "step": 100990 }, { "epoch": 16.47553017944535, "grad_norm": 0.21423105895519257, "learning_rate": 9.166073676550291e-05, "loss": 0.0501, "num_input_tokens_seen": 217853632, "step": 100995 }, { "epoch": 16.476345840130506, "grad_norm": 0.5114409923553467, "learning_rate": 9.161966353501023e-05, "loss": 0.0607, "num_input_tokens_seen": 217864608, "step": 101000 }, { "epoch": 16.47716150081566, "grad_norm": 0.004166365601122379, "learning_rate": 9.157859858086315e-05, "loss": 0.0011, "num_input_tokens_seen": 217875488, "step": 101005 }, { "epoch": 16.477977161500817, "grad_norm": 0.08910607546567917, "learning_rate": 9.153754190389379e-05, "loss": 0.0048, "num_input_tokens_seen": 217883904, "step": 101010 }, { "epoch": 16.47879282218597, "grad_norm": 0.0007670016493648291, "learning_rate": 9.149649350493456e-05, "loss": 0.0048, "num_input_tokens_seen": 217894784, "step": 101015 }, { "epoch": 16.479608482871125, "grad_norm": 0.04292795807123184, "learning_rate": 9.145545338481682e-05, "loss": 0.0158, "num_input_tokens_seen": 217905248, "step": 101020 }, { "epoch": 16.48042414355628, "grad_norm": 0.0022143730893731117, "learning_rate": 9.141442154437286e-05, "loss": 0.0046, "num_input_tokens_seen": 217915328, "step": 101025 }, { "epoch": 16.481239804241437, "grad_norm": 0.0029329683165997267, "learning_rate": 9.137339798443372e-05, "loss": 0.0091, "num_input_tokens_seen": 217925440, "step": 101030 }, { "epoch": 16.482055464926592, "grad_norm": 0.0045556616969406605, "learning_rate": 9.133238270583133e-05, "loss": 0.0029, "num_input_tokens_seen": 217936160, "step": 101035 }, { "epoch": 16.482871125611744, "grad_norm": 0.0033384861890226603, "learning_rate": 9.129137570939632e-05, "loss": 0.144, "num_input_tokens_seen": 217946784, "step": 101040 }, { "epoch": 16.4836867862969, "grad_norm": 0.03504770249128342, "learning_rate": 9.125037699596039e-05, "loss": 0.0601, "num_input_tokens_seen": 217956768, "step": 101045 }, { "epoch": 16.484502446982056, "grad_norm": 0.052725981920957565, "learning_rate": 9.12093865663538e-05, "loss": 0.0048, "num_input_tokens_seen": 217967936, "step": 101050 }, { "epoch": 16.48531810766721, "grad_norm": 0.002041316358372569, "learning_rate": 9.11684044214079e-05, "loss": 0.0053, "num_input_tokens_seen": 217979360, "step": 101055 }, { "epoch": 16.486133768352367, "grad_norm": 0.004488829988986254, "learning_rate": 9.112743056195261e-05, "loss": 0.0426, "num_input_tokens_seen": 217989760, "step": 101060 }, { "epoch": 16.48694942903752, "grad_norm": 0.0008538112742826343, "learning_rate": 9.10864649888189e-05, "loss": 0.0006, "num_input_tokens_seen": 218000768, "step": 101065 }, { "epoch": 16.487765089722675, "grad_norm": 0.035585202276706696, "learning_rate": 9.104550770283648e-05, "loss": 0.1114, "num_input_tokens_seen": 218012576, "step": 101070 }, { "epoch": 16.48858075040783, "grad_norm": 0.013968314044177532, "learning_rate": 9.100455870483587e-05, "loss": 0.0028, "num_input_tokens_seen": 218021824, "step": 101075 }, { "epoch": 16.489396411092986, "grad_norm": 0.004410531837493181, "learning_rate": 9.096361799564651e-05, "loss": 0.0039, "num_input_tokens_seen": 218032128, "step": 101080 }, { "epoch": 16.49021207177814, "grad_norm": 0.006506810896098614, "learning_rate": 9.092268557609856e-05, "loss": 0.0525, "num_input_tokens_seen": 218040768, "step": 101085 }, { "epoch": 16.491027732463294, "grad_norm": 0.0070687332190573215, "learning_rate": 9.088176144702104e-05, "loss": 0.0055, "num_input_tokens_seen": 218051872, "step": 101090 }, { "epoch": 16.49184339314845, "grad_norm": 0.07511427253484726, "learning_rate": 9.084084560924394e-05, "loss": 0.0032, "num_input_tokens_seen": 218062688, "step": 101095 }, { "epoch": 16.492659053833606, "grad_norm": 0.0031643579714000225, "learning_rate": 9.079993806359587e-05, "loss": 0.0013, "num_input_tokens_seen": 218073216, "step": 101100 }, { "epoch": 16.49347471451876, "grad_norm": 0.03284528851509094, "learning_rate": 9.075903881090636e-05, "loss": 0.0043, "num_input_tokens_seen": 218084832, "step": 101105 }, { "epoch": 16.494290375203914, "grad_norm": 0.0099415248259902, "learning_rate": 9.071814785200399e-05, "loss": 0.0698, "num_input_tokens_seen": 218095872, "step": 101110 }, { "epoch": 16.49510603588907, "grad_norm": 0.0019557003397494555, "learning_rate": 9.067726518771762e-05, "loss": 0.002, "num_input_tokens_seen": 218106944, "step": 101115 }, { "epoch": 16.495921696574225, "grad_norm": 0.02333030290901661, "learning_rate": 9.063639081887576e-05, "loss": 0.0044, "num_input_tokens_seen": 218118880, "step": 101120 }, { "epoch": 16.49673735725938, "grad_norm": 0.022836850956082344, "learning_rate": 9.059552474630672e-05, "loss": 0.0033, "num_input_tokens_seen": 218129472, "step": 101125 }, { "epoch": 16.497553017944536, "grad_norm": 0.022092454135417938, "learning_rate": 9.055466697083875e-05, "loss": 0.0036, "num_input_tokens_seen": 218138880, "step": 101130 }, { "epoch": 16.49836867862969, "grad_norm": 0.005485893692821264, "learning_rate": 9.051381749329984e-05, "loss": 0.0052, "num_input_tokens_seen": 218148192, "step": 101135 }, { "epoch": 16.499184339314844, "grad_norm": 0.007085420656949282, "learning_rate": 9.04729763145179e-05, "loss": 0.0105, "num_input_tokens_seen": 218158048, "step": 101140 }, { "epoch": 16.5, "grad_norm": 0.002588339615613222, "learning_rate": 9.043214343532063e-05, "loss": 0.001, "num_input_tokens_seen": 218168352, "step": 101145 }, { "epoch": 16.500815660685156, "grad_norm": 0.07121369242668152, "learning_rate": 9.039131885653556e-05, "loss": 0.098, "num_input_tokens_seen": 218178240, "step": 101150 }, { "epoch": 16.50163132137031, "grad_norm": 0.6670370101928711, "learning_rate": 9.035050257898991e-05, "loss": 0.0528, "num_input_tokens_seen": 218189760, "step": 101155 }, { "epoch": 16.502446982055464, "grad_norm": 0.011328437365591526, "learning_rate": 9.030969460351124e-05, "loss": 0.0134, "num_input_tokens_seen": 218200352, "step": 101160 }, { "epoch": 16.50326264274062, "grad_norm": 0.00879648607224226, "learning_rate": 9.026889493092605e-05, "loss": 0.0035, "num_input_tokens_seen": 218210112, "step": 101165 }, { "epoch": 16.504078303425775, "grad_norm": 0.0035576142836362123, "learning_rate": 9.022810356206179e-05, "loss": 0.003, "num_input_tokens_seen": 218221120, "step": 101170 }, { "epoch": 16.50489396411093, "grad_norm": 0.0272084828466177, "learning_rate": 9.018732049774459e-05, "loss": 0.0064, "num_input_tokens_seen": 218231488, "step": 101175 }, { "epoch": 16.505709624796086, "grad_norm": 0.02173745632171631, "learning_rate": 9.014654573880143e-05, "loss": 0.0374, "num_input_tokens_seen": 218242784, "step": 101180 }, { "epoch": 16.50652528548124, "grad_norm": 0.0013352024834603071, "learning_rate": 9.010577928605823e-05, "loss": 0.1504, "num_input_tokens_seen": 218254272, "step": 101185 }, { "epoch": 16.507340946166394, "grad_norm": 0.018275253474712372, "learning_rate": 9.00650211403417e-05, "loss": 0.0135, "num_input_tokens_seen": 218264896, "step": 101190 }, { "epoch": 16.50815660685155, "grad_norm": 0.006663934327661991, "learning_rate": 9.002427130247726e-05, "loss": 0.0097, "num_input_tokens_seen": 218274912, "step": 101195 }, { "epoch": 16.508972267536706, "grad_norm": 0.8704914450645447, "learning_rate": 8.998352977329127e-05, "loss": 0.1257, "num_input_tokens_seen": 218286432, "step": 101200 }, { "epoch": 16.50978792822186, "grad_norm": 0.0068464456126093864, "learning_rate": 8.994279655360899e-05, "loss": 0.005, "num_input_tokens_seen": 218297184, "step": 101205 }, { "epoch": 16.510603588907014, "grad_norm": 0.014604619704186916, "learning_rate": 8.99020716442564e-05, "loss": 0.0014, "num_input_tokens_seen": 218307744, "step": 101210 }, { "epoch": 16.51141924959217, "grad_norm": 0.01995505951344967, "learning_rate": 8.986135504605831e-05, "loss": 0.0114, "num_input_tokens_seen": 218319136, "step": 101215 }, { "epoch": 16.512234910277325, "grad_norm": 0.004068124573677778, "learning_rate": 8.982064675984025e-05, "loss": 0.0152, "num_input_tokens_seen": 218330272, "step": 101220 }, { "epoch": 16.51305057096248, "grad_norm": 0.09120550006628036, "learning_rate": 8.977994678642714e-05, "loss": 0.0033, "num_input_tokens_seen": 218342528, "step": 101225 }, { "epoch": 16.513866231647633, "grad_norm": 0.01239969301968813, "learning_rate": 8.973925512664383e-05, "loss": 0.0094, "num_input_tokens_seen": 218353408, "step": 101230 }, { "epoch": 16.51468189233279, "grad_norm": 0.0026525133289396763, "learning_rate": 8.969857178131497e-05, "loss": 0.0209, "num_input_tokens_seen": 218365056, "step": 101235 }, { "epoch": 16.515497553017944, "grad_norm": 0.0774027407169342, "learning_rate": 8.965789675126501e-05, "loss": 0.0024, "num_input_tokens_seen": 218376992, "step": 101240 }, { "epoch": 16.5163132137031, "grad_norm": 0.48965615034103394, "learning_rate": 8.961723003731837e-05, "loss": 0.1666, "num_input_tokens_seen": 218387584, "step": 101245 }, { "epoch": 16.517128874388256, "grad_norm": 0.08386432379484177, "learning_rate": 8.95765716402992e-05, "loss": 0.0089, "num_input_tokens_seen": 218398400, "step": 101250 }, { "epoch": 16.517944535073408, "grad_norm": 0.21002088487148285, "learning_rate": 8.953592156103141e-05, "loss": 0.0058, "num_input_tokens_seen": 218408672, "step": 101255 }, { "epoch": 16.518760195758563, "grad_norm": 0.016797726973891258, "learning_rate": 8.949527980033889e-05, "loss": 0.0046, "num_input_tokens_seen": 218419616, "step": 101260 }, { "epoch": 16.51957585644372, "grad_norm": 0.009152274578809738, "learning_rate": 8.945464635904532e-05, "loss": 0.0025, "num_input_tokens_seen": 218430400, "step": 101265 }, { "epoch": 16.520391517128875, "grad_norm": 0.04372251778841019, "learning_rate": 8.94140212379741e-05, "loss": 0.0257, "num_input_tokens_seen": 218440608, "step": 101270 }, { "epoch": 16.52120717781403, "grad_norm": 0.002020826330408454, "learning_rate": 8.937340443794867e-05, "loss": 0.0017, "num_input_tokens_seen": 218452512, "step": 101275 }, { "epoch": 16.522022838499183, "grad_norm": 0.038087837398052216, "learning_rate": 8.933279595979205e-05, "loss": 0.0051, "num_input_tokens_seen": 218462880, "step": 101280 }, { "epoch": 16.52283849918434, "grad_norm": 0.0023950140457600355, "learning_rate": 8.929219580432735e-05, "loss": 0.0018, "num_input_tokens_seen": 218473856, "step": 101285 }, { "epoch": 16.523654159869494, "grad_norm": 0.04852251708507538, "learning_rate": 8.925160397237725e-05, "loss": 0.0033, "num_input_tokens_seen": 218485824, "step": 101290 }, { "epoch": 16.52446982055465, "grad_norm": 0.00610564136877656, "learning_rate": 8.921102046476454e-05, "loss": 0.0013, "num_input_tokens_seen": 218496288, "step": 101295 }, { "epoch": 16.525285481239806, "grad_norm": 0.010461220517754555, "learning_rate": 8.917044528231145e-05, "loss": 0.0044, "num_input_tokens_seen": 218507616, "step": 101300 }, { "epoch": 16.526101141924958, "grad_norm": 0.014666219241917133, "learning_rate": 8.912987842584075e-05, "loss": 0.0025, "num_input_tokens_seen": 218519968, "step": 101305 }, { "epoch": 16.526916802610113, "grad_norm": 0.060498714447021484, "learning_rate": 8.908931989617403e-05, "loss": 0.0045, "num_input_tokens_seen": 218532352, "step": 101310 }, { "epoch": 16.52773246329527, "grad_norm": 0.009490466676652431, "learning_rate": 8.904876969413372e-05, "loss": 0.0077, "num_input_tokens_seen": 218543616, "step": 101315 }, { "epoch": 16.528548123980425, "grad_norm": 0.32791393995285034, "learning_rate": 8.900822782054124e-05, "loss": 0.0133, "num_input_tokens_seen": 218554912, "step": 101320 }, { "epoch": 16.52936378466558, "grad_norm": 0.14628013968467712, "learning_rate": 8.896769427621848e-05, "loss": 0.0045, "num_input_tokens_seen": 218565984, "step": 101325 }, { "epoch": 16.530179445350733, "grad_norm": 0.003448865143582225, "learning_rate": 8.892716906198683e-05, "loss": 0.0013, "num_input_tokens_seen": 218576448, "step": 101330 }, { "epoch": 16.53099510603589, "grad_norm": 0.019616059958934784, "learning_rate": 8.88866521786676e-05, "loss": 0.002, "num_input_tokens_seen": 218585472, "step": 101335 }, { "epoch": 16.531810766721044, "grad_norm": 0.0266133900731802, "learning_rate": 8.884614362708188e-05, "loss": 0.0206, "num_input_tokens_seen": 218595456, "step": 101340 }, { "epoch": 16.5326264274062, "grad_norm": 0.0021647054236382246, "learning_rate": 8.88056434080507e-05, "loss": 0.0027, "num_input_tokens_seen": 218605856, "step": 101345 }, { "epoch": 16.533442088091356, "grad_norm": 0.007287389598786831, "learning_rate": 8.876515152239472e-05, "loss": 0.0111, "num_input_tokens_seen": 218617920, "step": 101350 }, { "epoch": 16.534257748776508, "grad_norm": 0.016180168837308884, "learning_rate": 8.872466797093464e-05, "loss": 0.0021, "num_input_tokens_seen": 218628992, "step": 101355 }, { "epoch": 16.535073409461663, "grad_norm": 0.07536523044109344, "learning_rate": 8.868419275449096e-05, "loss": 0.0036, "num_input_tokens_seen": 218640064, "step": 101360 }, { "epoch": 16.53588907014682, "grad_norm": 0.011844971217215061, "learning_rate": 8.864372587388387e-05, "loss": 0.0018, "num_input_tokens_seen": 218651008, "step": 101365 }, { "epoch": 16.536704730831975, "grad_norm": 0.05238876864314079, "learning_rate": 8.860326732993352e-05, "loss": 0.0019, "num_input_tokens_seen": 218660256, "step": 101370 }, { "epoch": 16.53752039151713, "grad_norm": 0.01479857787489891, "learning_rate": 8.856281712345988e-05, "loss": 0.121, "num_input_tokens_seen": 218670176, "step": 101375 }, { "epoch": 16.538336052202283, "grad_norm": 0.011312603950500488, "learning_rate": 8.852237525528262e-05, "loss": 0.0223, "num_input_tokens_seen": 218680672, "step": 101380 }, { "epoch": 16.53915171288744, "grad_norm": 0.2558872103691101, "learning_rate": 8.848194172622148e-05, "loss": 0.0171, "num_input_tokens_seen": 218691840, "step": 101385 }, { "epoch": 16.539967373572594, "grad_norm": 0.009450355544686317, "learning_rate": 8.844151653709581e-05, "loss": 0.0009, "num_input_tokens_seen": 218702304, "step": 101390 }, { "epoch": 16.54078303425775, "grad_norm": 2.262813091278076, "learning_rate": 8.840109968872495e-05, "loss": 0.0599, "num_input_tokens_seen": 218713024, "step": 101395 }, { "epoch": 16.541598694942905, "grad_norm": 0.023536687716841698, "learning_rate": 8.836069118192791e-05, "loss": 0.0083, "num_input_tokens_seen": 218723616, "step": 101400 }, { "epoch": 16.542414355628058, "grad_norm": 0.3981465697288513, "learning_rate": 8.83202910175237e-05, "loss": 0.0132, "num_input_tokens_seen": 218734048, "step": 101405 }, { "epoch": 16.543230016313213, "grad_norm": 0.48934903740882874, "learning_rate": 8.827989919633106e-05, "loss": 0.0089, "num_input_tokens_seen": 218743232, "step": 101410 }, { "epoch": 16.54404567699837, "grad_norm": 0.057192884385585785, "learning_rate": 8.82395157191685e-05, "loss": 0.0031, "num_input_tokens_seen": 218751968, "step": 101415 }, { "epoch": 16.544861337683525, "grad_norm": 0.004864667076617479, "learning_rate": 8.819914058685458e-05, "loss": 0.0168, "num_input_tokens_seen": 218762912, "step": 101420 }, { "epoch": 16.545676998368677, "grad_norm": 0.0012730599846690893, "learning_rate": 8.815877380020743e-05, "loss": 0.0028, "num_input_tokens_seen": 218772352, "step": 101425 }, { "epoch": 16.546492659053833, "grad_norm": 0.0053931367583572865, "learning_rate": 8.811841536004505e-05, "loss": 0.1, "num_input_tokens_seen": 218783264, "step": 101430 }, { "epoch": 16.54730831973899, "grad_norm": 0.023255428299307823, "learning_rate": 8.807806526718565e-05, "loss": 0.0118, "num_input_tokens_seen": 218794784, "step": 101435 }, { "epoch": 16.548123980424144, "grad_norm": 0.17642340064048767, "learning_rate": 8.803772352244683e-05, "loss": 0.0244, "num_input_tokens_seen": 218806752, "step": 101440 }, { "epoch": 16.5489396411093, "grad_norm": 0.013758941553533077, "learning_rate": 8.799739012664615e-05, "loss": 0.003, "num_input_tokens_seen": 218818016, "step": 101445 }, { "epoch": 16.549755301794452, "grad_norm": 0.06110873073339462, "learning_rate": 8.795706508060102e-05, "loss": 0.0095, "num_input_tokens_seen": 218828352, "step": 101450 }, { "epoch": 16.550570962479608, "grad_norm": 0.025419682264328003, "learning_rate": 8.791674838512864e-05, "loss": 0.0048, "num_input_tokens_seen": 218838656, "step": 101455 }, { "epoch": 16.551386623164763, "grad_norm": 0.001649940386414528, "learning_rate": 8.787644004104617e-05, "loss": 0.0022, "num_input_tokens_seen": 218848384, "step": 101460 }, { "epoch": 16.55220228384992, "grad_norm": 0.01972990855574608, "learning_rate": 8.78361400491704e-05, "loss": 0.0015, "num_input_tokens_seen": 218858624, "step": 101465 }, { "epoch": 16.553017944535075, "grad_norm": 0.0030495089013129473, "learning_rate": 8.779584841031818e-05, "loss": 0.0035, "num_input_tokens_seen": 218869280, "step": 101470 }, { "epoch": 16.553833605220227, "grad_norm": 0.026724744588136673, "learning_rate": 8.775556512530597e-05, "loss": 0.0607, "num_input_tokens_seen": 218880352, "step": 101475 }, { "epoch": 16.554649265905383, "grad_norm": 0.005022918339818716, "learning_rate": 8.771529019495022e-05, "loss": 0.0813, "num_input_tokens_seen": 218891552, "step": 101480 }, { "epoch": 16.55546492659054, "grad_norm": 0.0017528855241835117, "learning_rate": 8.767502362006713e-05, "loss": 0.0064, "num_input_tokens_seen": 218902912, "step": 101485 }, { "epoch": 16.556280587275694, "grad_norm": 0.005208577029407024, "learning_rate": 8.763476540147275e-05, "loss": 0.0239, "num_input_tokens_seen": 218914016, "step": 101490 }, { "epoch": 16.55709624796085, "grad_norm": 0.00815120991319418, "learning_rate": 8.759451553998299e-05, "loss": 0.0029, "num_input_tokens_seen": 218924960, "step": 101495 }, { "epoch": 16.557911908646002, "grad_norm": 0.007020113058388233, "learning_rate": 8.755427403641352e-05, "loss": 0.0215, "num_input_tokens_seen": 218935808, "step": 101500 }, { "epoch": 16.558727569331158, "grad_norm": 0.009867184795439243, "learning_rate": 8.751404089157993e-05, "loss": 0.002, "num_input_tokens_seen": 218945952, "step": 101505 }, { "epoch": 16.559543230016313, "grad_norm": 0.0018413146026432514, "learning_rate": 8.747381610629762e-05, "loss": 0.0015, "num_input_tokens_seen": 218957792, "step": 101510 }, { "epoch": 16.56035889070147, "grad_norm": 0.002308186376467347, "learning_rate": 8.74335996813817e-05, "loss": 0.0022, "num_input_tokens_seen": 218968576, "step": 101515 }, { "epoch": 16.561174551386625, "grad_norm": 0.0024469371419399977, "learning_rate": 8.739339161764725e-05, "loss": 0.0018, "num_input_tokens_seen": 218979104, "step": 101520 }, { "epoch": 16.561990212071777, "grad_norm": 0.0008473870693705976, "learning_rate": 8.735319191590918e-05, "loss": 0.0172, "num_input_tokens_seen": 218989376, "step": 101525 }, { "epoch": 16.562805872756933, "grad_norm": 0.1348130851984024, "learning_rate": 8.731300057698216e-05, "loss": 0.0088, "num_input_tokens_seen": 219001440, "step": 101530 }, { "epoch": 16.563621533442088, "grad_norm": 1.1705695390701294, "learning_rate": 8.727281760168055e-05, "loss": 0.0986, "num_input_tokens_seen": 219012480, "step": 101535 }, { "epoch": 16.564437194127244, "grad_norm": 0.03120153397321701, "learning_rate": 8.723264299081912e-05, "loss": 0.0034, "num_input_tokens_seen": 219022528, "step": 101540 }, { "epoch": 16.5652528548124, "grad_norm": 0.016143882647156715, "learning_rate": 8.719247674521157e-05, "loss": 0.0086, "num_input_tokens_seen": 219033440, "step": 101545 }, { "epoch": 16.56606851549755, "grad_norm": 0.0021475458052009344, "learning_rate": 8.715231886567248e-05, "loss": 0.003, "num_input_tokens_seen": 219043776, "step": 101550 }, { "epoch": 16.566884176182707, "grad_norm": 0.013650192879140377, "learning_rate": 8.711216935301508e-05, "loss": 0.0016, "num_input_tokens_seen": 219054880, "step": 101555 }, { "epoch": 16.567699836867863, "grad_norm": 0.0033268542028963566, "learning_rate": 8.70720282080536e-05, "loss": 0.0098, "num_input_tokens_seen": 219065440, "step": 101560 }, { "epoch": 16.56851549755302, "grad_norm": 0.0033438701648265123, "learning_rate": 8.703189543160106e-05, "loss": 0.0111, "num_input_tokens_seen": 219076576, "step": 101565 }, { "epoch": 16.569331158238175, "grad_norm": 0.027511749416589737, "learning_rate": 8.699177102447126e-05, "loss": 0.0124, "num_input_tokens_seen": 219087168, "step": 101570 }, { "epoch": 16.570146818923327, "grad_norm": 0.0014527264283969998, "learning_rate": 8.695165498747698e-05, "loss": 0.0014, "num_input_tokens_seen": 219096992, "step": 101575 }, { "epoch": 16.570962479608482, "grad_norm": 0.2636241614818573, "learning_rate": 8.691154732143147e-05, "loss": 0.0255, "num_input_tokens_seen": 219108224, "step": 101580 }, { "epoch": 16.571778140293638, "grad_norm": 0.045492466539144516, "learning_rate": 8.687144802714753e-05, "loss": 0.0053, "num_input_tokens_seen": 219119168, "step": 101585 }, { "epoch": 16.572593800978794, "grad_norm": 0.010311417281627655, "learning_rate": 8.683135710543777e-05, "loss": 0.0031, "num_input_tokens_seen": 219130624, "step": 101590 }, { "epoch": 16.57340946166395, "grad_norm": 0.006684463005512953, "learning_rate": 8.679127455711466e-05, "loss": 0.0302, "num_input_tokens_seen": 219140416, "step": 101595 }, { "epoch": 16.5742251223491, "grad_norm": 0.5502263307571411, "learning_rate": 8.675120038299062e-05, "loss": 0.0246, "num_input_tokens_seen": 219151904, "step": 101600 }, { "epoch": 16.575040783034257, "grad_norm": 0.03278733789920807, "learning_rate": 8.671113458387775e-05, "loss": 0.0024, "num_input_tokens_seen": 219163968, "step": 101605 }, { "epoch": 16.575856443719413, "grad_norm": 0.007764813490211964, "learning_rate": 8.667107716058798e-05, "loss": 0.0027, "num_input_tokens_seen": 219176128, "step": 101610 }, { "epoch": 16.57667210440457, "grad_norm": 0.0464925579726696, "learning_rate": 8.66310281139332e-05, "loss": 0.0015, "num_input_tokens_seen": 219187552, "step": 101615 }, { "epoch": 16.57748776508972, "grad_norm": 0.022954393178224564, "learning_rate": 8.659098744472505e-05, "loss": 0.0061, "num_input_tokens_seen": 219197312, "step": 101620 }, { "epoch": 16.578303425774877, "grad_norm": 0.00563532579690218, "learning_rate": 8.655095515377498e-05, "loss": 0.0129, "num_input_tokens_seen": 219208704, "step": 101625 }, { "epoch": 16.579119086460032, "grad_norm": 0.0018282659584656358, "learning_rate": 8.65109312418943e-05, "loss": 0.2099, "num_input_tokens_seen": 219219264, "step": 101630 }, { "epoch": 16.579934747145188, "grad_norm": 0.030916016548871994, "learning_rate": 8.647091570989413e-05, "loss": 0.021, "num_input_tokens_seen": 219229472, "step": 101635 }, { "epoch": 16.580750407830344, "grad_norm": 0.3202219605445862, "learning_rate": 8.643090855858549e-05, "loss": 0.0107, "num_input_tokens_seen": 219239168, "step": 101640 }, { "epoch": 16.581566068515496, "grad_norm": 0.054054148495197296, "learning_rate": 8.639090978877912e-05, "loss": 0.0159, "num_input_tokens_seen": 219248832, "step": 101645 }, { "epoch": 16.58238172920065, "grad_norm": 0.007812487427145243, "learning_rate": 8.635091940128548e-05, "loss": 0.0387, "num_input_tokens_seen": 219260352, "step": 101650 }, { "epoch": 16.583197389885807, "grad_norm": 0.2291809618473053, "learning_rate": 8.631093739691553e-05, "loss": 0.009, "num_input_tokens_seen": 219271296, "step": 101655 }, { "epoch": 16.584013050570963, "grad_norm": 0.5422284007072449, "learning_rate": 8.627096377647898e-05, "loss": 0.0182, "num_input_tokens_seen": 219282560, "step": 101660 }, { "epoch": 16.58482871125612, "grad_norm": 0.0009843306615948677, "learning_rate": 8.623099854078643e-05, "loss": 0.0007, "num_input_tokens_seen": 219293376, "step": 101665 }, { "epoch": 16.58564437194127, "grad_norm": 0.38116028904914856, "learning_rate": 8.619104169064734e-05, "loss": 0.0151, "num_input_tokens_seen": 219304320, "step": 101670 }, { "epoch": 16.586460032626427, "grad_norm": 0.004795018583536148, "learning_rate": 8.615109322687203e-05, "loss": 0.058, "num_input_tokens_seen": 219315872, "step": 101675 }, { "epoch": 16.587275693311582, "grad_norm": 0.019142167642712593, "learning_rate": 8.611115315026951e-05, "loss": 0.0042, "num_input_tokens_seen": 219327424, "step": 101680 }, { "epoch": 16.588091353996738, "grad_norm": 0.5726431012153625, "learning_rate": 8.607122146164986e-05, "loss": 0.0125, "num_input_tokens_seen": 219337632, "step": 101685 }, { "epoch": 16.588907014681894, "grad_norm": 0.011856688186526299, "learning_rate": 8.60312981618217e-05, "loss": 0.0027, "num_input_tokens_seen": 219348960, "step": 101690 }, { "epoch": 16.589722675367046, "grad_norm": 0.0019181864336133003, "learning_rate": 8.599138325159472e-05, "loss": 0.0047, "num_input_tokens_seen": 219359680, "step": 101695 }, { "epoch": 16.5905383360522, "grad_norm": 0.004419298842549324, "learning_rate": 8.595147673177728e-05, "loss": 0.0018, "num_input_tokens_seen": 219368768, "step": 101700 }, { "epoch": 16.591353996737357, "grad_norm": 0.00823761336505413, "learning_rate": 8.591157860317871e-05, "loss": 0.0039, "num_input_tokens_seen": 219378816, "step": 101705 }, { "epoch": 16.592169657422513, "grad_norm": 0.001990312710404396, "learning_rate": 8.587168886660707e-05, "loss": 0.0013, "num_input_tokens_seen": 219389280, "step": 101710 }, { "epoch": 16.59298531810767, "grad_norm": 0.0016580966766923666, "learning_rate": 8.583180752287123e-05, "loss": 0.0136, "num_input_tokens_seen": 219399744, "step": 101715 }, { "epoch": 16.59380097879282, "grad_norm": 1.9643034934997559, "learning_rate": 8.579193457277895e-05, "loss": 0.097, "num_input_tokens_seen": 219411168, "step": 101720 }, { "epoch": 16.594616639477977, "grad_norm": 0.028503771871328354, "learning_rate": 8.575207001713875e-05, "loss": 0.0289, "num_input_tokens_seen": 219421888, "step": 101725 }, { "epoch": 16.595432300163132, "grad_norm": 0.0008037626394070685, "learning_rate": 8.571221385675832e-05, "loss": 0.0023, "num_input_tokens_seen": 219432320, "step": 101730 }, { "epoch": 16.596247960848288, "grad_norm": 0.019681762903928757, "learning_rate": 8.567236609244544e-05, "loss": 0.0012, "num_input_tokens_seen": 219442720, "step": 101735 }, { "epoch": 16.597063621533444, "grad_norm": 0.048574842512607574, "learning_rate": 8.563252672500771e-05, "loss": 0.0036, "num_input_tokens_seen": 219452768, "step": 101740 }, { "epoch": 16.597879282218596, "grad_norm": 0.001870894804596901, "learning_rate": 8.559269575525247e-05, "loss": 0.0143, "num_input_tokens_seen": 219462752, "step": 101745 }, { "epoch": 16.59869494290375, "grad_norm": 0.9398384690284729, "learning_rate": 8.555287318398697e-05, "loss": 0.1235, "num_input_tokens_seen": 219471488, "step": 101750 }, { "epoch": 16.599510603588907, "grad_norm": 0.04680575802922249, "learning_rate": 8.551305901201822e-05, "loss": 0.0151, "num_input_tokens_seen": 219481088, "step": 101755 }, { "epoch": 16.600326264274063, "grad_norm": 0.03274672478437424, "learning_rate": 8.54732532401532e-05, "loss": 0.0039, "num_input_tokens_seen": 219491648, "step": 101760 }, { "epoch": 16.601141924959215, "grad_norm": 0.9022951126098633, "learning_rate": 8.543345586919854e-05, "loss": 0.0484, "num_input_tokens_seen": 219501408, "step": 101765 }, { "epoch": 16.60195758564437, "grad_norm": 0.008404278196394444, "learning_rate": 8.53936668999608e-05, "loss": 0.1152, "num_input_tokens_seen": 219512672, "step": 101770 }, { "epoch": 16.602773246329527, "grad_norm": 0.3932918906211853, "learning_rate": 8.535388633324625e-05, "loss": 0.0063, "num_input_tokens_seen": 219524768, "step": 101775 }, { "epoch": 16.603588907014682, "grad_norm": 0.011017882265150547, "learning_rate": 8.531411416986152e-05, "loss": 0.0017, "num_input_tokens_seen": 219536544, "step": 101780 }, { "epoch": 16.604404567699838, "grad_norm": 0.09159163385629654, "learning_rate": 8.5274350410612e-05, "loss": 0.1218, "num_input_tokens_seen": 219547968, "step": 101785 }, { "epoch": 16.605220228384994, "grad_norm": 0.0036345161497592926, "learning_rate": 8.523459505630415e-05, "loss": 0.0026, "num_input_tokens_seen": 219560032, "step": 101790 }, { "epoch": 16.606035889070146, "grad_norm": 0.11200443655252457, "learning_rate": 8.51948481077432e-05, "loss": 0.0067, "num_input_tokens_seen": 219571296, "step": 101795 }, { "epoch": 16.6068515497553, "grad_norm": 0.01962619461119175, "learning_rate": 8.515510956573507e-05, "loss": 0.0114, "num_input_tokens_seen": 219582976, "step": 101800 }, { "epoch": 16.607667210440457, "grad_norm": 0.04852467402815819, "learning_rate": 8.511537943108466e-05, "loss": 0.0049, "num_input_tokens_seen": 219593184, "step": 101805 }, { "epoch": 16.608482871125613, "grad_norm": 0.018769459798932076, "learning_rate": 8.507565770459769e-05, "loss": 0.0029, "num_input_tokens_seen": 219603424, "step": 101810 }, { "epoch": 16.609298531810765, "grad_norm": 0.0029953313060104847, "learning_rate": 8.503594438707856e-05, "loss": 0.0035, "num_input_tokens_seen": 219614976, "step": 101815 }, { "epoch": 16.61011419249592, "grad_norm": 0.04238995537161827, "learning_rate": 8.499623947933276e-05, "loss": 0.009, "num_input_tokens_seen": 219626784, "step": 101820 }, { "epoch": 16.610929853181077, "grad_norm": 0.009645356796681881, "learning_rate": 8.495654298216438e-05, "loss": 0.003, "num_input_tokens_seen": 219638752, "step": 101825 }, { "epoch": 16.611745513866232, "grad_norm": 0.013180495239794254, "learning_rate": 8.49168548963784e-05, "loss": 0.0029, "num_input_tokens_seen": 219648960, "step": 101830 }, { "epoch": 16.612561174551388, "grad_norm": 0.0010466892272233963, "learning_rate": 8.487717522277872e-05, "loss": 0.0021, "num_input_tokens_seen": 219659488, "step": 101835 }, { "epoch": 16.61337683523654, "grad_norm": 0.013838013634085655, "learning_rate": 8.483750396216988e-05, "loss": 0.0933, "num_input_tokens_seen": 219671392, "step": 101840 }, { "epoch": 16.614192495921696, "grad_norm": 0.7977277636528015, "learning_rate": 8.479784111535549e-05, "loss": 0.0786, "num_input_tokens_seen": 219681824, "step": 101845 }, { "epoch": 16.61500815660685, "grad_norm": 0.0092974454164505, "learning_rate": 8.475818668313984e-05, "loss": 0.0014, "num_input_tokens_seen": 219690400, "step": 101850 }, { "epoch": 16.615823817292007, "grad_norm": 0.003395598381757736, "learning_rate": 8.471854066632607e-05, "loss": 0.096, "num_input_tokens_seen": 219700736, "step": 101855 }, { "epoch": 16.616639477977163, "grad_norm": 0.052133284509181976, "learning_rate": 8.467890306571795e-05, "loss": 0.0071, "num_input_tokens_seen": 219711488, "step": 101860 }, { "epoch": 16.617455138662315, "grad_norm": 0.6237679719924927, "learning_rate": 8.463927388211878e-05, "loss": 0.0326, "num_input_tokens_seen": 219722336, "step": 101865 }, { "epoch": 16.61827079934747, "grad_norm": 0.001961885951459408, "learning_rate": 8.459965311633161e-05, "loss": 0.0377, "num_input_tokens_seen": 219733728, "step": 101870 }, { "epoch": 16.619086460032626, "grad_norm": 0.003230506554245949, "learning_rate": 8.456004076915952e-05, "loss": 0.0134, "num_input_tokens_seen": 219744704, "step": 101875 }, { "epoch": 16.619902120717782, "grad_norm": 0.12375766783952713, "learning_rate": 8.452043684140514e-05, "loss": 0.0103, "num_input_tokens_seen": 219755712, "step": 101880 }, { "epoch": 16.620717781402938, "grad_norm": 0.23010921478271484, "learning_rate": 8.448084133387124e-05, "loss": 0.0126, "num_input_tokens_seen": 219766752, "step": 101885 }, { "epoch": 16.62153344208809, "grad_norm": 0.007138585671782494, "learning_rate": 8.444125424736016e-05, "loss": 0.0117, "num_input_tokens_seen": 219778048, "step": 101890 }, { "epoch": 16.622349102773246, "grad_norm": 0.0013294769451022148, "learning_rate": 8.440167558267431e-05, "loss": 0.0148, "num_input_tokens_seen": 219789600, "step": 101895 }, { "epoch": 16.6231647634584, "grad_norm": 0.004931210074573755, "learning_rate": 8.436210534061567e-05, "loss": 0.0125, "num_input_tokens_seen": 219799616, "step": 101900 }, { "epoch": 16.623980424143557, "grad_norm": 0.11002521961927414, "learning_rate": 8.432254352198626e-05, "loss": 0.0054, "num_input_tokens_seen": 219812064, "step": 101905 }, { "epoch": 16.624796084828713, "grad_norm": 0.2072538286447525, "learning_rate": 8.428299012758778e-05, "loss": 0.102, "num_input_tokens_seen": 219822464, "step": 101910 }, { "epoch": 16.625611745513865, "grad_norm": 0.005426901392638683, "learning_rate": 8.424344515822197e-05, "loss": 0.004, "num_input_tokens_seen": 219833600, "step": 101915 }, { "epoch": 16.62642740619902, "grad_norm": 0.048319607973098755, "learning_rate": 8.420390861468996e-05, "loss": 0.0087, "num_input_tokens_seen": 219844416, "step": 101920 }, { "epoch": 16.627243066884176, "grad_norm": 0.007641777396202087, "learning_rate": 8.416438049779351e-05, "loss": 0.0886, "num_input_tokens_seen": 219854624, "step": 101925 }, { "epoch": 16.628058727569332, "grad_norm": 0.016032839193940163, "learning_rate": 8.412486080833315e-05, "loss": 0.0033, "num_input_tokens_seen": 219866080, "step": 101930 }, { "epoch": 16.628874388254488, "grad_norm": 0.3541107773780823, "learning_rate": 8.408534954711034e-05, "loss": 0.0206, "num_input_tokens_seen": 219876448, "step": 101935 }, { "epoch": 16.62969004893964, "grad_norm": 0.02240462228655815, "learning_rate": 8.404584671492526e-05, "loss": 0.1063, "num_input_tokens_seen": 219886368, "step": 101940 }, { "epoch": 16.630505709624796, "grad_norm": 0.0026257894933223724, "learning_rate": 8.400635231257902e-05, "loss": 0.0893, "num_input_tokens_seen": 219896064, "step": 101945 }, { "epoch": 16.63132137030995, "grad_norm": 0.020161097869277, "learning_rate": 8.396686634087159e-05, "loss": 0.0021, "num_input_tokens_seen": 219906240, "step": 101950 }, { "epoch": 16.632137030995107, "grad_norm": 0.0849241316318512, "learning_rate": 8.392738880060358e-05, "loss": 0.0024, "num_input_tokens_seen": 219918048, "step": 101955 }, { "epoch": 16.63295269168026, "grad_norm": 0.0077824220061302185, "learning_rate": 8.388791969257458e-05, "loss": 0.0171, "num_input_tokens_seen": 219928928, "step": 101960 }, { "epoch": 16.633768352365415, "grad_norm": 0.003548326203599572, "learning_rate": 8.384845901758498e-05, "loss": 0.0154, "num_input_tokens_seen": 219939552, "step": 101965 }, { "epoch": 16.63458401305057, "grad_norm": 0.00210005440749228, "learning_rate": 8.380900677643421e-05, "loss": 0.0565, "num_input_tokens_seen": 219950688, "step": 101970 }, { "epoch": 16.635399673735726, "grad_norm": 0.042507290840148926, "learning_rate": 8.376956296992195e-05, "loss": 0.0088, "num_input_tokens_seen": 219962720, "step": 101975 }, { "epoch": 16.636215334420882, "grad_norm": 0.0005390935111790895, "learning_rate": 8.373012759884746e-05, "loss": 0.0242, "num_input_tokens_seen": 219972928, "step": 101980 }, { "epoch": 16.637030995106034, "grad_norm": 0.002074267715215683, "learning_rate": 8.369070066401003e-05, "loss": 0.0027, "num_input_tokens_seen": 219983328, "step": 101985 }, { "epoch": 16.63784665579119, "grad_norm": 0.05994388088583946, "learning_rate": 8.365128216620871e-05, "loss": 0.0036, "num_input_tokens_seen": 219994048, "step": 101990 }, { "epoch": 16.638662316476346, "grad_norm": 0.007155990228056908, "learning_rate": 8.361187210624232e-05, "loss": 0.0743, "num_input_tokens_seen": 220005696, "step": 101995 }, { "epoch": 16.6394779771615, "grad_norm": 0.010875949636101723, "learning_rate": 8.357247048490957e-05, "loss": 0.0076, "num_input_tokens_seen": 220016288, "step": 102000 }, { "epoch": 16.640293637846657, "grad_norm": 0.18072858452796936, "learning_rate": 8.353307730300897e-05, "loss": 0.0166, "num_input_tokens_seen": 220027520, "step": 102005 }, { "epoch": 16.64110929853181, "grad_norm": 0.0029338616877794266, "learning_rate": 8.349369256133888e-05, "loss": 0.0029, "num_input_tokens_seen": 220039200, "step": 102010 }, { "epoch": 16.641924959216965, "grad_norm": 0.023301510140299797, "learning_rate": 8.345431626069744e-05, "loss": 0.026, "num_input_tokens_seen": 220049408, "step": 102015 }, { "epoch": 16.64274061990212, "grad_norm": 0.06360410153865814, "learning_rate": 8.34149484018828e-05, "loss": 0.0043, "num_input_tokens_seen": 220060608, "step": 102020 }, { "epoch": 16.643556280587276, "grad_norm": 0.1415073424577713, "learning_rate": 8.337558898569264e-05, "loss": 0.0062, "num_input_tokens_seen": 220070368, "step": 102025 }, { "epoch": 16.644371941272432, "grad_norm": 0.006179925054311752, "learning_rate": 8.333623801292472e-05, "loss": 0.0056, "num_input_tokens_seen": 220081856, "step": 102030 }, { "epoch": 16.645187601957584, "grad_norm": 0.04386072978377342, "learning_rate": 8.329689548437652e-05, "loss": 0.0528, "num_input_tokens_seen": 220091840, "step": 102035 }, { "epoch": 16.64600326264274, "grad_norm": 0.03572265803813934, "learning_rate": 8.325756140084533e-05, "loss": 0.0262, "num_input_tokens_seen": 220104192, "step": 102040 }, { "epoch": 16.646818923327896, "grad_norm": 0.012608006596565247, "learning_rate": 8.321823576312837e-05, "loss": 0.0303, "num_input_tokens_seen": 220113952, "step": 102045 }, { "epoch": 16.64763458401305, "grad_norm": 0.041379671543836594, "learning_rate": 8.317891857202253e-05, "loss": 0.0373, "num_input_tokens_seen": 220124512, "step": 102050 }, { "epoch": 16.648450244698207, "grad_norm": 0.19257573783397675, "learning_rate": 8.313960982832475e-05, "loss": 0.0066, "num_input_tokens_seen": 220135808, "step": 102055 }, { "epoch": 16.64926590538336, "grad_norm": 0.012700493447482586, "learning_rate": 8.310030953283154e-05, "loss": 0.0076, "num_input_tokens_seen": 220145216, "step": 102060 }, { "epoch": 16.650081566068515, "grad_norm": 0.0045757414773106575, "learning_rate": 8.30610176863394e-05, "loss": 0.0036, "num_input_tokens_seen": 220155776, "step": 102065 }, { "epoch": 16.65089722675367, "grad_norm": 0.004711855202913284, "learning_rate": 8.302173428964472e-05, "loss": 0.0011, "num_input_tokens_seen": 220166592, "step": 102070 }, { "epoch": 16.651712887438826, "grad_norm": 0.01937665417790413, "learning_rate": 8.298245934354353e-05, "loss": 0.0133, "num_input_tokens_seen": 220177696, "step": 102075 }, { "epoch": 16.652528548123982, "grad_norm": 0.020392822101712227, "learning_rate": 8.29431928488319e-05, "loss": 0.007, "num_input_tokens_seen": 220188768, "step": 102080 }, { "epoch": 16.653344208809134, "grad_norm": 0.010108470916748047, "learning_rate": 8.290393480630549e-05, "loss": 0.0016, "num_input_tokens_seen": 220199456, "step": 102085 }, { "epoch": 16.65415986949429, "grad_norm": 0.056723468005657196, "learning_rate": 8.286468521676e-05, "loss": 0.0058, "num_input_tokens_seen": 220209728, "step": 102090 }, { "epoch": 16.654975530179446, "grad_norm": 0.0012470949441194534, "learning_rate": 8.282544408099079e-05, "loss": 0.0083, "num_input_tokens_seen": 220221216, "step": 102095 }, { "epoch": 16.6557911908646, "grad_norm": 0.0020048327278345823, "learning_rate": 8.278621139979325e-05, "loss": 0.0065, "num_input_tokens_seen": 220231712, "step": 102100 }, { "epoch": 16.656606851549757, "grad_norm": 0.10065899044275284, "learning_rate": 8.274698717396234e-05, "loss": 0.1058, "num_input_tokens_seen": 220242720, "step": 102105 }, { "epoch": 16.65742251223491, "grad_norm": 0.001540284720249474, "learning_rate": 8.270777140429308e-05, "loss": 0.005, "num_input_tokens_seen": 220253920, "step": 102110 }, { "epoch": 16.658238172920065, "grad_norm": 0.009685736149549484, "learning_rate": 8.266856409158025e-05, "loss": 0.0021, "num_input_tokens_seen": 220265504, "step": 102115 }, { "epoch": 16.65905383360522, "grad_norm": 0.10091249644756317, "learning_rate": 8.262936523661835e-05, "loss": 0.0107, "num_input_tokens_seen": 220277504, "step": 102120 }, { "epoch": 16.659869494290376, "grad_norm": 0.07948818057775497, "learning_rate": 8.259017484020181e-05, "loss": 0.0024, "num_input_tokens_seen": 220288704, "step": 102125 }, { "epoch": 16.660685154975532, "grad_norm": 0.009430035017430782, "learning_rate": 8.255099290312495e-05, "loss": 0.0044, "num_input_tokens_seen": 220300224, "step": 102130 }, { "epoch": 16.661500815660684, "grad_norm": 0.37671995162963867, "learning_rate": 8.251181942618174e-05, "loss": 0.0058, "num_input_tokens_seen": 220310720, "step": 102135 }, { "epoch": 16.66231647634584, "grad_norm": 0.288059264421463, "learning_rate": 8.247265441016621e-05, "loss": 0.0132, "num_input_tokens_seen": 220321280, "step": 102140 }, { "epoch": 16.663132137030995, "grad_norm": 0.04418053850531578, "learning_rate": 8.243349785587195e-05, "loss": 0.0047, "num_input_tokens_seen": 220332096, "step": 102145 }, { "epoch": 16.66394779771615, "grad_norm": 0.0022062654606997967, "learning_rate": 8.23943497640926e-05, "loss": 0.0124, "num_input_tokens_seen": 220342816, "step": 102150 }, { "epoch": 16.664763458401303, "grad_norm": 0.00585405807942152, "learning_rate": 8.235521013562148e-05, "loss": 0.0017, "num_input_tokens_seen": 220354208, "step": 102155 }, { "epoch": 16.66557911908646, "grad_norm": 0.03241811692714691, "learning_rate": 8.231607897125188e-05, "loss": 0.0052, "num_input_tokens_seen": 220364032, "step": 102160 }, { "epoch": 16.666394779771615, "grad_norm": 1.0384999513626099, "learning_rate": 8.227695627177678e-05, "loss": 0.0408, "num_input_tokens_seen": 220375264, "step": 102165 }, { "epoch": 16.66721044045677, "grad_norm": 0.015278251841664314, "learning_rate": 8.223784203798912e-05, "loss": 0.0721, "num_input_tokens_seen": 220386400, "step": 102170 }, { "epoch": 16.668026101141926, "grad_norm": 0.0033618370071053505, "learning_rate": 8.219873627068141e-05, "loss": 0.0018, "num_input_tokens_seen": 220396704, "step": 102175 }, { "epoch": 16.66884176182708, "grad_norm": 0.02199321612715721, "learning_rate": 8.21596389706466e-05, "loss": 0.0057, "num_input_tokens_seen": 220407840, "step": 102180 }, { "epoch": 16.669657422512234, "grad_norm": 0.12732742726802826, "learning_rate": 8.212055013867654e-05, "loss": 0.0028, "num_input_tokens_seen": 220418976, "step": 102185 }, { "epoch": 16.67047308319739, "grad_norm": 0.030749700963497162, "learning_rate": 8.208146977556386e-05, "loss": 0.021, "num_input_tokens_seen": 220430144, "step": 102190 }, { "epoch": 16.671288743882545, "grad_norm": 0.08659858256578445, "learning_rate": 8.204239788210011e-05, "loss": 0.0047, "num_input_tokens_seen": 220441056, "step": 102195 }, { "epoch": 16.6721044045677, "grad_norm": 0.011828580871224403, "learning_rate": 8.200333445907766e-05, "loss": 0.0034, "num_input_tokens_seen": 220451744, "step": 102200 }, { "epoch": 16.672920065252853, "grad_norm": 0.015247615054249763, "learning_rate": 8.196427950728763e-05, "loss": 0.145, "num_input_tokens_seen": 220462624, "step": 102205 }, { "epoch": 16.67373572593801, "grad_norm": 0.0023519936949014664, "learning_rate": 8.192523302752192e-05, "loss": 0.002, "num_input_tokens_seen": 220474272, "step": 102210 }, { "epoch": 16.674551386623165, "grad_norm": 0.03897944092750549, "learning_rate": 8.188619502057176e-05, "loss": 0.0023, "num_input_tokens_seen": 220485216, "step": 102215 }, { "epoch": 16.67536704730832, "grad_norm": 0.003212972776964307, "learning_rate": 8.184716548722825e-05, "loss": 0.0013, "num_input_tokens_seen": 220497248, "step": 102220 }, { "epoch": 16.676182707993476, "grad_norm": 0.010272002778947353, "learning_rate": 8.180814442828238e-05, "loss": 0.0023, "num_input_tokens_seen": 220507840, "step": 102225 }, { "epoch": 16.67699836867863, "grad_norm": 0.03500838950276375, "learning_rate": 8.1769131844525e-05, "loss": 0.0734, "num_input_tokens_seen": 220518752, "step": 102230 }, { "epoch": 16.677814029363784, "grad_norm": 0.9756990075111389, "learning_rate": 8.173012773674671e-05, "loss": 0.0421, "num_input_tokens_seen": 220529248, "step": 102235 }, { "epoch": 16.67862969004894, "grad_norm": 0.0057732053101062775, "learning_rate": 8.169113210573803e-05, "loss": 0.0031, "num_input_tokens_seen": 220541056, "step": 102240 }, { "epoch": 16.679445350734095, "grad_norm": 0.03203001618385315, "learning_rate": 8.165214495228918e-05, "loss": 0.0036, "num_input_tokens_seen": 220551168, "step": 102245 }, { "epoch": 16.68026101141925, "grad_norm": 0.013308649882674217, "learning_rate": 8.161316627719035e-05, "loss": 0.0031, "num_input_tokens_seen": 220562272, "step": 102250 }, { "epoch": 16.681076672104403, "grad_norm": 0.581266462802887, "learning_rate": 8.157419608123145e-05, "loss": 0.0174, "num_input_tokens_seen": 220573312, "step": 102255 }, { "epoch": 16.68189233278956, "grad_norm": 0.7099611759185791, "learning_rate": 8.153523436520226e-05, "loss": 0.0144, "num_input_tokens_seen": 220584576, "step": 102260 }, { "epoch": 16.682707993474715, "grad_norm": 0.004080967511981726, "learning_rate": 8.149628112989243e-05, "loss": 0.0033, "num_input_tokens_seen": 220594336, "step": 102265 }, { "epoch": 16.68352365415987, "grad_norm": 0.022572338581085205, "learning_rate": 8.145733637609137e-05, "loss": 0.0048, "num_input_tokens_seen": 220604800, "step": 102270 }, { "epoch": 16.684339314845026, "grad_norm": 0.10570850968360901, "learning_rate": 8.141840010458835e-05, "loss": 0.058, "num_input_tokens_seen": 220615936, "step": 102275 }, { "epoch": 16.68515497553018, "grad_norm": 0.017804522067308426, "learning_rate": 8.137947231617237e-05, "loss": 0.0051, "num_input_tokens_seen": 220627072, "step": 102280 }, { "epoch": 16.685970636215334, "grad_norm": 0.9061238169670105, "learning_rate": 8.134055301163263e-05, "loss": 0.0525, "num_input_tokens_seen": 220637536, "step": 102285 }, { "epoch": 16.68678629690049, "grad_norm": 0.006700098980218172, "learning_rate": 8.130164219175745e-05, "loss": 0.0416, "num_input_tokens_seen": 220647424, "step": 102290 }, { "epoch": 16.687601957585645, "grad_norm": 1.152877688407898, "learning_rate": 8.126273985733595e-05, "loss": 0.083, "num_input_tokens_seen": 220658144, "step": 102295 }, { "epoch": 16.6884176182708, "grad_norm": 0.004121533129364252, "learning_rate": 8.122384600915594e-05, "loss": 0.0051, "num_input_tokens_seen": 220668768, "step": 102300 }, { "epoch": 16.689233278955953, "grad_norm": 0.010655397549271584, "learning_rate": 8.118496064800618e-05, "loss": 0.0294, "num_input_tokens_seen": 220679424, "step": 102305 }, { "epoch": 16.69004893964111, "grad_norm": 0.004067014437168837, "learning_rate": 8.11460837746743e-05, "loss": 0.0085, "num_input_tokens_seen": 220690112, "step": 102310 }, { "epoch": 16.690864600326265, "grad_norm": 0.06763318181037903, "learning_rate": 8.110721538994859e-05, "loss": 0.0047, "num_input_tokens_seen": 220700768, "step": 102315 }, { "epoch": 16.69168026101142, "grad_norm": 0.24559415876865387, "learning_rate": 8.106835549461633e-05, "loss": 0.0083, "num_input_tokens_seen": 220712192, "step": 102320 }, { "epoch": 16.692495921696576, "grad_norm": 0.016000831499695778, "learning_rate": 8.102950408946552e-05, "loss": 0.0158, "num_input_tokens_seen": 220723392, "step": 102325 }, { "epoch": 16.693311582381728, "grad_norm": 0.021397775039076805, "learning_rate": 8.099066117528308e-05, "loss": 0.0053, "num_input_tokens_seen": 220734304, "step": 102330 }, { "epoch": 16.694127243066884, "grad_norm": 0.047292545437812805, "learning_rate": 8.095182675285673e-05, "loss": 0.0024, "num_input_tokens_seen": 220743840, "step": 102335 }, { "epoch": 16.69494290375204, "grad_norm": 0.00583425909280777, "learning_rate": 8.091300082297293e-05, "loss": 0.1697, "num_input_tokens_seen": 220753440, "step": 102340 }, { "epoch": 16.695758564437195, "grad_norm": 0.004230791237205267, "learning_rate": 8.087418338641906e-05, "loss": 0.0015, "num_input_tokens_seen": 220764640, "step": 102345 }, { "epoch": 16.696574225122347, "grad_norm": 0.016619306057691574, "learning_rate": 8.083537444398131e-05, "loss": 0.0578, "num_input_tokens_seen": 220775008, "step": 102350 }, { "epoch": 16.697389885807503, "grad_norm": 0.015291106887161732, "learning_rate": 8.079657399644664e-05, "loss": 0.0028, "num_input_tokens_seen": 220786720, "step": 102355 }, { "epoch": 16.69820554649266, "grad_norm": 1.9769657850265503, "learning_rate": 8.07577820446011e-05, "loss": 0.0301, "num_input_tokens_seen": 220798144, "step": 102360 }, { "epoch": 16.699021207177815, "grad_norm": 2.560305118560791, "learning_rate": 8.071899858923098e-05, "loss": 0.0657, "num_input_tokens_seen": 220808864, "step": 102365 }, { "epoch": 16.69983686786297, "grad_norm": 0.004553290084004402, "learning_rate": 8.068022363112227e-05, "loss": 0.0028, "num_input_tokens_seen": 220820224, "step": 102370 }, { "epoch": 16.700652528548122, "grad_norm": 0.11440113931894302, "learning_rate": 8.064145717106075e-05, "loss": 0.0168, "num_input_tokens_seen": 220831488, "step": 102375 }, { "epoch": 16.701468189233278, "grad_norm": 0.623996376991272, "learning_rate": 8.06026992098321e-05, "loss": 0.0544, "num_input_tokens_seen": 220841728, "step": 102380 }, { "epoch": 16.702283849918434, "grad_norm": 0.06481090933084488, "learning_rate": 8.056394974822185e-05, "loss": 0.0974, "num_input_tokens_seen": 220851968, "step": 102385 }, { "epoch": 16.70309951060359, "grad_norm": 0.0017354049487039447, "learning_rate": 8.052520878701519e-05, "loss": 0.0039, "num_input_tokens_seen": 220863296, "step": 102390 }, { "epoch": 16.703915171288745, "grad_norm": 0.02590327151119709, "learning_rate": 8.04864763269973e-05, "loss": 0.0218, "num_input_tokens_seen": 220873024, "step": 102395 }, { "epoch": 16.704730831973897, "grad_norm": 0.01765533722937107, "learning_rate": 8.044775236895319e-05, "loss": 0.0796, "num_input_tokens_seen": 220884032, "step": 102400 }, { "epoch": 16.705546492659053, "grad_norm": 0.006046569440513849, "learning_rate": 8.040903691366753e-05, "loss": 0.0385, "num_input_tokens_seen": 220895264, "step": 102405 }, { "epoch": 16.70636215334421, "grad_norm": 0.005744642112404108, "learning_rate": 8.037032996192522e-05, "loss": 0.0052, "num_input_tokens_seen": 220906720, "step": 102410 }, { "epoch": 16.707177814029365, "grad_norm": 0.22247447073459625, "learning_rate": 8.033163151451028e-05, "loss": 0.006, "num_input_tokens_seen": 220918208, "step": 102415 }, { "epoch": 16.70799347471452, "grad_norm": 0.01181444525718689, "learning_rate": 8.029294157220746e-05, "loss": 0.0405, "num_input_tokens_seen": 220928928, "step": 102420 }, { "epoch": 16.708809135399672, "grad_norm": 0.003780631348490715, "learning_rate": 8.025426013580033e-05, "loss": 0.0891, "num_input_tokens_seen": 220938624, "step": 102425 }, { "epoch": 16.709624796084828, "grad_norm": 0.007309529930353165, "learning_rate": 8.021558720607342e-05, "loss": 0.0015, "num_input_tokens_seen": 220949408, "step": 102430 }, { "epoch": 16.710440456769984, "grad_norm": 0.09403771162033081, "learning_rate": 8.01769227838099e-05, "loss": 0.0073, "num_input_tokens_seen": 220960480, "step": 102435 }, { "epoch": 16.71125611745514, "grad_norm": 0.8954527378082275, "learning_rate": 8.013826686979381e-05, "loss": 0.0657, "num_input_tokens_seen": 220971520, "step": 102440 }, { "epoch": 16.712071778140295, "grad_norm": 0.009664108045399189, "learning_rate": 8.00996194648082e-05, "loss": 0.0591, "num_input_tokens_seen": 220982432, "step": 102445 }, { "epoch": 16.712887438825447, "grad_norm": 0.16935911774635315, "learning_rate": 8.006098056963668e-05, "loss": 0.0739, "num_input_tokens_seen": 220992960, "step": 102450 }, { "epoch": 16.713703099510603, "grad_norm": 0.004235012922435999, "learning_rate": 8.002235018506194e-05, "loss": 0.0441, "num_input_tokens_seen": 221003232, "step": 102455 }, { "epoch": 16.71451876019576, "grad_norm": 0.011885006912052631, "learning_rate": 7.998372831186723e-05, "loss": 0.0047, "num_input_tokens_seen": 221014048, "step": 102460 }, { "epoch": 16.715334420880914, "grad_norm": 0.12713190913200378, "learning_rate": 7.99451149508349e-05, "loss": 0.0052, "num_input_tokens_seen": 221024480, "step": 102465 }, { "epoch": 16.71615008156607, "grad_norm": 0.07059351354837418, "learning_rate": 7.990651010274791e-05, "loss": 0.0063, "num_input_tokens_seen": 221034208, "step": 102470 }, { "epoch": 16.716965742251222, "grad_norm": 0.303607702255249, "learning_rate": 7.98679137683882e-05, "loss": 0.0114, "num_input_tokens_seen": 221044544, "step": 102475 }, { "epoch": 16.717781402936378, "grad_norm": 0.5538349151611328, "learning_rate": 7.982932594853837e-05, "loss": 0.0071, "num_input_tokens_seen": 221054272, "step": 102480 }, { "epoch": 16.718597063621534, "grad_norm": 1.1779837608337402, "learning_rate": 7.979074664398012e-05, "loss": 0.0881, "num_input_tokens_seen": 221065760, "step": 102485 }, { "epoch": 16.71941272430669, "grad_norm": 0.01922541856765747, "learning_rate": 7.975217585549566e-05, "loss": 0.0063, "num_input_tokens_seen": 221077120, "step": 102490 }, { "epoch": 16.72022838499184, "grad_norm": 0.01696177013218403, "learning_rate": 7.97136135838662e-05, "loss": 0.0045, "num_input_tokens_seen": 221088064, "step": 102495 }, { "epoch": 16.721044045676997, "grad_norm": 0.0023887394927442074, "learning_rate": 7.967505982987372e-05, "loss": 0.0021, "num_input_tokens_seen": 221098528, "step": 102500 }, { "epoch": 16.721859706362153, "grad_norm": 0.004695380572229624, "learning_rate": 7.963651459429932e-05, "loss": 0.0073, "num_input_tokens_seen": 221109536, "step": 102505 }, { "epoch": 16.72267536704731, "grad_norm": 0.0361570343375206, "learning_rate": 7.959797787792428e-05, "loss": 0.0026, "num_input_tokens_seen": 221120096, "step": 102510 }, { "epoch": 16.723491027732464, "grad_norm": 0.0015288087306544185, "learning_rate": 7.955944968152951e-05, "loss": 0.0067, "num_input_tokens_seen": 221130752, "step": 102515 }, { "epoch": 16.724306688417617, "grad_norm": 0.013556277379393578, "learning_rate": 7.952093000589583e-05, "loss": 0.0932, "num_input_tokens_seen": 221141312, "step": 102520 }, { "epoch": 16.725122349102772, "grad_norm": 0.03573325648903847, "learning_rate": 7.948241885180396e-05, "loss": 0.0052, "num_input_tokens_seen": 221152960, "step": 102525 }, { "epoch": 16.725938009787928, "grad_norm": 0.002607989124953747, "learning_rate": 7.944391622003427e-05, "loss": 0.0597, "num_input_tokens_seen": 221164064, "step": 102530 }, { "epoch": 16.726753670473084, "grad_norm": 0.005784845445305109, "learning_rate": 7.94054221113672e-05, "loss": 0.0018, "num_input_tokens_seen": 221173760, "step": 102535 }, { "epoch": 16.72756933115824, "grad_norm": 0.8408197164535522, "learning_rate": 7.936693652658278e-05, "loss": 0.1328, "num_input_tokens_seen": 221184256, "step": 102540 }, { "epoch": 16.72838499184339, "grad_norm": 0.20591320097446442, "learning_rate": 7.9328459466461e-05, "loss": 0.0041, "num_input_tokens_seen": 221194976, "step": 102545 }, { "epoch": 16.729200652528547, "grad_norm": 0.03820100054144859, "learning_rate": 7.928999093178157e-05, "loss": 0.0022, "num_input_tokens_seen": 221206624, "step": 102550 }, { "epoch": 16.730016313213703, "grad_norm": 0.005452121142297983, "learning_rate": 7.925153092332438e-05, "loss": 0.005, "num_input_tokens_seen": 221218304, "step": 102555 }, { "epoch": 16.73083197389886, "grad_norm": 0.025224938988685608, "learning_rate": 7.921307944186845e-05, "loss": 0.0129, "num_input_tokens_seen": 221227584, "step": 102560 }, { "epoch": 16.731647634584014, "grad_norm": 0.2355371117591858, "learning_rate": 7.91746364881935e-05, "loss": 0.0107, "num_input_tokens_seen": 221238496, "step": 102565 }, { "epoch": 16.732463295269167, "grad_norm": 0.0017045412678271532, "learning_rate": 7.913620206307814e-05, "loss": 0.0029, "num_input_tokens_seen": 221248928, "step": 102570 }, { "epoch": 16.733278955954322, "grad_norm": 0.02752010151743889, "learning_rate": 7.909777616730185e-05, "loss": 0.0033, "num_input_tokens_seen": 221259936, "step": 102575 }, { "epoch": 16.734094616639478, "grad_norm": 0.05363456904888153, "learning_rate": 7.905935880164278e-05, "loss": 0.0016, "num_input_tokens_seen": 221269632, "step": 102580 }, { "epoch": 16.734910277324634, "grad_norm": 0.015610609203577042, "learning_rate": 7.902094996688009e-05, "loss": 0.0279, "num_input_tokens_seen": 221281536, "step": 102585 }, { "epoch": 16.73572593800979, "grad_norm": 0.002872697776183486, "learning_rate": 7.89825496637916e-05, "loss": 0.0065, "num_input_tokens_seen": 221291200, "step": 102590 }, { "epoch": 16.73654159869494, "grad_norm": 0.8114063143730164, "learning_rate": 7.894415789315612e-05, "loss": 0.1756, "num_input_tokens_seen": 221302336, "step": 102595 }, { "epoch": 16.737357259380097, "grad_norm": 1.4492063522338867, "learning_rate": 7.890577465575121e-05, "loss": 0.0404, "num_input_tokens_seen": 221314368, "step": 102600 }, { "epoch": 16.738172920065253, "grad_norm": 0.0009090484236367047, "learning_rate": 7.886739995235504e-05, "loss": 0.0015, "num_input_tokens_seen": 221325344, "step": 102605 }, { "epoch": 16.73898858075041, "grad_norm": 0.02358722873032093, "learning_rate": 7.882903378374528e-05, "loss": 0.0154, "num_input_tokens_seen": 221334688, "step": 102610 }, { "epoch": 16.739804241435564, "grad_norm": 0.0343654565513134, "learning_rate": 7.879067615069946e-05, "loss": 0.0027, "num_input_tokens_seen": 221346432, "step": 102615 }, { "epoch": 16.740619902120716, "grad_norm": 0.00938003696501255, "learning_rate": 7.875232705399488e-05, "loss": 0.0203, "num_input_tokens_seen": 221357152, "step": 102620 }, { "epoch": 16.741435562805872, "grad_norm": 0.11765291541814804, "learning_rate": 7.871398649440886e-05, "loss": 0.0043, "num_input_tokens_seen": 221367840, "step": 102625 }, { "epoch": 16.742251223491028, "grad_norm": 0.9214667677879333, "learning_rate": 7.867565447271829e-05, "loss": 0.1867, "num_input_tokens_seen": 221379456, "step": 102630 }, { "epoch": 16.743066884176184, "grad_norm": 0.08192692697048187, "learning_rate": 7.863733098970006e-05, "loss": 0.0141, "num_input_tokens_seen": 221389696, "step": 102635 }, { "epoch": 16.74388254486134, "grad_norm": 0.028926236554980278, "learning_rate": 7.85990160461309e-05, "loss": 0.0037, "num_input_tokens_seen": 221399808, "step": 102640 }, { "epoch": 16.74469820554649, "grad_norm": 0.0031544314697384834, "learning_rate": 7.856070964278722e-05, "loss": 0.0041, "num_input_tokens_seen": 221410624, "step": 102645 }, { "epoch": 16.745513866231647, "grad_norm": 0.05227188393473625, "learning_rate": 7.852241178044539e-05, "loss": 0.0622, "num_input_tokens_seen": 221421664, "step": 102650 }, { "epoch": 16.746329526916803, "grad_norm": 0.09283024072647095, "learning_rate": 7.848412245988157e-05, "loss": 0.0038, "num_input_tokens_seen": 221431136, "step": 102655 }, { "epoch": 16.74714518760196, "grad_norm": 0.11333291977643967, "learning_rate": 7.84458416818718e-05, "loss": 0.0043, "num_input_tokens_seen": 221442112, "step": 102660 }, { "epoch": 16.747960848287114, "grad_norm": 0.09067942202091217, "learning_rate": 7.840756944719174e-05, "loss": 0.0079, "num_input_tokens_seen": 221453696, "step": 102665 }, { "epoch": 16.748776508972266, "grad_norm": 0.11829224228858948, "learning_rate": 7.836930575661716e-05, "loss": 0.0059, "num_input_tokens_seen": 221465088, "step": 102670 }, { "epoch": 16.749592169657422, "grad_norm": 0.02776484377682209, "learning_rate": 7.83310506109235e-05, "loss": 0.0035, "num_input_tokens_seen": 221475552, "step": 102675 }, { "epoch": 16.750407830342578, "grad_norm": 0.19685065746307373, "learning_rate": 7.829280401088601e-05, "loss": 0.0058, "num_input_tokens_seen": 221487232, "step": 102680 }, { "epoch": 16.751223491027734, "grad_norm": 0.01178918220102787, "learning_rate": 7.82545659572798e-05, "loss": 0.0015, "num_input_tokens_seen": 221497024, "step": 102685 }, { "epoch": 16.752039151712886, "grad_norm": 0.04747878015041351, "learning_rate": 7.821633645087984e-05, "loss": 0.0144, "num_input_tokens_seen": 221507680, "step": 102690 }, { "epoch": 16.75285481239804, "grad_norm": 0.0006992795970290899, "learning_rate": 7.817811549246079e-05, "loss": 0.0034, "num_input_tokens_seen": 221519360, "step": 102695 }, { "epoch": 16.753670473083197, "grad_norm": 0.02291467972099781, "learning_rate": 7.813990308279755e-05, "loss": 0.0019, "num_input_tokens_seen": 221529536, "step": 102700 }, { "epoch": 16.754486133768353, "grad_norm": 0.03158566355705261, "learning_rate": 7.810169922266413e-05, "loss": 0.0077, "num_input_tokens_seen": 221541152, "step": 102705 }, { "epoch": 16.75530179445351, "grad_norm": 0.01382729783654213, "learning_rate": 7.806350391283507e-05, "loss": 0.0118, "num_input_tokens_seen": 221552320, "step": 102710 }, { "epoch": 16.75611745513866, "grad_norm": 0.0017946879379451275, "learning_rate": 7.80253171540844e-05, "loss": 0.0025, "num_input_tokens_seen": 221562656, "step": 102715 }, { "epoch": 16.756933115823816, "grad_norm": 0.0011893212795257568, "learning_rate": 7.798713894718602e-05, "loss": 0.008, "num_input_tokens_seen": 221573664, "step": 102720 }, { "epoch": 16.757748776508972, "grad_norm": 0.0030455668456852436, "learning_rate": 7.794896929291361e-05, "loss": 0.011, "num_input_tokens_seen": 221585728, "step": 102725 }, { "epoch": 16.758564437194128, "grad_norm": 0.16485022008419037, "learning_rate": 7.791080819204072e-05, "loss": 0.0686, "num_input_tokens_seen": 221596064, "step": 102730 }, { "epoch": 16.759380097879284, "grad_norm": 0.09982141852378845, "learning_rate": 7.78726556453408e-05, "loss": 0.0032, "num_input_tokens_seen": 221607200, "step": 102735 }, { "epoch": 16.760195758564436, "grad_norm": 0.0022347350604832172, "learning_rate": 7.783451165358696e-05, "loss": 0.001, "num_input_tokens_seen": 221618368, "step": 102740 }, { "epoch": 16.76101141924959, "grad_norm": 0.03732523322105408, "learning_rate": 7.779637621755236e-05, "loss": 0.1905, "num_input_tokens_seen": 221628032, "step": 102745 }, { "epoch": 16.761827079934747, "grad_norm": 0.009463043883442879, "learning_rate": 7.775824933800979e-05, "loss": 0.002, "num_input_tokens_seen": 221639040, "step": 102750 }, { "epoch": 16.762642740619903, "grad_norm": 0.03571273386478424, "learning_rate": 7.772013101573195e-05, "loss": 0.0016, "num_input_tokens_seen": 221649760, "step": 102755 }, { "epoch": 16.76345840130506, "grad_norm": 0.01975579746067524, "learning_rate": 7.768202125149132e-05, "loss": 0.0344, "num_input_tokens_seen": 221660224, "step": 102760 }, { "epoch": 16.76427406199021, "grad_norm": 0.00847950391471386, "learning_rate": 7.76439200460603e-05, "loss": 0.0037, "num_input_tokens_seen": 221671840, "step": 102765 }, { "epoch": 16.765089722675366, "grad_norm": 0.7757527232170105, "learning_rate": 7.7605827400211e-05, "loss": 0.0285, "num_input_tokens_seen": 221682784, "step": 102770 }, { "epoch": 16.765905383360522, "grad_norm": 0.0265356432646513, "learning_rate": 7.75677433147155e-05, "loss": 0.0048, "num_input_tokens_seen": 221693664, "step": 102775 }, { "epoch": 16.766721044045678, "grad_norm": 0.0026890330482274294, "learning_rate": 7.752966779034553e-05, "loss": 0.0021, "num_input_tokens_seen": 221704320, "step": 102780 }, { "epoch": 16.767536704730833, "grad_norm": 0.06932741403579712, "learning_rate": 7.749160082787283e-05, "loss": 0.0672, "num_input_tokens_seen": 221714560, "step": 102785 }, { "epoch": 16.768352365415986, "grad_norm": 0.18144960701465607, "learning_rate": 7.745354242806884e-05, "loss": 0.0069, "num_input_tokens_seen": 221724480, "step": 102790 }, { "epoch": 16.76916802610114, "grad_norm": 0.005833078641444445, "learning_rate": 7.741549259170483e-05, "loss": 0.0015, "num_input_tokens_seen": 221736224, "step": 102795 }, { "epoch": 16.769983686786297, "grad_norm": 0.0516800694167614, "learning_rate": 7.737745131955192e-05, "loss": 0.0068, "num_input_tokens_seen": 221747072, "step": 102800 }, { "epoch": 16.770799347471453, "grad_norm": 0.5356493592262268, "learning_rate": 7.733941861238114e-05, "loss": 0.0067, "num_input_tokens_seen": 221757024, "step": 102805 }, { "epoch": 16.77161500815661, "grad_norm": 0.032318029552698135, "learning_rate": 7.730139447096319e-05, "loss": 0.0943, "num_input_tokens_seen": 221767872, "step": 102810 }, { "epoch": 16.77243066884176, "grad_norm": 0.0072444831021130085, "learning_rate": 7.726337889606861e-05, "loss": 0.0271, "num_input_tokens_seen": 221778144, "step": 102815 }, { "epoch": 16.773246329526916, "grad_norm": 0.012703686021268368, "learning_rate": 7.722537188846817e-05, "loss": 0.0023, "num_input_tokens_seen": 221787552, "step": 102820 }, { "epoch": 16.774061990212072, "grad_norm": 0.036274489015340805, "learning_rate": 7.718737344893167e-05, "loss": 0.0122, "num_input_tokens_seen": 221797792, "step": 102825 }, { "epoch": 16.774877650897228, "grad_norm": 0.012096802704036236, "learning_rate": 7.714938357822965e-05, "loss": 0.011, "num_input_tokens_seen": 221809600, "step": 102830 }, { "epoch": 16.775693311582383, "grad_norm": 1.4317375421524048, "learning_rate": 7.711140227713154e-05, "loss": 0.0784, "num_input_tokens_seen": 221820896, "step": 102835 }, { "epoch": 16.776508972267536, "grad_norm": 0.14059321582317352, "learning_rate": 7.70734295464075e-05, "loss": 0.004, "num_input_tokens_seen": 221831616, "step": 102840 }, { "epoch": 16.77732463295269, "grad_norm": 0.0020502174738794565, "learning_rate": 7.703546538682688e-05, "loss": 0.0067, "num_input_tokens_seen": 221842752, "step": 102845 }, { "epoch": 16.778140293637847, "grad_norm": 0.07004014402627945, "learning_rate": 7.699750979915915e-05, "loss": 0.0102, "num_input_tokens_seen": 221854048, "step": 102850 }, { "epoch": 16.778955954323003, "grad_norm": 0.044225696474313736, "learning_rate": 7.695956278417349e-05, "loss": 0.0037, "num_input_tokens_seen": 221864704, "step": 102855 }, { "epoch": 16.77977161500816, "grad_norm": 0.07516263425350189, "learning_rate": 7.692162434263894e-05, "loss": 0.0021, "num_input_tokens_seen": 221875488, "step": 102860 }, { "epoch": 16.78058727569331, "grad_norm": 0.0010245087323710322, "learning_rate": 7.688369447532444e-05, "loss": 0.084, "num_input_tokens_seen": 221885952, "step": 102865 }, { "epoch": 16.781402936378466, "grad_norm": 0.02747989073395729, "learning_rate": 7.684577318299857e-05, "loss": 0.0277, "num_input_tokens_seen": 221897568, "step": 102870 }, { "epoch": 16.782218597063622, "grad_norm": 0.02529071271419525, "learning_rate": 7.680786046642996e-05, "loss": 0.0902, "num_input_tokens_seen": 221909632, "step": 102875 }, { "epoch": 16.783034257748778, "grad_norm": 0.01516773086041212, "learning_rate": 7.676995632638689e-05, "loss": 0.0079, "num_input_tokens_seen": 221919648, "step": 102880 }, { "epoch": 16.78384991843393, "grad_norm": 1.148303508758545, "learning_rate": 7.67320607636376e-05, "loss": 0.1362, "num_input_tokens_seen": 221929536, "step": 102885 }, { "epoch": 16.784665579119086, "grad_norm": 0.14677760004997253, "learning_rate": 7.669417377894999e-05, "loss": 0.0093, "num_input_tokens_seen": 221940000, "step": 102890 }, { "epoch": 16.78548123980424, "grad_norm": 0.00138580659404397, "learning_rate": 7.665629537309199e-05, "loss": 0.0095, "num_input_tokens_seen": 221951328, "step": 102895 }, { "epoch": 16.786296900489397, "grad_norm": 0.21909086406230927, "learning_rate": 7.661842554683124e-05, "loss": 0.0052, "num_input_tokens_seen": 221962816, "step": 102900 }, { "epoch": 16.787112561174553, "grad_norm": 0.2902829051017761, "learning_rate": 7.658056430093512e-05, "loss": 0.1077, "num_input_tokens_seen": 221973504, "step": 102905 }, { "epoch": 16.787928221859705, "grad_norm": 0.04394185543060303, "learning_rate": 7.654271163617105e-05, "loss": 0.0114, "num_input_tokens_seen": 221984832, "step": 102910 }, { "epoch": 16.78874388254486, "grad_norm": 0.006155358161777258, "learning_rate": 7.650486755330616e-05, "loss": 0.0025, "num_input_tokens_seen": 221995456, "step": 102915 }, { "epoch": 16.789559543230016, "grad_norm": 0.0012215788010507822, "learning_rate": 7.646703205310718e-05, "loss": 0.0047, "num_input_tokens_seen": 222005248, "step": 102920 }, { "epoch": 16.790375203915172, "grad_norm": 0.009544708766043186, "learning_rate": 7.642920513634138e-05, "loss": 0.1066, "num_input_tokens_seen": 222016896, "step": 102925 }, { "epoch": 16.791190864600328, "grad_norm": 0.12734994292259216, "learning_rate": 7.639138680377478e-05, "loss": 0.0035, "num_input_tokens_seen": 222028160, "step": 102930 }, { "epoch": 16.79200652528548, "grad_norm": 0.010416317731142044, "learning_rate": 7.63535770561744e-05, "loss": 0.0181, "num_input_tokens_seen": 222040768, "step": 102935 }, { "epoch": 16.792822185970635, "grad_norm": 0.05302588269114494, "learning_rate": 7.631577589430593e-05, "loss": 0.0044, "num_input_tokens_seen": 222050208, "step": 102940 }, { "epoch": 16.79363784665579, "grad_norm": 0.005425974261015654, "learning_rate": 7.627798331893604e-05, "loss": 0.0015, "num_input_tokens_seen": 222059872, "step": 102945 }, { "epoch": 16.794453507340947, "grad_norm": 0.015956606715917587, "learning_rate": 7.62401993308301e-05, "loss": 0.0029, "num_input_tokens_seen": 222069312, "step": 102950 }, { "epoch": 16.795269168026103, "grad_norm": 0.04568832367658615, "learning_rate": 7.620242393075432e-05, "loss": 0.0032, "num_input_tokens_seen": 222080800, "step": 102955 }, { "epoch": 16.796084828711255, "grad_norm": 0.034103989601135254, "learning_rate": 7.61646571194738e-05, "loss": 0.0122, "num_input_tokens_seen": 222092960, "step": 102960 }, { "epoch": 16.79690048939641, "grad_norm": 0.8675066828727722, "learning_rate": 7.612689889775443e-05, "loss": 0.1447, "num_input_tokens_seen": 222103936, "step": 102965 }, { "epoch": 16.797716150081566, "grad_norm": 0.013243857771158218, "learning_rate": 7.60891492663609e-05, "loss": 0.0048, "num_input_tokens_seen": 222115552, "step": 102970 }, { "epoch": 16.798531810766722, "grad_norm": 0.006596239749342203, "learning_rate": 7.605140822605883e-05, "loss": 0.0067, "num_input_tokens_seen": 222127040, "step": 102975 }, { "epoch": 16.799347471451878, "grad_norm": 0.04751402139663696, "learning_rate": 7.601367577761248e-05, "loss": 0.0177, "num_input_tokens_seen": 222139584, "step": 102980 }, { "epoch": 16.80016313213703, "grad_norm": 0.0015399554977193475, "learning_rate": 7.597595192178702e-05, "loss": 0.0569, "num_input_tokens_seen": 222149312, "step": 102985 }, { "epoch": 16.800978792822185, "grad_norm": 0.03497251868247986, "learning_rate": 7.59382366593468e-05, "loss": 0.0025, "num_input_tokens_seen": 222159616, "step": 102990 }, { "epoch": 16.80179445350734, "grad_norm": 0.07846704125404358, "learning_rate": 7.590052999105618e-05, "loss": 0.0031, "num_input_tokens_seen": 222171008, "step": 102995 }, { "epoch": 16.802610114192497, "grad_norm": 0.0031929812394082546, "learning_rate": 7.586283191767929e-05, "loss": 0.0988, "num_input_tokens_seen": 222182592, "step": 103000 }, { "epoch": 16.803425774877653, "grad_norm": 0.0017342081991955638, "learning_rate": 7.582514243998023e-05, "loss": 0.0029, "num_input_tokens_seen": 222193248, "step": 103005 }, { "epoch": 16.804241435562805, "grad_norm": 0.0043642232194542885, "learning_rate": 7.578746155872268e-05, "loss": 0.0757, "num_input_tokens_seen": 222203808, "step": 103010 }, { "epoch": 16.80505709624796, "grad_norm": 0.03254813328385353, "learning_rate": 7.574978927467046e-05, "loss": 0.085, "num_input_tokens_seen": 222214080, "step": 103015 }, { "epoch": 16.805872756933116, "grad_norm": 0.021946555003523827, "learning_rate": 7.571212558858692e-05, "loss": 0.0021, "num_input_tokens_seen": 222224224, "step": 103020 }, { "epoch": 16.806688417618272, "grad_norm": 0.13638600707054138, "learning_rate": 7.567447050123538e-05, "loss": 0.1144, "num_input_tokens_seen": 222236800, "step": 103025 }, { "epoch": 16.807504078303424, "grad_norm": 0.03147580474615097, "learning_rate": 7.563682401337901e-05, "loss": 0.0144, "num_input_tokens_seen": 222249536, "step": 103030 }, { "epoch": 16.80831973898858, "grad_norm": 0.12085145711898804, "learning_rate": 7.559918612578065e-05, "loss": 0.0106, "num_input_tokens_seen": 222259872, "step": 103035 }, { "epoch": 16.809135399673735, "grad_norm": 0.05397968739271164, "learning_rate": 7.55615568392034e-05, "loss": 0.0035, "num_input_tokens_seen": 222271360, "step": 103040 }, { "epoch": 16.80995106035889, "grad_norm": 0.02971021831035614, "learning_rate": 7.552393615440939e-05, "loss": 0.0049, "num_input_tokens_seen": 222281248, "step": 103045 }, { "epoch": 16.810766721044047, "grad_norm": 0.049052756279706955, "learning_rate": 7.548632407216155e-05, "loss": 0.0039, "num_input_tokens_seen": 222292640, "step": 103050 }, { "epoch": 16.8115823817292, "grad_norm": 0.0014121206477284431, "learning_rate": 7.544872059322161e-05, "loss": 0.0077, "num_input_tokens_seen": 222303648, "step": 103055 }, { "epoch": 16.812398042414355, "grad_norm": 0.15244875848293304, "learning_rate": 7.541112571835218e-05, "loss": 0.0068, "num_input_tokens_seen": 222314912, "step": 103060 }, { "epoch": 16.81321370309951, "grad_norm": 0.00469334376975894, "learning_rate": 7.537353944831471e-05, "loss": 0.0162, "num_input_tokens_seen": 222326016, "step": 103065 }, { "epoch": 16.814029363784666, "grad_norm": 0.012406429275870323, "learning_rate": 7.533596178387136e-05, "loss": 0.0088, "num_input_tokens_seen": 222335776, "step": 103070 }, { "epoch": 16.81484502446982, "grad_norm": 0.18729664385318756, "learning_rate": 7.529839272578326e-05, "loss": 0.0807, "num_input_tokens_seen": 222346720, "step": 103075 }, { "epoch": 16.815660685154974, "grad_norm": 0.1001511737704277, "learning_rate": 7.526083227481223e-05, "loss": 0.0103, "num_input_tokens_seen": 222357696, "step": 103080 }, { "epoch": 16.81647634584013, "grad_norm": 0.1061549186706543, "learning_rate": 7.522328043171899e-05, "loss": 0.0082, "num_input_tokens_seen": 222368864, "step": 103085 }, { "epoch": 16.817292006525285, "grad_norm": 0.009134296327829361, "learning_rate": 7.518573719726507e-05, "loss": 0.0015, "num_input_tokens_seen": 222380096, "step": 103090 }, { "epoch": 16.81810766721044, "grad_norm": 0.005731267388910055, "learning_rate": 7.514820257221088e-05, "loss": 0.006, "num_input_tokens_seen": 222390656, "step": 103095 }, { "epoch": 16.818923327895597, "grad_norm": 0.8650200963020325, "learning_rate": 7.511067655731757e-05, "loss": 0.0714, "num_input_tokens_seen": 222403488, "step": 103100 }, { "epoch": 16.81973898858075, "grad_norm": 0.0014061112888157368, "learning_rate": 7.507315915334517e-05, "loss": 0.0019, "num_input_tokens_seen": 222413184, "step": 103105 }, { "epoch": 16.820554649265905, "grad_norm": 0.005636777728796005, "learning_rate": 7.503565036105447e-05, "loss": 0.0648, "num_input_tokens_seen": 222424800, "step": 103110 }, { "epoch": 16.82137030995106, "grad_norm": 0.005421518813818693, "learning_rate": 7.49981501812052e-05, "loss": 0.0096, "num_input_tokens_seen": 222435168, "step": 103115 }, { "epoch": 16.822185970636216, "grad_norm": 0.00229824660345912, "learning_rate": 7.496065861455786e-05, "loss": 0.001, "num_input_tokens_seen": 222446144, "step": 103120 }, { "epoch": 16.82300163132137, "grad_norm": 1.015443205833435, "learning_rate": 7.492317566187167e-05, "loss": 0.1932, "num_input_tokens_seen": 222456192, "step": 103125 }, { "epoch": 16.823817292006524, "grad_norm": 1.551841139793396, "learning_rate": 7.48857013239067e-05, "loss": 0.0243, "num_input_tokens_seen": 222466432, "step": 103130 }, { "epoch": 16.82463295269168, "grad_norm": 0.005467813462018967, "learning_rate": 7.484823560142235e-05, "loss": 0.0028, "num_input_tokens_seen": 222478048, "step": 103135 }, { "epoch": 16.825448613376835, "grad_norm": 0.07930736243724823, "learning_rate": 7.481077849517776e-05, "loss": 0.0067, "num_input_tokens_seen": 222489312, "step": 103140 }, { "epoch": 16.82626427406199, "grad_norm": 0.003657180117443204, "learning_rate": 7.477333000593218e-05, "loss": 0.0073, "num_input_tokens_seen": 222499936, "step": 103145 }, { "epoch": 16.827079934747147, "grad_norm": 0.10068754851818085, "learning_rate": 7.473589013444449e-05, "loss": 0.0057, "num_input_tokens_seen": 222510816, "step": 103150 }, { "epoch": 16.8278955954323, "grad_norm": 0.29185786843299866, "learning_rate": 7.469845888147348e-05, "loss": 0.0172, "num_input_tokens_seen": 222521728, "step": 103155 }, { "epoch": 16.828711256117455, "grad_norm": 0.0023438213393092155, "learning_rate": 7.466103624777776e-05, "loss": 0.0031, "num_input_tokens_seen": 222532448, "step": 103160 }, { "epoch": 16.82952691680261, "grad_norm": 0.010557902976870537, "learning_rate": 7.462362223411568e-05, "loss": 0.0075, "num_input_tokens_seen": 222542496, "step": 103165 }, { "epoch": 16.830342577487766, "grad_norm": 0.003416226012632251, "learning_rate": 7.458621684124556e-05, "loss": 0.0009, "num_input_tokens_seen": 222553248, "step": 103170 }, { "epoch": 16.83115823817292, "grad_norm": 0.06747445464134216, "learning_rate": 7.454882006992541e-05, "loss": 0.0047, "num_input_tokens_seen": 222564288, "step": 103175 }, { "epoch": 16.831973898858074, "grad_norm": 0.003989071119576693, "learning_rate": 7.451143192091304e-05, "loss": 0.0051, "num_input_tokens_seen": 222575520, "step": 103180 }, { "epoch": 16.83278955954323, "grad_norm": 0.004910726565867662, "learning_rate": 7.447405239496646e-05, "loss": 0.0018, "num_input_tokens_seen": 222585920, "step": 103185 }, { "epoch": 16.833605220228385, "grad_norm": 0.9558630585670471, "learning_rate": 7.443668149284289e-05, "loss": 0.1849, "num_input_tokens_seen": 222596064, "step": 103190 }, { "epoch": 16.83442088091354, "grad_norm": 0.0018194529693573713, "learning_rate": 7.439931921529996e-05, "loss": 0.0134, "num_input_tokens_seen": 222607424, "step": 103195 }, { "epoch": 16.835236541598697, "grad_norm": 0.0023145615123212337, "learning_rate": 7.436196556309454e-05, "loss": 0.0205, "num_input_tokens_seen": 222617632, "step": 103200 }, { "epoch": 16.83605220228385, "grad_norm": 0.7130302786827087, "learning_rate": 7.432462053698413e-05, "loss": 0.0161, "num_input_tokens_seen": 222628352, "step": 103205 }, { "epoch": 16.836867862969005, "grad_norm": 0.1159878745675087, "learning_rate": 7.428728413772502e-05, "loss": 0.0077, "num_input_tokens_seen": 222639328, "step": 103210 }, { "epoch": 16.83768352365416, "grad_norm": 0.02864869311451912, "learning_rate": 7.42499563660744e-05, "loss": 0.0022, "num_input_tokens_seen": 222651072, "step": 103215 }, { "epoch": 16.838499184339316, "grad_norm": 0.21571122109889984, "learning_rate": 7.421263722278826e-05, "loss": 0.0052, "num_input_tokens_seen": 222663232, "step": 103220 }, { "epoch": 16.839314845024468, "grad_norm": 0.0337756909430027, "learning_rate": 7.417532670862343e-05, "loss": 0.0036, "num_input_tokens_seen": 222673184, "step": 103225 }, { "epoch": 16.840130505709624, "grad_norm": 0.00542595237493515, "learning_rate": 7.413802482433557e-05, "loss": 0.0028, "num_input_tokens_seen": 222683776, "step": 103230 }, { "epoch": 16.84094616639478, "grad_norm": 0.01951267570257187, "learning_rate": 7.41007315706811e-05, "loss": 0.0089, "num_input_tokens_seen": 222693824, "step": 103235 }, { "epoch": 16.841761827079935, "grad_norm": 0.001404472510330379, "learning_rate": 7.406344694841538e-05, "loss": 0.0027, "num_input_tokens_seen": 222705280, "step": 103240 }, { "epoch": 16.84257748776509, "grad_norm": 0.002772293286398053, "learning_rate": 7.402617095829434e-05, "loss": 0.0051, "num_input_tokens_seen": 222716352, "step": 103245 }, { "epoch": 16.843393148450243, "grad_norm": 0.12816722691059113, "learning_rate": 7.398890360107336e-05, "loss": 0.019, "num_input_tokens_seen": 222726368, "step": 103250 }, { "epoch": 16.8442088091354, "grad_norm": 0.0014327878598123789, "learning_rate": 7.395164487750766e-05, "loss": 0.0028, "num_input_tokens_seen": 222736512, "step": 103255 }, { "epoch": 16.845024469820554, "grad_norm": 0.10159187763929367, "learning_rate": 7.391439478835233e-05, "loss": 0.0048, "num_input_tokens_seen": 222748128, "step": 103260 }, { "epoch": 16.84584013050571, "grad_norm": 0.05256170406937599, "learning_rate": 7.387715333436235e-05, "loss": 0.0033, "num_input_tokens_seen": 222759616, "step": 103265 }, { "epoch": 16.846655791190866, "grad_norm": 0.012688703835010529, "learning_rate": 7.383992051629246e-05, "loss": 0.0205, "num_input_tokens_seen": 222771104, "step": 103270 }, { "epoch": 16.847471451876018, "grad_norm": 0.01922079361975193, "learning_rate": 7.380269633489717e-05, "loss": 0.003, "num_input_tokens_seen": 222782688, "step": 103275 }, { "epoch": 16.848287112561174, "grad_norm": 0.0016831837128847837, "learning_rate": 7.376548079093087e-05, "loss": 0.1106, "num_input_tokens_seen": 222794784, "step": 103280 }, { "epoch": 16.84910277324633, "grad_norm": 0.8252767324447632, "learning_rate": 7.372827388514792e-05, "loss": 0.073, "num_input_tokens_seen": 222804864, "step": 103285 }, { "epoch": 16.849918433931485, "grad_norm": 0.1496524214744568, "learning_rate": 7.369107561830218e-05, "loss": 0.0497, "num_input_tokens_seen": 222815264, "step": 103290 }, { "epoch": 16.85073409461664, "grad_norm": 0.0018246716354042292, "learning_rate": 7.365388599114764e-05, "loss": 0.0239, "num_input_tokens_seen": 222825824, "step": 103295 }, { "epoch": 16.851549755301793, "grad_norm": 0.010956132784485817, "learning_rate": 7.361670500443796e-05, "loss": 0.0037, "num_input_tokens_seen": 222836992, "step": 103300 }, { "epoch": 16.85236541598695, "grad_norm": 0.049963925033807755, "learning_rate": 7.357953265892665e-05, "loss": 0.0079, "num_input_tokens_seen": 222849152, "step": 103305 }, { "epoch": 16.853181076672104, "grad_norm": 0.021916840225458145, "learning_rate": 7.354236895536704e-05, "loss": 0.0099, "num_input_tokens_seen": 222860000, "step": 103310 }, { "epoch": 16.85399673735726, "grad_norm": 0.03210293501615524, "learning_rate": 7.350521389451231e-05, "loss": 0.0037, "num_input_tokens_seen": 222871392, "step": 103315 }, { "epoch": 16.854812398042416, "grad_norm": 0.015657193958759308, "learning_rate": 7.346806747711554e-05, "loss": 0.0035, "num_input_tokens_seen": 222881760, "step": 103320 }, { "epoch": 16.855628058727568, "grad_norm": 0.00313331326469779, "learning_rate": 7.343092970392929e-05, "loss": 0.1122, "num_input_tokens_seen": 222892032, "step": 103325 }, { "epoch": 16.856443719412724, "grad_norm": 0.1658102571964264, "learning_rate": 7.339380057570666e-05, "loss": 0.0037, "num_input_tokens_seen": 222902944, "step": 103330 }, { "epoch": 16.85725938009788, "grad_norm": 0.005717913154512644, "learning_rate": 7.335668009319962e-05, "loss": 0.0027, "num_input_tokens_seen": 222913856, "step": 103335 }, { "epoch": 16.858075040783035, "grad_norm": 0.010470336303114891, "learning_rate": 7.331956825716091e-05, "loss": 0.0928, "num_input_tokens_seen": 222924480, "step": 103340 }, { "epoch": 16.85889070146819, "grad_norm": 0.00856468454003334, "learning_rate": 7.328246506834224e-05, "loss": 0.0596, "num_input_tokens_seen": 222934592, "step": 103345 }, { "epoch": 16.859706362153343, "grad_norm": 0.005223467946052551, "learning_rate": 7.32453705274958e-05, "loss": 0.01, "num_input_tokens_seen": 222946240, "step": 103350 }, { "epoch": 16.8605220228385, "grad_norm": 1.1444042921066284, "learning_rate": 7.320828463537333e-05, "loss": 0.041, "num_input_tokens_seen": 222957568, "step": 103355 }, { "epoch": 16.861337683523654, "grad_norm": 0.22558574378490448, "learning_rate": 7.317120739272643e-05, "loss": 0.0077, "num_input_tokens_seen": 222967584, "step": 103360 }, { "epoch": 16.86215334420881, "grad_norm": 0.0284471083432436, "learning_rate": 7.313413880030645e-05, "loss": 0.0326, "num_input_tokens_seen": 222979456, "step": 103365 }, { "epoch": 16.862969004893966, "grad_norm": 0.023329202085733414, "learning_rate": 7.309707885886462e-05, "loss": 0.0045, "num_input_tokens_seen": 222990368, "step": 103370 }, { "epoch": 16.863784665579118, "grad_norm": 0.002458177739754319, "learning_rate": 7.306002756915214e-05, "loss": 0.0358, "num_input_tokens_seen": 223000928, "step": 103375 }, { "epoch": 16.864600326264274, "grad_norm": 0.0386650525033474, "learning_rate": 7.302298493191972e-05, "loss": 0.0115, "num_input_tokens_seen": 223011296, "step": 103380 }, { "epoch": 16.86541598694943, "grad_norm": 0.01256465446203947, "learning_rate": 7.298595094791826e-05, "loss": 0.004, "num_input_tokens_seen": 223022560, "step": 103385 }, { "epoch": 16.866231647634585, "grad_norm": 0.019112542271614075, "learning_rate": 7.294892561789817e-05, "loss": 0.0061, "num_input_tokens_seen": 223031936, "step": 103390 }, { "epoch": 16.86704730831974, "grad_norm": 0.009198786690831184, "learning_rate": 7.291190894260985e-05, "loss": 0.008, "num_input_tokens_seen": 223042240, "step": 103395 }, { "epoch": 16.867862969004893, "grad_norm": 0.04011957719922066, "learning_rate": 7.287490092280346e-05, "loss": 0.004, "num_input_tokens_seen": 223051808, "step": 103400 }, { "epoch": 16.86867862969005, "grad_norm": 0.3813632130622864, "learning_rate": 7.28379015592291e-05, "loss": 0.0334, "num_input_tokens_seen": 223061824, "step": 103405 }, { "epoch": 16.869494290375204, "grad_norm": 0.021078310906887054, "learning_rate": 7.280091085263657e-05, "loss": 0.0031, "num_input_tokens_seen": 223072256, "step": 103410 }, { "epoch": 16.87030995106036, "grad_norm": 0.0437936969101429, "learning_rate": 7.276392880377548e-05, "loss": 0.0053, "num_input_tokens_seen": 223083776, "step": 103415 }, { "epoch": 16.871125611745512, "grad_norm": 0.024780699983239174, "learning_rate": 7.27269554133954e-05, "loss": 0.0843, "num_input_tokens_seen": 223094016, "step": 103420 }, { "epoch": 16.871941272430668, "grad_norm": 0.004966340027749538, "learning_rate": 7.268999068224557e-05, "loss": 0.1127, "num_input_tokens_seen": 223104224, "step": 103425 }, { "epoch": 16.872756933115824, "grad_norm": 0.06812267005443573, "learning_rate": 7.265303461107519e-05, "loss": 0.0051, "num_input_tokens_seen": 223114944, "step": 103430 }, { "epoch": 16.87357259380098, "grad_norm": 0.21134552359580994, "learning_rate": 7.261608720063317e-05, "loss": 0.0197, "num_input_tokens_seen": 223125984, "step": 103435 }, { "epoch": 16.874388254486135, "grad_norm": 0.08864639699459076, "learning_rate": 7.25791484516683e-05, "loss": 0.0037, "num_input_tokens_seen": 223137504, "step": 103440 }, { "epoch": 16.875203915171287, "grad_norm": 0.00807627197355032, "learning_rate": 7.254221836492925e-05, "loss": 0.0061, "num_input_tokens_seen": 223147872, "step": 103445 }, { "epoch": 16.876019575856443, "grad_norm": 0.016674986109137535, "learning_rate": 7.250529694116436e-05, "loss": 0.0048, "num_input_tokens_seen": 223158624, "step": 103450 }, { "epoch": 16.8768352365416, "grad_norm": 0.014501569792628288, "learning_rate": 7.246838418112189e-05, "loss": 0.0021, "num_input_tokens_seen": 223168928, "step": 103455 }, { "epoch": 16.877650897226754, "grad_norm": 0.026991363614797592, "learning_rate": 7.243148008555017e-05, "loss": 0.0058, "num_input_tokens_seen": 223180640, "step": 103460 }, { "epoch": 16.87846655791191, "grad_norm": 0.034556131809949875, "learning_rate": 7.239458465519672e-05, "loss": 0.0027, "num_input_tokens_seen": 223190496, "step": 103465 }, { "epoch": 16.879282218597062, "grad_norm": 1.0521445274353027, "learning_rate": 7.235769789080954e-05, "loss": 0.0549, "num_input_tokens_seen": 223200384, "step": 103470 }, { "epoch": 16.880097879282218, "grad_norm": 0.0025549496058374643, "learning_rate": 7.232081979313615e-05, "loss": 0.0109, "num_input_tokens_seen": 223210368, "step": 103475 }, { "epoch": 16.880913539967374, "grad_norm": 0.001639281865209341, "learning_rate": 7.228395036292384e-05, "loss": 0.0039, "num_input_tokens_seen": 223220992, "step": 103480 }, { "epoch": 16.88172920065253, "grad_norm": 0.09795226901769638, "learning_rate": 7.224708960091992e-05, "loss": 0.0043, "num_input_tokens_seen": 223232096, "step": 103485 }, { "epoch": 16.882544861337685, "grad_norm": 0.038850463926792145, "learning_rate": 7.221023750787136e-05, "loss": 0.0091, "num_input_tokens_seen": 223242976, "step": 103490 }, { "epoch": 16.883360522022837, "grad_norm": 0.1187666803598404, "learning_rate": 7.217339408452505e-05, "loss": 0.0081, "num_input_tokens_seen": 223254432, "step": 103495 }, { "epoch": 16.884176182707993, "grad_norm": 0.0904577225446701, "learning_rate": 7.21365593316276e-05, "loss": 0.0082, "num_input_tokens_seen": 223264096, "step": 103500 }, { "epoch": 16.88499184339315, "grad_norm": 0.03377325087785721, "learning_rate": 7.209973324992558e-05, "loss": 0.0139, "num_input_tokens_seen": 223275072, "step": 103505 }, { "epoch": 16.885807504078304, "grad_norm": 0.0834374725818634, "learning_rate": 7.206291584016533e-05, "loss": 0.0066, "num_input_tokens_seen": 223286080, "step": 103510 }, { "epoch": 16.88662316476346, "grad_norm": 0.0063340310007333755, "learning_rate": 7.202610710309293e-05, "loss": 0.0089, "num_input_tokens_seen": 223297152, "step": 103515 }, { "epoch": 16.887438825448612, "grad_norm": 0.020802278071641922, "learning_rate": 7.198930703945439e-05, "loss": 0.0024, "num_input_tokens_seen": 223308576, "step": 103520 }, { "epoch": 16.888254486133768, "grad_norm": 0.030867574736475945, "learning_rate": 7.19525156499955e-05, "loss": 0.0046, "num_input_tokens_seen": 223319232, "step": 103525 }, { "epoch": 16.889070146818923, "grad_norm": 0.15654076635837555, "learning_rate": 7.191573293546195e-05, "loss": 0.006, "num_input_tokens_seen": 223328800, "step": 103530 }, { "epoch": 16.88988580750408, "grad_norm": 0.05379585549235344, "learning_rate": 7.187895889659906e-05, "loss": 0.0578, "num_input_tokens_seen": 223340544, "step": 103535 }, { "epoch": 16.890701468189235, "grad_norm": 0.0017426940612494946, "learning_rate": 7.184219353415228e-05, "loss": 0.0084, "num_input_tokens_seen": 223350592, "step": 103540 }, { "epoch": 16.891517128874387, "grad_norm": 0.020400412380695343, "learning_rate": 7.180543684886654e-05, "loss": 0.0027, "num_input_tokens_seen": 223362240, "step": 103545 }, { "epoch": 16.892332789559543, "grad_norm": 0.05035019665956497, "learning_rate": 7.176868884148679e-05, "loss": 0.0079, "num_input_tokens_seen": 223373184, "step": 103550 }, { "epoch": 16.8931484502447, "grad_norm": 0.24116086959838867, "learning_rate": 7.173194951275786e-05, "loss": 0.0062, "num_input_tokens_seen": 223383648, "step": 103555 }, { "epoch": 16.893964110929854, "grad_norm": 0.013390913605690002, "learning_rate": 7.169521886342417e-05, "loss": 0.0053, "num_input_tokens_seen": 223393824, "step": 103560 }, { "epoch": 16.894779771615006, "grad_norm": 0.0015389756299555302, "learning_rate": 7.165849689423043e-05, "loss": 0.0014, "num_input_tokens_seen": 223404800, "step": 103565 }, { "epoch": 16.895595432300162, "grad_norm": 0.031202999874949455, "learning_rate": 7.162178360592037e-05, "loss": 0.0062, "num_input_tokens_seen": 223416160, "step": 103570 }, { "epoch": 16.896411092985318, "grad_norm": 0.7381551861763, "learning_rate": 7.15850789992386e-05, "loss": 0.1149, "num_input_tokens_seen": 223426432, "step": 103575 }, { "epoch": 16.897226753670473, "grad_norm": 0.0009809001348912716, "learning_rate": 7.154838307492839e-05, "loss": 0.0027, "num_input_tokens_seen": 223437632, "step": 103580 }, { "epoch": 16.89804241435563, "grad_norm": 0.0020821825601160526, "learning_rate": 7.151169583373402e-05, "loss": 0.0021, "num_input_tokens_seen": 223448768, "step": 103585 }, { "epoch": 16.898858075040785, "grad_norm": 0.2029523402452469, "learning_rate": 7.147501727639844e-05, "loss": 0.0071, "num_input_tokens_seen": 223459232, "step": 103590 }, { "epoch": 16.899673735725937, "grad_norm": 0.14450368285179138, "learning_rate": 7.14383474036655e-05, "loss": 0.0089, "num_input_tokens_seen": 223469664, "step": 103595 }, { "epoch": 16.900489396411093, "grad_norm": 0.02121632918715477, "learning_rate": 7.140168621627786e-05, "loss": 0.0024, "num_input_tokens_seen": 223480064, "step": 103600 }, { "epoch": 16.90130505709625, "grad_norm": 0.40811803936958313, "learning_rate": 7.136503371497888e-05, "loss": 0.0229, "num_input_tokens_seen": 223490848, "step": 103605 }, { "epoch": 16.902120717781404, "grad_norm": 0.0015344663988798857, "learning_rate": 7.132838990051132e-05, "loss": 0.0038, "num_input_tokens_seen": 223501984, "step": 103610 }, { "epoch": 16.902936378466556, "grad_norm": 1.1569275856018066, "learning_rate": 7.129175477361766e-05, "loss": 0.0272, "num_input_tokens_seen": 223511840, "step": 103615 }, { "epoch": 16.903752039151712, "grad_norm": 0.05850755795836449, "learning_rate": 7.125512833504049e-05, "loss": 0.0057, "num_input_tokens_seen": 223522368, "step": 103620 }, { "epoch": 16.904567699836868, "grad_norm": 0.013369235210120678, "learning_rate": 7.121851058552209e-05, "loss": 0.0239, "num_input_tokens_seen": 223531680, "step": 103625 }, { "epoch": 16.905383360522023, "grad_norm": 0.12183018773794174, "learning_rate": 7.118190152580444e-05, "loss": 0.0048, "num_input_tokens_seen": 223541472, "step": 103630 }, { "epoch": 16.90619902120718, "grad_norm": 0.060237642377614975, "learning_rate": 7.114530115662959e-05, "loss": 0.0055, "num_input_tokens_seen": 223552576, "step": 103635 }, { "epoch": 16.90701468189233, "grad_norm": 0.013088440522551537, "learning_rate": 7.110870947873926e-05, "loss": 0.0023, "num_input_tokens_seen": 223562144, "step": 103640 }, { "epoch": 16.907830342577487, "grad_norm": 0.004634837154299021, "learning_rate": 7.107212649287497e-05, "loss": 0.0247, "num_input_tokens_seen": 223572704, "step": 103645 }, { "epoch": 16.908646003262643, "grad_norm": 0.030368324369192123, "learning_rate": 7.103555219977825e-05, "loss": 0.0086, "num_input_tokens_seen": 223584096, "step": 103650 }, { "epoch": 16.9094616639478, "grad_norm": 0.07008624076843262, "learning_rate": 7.099898660019016e-05, "loss": 0.0079, "num_input_tokens_seen": 223594272, "step": 103655 }, { "epoch": 16.910277324632954, "grad_norm": 0.014119603671133518, "learning_rate": 7.096242969485189e-05, "loss": 0.007, "num_input_tokens_seen": 223604576, "step": 103660 }, { "epoch": 16.911092985318106, "grad_norm": 0.0014640094013884664, "learning_rate": 7.092588148450413e-05, "loss": 0.0021, "num_input_tokens_seen": 223614176, "step": 103665 }, { "epoch": 16.911908646003262, "grad_norm": 0.001201203209348023, "learning_rate": 7.088934196988795e-05, "loss": 0.0008, "num_input_tokens_seen": 223625312, "step": 103670 }, { "epoch": 16.912724306688418, "grad_norm": 0.10635355114936829, "learning_rate": 7.085281115174335e-05, "loss": 0.0045, "num_input_tokens_seen": 223636672, "step": 103675 }, { "epoch": 16.913539967373573, "grad_norm": 0.0859210416674614, "learning_rate": 7.081628903081116e-05, "loss": 0.006, "num_input_tokens_seen": 223647552, "step": 103680 }, { "epoch": 16.91435562805873, "grad_norm": 0.007214415352791548, "learning_rate": 7.077977560783117e-05, "loss": 0.0046, "num_input_tokens_seen": 223658144, "step": 103685 }, { "epoch": 16.91517128874388, "grad_norm": 0.6557047367095947, "learning_rate": 7.074327088354371e-05, "loss": 0.0155, "num_input_tokens_seen": 223668128, "step": 103690 }, { "epoch": 16.915986949429037, "grad_norm": 0.0019038468599319458, "learning_rate": 7.070677485868821e-05, "loss": 0.051, "num_input_tokens_seen": 223678496, "step": 103695 }, { "epoch": 16.916802610114193, "grad_norm": 0.004583531059324741, "learning_rate": 7.067028753400473e-05, "loss": 0.0682, "num_input_tokens_seen": 223689024, "step": 103700 }, { "epoch": 16.91761827079935, "grad_norm": 1.5601286888122559, "learning_rate": 7.06338089102323e-05, "loss": 0.1363, "num_input_tokens_seen": 223699616, "step": 103705 }, { "epoch": 16.918433931484504, "grad_norm": 0.013203333131968975, "learning_rate": 7.05973389881106e-05, "loss": 0.002, "num_input_tokens_seen": 223711488, "step": 103710 }, { "epoch": 16.919249592169656, "grad_norm": 0.1484663337469101, "learning_rate": 7.056087776837838e-05, "loss": 0.0072, "num_input_tokens_seen": 223722912, "step": 103715 }, { "epoch": 16.920065252854812, "grad_norm": 0.1081765741109848, "learning_rate": 7.052442525177499e-05, "loss": 0.0044, "num_input_tokens_seen": 223734016, "step": 103720 }, { "epoch": 16.920880913539968, "grad_norm": 0.06587956100702286, "learning_rate": 7.048798143903873e-05, "loss": 0.0043, "num_input_tokens_seen": 223743936, "step": 103725 }, { "epoch": 16.921696574225123, "grad_norm": 0.010399832390248775, "learning_rate": 7.045154633090861e-05, "loss": 0.0011, "num_input_tokens_seen": 223756704, "step": 103730 }, { "epoch": 16.92251223491028, "grad_norm": 0.007189610973000526, "learning_rate": 7.041511992812255e-05, "loss": 0.0014, "num_input_tokens_seen": 223767712, "step": 103735 }, { "epoch": 16.92332789559543, "grad_norm": 0.17671909928321838, "learning_rate": 7.037870223141935e-05, "loss": 0.0116, "num_input_tokens_seen": 223778304, "step": 103740 }, { "epoch": 16.924143556280587, "grad_norm": 0.026814401149749756, "learning_rate": 7.034229324153652e-05, "loss": 0.0049, "num_input_tokens_seen": 223789792, "step": 103745 }, { "epoch": 16.924959216965743, "grad_norm": 0.3240191638469696, "learning_rate": 7.030589295921224e-05, "loss": 0.0142, "num_input_tokens_seen": 223800096, "step": 103750 }, { "epoch": 16.9257748776509, "grad_norm": 0.0014137339312583208, "learning_rate": 7.026950138518423e-05, "loss": 0.0009, "num_input_tokens_seen": 223811392, "step": 103755 }, { "epoch": 16.92659053833605, "grad_norm": 0.005644438322633505, "learning_rate": 7.023311852018988e-05, "loss": 0.0262, "num_input_tokens_seen": 223820992, "step": 103760 }, { "epoch": 16.927406199021206, "grad_norm": 0.03404803201556206, "learning_rate": 7.019674436496653e-05, "loss": 0.1644, "num_input_tokens_seen": 223832192, "step": 103765 }, { "epoch": 16.928221859706362, "grad_norm": 0.5003405213356018, "learning_rate": 7.01603789202515e-05, "loss": 0.0107, "num_input_tokens_seen": 223842496, "step": 103770 }, { "epoch": 16.929037520391518, "grad_norm": 0.06349480897188187, "learning_rate": 7.01240221867816e-05, "loss": 0.0077, "num_input_tokens_seen": 223852608, "step": 103775 }, { "epoch": 16.929853181076673, "grad_norm": 0.005640864372253418, "learning_rate": 7.008767416529376e-05, "loss": 0.0047, "num_input_tokens_seen": 223864288, "step": 103780 }, { "epoch": 16.930668841761825, "grad_norm": 0.003700596047565341, "learning_rate": 7.00513348565246e-05, "loss": 0.0016, "num_input_tokens_seen": 223875520, "step": 103785 }, { "epoch": 16.93148450244698, "grad_norm": 0.02476685307919979, "learning_rate": 7.001500426121055e-05, "loss": 0.0069, "num_input_tokens_seen": 223887200, "step": 103790 }, { "epoch": 16.932300163132137, "grad_norm": 0.00203868024982512, "learning_rate": 6.997868238008793e-05, "loss": 0.0021, "num_input_tokens_seen": 223896160, "step": 103795 }, { "epoch": 16.933115823817293, "grad_norm": 0.003458665916696191, "learning_rate": 6.994236921389268e-05, "loss": 0.0044, "num_input_tokens_seen": 223906752, "step": 103800 }, { "epoch": 16.93393148450245, "grad_norm": 0.00593669293448329, "learning_rate": 6.990606476336114e-05, "loss": 0.004, "num_input_tokens_seen": 223917888, "step": 103805 }, { "epoch": 16.9347471451876, "grad_norm": 0.011752564460039139, "learning_rate": 6.98697690292286e-05, "loss": 0.0035, "num_input_tokens_seen": 223928544, "step": 103810 }, { "epoch": 16.935562805872756, "grad_norm": 0.1383724957704544, "learning_rate": 6.983348201223105e-05, "loss": 0.0614, "num_input_tokens_seen": 223938080, "step": 103815 }, { "epoch": 16.936378466557912, "grad_norm": 0.0015911642694845796, "learning_rate": 6.97972037131035e-05, "loss": 0.0038, "num_input_tokens_seen": 223950112, "step": 103820 }, { "epoch": 16.937194127243067, "grad_norm": 0.007721527945250273, "learning_rate": 6.976093413258156e-05, "loss": 0.009, "num_input_tokens_seen": 223961216, "step": 103825 }, { "epoch": 16.938009787928223, "grad_norm": 0.06764552742242813, "learning_rate": 6.972467327139987e-05, "loss": 0.0041, "num_input_tokens_seen": 223973728, "step": 103830 }, { "epoch": 16.938825448613375, "grad_norm": 0.018324121832847595, "learning_rate": 6.968842113029372e-05, "loss": 0.0028, "num_input_tokens_seen": 223984352, "step": 103835 }, { "epoch": 16.93964110929853, "grad_norm": 0.016896432265639305, "learning_rate": 6.965217770999738e-05, "loss": 0.0505, "num_input_tokens_seen": 223995456, "step": 103840 }, { "epoch": 16.940456769983687, "grad_norm": 0.0049615902826189995, "learning_rate": 6.961594301124585e-05, "loss": 0.0075, "num_input_tokens_seen": 224005984, "step": 103845 }, { "epoch": 16.941272430668842, "grad_norm": 0.02980915457010269, "learning_rate": 6.957971703477301e-05, "loss": 0.0027, "num_input_tokens_seen": 224016704, "step": 103850 }, { "epoch": 16.942088091353998, "grad_norm": 0.005859128665179014, "learning_rate": 6.954349978131342e-05, "loss": 0.0028, "num_input_tokens_seen": 224026112, "step": 103855 }, { "epoch": 16.94290375203915, "grad_norm": 0.229956716299057, "learning_rate": 6.950729125160066e-05, "loss": 0.0141, "num_input_tokens_seen": 224036672, "step": 103860 }, { "epoch": 16.943719412724306, "grad_norm": 0.004111624788492918, "learning_rate": 6.947109144636898e-05, "loss": 0.0023, "num_input_tokens_seen": 224047904, "step": 103865 }, { "epoch": 16.94453507340946, "grad_norm": 1.4143182039260864, "learning_rate": 6.943490036635158e-05, "loss": 0.0211, "num_input_tokens_seen": 224058304, "step": 103870 }, { "epoch": 16.945350734094617, "grad_norm": 0.03140643239021301, "learning_rate": 6.939871801228236e-05, "loss": 0.0079, "num_input_tokens_seen": 224069024, "step": 103875 }, { "epoch": 16.946166394779773, "grad_norm": 0.035570088773965836, "learning_rate": 6.936254438489414e-05, "loss": 0.0024, "num_input_tokens_seen": 224080160, "step": 103880 }, { "epoch": 16.946982055464925, "grad_norm": 0.005088799633085728, "learning_rate": 6.932637948492038e-05, "loss": 0.0013, "num_input_tokens_seen": 224091296, "step": 103885 }, { "epoch": 16.94779771615008, "grad_norm": 0.0060318526811897755, "learning_rate": 6.929022331309392e-05, "loss": 0.0016, "num_input_tokens_seen": 224102656, "step": 103890 }, { "epoch": 16.948613376835237, "grad_norm": 0.004163091070950031, "learning_rate": 6.925407587014743e-05, "loss": 0.0283, "num_input_tokens_seen": 224111488, "step": 103895 }, { "epoch": 16.949429037520392, "grad_norm": 0.018625494092702866, "learning_rate": 6.921793715681358e-05, "loss": 0.0144, "num_input_tokens_seen": 224121280, "step": 103900 }, { "epoch": 16.950244698205548, "grad_norm": 0.005926806014031172, "learning_rate": 6.918180717382466e-05, "loss": 0.0073, "num_input_tokens_seen": 224131840, "step": 103905 }, { "epoch": 16.9510603588907, "grad_norm": 0.004623315762728453, "learning_rate": 6.914568592191301e-05, "loss": 0.023, "num_input_tokens_seen": 224143712, "step": 103910 }, { "epoch": 16.951876019575856, "grad_norm": 0.08638868480920792, "learning_rate": 6.910957340181056e-05, "loss": 0.0053, "num_input_tokens_seen": 224153664, "step": 103915 }, { "epoch": 16.95269168026101, "grad_norm": 0.011427692137658596, "learning_rate": 6.907346961424926e-05, "loss": 0.0492, "num_input_tokens_seen": 224165216, "step": 103920 }, { "epoch": 16.953507340946167, "grad_norm": 0.002139717573300004, "learning_rate": 6.903737455996073e-05, "loss": 0.0728, "num_input_tokens_seen": 224177504, "step": 103925 }, { "epoch": 16.954323001631323, "grad_norm": 0.0068588824942708015, "learning_rate": 6.900128823967655e-05, "loss": 0.0008, "num_input_tokens_seen": 224187264, "step": 103930 }, { "epoch": 16.955138662316475, "grad_norm": 0.0012632354628294706, "learning_rate": 6.896521065412803e-05, "loss": 0.0075, "num_input_tokens_seen": 224197472, "step": 103935 }, { "epoch": 16.95595432300163, "grad_norm": 0.021549273282289505, "learning_rate": 6.89291418040463e-05, "loss": 0.0271, "num_input_tokens_seen": 224208512, "step": 103940 }, { "epoch": 16.956769983686787, "grad_norm": 0.029506415128707886, "learning_rate": 6.889308169016229e-05, "loss": 0.0017, "num_input_tokens_seen": 224217568, "step": 103945 }, { "epoch": 16.957585644371942, "grad_norm": 0.007408965844660997, "learning_rate": 6.885703031320706e-05, "loss": 0.0021, "num_input_tokens_seen": 224227808, "step": 103950 }, { "epoch": 16.958401305057095, "grad_norm": 0.025822795927524567, "learning_rate": 6.882098767391087e-05, "loss": 0.0229, "num_input_tokens_seen": 224238304, "step": 103955 }, { "epoch": 16.95921696574225, "grad_norm": 0.0003688313299790025, "learning_rate": 6.878495377300453e-05, "loss": 0.0016, "num_input_tokens_seen": 224249280, "step": 103960 }, { "epoch": 16.960032626427406, "grad_norm": 0.034812621772289276, "learning_rate": 6.874892861121795e-05, "loss": 0.0017, "num_input_tokens_seen": 224259584, "step": 103965 }, { "epoch": 16.96084828711256, "grad_norm": 0.20228374004364014, "learning_rate": 6.871291218928166e-05, "loss": 0.0088, "num_input_tokens_seen": 224269312, "step": 103970 }, { "epoch": 16.961663947797717, "grad_norm": 0.003344029188156128, "learning_rate": 6.867690450792508e-05, "loss": 0.0019, "num_input_tokens_seen": 224280864, "step": 103975 }, { "epoch": 16.96247960848287, "grad_norm": 0.0290042944252491, "learning_rate": 6.864090556787838e-05, "loss": 0.0043, "num_input_tokens_seen": 224291328, "step": 103980 }, { "epoch": 16.963295269168025, "grad_norm": 0.026988210156559944, "learning_rate": 6.860491536987079e-05, "loss": 0.0177, "num_input_tokens_seen": 224302816, "step": 103985 }, { "epoch": 16.96411092985318, "grad_norm": 0.001548346015624702, "learning_rate": 6.856893391463192e-05, "loss": 0.1025, "num_input_tokens_seen": 224313440, "step": 103990 }, { "epoch": 16.964926590538337, "grad_norm": 0.006010445766150951, "learning_rate": 6.853296120289094e-05, "loss": 0.0146, "num_input_tokens_seen": 224325568, "step": 103995 }, { "epoch": 16.965742251223492, "grad_norm": 0.0020670003723353148, "learning_rate": 6.849699723537684e-05, "loss": 0.0026, "num_input_tokens_seen": 224337088, "step": 104000 }, { "epoch": 16.966557911908644, "grad_norm": 0.04562485218048096, "learning_rate": 6.84610420128185e-05, "loss": 0.0057, "num_input_tokens_seen": 224348000, "step": 104005 }, { "epoch": 16.9673735725938, "grad_norm": 0.00812728051096201, "learning_rate": 6.842509553594462e-05, "loss": 0.0024, "num_input_tokens_seen": 224359360, "step": 104010 }, { "epoch": 16.968189233278956, "grad_norm": 0.010814927518367767, "learning_rate": 6.83891578054836e-05, "loss": 0.0042, "num_input_tokens_seen": 224370368, "step": 104015 }, { "epoch": 16.96900489396411, "grad_norm": 0.07433625310659409, "learning_rate": 6.835322882216388e-05, "loss": 0.0586, "num_input_tokens_seen": 224380992, "step": 104020 }, { "epoch": 16.969820554649267, "grad_norm": 0.016932446509599686, "learning_rate": 6.831730858671353e-05, "loss": 0.0035, "num_input_tokens_seen": 224392416, "step": 104025 }, { "epoch": 16.97063621533442, "grad_norm": 0.018028758466243744, "learning_rate": 6.828139709986058e-05, "loss": 0.0027, "num_input_tokens_seen": 224401600, "step": 104030 }, { "epoch": 16.971451876019575, "grad_norm": 0.05279481038451195, "learning_rate": 6.824549436233279e-05, "loss": 0.0045, "num_input_tokens_seen": 224413088, "step": 104035 }, { "epoch": 16.97226753670473, "grad_norm": 0.003573646768927574, "learning_rate": 6.820960037485779e-05, "loss": 0.0599, "num_input_tokens_seen": 224423776, "step": 104040 }, { "epoch": 16.973083197389887, "grad_norm": 0.054679661989212036, "learning_rate": 6.8173715138163e-05, "loss": 0.0038, "num_input_tokens_seen": 224434848, "step": 104045 }, { "epoch": 16.973898858075042, "grad_norm": 0.013194920495152473, "learning_rate": 6.813783865297563e-05, "loss": 0.0035, "num_input_tokens_seen": 224444512, "step": 104050 }, { "epoch": 16.974714518760194, "grad_norm": 0.006568380165845156, "learning_rate": 6.810197092002285e-05, "loss": 0.0023, "num_input_tokens_seen": 224455840, "step": 104055 }, { "epoch": 16.97553017944535, "grad_norm": 0.0030842041596770287, "learning_rate": 6.806611194003154e-05, "loss": 0.1657, "num_input_tokens_seen": 224467488, "step": 104060 }, { "epoch": 16.976345840130506, "grad_norm": 0.003656767774373293, "learning_rate": 6.803026171372845e-05, "loss": 0.1173, "num_input_tokens_seen": 224476928, "step": 104065 }, { "epoch": 16.97716150081566, "grad_norm": 0.03733963519334793, "learning_rate": 6.799442024184005e-05, "loss": 0.0084, "num_input_tokens_seen": 224488096, "step": 104070 }, { "epoch": 16.977977161500817, "grad_norm": 0.019390324130654335, "learning_rate": 6.795858752509276e-05, "loss": 0.0099, "num_input_tokens_seen": 224500736, "step": 104075 }, { "epoch": 16.97879282218597, "grad_norm": 0.002359053585678339, "learning_rate": 6.792276356421278e-05, "loss": 0.0016, "num_input_tokens_seen": 224510336, "step": 104080 }, { "epoch": 16.979608482871125, "grad_norm": 0.0032478172797709703, "learning_rate": 6.788694835992615e-05, "loss": 0.0031, "num_input_tokens_seen": 224520928, "step": 104085 }, { "epoch": 16.98042414355628, "grad_norm": 0.19882608950138092, "learning_rate": 6.785114191295854e-05, "loss": 0.0044, "num_input_tokens_seen": 224531424, "step": 104090 }, { "epoch": 16.981239804241437, "grad_norm": 0.017898503690958023, "learning_rate": 6.78153442240359e-05, "loss": 0.0069, "num_input_tokens_seen": 224541568, "step": 104095 }, { "epoch": 16.982055464926592, "grad_norm": 0.5799863934516907, "learning_rate": 6.777955529388358e-05, "loss": 0.0132, "num_input_tokens_seen": 224551936, "step": 104100 }, { "epoch": 16.982871125611744, "grad_norm": 0.002687811618670821, "learning_rate": 6.774377512322688e-05, "loss": 0.0073, "num_input_tokens_seen": 224562176, "step": 104105 }, { "epoch": 16.9836867862969, "grad_norm": 0.024144001305103302, "learning_rate": 6.77080037127909e-05, "loss": 0.0047, "num_input_tokens_seen": 224573184, "step": 104110 }, { "epoch": 16.984502446982056, "grad_norm": 0.00421716645359993, "learning_rate": 6.767224106330067e-05, "loss": 0.0245, "num_input_tokens_seen": 224583264, "step": 104115 }, { "epoch": 16.98531810766721, "grad_norm": 0.013138786889612675, "learning_rate": 6.763648717548088e-05, "loss": 0.0107, "num_input_tokens_seen": 224593824, "step": 104120 }, { "epoch": 16.986133768352367, "grad_norm": 0.007796260993927717, "learning_rate": 6.760074205005617e-05, "loss": 0.0011, "num_input_tokens_seen": 224605536, "step": 104125 }, { "epoch": 16.98694942903752, "grad_norm": 0.0929383635520935, "learning_rate": 6.756500568775098e-05, "loss": 0.0055, "num_input_tokens_seen": 224616416, "step": 104130 }, { "epoch": 16.987765089722675, "grad_norm": 0.032929226756095886, "learning_rate": 6.752927808928955e-05, "loss": 0.0032, "num_input_tokens_seen": 224627680, "step": 104135 }, { "epoch": 16.98858075040783, "grad_norm": 0.05063315108418465, "learning_rate": 6.749355925539591e-05, "loss": 0.0044, "num_input_tokens_seen": 224638400, "step": 104140 }, { "epoch": 16.989396411092986, "grad_norm": 0.0023174448870122433, "learning_rate": 6.745784918679399e-05, "loss": 0.0017, "num_input_tokens_seen": 224648800, "step": 104145 }, { "epoch": 16.99021207177814, "grad_norm": 0.011181803420186043, "learning_rate": 6.742214788420742e-05, "loss": 0.0035, "num_input_tokens_seen": 224658816, "step": 104150 }, { "epoch": 16.991027732463294, "grad_norm": 0.13051854074001312, "learning_rate": 6.73864553483598e-05, "loss": 0.0088, "num_input_tokens_seen": 224669472, "step": 104155 }, { "epoch": 16.99184339314845, "grad_norm": 0.012974252924323082, "learning_rate": 6.735077157997448e-05, "loss": 0.0022, "num_input_tokens_seen": 224680064, "step": 104160 }, { "epoch": 16.992659053833606, "grad_norm": 0.04372046887874603, "learning_rate": 6.731509657977464e-05, "loss": 0.0065, "num_input_tokens_seen": 224690208, "step": 104165 }, { "epoch": 16.99347471451876, "grad_norm": 0.691785991191864, "learning_rate": 6.727943034848327e-05, "loss": 0.0403, "num_input_tokens_seen": 224701728, "step": 104170 }, { "epoch": 16.994290375203914, "grad_norm": 0.03455149382352829, "learning_rate": 6.72437728868232e-05, "loss": 0.0575, "num_input_tokens_seen": 224711264, "step": 104175 }, { "epoch": 16.99510603588907, "grad_norm": 0.001892270054668188, "learning_rate": 6.720812419551703e-05, "loss": 0.005, "num_input_tokens_seen": 224721984, "step": 104180 }, { "epoch": 16.995921696574225, "grad_norm": 0.6105122566223145, "learning_rate": 6.717248427528727e-05, "loss": 0.0109, "num_input_tokens_seen": 224733760, "step": 104185 }, { "epoch": 16.99673735725938, "grad_norm": 0.008856971748173237, "learning_rate": 6.713685312685619e-05, "loss": 0.0053, "num_input_tokens_seen": 224744704, "step": 104190 }, { "epoch": 16.997553017944536, "grad_norm": 0.012863608077168465, "learning_rate": 6.710123075094593e-05, "loss": 0.0065, "num_input_tokens_seen": 224756256, "step": 104195 }, { "epoch": 16.99836867862969, "grad_norm": 0.019687986001372337, "learning_rate": 6.70656171482783e-05, "loss": 0.0347, "num_input_tokens_seen": 224766560, "step": 104200 }, { "epoch": 16.999184339314844, "grad_norm": 0.014954157173633575, "learning_rate": 6.703001231957535e-05, "loss": 0.1807, "num_input_tokens_seen": 224777920, "step": 104205 }, { "epoch": 17.0, "grad_norm": 0.040260665118694305, "learning_rate": 6.699441626555824e-05, "loss": 0.0088, "num_input_tokens_seen": 224787008, "step": 104210 }, { "epoch": 17.0, "eval_loss": 0.24776047468185425, "eval_runtime": 104.6813, "eval_samples_per_second": 26.031, "eval_steps_per_second": 6.515, "num_input_tokens_seen": 224787008, "step": 104210 }, { "epoch": 17.000815660685156, "grad_norm": 0.011772292666137218, "learning_rate": 6.695882898694883e-05, "loss": 0.0116, "num_input_tokens_seen": 224796928, "step": 104215 }, { "epoch": 17.00163132137031, "grad_norm": 0.00812274869531393, "learning_rate": 6.692325048446784e-05, "loss": 0.0252, "num_input_tokens_seen": 224807392, "step": 104220 }, { "epoch": 17.002446982055464, "grad_norm": 0.19330890476703644, "learning_rate": 6.688768075883683e-05, "loss": 0.0046, "num_input_tokens_seen": 224818624, "step": 104225 }, { "epoch": 17.00326264274062, "grad_norm": 0.014338434673845768, "learning_rate": 6.685211981077616e-05, "loss": 0.1454, "num_input_tokens_seen": 224829312, "step": 104230 }, { "epoch": 17.004078303425775, "grad_norm": 0.003659123321995139, "learning_rate": 6.68165676410069e-05, "loss": 0.0015, "num_input_tokens_seen": 224840768, "step": 104235 }, { "epoch": 17.00489396411093, "grad_norm": 0.001604782068170607, "learning_rate": 6.678102425024946e-05, "loss": 0.0087, "num_input_tokens_seen": 224850656, "step": 104240 }, { "epoch": 17.005709624796086, "grad_norm": 0.0010798327857628465, "learning_rate": 6.674548963922412e-05, "loss": 0.0065, "num_input_tokens_seen": 224862080, "step": 104245 }, { "epoch": 17.00652528548124, "grad_norm": 0.353349506855011, "learning_rate": 6.670996380865101e-05, "loss": 0.0121, "num_input_tokens_seen": 224873312, "step": 104250 }, { "epoch": 17.007340946166394, "grad_norm": 0.0016907331300899386, "learning_rate": 6.667444675925022e-05, "loss": 0.0034, "num_input_tokens_seen": 224885504, "step": 104255 }, { "epoch": 17.00815660685155, "grad_norm": 0.35217127203941345, "learning_rate": 6.663893849174147e-05, "loss": 0.0079, "num_input_tokens_seen": 224896032, "step": 104260 }, { "epoch": 17.008972267536706, "grad_norm": 0.0038699081633239985, "learning_rate": 6.660343900684434e-05, "loss": 0.0126, "num_input_tokens_seen": 224907200, "step": 104265 }, { "epoch": 17.00978792822186, "grad_norm": 0.01561376266181469, "learning_rate": 6.656794830527835e-05, "loss": 0.107, "num_input_tokens_seen": 224916192, "step": 104270 }, { "epoch": 17.010603588907014, "grad_norm": 0.03557194024324417, "learning_rate": 6.653246638776273e-05, "loss": 0.0023, "num_input_tokens_seen": 224927840, "step": 104275 }, { "epoch": 17.01141924959217, "grad_norm": 0.007549258880317211, "learning_rate": 6.649699325501657e-05, "loss": 0.0049, "num_input_tokens_seen": 224938560, "step": 104280 }, { "epoch": 17.012234910277325, "grad_norm": 0.1349920779466629, "learning_rate": 6.64615289077588e-05, "loss": 0.0037, "num_input_tokens_seen": 224949280, "step": 104285 }, { "epoch": 17.01305057096248, "grad_norm": 0.013362543657422066, "learning_rate": 6.642607334670808e-05, "loss": 0.0015, "num_input_tokens_seen": 224960320, "step": 104290 }, { "epoch": 17.013866231647636, "grad_norm": 0.037102337926626205, "learning_rate": 6.639062657258305e-05, "loss": 0.0067, "num_input_tokens_seen": 224971616, "step": 104295 }, { "epoch": 17.01468189233279, "grad_norm": 0.006163390818983316, "learning_rate": 6.635518858610207e-05, "loss": 0.0031, "num_input_tokens_seen": 224981696, "step": 104300 }, { "epoch": 17.015497553017944, "grad_norm": 0.0015328828012570739, "learning_rate": 6.631975938798312e-05, "loss": 0.0194, "num_input_tokens_seen": 224992096, "step": 104305 }, { "epoch": 17.0163132137031, "grad_norm": 0.03885510191321373, "learning_rate": 6.62843389789447e-05, "loss": 0.0058, "num_input_tokens_seen": 225001536, "step": 104310 }, { "epoch": 17.017128874388256, "grad_norm": 0.004807349760085344, "learning_rate": 6.624892735970412e-05, "loss": 0.0016, "num_input_tokens_seen": 225012640, "step": 104315 }, { "epoch": 17.017944535073408, "grad_norm": 1.9273040294647217, "learning_rate": 6.621352453097951e-05, "loss": 0.0242, "num_input_tokens_seen": 225023616, "step": 104320 }, { "epoch": 17.018760195758563, "grad_norm": 0.005595046561211348, "learning_rate": 6.617813049348787e-05, "loss": 0.0015, "num_input_tokens_seen": 225034912, "step": 104325 }, { "epoch": 17.01957585644372, "grad_norm": 0.049170102924108505, "learning_rate": 6.6142745247947e-05, "loss": 0.0049, "num_input_tokens_seen": 225045600, "step": 104330 }, { "epoch": 17.020391517128875, "grad_norm": 0.13315409421920776, "learning_rate": 6.610736879507356e-05, "loss": 0.0055, "num_input_tokens_seen": 225056096, "step": 104335 }, { "epoch": 17.02120717781403, "grad_norm": 0.013612095266580582, "learning_rate": 6.607200113558493e-05, "loss": 0.0014, "num_input_tokens_seen": 225066880, "step": 104340 }, { "epoch": 17.022022838499183, "grad_norm": 0.01146923378109932, "learning_rate": 6.603664227019745e-05, "loss": 0.0026, "num_input_tokens_seen": 225078432, "step": 104345 }, { "epoch": 17.02283849918434, "grad_norm": 0.013631249777972698, "learning_rate": 6.600129219962819e-05, "loss": 0.0043, "num_input_tokens_seen": 225089312, "step": 104350 }, { "epoch": 17.023654159869494, "grad_norm": 0.13032369315624237, "learning_rate": 6.596595092459307e-05, "loss": 0.0041, "num_input_tokens_seen": 225099680, "step": 104355 }, { "epoch": 17.02446982055465, "grad_norm": 0.08544798940420151, "learning_rate": 6.593061844580878e-05, "loss": 0.0532, "num_input_tokens_seen": 225111520, "step": 104360 }, { "epoch": 17.025285481239806, "grad_norm": 0.004546268843114376, "learning_rate": 6.589529476399097e-05, "loss": 0.0051, "num_input_tokens_seen": 225121312, "step": 104365 }, { "epoch": 17.026101141924958, "grad_norm": 0.020557256415486336, "learning_rate": 6.585997987985592e-05, "loss": 0.0015, "num_input_tokens_seen": 225130720, "step": 104370 }, { "epoch": 17.026916802610113, "grad_norm": 0.0016014183638617396, "learning_rate": 6.582467379411889e-05, "loss": 0.0025, "num_input_tokens_seen": 225142240, "step": 104375 }, { "epoch": 17.02773246329527, "grad_norm": 0.015116388909518719, "learning_rate": 6.578937650749573e-05, "loss": 0.0058, "num_input_tokens_seen": 225153248, "step": 104380 }, { "epoch": 17.028548123980425, "grad_norm": 0.0017644548788666725, "learning_rate": 6.575408802070171e-05, "loss": 0.0037, "num_input_tokens_seen": 225165184, "step": 104385 }, { "epoch": 17.02936378466558, "grad_norm": 0.02056831121444702, "learning_rate": 6.571880833445198e-05, "loss": 0.0056, "num_input_tokens_seen": 225175584, "step": 104390 }, { "epoch": 17.030179445350733, "grad_norm": 0.0925116166472435, "learning_rate": 6.568353744946154e-05, "loss": 0.0056, "num_input_tokens_seen": 225186272, "step": 104395 }, { "epoch": 17.03099510603589, "grad_norm": 0.03236358240246773, "learning_rate": 6.564827536644519e-05, "loss": 0.0037, "num_input_tokens_seen": 225197632, "step": 104400 }, { "epoch": 17.031810766721044, "grad_norm": 0.06901739537715912, "learning_rate": 6.561302208611752e-05, "loss": 0.0054, "num_input_tokens_seen": 225208448, "step": 104405 }, { "epoch": 17.0326264274062, "grad_norm": 0.009202918969094753, "learning_rate": 6.557777760919303e-05, "loss": 0.0042, "num_input_tokens_seen": 225220160, "step": 104410 }, { "epoch": 17.033442088091356, "grad_norm": 0.0015428396873176098, "learning_rate": 6.554254193638598e-05, "loss": 0.0041, "num_input_tokens_seen": 225230784, "step": 104415 }, { "epoch": 17.034257748776508, "grad_norm": 0.005634005181491375, "learning_rate": 6.550731506841046e-05, "loss": 0.001, "num_input_tokens_seen": 225240608, "step": 104420 }, { "epoch": 17.035073409461663, "grad_norm": 0.023702984675765038, "learning_rate": 6.54720970059804e-05, "loss": 0.0055, "num_input_tokens_seen": 225251520, "step": 104425 }, { "epoch": 17.03588907014682, "grad_norm": 0.01368745043873787, "learning_rate": 6.543688774980944e-05, "loss": 0.0032, "num_input_tokens_seen": 225262528, "step": 104430 }, { "epoch": 17.036704730831975, "grad_norm": 0.005876247305423021, "learning_rate": 6.540168730061141e-05, "loss": 0.0085, "num_input_tokens_seen": 225273440, "step": 104435 }, { "epoch": 17.03752039151713, "grad_norm": 0.14090485870838165, "learning_rate": 6.53664956590993e-05, "loss": 0.0194, "num_input_tokens_seen": 225284128, "step": 104440 }, { "epoch": 17.038336052202283, "grad_norm": 0.015776116400957108, "learning_rate": 6.533131282598676e-05, "loss": 0.0035, "num_input_tokens_seen": 225293792, "step": 104445 }, { "epoch": 17.03915171288744, "grad_norm": 0.12819184362888336, "learning_rate": 6.529613880198638e-05, "loss": 0.0068, "num_input_tokens_seen": 225303328, "step": 104450 }, { "epoch": 17.039967373572594, "grad_norm": 0.6987629532814026, "learning_rate": 6.526097358781141e-05, "loss": 0.0253, "num_input_tokens_seen": 225314336, "step": 104455 }, { "epoch": 17.04078303425775, "grad_norm": 0.01071083452552557, "learning_rate": 6.522581718417409e-05, "loss": 0.0029, "num_input_tokens_seen": 225325280, "step": 104460 }, { "epoch": 17.041598694942905, "grad_norm": 0.002036697929725051, "learning_rate": 6.519066959178738e-05, "loss": 0.0105, "num_input_tokens_seen": 225336224, "step": 104465 }, { "epoch": 17.042414355628058, "grad_norm": 0.07784174382686615, "learning_rate": 6.515553081136311e-05, "loss": 0.0081, "num_input_tokens_seen": 225346976, "step": 104470 }, { "epoch": 17.043230016313213, "grad_norm": 0.06865884363651276, "learning_rate": 6.512040084361388e-05, "loss": 0.0044, "num_input_tokens_seen": 225358368, "step": 104475 }, { "epoch": 17.04404567699837, "grad_norm": 0.0036467344034463167, "learning_rate": 6.508527968925115e-05, "loss": 0.0839, "num_input_tokens_seen": 225368928, "step": 104480 }, { "epoch": 17.044861337683525, "grad_norm": 0.01101301982998848, "learning_rate": 6.505016734898722e-05, "loss": 0.0021, "num_input_tokens_seen": 225380512, "step": 104485 }, { "epoch": 17.045676998368677, "grad_norm": 0.0027499424759298563, "learning_rate": 6.501506382353317e-05, "loss": 0.0021, "num_input_tokens_seen": 225390912, "step": 104490 }, { "epoch": 17.046492659053833, "grad_norm": 0.0024078956339508295, "learning_rate": 6.497996911360093e-05, "loss": 0.0015, "num_input_tokens_seen": 225401888, "step": 104495 }, { "epoch": 17.04730831973899, "grad_norm": 0.001780488877557218, "learning_rate": 6.494488321990122e-05, "loss": 0.0104, "num_input_tokens_seen": 225413152, "step": 104500 }, { "epoch": 17.048123980424144, "grad_norm": 0.0019509729463607073, "learning_rate": 6.490980614314556e-05, "loss": 0.0084, "num_input_tokens_seen": 225423488, "step": 104505 }, { "epoch": 17.0489396411093, "grad_norm": 0.009638155810534954, "learning_rate": 6.487473788404446e-05, "loss": 0.0022, "num_input_tokens_seen": 225433472, "step": 104510 }, { "epoch": 17.049755301794452, "grad_norm": 0.001258420990779996, "learning_rate": 6.483967844330901e-05, "loss": 0.0037, "num_input_tokens_seen": 225445472, "step": 104515 }, { "epoch": 17.050570962479608, "grad_norm": 0.017608510330319405, "learning_rate": 6.480462782164925e-05, "loss": 0.003, "num_input_tokens_seen": 225455296, "step": 104520 }, { "epoch": 17.051386623164763, "grad_norm": 0.013947105035185814, "learning_rate": 6.476958601977595e-05, "loss": 0.0017, "num_input_tokens_seen": 225466208, "step": 104525 }, { "epoch": 17.05220228384992, "grad_norm": 1.2629220485687256, "learning_rate": 6.473455303839909e-05, "loss": 0.1792, "num_input_tokens_seen": 225476480, "step": 104530 }, { "epoch": 17.053017944535075, "grad_norm": 0.015518652275204659, "learning_rate": 6.469952887822866e-05, "loss": 0.0026, "num_input_tokens_seen": 225486976, "step": 104535 }, { "epoch": 17.053833605220227, "grad_norm": 0.00446438230574131, "learning_rate": 6.466451353997455e-05, "loss": 0.0027, "num_input_tokens_seen": 225497280, "step": 104540 }, { "epoch": 17.054649265905383, "grad_norm": 0.000701532291714102, "learning_rate": 6.462950702434633e-05, "loss": 0.0011, "num_input_tokens_seen": 225508480, "step": 104545 }, { "epoch": 17.05546492659054, "grad_norm": 0.0009421190479770303, "learning_rate": 6.459450933205346e-05, "loss": 0.0127, "num_input_tokens_seen": 225519072, "step": 104550 }, { "epoch": 17.056280587275694, "grad_norm": 0.001118298969231546, "learning_rate": 6.455952046380514e-05, "loss": 0.0046, "num_input_tokens_seen": 225530976, "step": 104555 }, { "epoch": 17.05709624796085, "grad_norm": 0.5358046293258667, "learning_rate": 6.452454042031059e-05, "loss": 0.0122, "num_input_tokens_seen": 225540992, "step": 104560 }, { "epoch": 17.057911908646002, "grad_norm": 0.3185410797595978, "learning_rate": 6.448956920227867e-05, "loss": 0.0033, "num_input_tokens_seen": 225551904, "step": 104565 }, { "epoch": 17.058727569331158, "grad_norm": 0.005120089277625084, "learning_rate": 6.445460681041815e-05, "loss": 0.0078, "num_input_tokens_seen": 225562944, "step": 104570 }, { "epoch": 17.059543230016313, "grad_norm": 0.010832875035703182, "learning_rate": 6.441965324543737e-05, "loss": 0.0011, "num_input_tokens_seen": 225573920, "step": 104575 }, { "epoch": 17.06035889070147, "grad_norm": 0.04214286804199219, "learning_rate": 6.438470850804512e-05, "loss": 0.0034, "num_input_tokens_seen": 225584256, "step": 104580 }, { "epoch": 17.061174551386625, "grad_norm": 0.1565100997686386, "learning_rate": 6.43497725989492e-05, "loss": 0.0153, "num_input_tokens_seen": 225595392, "step": 104585 }, { "epoch": 17.061990212071777, "grad_norm": 0.013069977052509785, "learning_rate": 6.431484551885797e-05, "loss": 0.0046, "num_input_tokens_seen": 225606528, "step": 104590 }, { "epoch": 17.062805872756933, "grad_norm": 0.007414736319333315, "learning_rate": 6.427992726847892e-05, "loss": 0.0095, "num_input_tokens_seen": 225617760, "step": 104595 }, { "epoch": 17.063621533442088, "grad_norm": 0.011819995939731598, "learning_rate": 6.424501784852004e-05, "loss": 0.1192, "num_input_tokens_seen": 225627712, "step": 104600 }, { "epoch": 17.064437194127244, "grad_norm": 0.01895037107169628, "learning_rate": 6.421011725968856e-05, "loss": 0.0033, "num_input_tokens_seen": 225638496, "step": 104605 }, { "epoch": 17.0652528548124, "grad_norm": 0.005347767844796181, "learning_rate": 6.4175225502692e-05, "loss": 0.007, "num_input_tokens_seen": 225649600, "step": 104610 }, { "epoch": 17.06606851549755, "grad_norm": 0.03307915851473808, "learning_rate": 6.414034257823725e-05, "loss": 0.0026, "num_input_tokens_seen": 225659552, "step": 104615 }, { "epoch": 17.066884176182707, "grad_norm": 0.7239627838134766, "learning_rate": 6.410546848703153e-05, "loss": 0.0512, "num_input_tokens_seen": 225669792, "step": 104620 }, { "epoch": 17.067699836867863, "grad_norm": 0.0042619346641004086, "learning_rate": 6.407060322978131e-05, "loss": 0.0476, "num_input_tokens_seen": 225678848, "step": 104625 }, { "epoch": 17.06851549755302, "grad_norm": 0.05504421144723892, "learning_rate": 6.403574680719343e-05, "loss": 0.0048, "num_input_tokens_seen": 225690112, "step": 104630 }, { "epoch": 17.069331158238175, "grad_norm": 0.00629306398332119, "learning_rate": 6.400089921997415e-05, "loss": 0.0026, "num_input_tokens_seen": 225701312, "step": 104635 }, { "epoch": 17.070146818923327, "grad_norm": 0.002915940945968032, "learning_rate": 6.39660604688298e-05, "loss": 0.0009, "num_input_tokens_seen": 225712416, "step": 104640 }, { "epoch": 17.070962479608482, "grad_norm": 0.01589224301278591, "learning_rate": 6.393123055446637e-05, "loss": 0.0036, "num_input_tokens_seen": 225723104, "step": 104645 }, { "epoch": 17.071778140293638, "grad_norm": 0.007028897292912006, "learning_rate": 6.389640947758973e-05, "loss": 0.0031, "num_input_tokens_seen": 225734784, "step": 104650 }, { "epoch": 17.072593800978794, "grad_norm": 0.06384836137294769, "learning_rate": 6.38615972389056e-05, "loss": 0.0103, "num_input_tokens_seen": 225746016, "step": 104655 }, { "epoch": 17.07340946166395, "grad_norm": 0.006251305807381868, "learning_rate": 6.382679383911949e-05, "loss": 0.0027, "num_input_tokens_seen": 225757504, "step": 104660 }, { "epoch": 17.0742251223491, "grad_norm": 0.008043746463954449, "learning_rate": 6.37919992789367e-05, "loss": 0.026, "num_input_tokens_seen": 225768128, "step": 104665 }, { "epoch": 17.075040783034257, "grad_norm": 0.018748454749584198, "learning_rate": 6.375721355906245e-05, "loss": 0.0044, "num_input_tokens_seen": 225779872, "step": 104670 }, { "epoch": 17.075856443719413, "grad_norm": 0.009538413025438786, "learning_rate": 6.372243668020167e-05, "loss": 0.0027, "num_input_tokens_seen": 225789472, "step": 104675 }, { "epoch": 17.07667210440457, "grad_norm": 0.16040140390396118, "learning_rate": 6.368766864305914e-05, "loss": 0.0061, "num_input_tokens_seen": 225800512, "step": 104680 }, { "epoch": 17.07748776508972, "grad_norm": 0.0005987054901197553, "learning_rate": 6.365290944833952e-05, "loss": 0.0036, "num_input_tokens_seen": 225811616, "step": 104685 }, { "epoch": 17.078303425774877, "grad_norm": 0.01854178123176098, "learning_rate": 6.361815909674722e-05, "loss": 0.0032, "num_input_tokens_seen": 225823872, "step": 104690 }, { "epoch": 17.079119086460032, "grad_norm": 0.00650700693950057, "learning_rate": 6.358341758898656e-05, "loss": 0.0018, "num_input_tokens_seen": 225834080, "step": 104695 }, { "epoch": 17.079934747145188, "grad_norm": 0.003897515358403325, "learning_rate": 6.354868492576154e-05, "loss": 0.067, "num_input_tokens_seen": 225844800, "step": 104700 }, { "epoch": 17.080750407830344, "grad_norm": 0.8626391291618347, "learning_rate": 6.351396110777613e-05, "loss": 0.1648, "num_input_tokens_seen": 225855392, "step": 104705 }, { "epoch": 17.081566068515496, "grad_norm": 0.646980345249176, "learning_rate": 6.347924613573402e-05, "loss": 0.017, "num_input_tokens_seen": 225866400, "step": 104710 }, { "epoch": 17.08238172920065, "grad_norm": 0.0007816980360075831, "learning_rate": 6.344454001033873e-05, "loss": 0.0013, "num_input_tokens_seen": 225878048, "step": 104715 }, { "epoch": 17.083197389885807, "grad_norm": 0.05243643373250961, "learning_rate": 6.340984273229355e-05, "loss": 0.0058, "num_input_tokens_seen": 225889184, "step": 104720 }, { "epoch": 17.084013050570963, "grad_norm": 0.04723139852285385, "learning_rate": 6.337515430230196e-05, "loss": 0.0025, "num_input_tokens_seen": 225900544, "step": 104725 }, { "epoch": 17.08482871125612, "grad_norm": 0.004147608298808336, "learning_rate": 6.334047472106657e-05, "loss": 0.0036, "num_input_tokens_seen": 225911136, "step": 104730 }, { "epoch": 17.08564437194127, "grad_norm": 0.1590079814195633, "learning_rate": 6.330580398929047e-05, "loss": 0.0043, "num_input_tokens_seen": 225920032, "step": 104735 }, { "epoch": 17.086460032626427, "grad_norm": 0.002597876824438572, "learning_rate": 6.327114210767632e-05, "loss": 0.0009, "num_input_tokens_seen": 225931232, "step": 104740 }, { "epoch": 17.087275693311582, "grad_norm": 0.0009755231440067291, "learning_rate": 6.323648907692642e-05, "loss": 0.0092, "num_input_tokens_seen": 225942048, "step": 104745 }, { "epoch": 17.088091353996738, "grad_norm": 0.0024422931019216776, "learning_rate": 6.320184489774317e-05, "loss": 0.0037, "num_input_tokens_seen": 225952768, "step": 104750 }, { "epoch": 17.088907014681894, "grad_norm": 0.006987266708165407, "learning_rate": 6.316720957082867e-05, "loss": 0.0021, "num_input_tokens_seen": 225964032, "step": 104755 }, { "epoch": 17.089722675367046, "grad_norm": 0.00891895405948162, "learning_rate": 6.31325830968848e-05, "loss": 0.0025, "num_input_tokens_seen": 225977248, "step": 104760 }, { "epoch": 17.0905383360522, "grad_norm": 0.09654968231916428, "learning_rate": 6.30979654766134e-05, "loss": 0.0093, "num_input_tokens_seen": 225987296, "step": 104765 }, { "epoch": 17.091353996737357, "grad_norm": 0.030549118295311928, "learning_rate": 6.306335671071589e-05, "loss": 0.0088, "num_input_tokens_seen": 225996480, "step": 104770 }, { "epoch": 17.092169657422513, "grad_norm": 0.03250773623585701, "learning_rate": 6.302875679989384e-05, "loss": 0.0011, "num_input_tokens_seen": 226007072, "step": 104775 }, { "epoch": 17.09298531810767, "grad_norm": 0.06237514317035675, "learning_rate": 6.299416574484828e-05, "loss": 0.0025, "num_input_tokens_seen": 226018720, "step": 104780 }, { "epoch": 17.09380097879282, "grad_norm": 0.09156310558319092, "learning_rate": 6.29595835462804e-05, "loss": 0.0149, "num_input_tokens_seen": 226029856, "step": 104785 }, { "epoch": 17.094616639477977, "grad_norm": 0.04713364318013191, "learning_rate": 6.2925010204891e-05, "loss": 0.0835, "num_input_tokens_seen": 226040096, "step": 104790 }, { "epoch": 17.095432300163132, "grad_norm": 0.0020797650795429945, "learning_rate": 6.289044572138069e-05, "loss": 0.0034, "num_input_tokens_seen": 226051136, "step": 104795 }, { "epoch": 17.096247960848288, "grad_norm": 0.043308407068252563, "learning_rate": 6.285589009644999e-05, "loss": 0.0016, "num_input_tokens_seen": 226061472, "step": 104800 }, { "epoch": 17.097063621533444, "grad_norm": 0.0017314457800239325, "learning_rate": 6.282134333079926e-05, "loss": 0.0016, "num_input_tokens_seen": 226071744, "step": 104805 }, { "epoch": 17.097879282218596, "grad_norm": 0.021750034764409065, "learning_rate": 6.278680542512866e-05, "loss": 0.0027, "num_input_tokens_seen": 226082112, "step": 104810 }, { "epoch": 17.09869494290375, "grad_norm": 1.0848203897476196, "learning_rate": 6.275227638013803e-05, "loss": 0.0782, "num_input_tokens_seen": 226092480, "step": 104815 }, { "epoch": 17.099510603588907, "grad_norm": 0.028136372566223145, "learning_rate": 6.271775619652719e-05, "loss": 0.017, "num_input_tokens_seen": 226103968, "step": 104820 }, { "epoch": 17.100326264274063, "grad_norm": 0.007583794184029102, "learning_rate": 6.268324487499583e-05, "loss": 0.0012, "num_input_tokens_seen": 226115072, "step": 104825 }, { "epoch": 17.10114192495922, "grad_norm": 0.002195006003603339, "learning_rate": 6.264874241624324e-05, "loss": 0.0141, "num_input_tokens_seen": 226125600, "step": 104830 }, { "epoch": 17.10195758564437, "grad_norm": 0.0008435287745669484, "learning_rate": 6.261424882096866e-05, "loss": 0.0021, "num_input_tokens_seen": 226135232, "step": 104835 }, { "epoch": 17.102773246329527, "grad_norm": 0.010542824864387512, "learning_rate": 6.257976408987115e-05, "loss": 0.0038, "num_input_tokens_seen": 226145856, "step": 104840 }, { "epoch": 17.103588907014682, "grad_norm": 1.1479278802871704, "learning_rate": 6.254528822364985e-05, "loss": 0.1151, "num_input_tokens_seen": 226154816, "step": 104845 }, { "epoch": 17.104404567699838, "grad_norm": 0.08439220488071442, "learning_rate": 6.2510821223003e-05, "loss": 0.0031, "num_input_tokens_seen": 226164640, "step": 104850 }, { "epoch": 17.10522022838499, "grad_norm": 0.0009026865591295063, "learning_rate": 6.247636308862953e-05, "loss": 0.0013, "num_input_tokens_seen": 226175200, "step": 104855 }, { "epoch": 17.106035889070146, "grad_norm": 0.01947978138923645, "learning_rate": 6.244191382122744e-05, "loss": 0.0015, "num_input_tokens_seen": 226186688, "step": 104860 }, { "epoch": 17.1068515497553, "grad_norm": 0.0058452654629945755, "learning_rate": 6.240747342149511e-05, "loss": 0.0008, "num_input_tokens_seen": 226197632, "step": 104865 }, { "epoch": 17.107667210440457, "grad_norm": 0.03328147903084755, "learning_rate": 6.237304189013049e-05, "loss": 0.0193, "num_input_tokens_seen": 226208448, "step": 104870 }, { "epoch": 17.108482871125613, "grad_norm": 0.02567555569112301, "learning_rate": 6.233861922783135e-05, "loss": 0.039, "num_input_tokens_seen": 226218944, "step": 104875 }, { "epoch": 17.109298531810765, "grad_norm": 0.021892793476581573, "learning_rate": 6.230420543529525e-05, "loss": 0.0043, "num_input_tokens_seen": 226229984, "step": 104880 }, { "epoch": 17.11011419249592, "grad_norm": 0.0232851505279541, "learning_rate": 6.226980051321973e-05, "loss": 0.0049, "num_input_tokens_seen": 226240544, "step": 104885 }, { "epoch": 17.110929853181077, "grad_norm": 0.012895450927317142, "learning_rate": 6.223540446230202e-05, "loss": 0.0046, "num_input_tokens_seen": 226251424, "step": 104890 }, { "epoch": 17.111745513866232, "grad_norm": 0.2823787033557892, "learning_rate": 6.220101728323913e-05, "loss": 0.0044, "num_input_tokens_seen": 226263328, "step": 104895 }, { "epoch": 17.112561174551388, "grad_norm": 0.010158097371459007, "learning_rate": 6.216663897672803e-05, "loss": 0.0039, "num_input_tokens_seen": 226273984, "step": 104900 }, { "epoch": 17.11337683523654, "grad_norm": 0.10075705498456955, "learning_rate": 6.213226954346546e-05, "loss": 0.0097, "num_input_tokens_seen": 226285184, "step": 104905 }, { "epoch": 17.114192495921696, "grad_norm": 0.15536688268184662, "learning_rate": 6.209790898414785e-05, "loss": 0.0064, "num_input_tokens_seen": 226294912, "step": 104910 }, { "epoch": 17.11500815660685, "grad_norm": 0.001563726575113833, "learning_rate": 6.206355729947171e-05, "loss": 0.0052, "num_input_tokens_seen": 226304576, "step": 104915 }, { "epoch": 17.115823817292007, "grad_norm": 0.0008472060435451567, "learning_rate": 6.20292144901331e-05, "loss": 0.0007, "num_input_tokens_seen": 226315744, "step": 104920 }, { "epoch": 17.116639477977163, "grad_norm": 0.00992321502417326, "learning_rate": 6.199488055682806e-05, "loss": 0.0022, "num_input_tokens_seen": 226325120, "step": 104925 }, { "epoch": 17.117455138662315, "grad_norm": 0.005183232016861439, "learning_rate": 6.196055550025243e-05, "loss": 0.0024, "num_input_tokens_seen": 226336544, "step": 104930 }, { "epoch": 17.11827079934747, "grad_norm": 0.0864577442407608, "learning_rate": 6.192623932110187e-05, "loss": 0.0473, "num_input_tokens_seen": 226347488, "step": 104935 }, { "epoch": 17.119086460032626, "grad_norm": 0.0020709217060357332, "learning_rate": 6.189193202007176e-05, "loss": 0.0022, "num_input_tokens_seen": 226357824, "step": 104940 }, { "epoch": 17.119902120717782, "grad_norm": 0.025334632024168968, "learning_rate": 6.185763359785729e-05, "loss": 0.0029, "num_input_tokens_seen": 226368512, "step": 104945 }, { "epoch": 17.120717781402938, "grad_norm": 0.0030437589157372713, "learning_rate": 6.182334405515399e-05, "loss": 0.0917, "num_input_tokens_seen": 226379552, "step": 104950 }, { "epoch": 17.12153344208809, "grad_norm": 0.00733728613704443, "learning_rate": 6.178906339265622e-05, "loss": 0.0024, "num_input_tokens_seen": 226391072, "step": 104955 }, { "epoch": 17.122349102773246, "grad_norm": 0.008691351860761642, "learning_rate": 6.175479161105923e-05, "loss": 0.0027, "num_input_tokens_seen": 226402432, "step": 104960 }, { "epoch": 17.1231647634584, "grad_norm": 0.041552335023880005, "learning_rate": 6.17205287110571e-05, "loss": 0.0102, "num_input_tokens_seen": 226413280, "step": 104965 }, { "epoch": 17.123980424143557, "grad_norm": 0.009080358780920506, "learning_rate": 6.16862746933447e-05, "loss": 0.0021, "num_input_tokens_seen": 226423200, "step": 104970 }, { "epoch": 17.124796084828713, "grad_norm": 0.0018575722351670265, "learning_rate": 6.165202955861577e-05, "loss": 0.0026, "num_input_tokens_seen": 226434432, "step": 104975 }, { "epoch": 17.125611745513865, "grad_norm": 0.026986604556441307, "learning_rate": 6.161779330756473e-05, "loss": 0.0023, "num_input_tokens_seen": 226445440, "step": 104980 }, { "epoch": 17.12642740619902, "grad_norm": 0.5842498540878296, "learning_rate": 6.158356594088504e-05, "loss": 0.0199, "num_input_tokens_seen": 226457856, "step": 104985 }, { "epoch": 17.127243066884176, "grad_norm": 0.0189560204744339, "learning_rate": 6.154934745927076e-05, "loss": 0.0019, "num_input_tokens_seen": 226468768, "step": 104990 }, { "epoch": 17.128058727569332, "grad_norm": 0.07833965122699738, "learning_rate": 6.151513786341495e-05, "loss": 0.0112, "num_input_tokens_seen": 226480704, "step": 104995 }, { "epoch": 17.128874388254488, "grad_norm": 0.5421042442321777, "learning_rate": 6.148093715401138e-05, "loss": 0.0201, "num_input_tokens_seen": 226492160, "step": 105000 }, { "epoch": 17.12969004893964, "grad_norm": 0.21265332400798798, "learning_rate": 6.144674533175265e-05, "loss": 0.0064, "num_input_tokens_seen": 226502944, "step": 105005 }, { "epoch": 17.130505709624796, "grad_norm": 0.00845579244196415, "learning_rate": 6.141256239733212e-05, "loss": 0.0017, "num_input_tokens_seen": 226513824, "step": 105010 }, { "epoch": 17.13132137030995, "grad_norm": 0.0073780580423772335, "learning_rate": 6.137838835144239e-05, "loss": 0.0051, "num_input_tokens_seen": 226524224, "step": 105015 }, { "epoch": 17.132137030995107, "grad_norm": 0.6693096160888672, "learning_rate": 6.1344223194776e-05, "loss": 0.1268, "num_input_tokens_seen": 226535584, "step": 105020 }, { "epoch": 17.13295269168026, "grad_norm": 0.010491420514881611, "learning_rate": 6.13100669280255e-05, "loss": 0.0058, "num_input_tokens_seen": 226546208, "step": 105025 }, { "epoch": 17.133768352365415, "grad_norm": 0.003053294727578759, "learning_rate": 6.127591955188295e-05, "loss": 0.0026, "num_input_tokens_seen": 226556160, "step": 105030 }, { "epoch": 17.13458401305057, "grad_norm": 0.1412600874900818, "learning_rate": 6.124178106704042e-05, "loss": 0.0071, "num_input_tokens_seen": 226566368, "step": 105035 }, { "epoch": 17.135399673735726, "grad_norm": 0.006022570189088583, "learning_rate": 6.120765147418989e-05, "loss": 0.0016, "num_input_tokens_seen": 226578784, "step": 105040 }, { "epoch": 17.136215334420882, "grad_norm": 0.008763427846133709, "learning_rate": 6.117353077402288e-05, "loss": 0.0053, "num_input_tokens_seen": 226590240, "step": 105045 }, { "epoch": 17.137030995106034, "grad_norm": 0.03271832317113876, "learning_rate": 6.113941896723097e-05, "loss": 0.0033, "num_input_tokens_seen": 226600768, "step": 105050 }, { "epoch": 17.13784665579119, "grad_norm": 0.0021710838191211224, "learning_rate": 6.110531605450548e-05, "loss": 0.0011, "num_input_tokens_seen": 226612256, "step": 105055 }, { "epoch": 17.138662316476346, "grad_norm": 0.0025985618121922016, "learning_rate": 6.107122203653742e-05, "loss": 0.006, "num_input_tokens_seen": 226622944, "step": 105060 }, { "epoch": 17.1394779771615, "grad_norm": 0.0007510372088290751, "learning_rate": 6.103713691401813e-05, "loss": 0.0059, "num_input_tokens_seen": 226633728, "step": 105065 }, { "epoch": 17.140293637846657, "grad_norm": 0.003081483067944646, "learning_rate": 6.1003060687637836e-05, "loss": 0.0078, "num_input_tokens_seen": 226644448, "step": 105070 }, { "epoch": 17.14110929853181, "grad_norm": 0.06227454915642738, "learning_rate": 6.09689933580877e-05, "loss": 0.0059, "num_input_tokens_seen": 226654496, "step": 105075 }, { "epoch": 17.141924959216965, "grad_norm": 0.01723240502178669, "learning_rate": 6.0934934926057616e-05, "loss": 0.0469, "num_input_tokens_seen": 226665856, "step": 105080 }, { "epoch": 17.14274061990212, "grad_norm": 0.002034595236182213, "learning_rate": 6.0900885392238316e-05, "loss": 0.0023, "num_input_tokens_seen": 226675264, "step": 105085 }, { "epoch": 17.143556280587276, "grad_norm": 0.01371521782130003, "learning_rate": 6.086684475731935e-05, "loss": 0.0051, "num_input_tokens_seen": 226684960, "step": 105090 }, { "epoch": 17.144371941272432, "grad_norm": 0.04693332314491272, "learning_rate": 6.083281302199112e-05, "loss": 0.0022, "num_input_tokens_seen": 226695136, "step": 105095 }, { "epoch": 17.145187601957584, "grad_norm": 0.003651362145319581, "learning_rate": 6.0798790186942784e-05, "loss": 0.001, "num_input_tokens_seen": 226704544, "step": 105100 }, { "epoch": 17.14600326264274, "grad_norm": 0.5629998445510864, "learning_rate": 6.0764776252864365e-05, "loss": 0.0141, "num_input_tokens_seen": 226714720, "step": 105105 }, { "epoch": 17.146818923327896, "grad_norm": 0.002373956609517336, "learning_rate": 6.073077122044479e-05, "loss": 0.0025, "num_input_tokens_seen": 226724768, "step": 105110 }, { "epoch": 17.14763458401305, "grad_norm": 0.03845464438199997, "learning_rate": 6.069677509037358e-05, "loss": 0.0216, "num_input_tokens_seen": 226734848, "step": 105115 }, { "epoch": 17.148450244698207, "grad_norm": 0.003605737816542387, "learning_rate": 6.066278786333928e-05, "loss": 0.0069, "num_input_tokens_seen": 226745184, "step": 105120 }, { "epoch": 17.14926590538336, "grad_norm": 0.003846066305413842, "learning_rate": 6.062880954003114e-05, "loss": 0.0021, "num_input_tokens_seen": 226755488, "step": 105125 }, { "epoch": 17.150081566068515, "grad_norm": 0.6302109360694885, "learning_rate": 6.059484012113736e-05, "loss": 0.0889, "num_input_tokens_seen": 226764992, "step": 105130 }, { "epoch": 17.15089722675367, "grad_norm": 0.001371624879539013, "learning_rate": 6.0560879607346795e-05, "loss": 0.0019, "num_input_tokens_seen": 226777568, "step": 105135 }, { "epoch": 17.151712887438826, "grad_norm": 0.004751713015139103, "learning_rate": 6.0526927999347224e-05, "loss": 0.0013, "num_input_tokens_seen": 226788160, "step": 105140 }, { "epoch": 17.152528548123982, "grad_norm": 0.018031736835837364, "learning_rate": 6.049298529782721e-05, "loss": 0.0045, "num_input_tokens_seen": 226799712, "step": 105145 }, { "epoch": 17.153344208809134, "grad_norm": 0.04230910539627075, "learning_rate": 6.045905150347419e-05, "loss": 0.005, "num_input_tokens_seen": 226810656, "step": 105150 }, { "epoch": 17.15415986949429, "grad_norm": 0.0187738798558712, "learning_rate": 6.0425126616976186e-05, "loss": 0.0873, "num_input_tokens_seen": 226821536, "step": 105155 }, { "epoch": 17.154975530179446, "grad_norm": 0.016406631097197533, "learning_rate": 6.039121063902064e-05, "loss": 0.0122, "num_input_tokens_seen": 226832864, "step": 105160 }, { "epoch": 17.1557911908646, "grad_norm": 0.022522129118442535, "learning_rate": 6.03573035702949e-05, "loss": 0.0016, "num_input_tokens_seen": 226844448, "step": 105165 }, { "epoch": 17.156606851549757, "grad_norm": 0.0011380099458619952, "learning_rate": 6.032340541148612e-05, "loss": 0.0055, "num_input_tokens_seen": 226856512, "step": 105170 }, { "epoch": 17.15742251223491, "grad_norm": 0.0013981532538309693, "learning_rate": 6.0289516163281264e-05, "loss": 0.0017, "num_input_tokens_seen": 226866368, "step": 105175 }, { "epoch": 17.158238172920065, "grad_norm": 0.024873143061995506, "learning_rate": 6.025563582636723e-05, "loss": 0.0029, "num_input_tokens_seen": 226878176, "step": 105180 }, { "epoch": 17.15905383360522, "grad_norm": 0.022311434149742126, "learning_rate": 6.0221764401430565e-05, "loss": 0.0035, "num_input_tokens_seen": 226889184, "step": 105185 }, { "epoch": 17.159869494290376, "grad_norm": 0.007276357151567936, "learning_rate": 6.0187901889157735e-05, "loss": 0.1266, "num_input_tokens_seen": 226900192, "step": 105190 }, { "epoch": 17.160685154975532, "grad_norm": 0.08659173548221588, "learning_rate": 6.015404829023502e-05, "loss": 0.003, "num_input_tokens_seen": 226911168, "step": 105195 }, { "epoch": 17.161500815660684, "grad_norm": 0.03120732493698597, "learning_rate": 6.012020360534853e-05, "loss": 0.0055, "num_input_tokens_seen": 226921792, "step": 105200 }, { "epoch": 17.16231647634584, "grad_norm": 0.13158459961414337, "learning_rate": 6.008636783518401e-05, "loss": 0.1302, "num_input_tokens_seen": 226932032, "step": 105205 }, { "epoch": 17.163132137030995, "grad_norm": 0.008246788755059242, "learning_rate": 6.005254098042751e-05, "loss": 0.0014, "num_input_tokens_seen": 226943872, "step": 105210 }, { "epoch": 17.16394779771615, "grad_norm": 0.014018232002854347, "learning_rate": 6.00187230417642e-05, "loss": 0.0054, "num_input_tokens_seen": 226954304, "step": 105215 }, { "epoch": 17.164763458401303, "grad_norm": 0.3761582672595978, "learning_rate": 5.998491401987982e-05, "loss": 0.0168, "num_input_tokens_seen": 226965216, "step": 105220 }, { "epoch": 17.16557911908646, "grad_norm": 0.002861709799617529, "learning_rate": 5.9951113915459154e-05, "loss": 0.0035, "num_input_tokens_seen": 226974144, "step": 105225 }, { "epoch": 17.166394779771615, "grad_norm": 0.02729472704231739, "learning_rate": 5.9917322729187594e-05, "loss": 0.0025, "num_input_tokens_seen": 226984800, "step": 105230 }, { "epoch": 17.16721044045677, "grad_norm": 0.05355251580476761, "learning_rate": 5.9883540461749596e-05, "loss": 0.0039, "num_input_tokens_seen": 226996480, "step": 105235 }, { "epoch": 17.168026101141926, "grad_norm": 0.34371623396873474, "learning_rate": 5.984976711383017e-05, "loss": 0.0106, "num_input_tokens_seen": 227007040, "step": 105240 }, { "epoch": 17.16884176182708, "grad_norm": 0.015780890360474586, "learning_rate": 5.981600268611337e-05, "loss": 0.0038, "num_input_tokens_seen": 227018752, "step": 105245 }, { "epoch": 17.169657422512234, "grad_norm": 0.0009406707249581814, "learning_rate": 5.9782247179283875e-05, "loss": 0.0071, "num_input_tokens_seen": 227029856, "step": 105250 }, { "epoch": 17.17047308319739, "grad_norm": 0.005174366757273674, "learning_rate": 5.9748500594025425e-05, "loss": 0.0031, "num_input_tokens_seen": 227040800, "step": 105255 }, { "epoch": 17.171288743882545, "grad_norm": 0.0680832490324974, "learning_rate": 5.971476293102229e-05, "loss": 0.0083, "num_input_tokens_seen": 227052064, "step": 105260 }, { "epoch": 17.1721044045677, "grad_norm": 0.017716310918331146, "learning_rate": 5.9681034190957886e-05, "loss": 0.0039, "num_input_tokens_seen": 227063552, "step": 105265 }, { "epoch": 17.172920065252853, "grad_norm": 0.013128891587257385, "learning_rate": 5.964731437451593e-05, "loss": 0.0089, "num_input_tokens_seen": 227073824, "step": 105270 }, { "epoch": 17.17373572593801, "grad_norm": 0.003489695955067873, "learning_rate": 5.961360348237982e-05, "loss": 0.0416, "num_input_tokens_seen": 227084000, "step": 105275 }, { "epoch": 17.174551386623165, "grad_norm": 0.1361032873392105, "learning_rate": 5.9579901515232684e-05, "loss": 0.0099, "num_input_tokens_seen": 227093664, "step": 105280 }, { "epoch": 17.17536704730832, "grad_norm": 0.0019487797981128097, "learning_rate": 5.954620847375758e-05, "loss": 0.0012, "num_input_tokens_seen": 227104256, "step": 105285 }, { "epoch": 17.176182707993476, "grad_norm": 0.06878817826509476, "learning_rate": 5.9512524358637296e-05, "loss": 0.1204, "num_input_tokens_seen": 227115296, "step": 105290 }, { "epoch": 17.17699836867863, "grad_norm": 0.0015410167397931218, "learning_rate": 5.9478849170554513e-05, "loss": 0.0051, "num_input_tokens_seen": 227126464, "step": 105295 }, { "epoch": 17.177814029363784, "grad_norm": 0.0014562886208295822, "learning_rate": 5.944518291019168e-05, "loss": 0.0032, "num_input_tokens_seen": 227137088, "step": 105300 }, { "epoch": 17.17862969004894, "grad_norm": 0.007893229834735394, "learning_rate": 5.9411525578231094e-05, "loss": 0.0013, "num_input_tokens_seen": 227148672, "step": 105305 }, { "epoch": 17.179445350734095, "grad_norm": 0.0010714258532971144, "learning_rate": 5.9377877175354865e-05, "loss": 0.0879, "num_input_tokens_seen": 227159232, "step": 105310 }, { "epoch": 17.18026101141925, "grad_norm": 0.001984142232686281, "learning_rate": 5.934423770224495e-05, "loss": 0.0041, "num_input_tokens_seen": 227169792, "step": 105315 }, { "epoch": 17.181076672104403, "grad_norm": 0.055945903062820435, "learning_rate": 5.931060715958309e-05, "loss": 0.0037, "num_input_tokens_seen": 227181856, "step": 105320 }, { "epoch": 17.18189233278956, "grad_norm": 0.020281154662370682, "learning_rate": 5.9276985548050775e-05, "loss": 0.0053, "num_input_tokens_seen": 227191936, "step": 105325 }, { "epoch": 17.182707993474715, "grad_norm": 0.04612872749567032, "learning_rate": 5.924337286832948e-05, "loss": 0.0104, "num_input_tokens_seen": 227202112, "step": 105330 }, { "epoch": 17.18352365415987, "grad_norm": 0.009314208291471004, "learning_rate": 5.9209769121100374e-05, "loss": 0.0084, "num_input_tokens_seen": 227212096, "step": 105335 }, { "epoch": 17.184339314845026, "grad_norm": 0.0042664408683776855, "learning_rate": 5.917617430704447e-05, "loss": 0.0032, "num_input_tokens_seen": 227222272, "step": 105340 }, { "epoch": 17.18515497553018, "grad_norm": 0.014778603799641132, "learning_rate": 5.9142588426842615e-05, "loss": 0.0096, "num_input_tokens_seen": 227233504, "step": 105345 }, { "epoch": 17.185970636215334, "grad_norm": 0.009363583289086819, "learning_rate": 5.9109011481175364e-05, "loss": 0.0018, "num_input_tokens_seen": 227243904, "step": 105350 }, { "epoch": 17.18678629690049, "grad_norm": 0.03682023286819458, "learning_rate": 5.907544347072352e-05, "loss": 0.0069, "num_input_tokens_seen": 227253760, "step": 105355 }, { "epoch": 17.187601957585645, "grad_norm": 0.0011115940287709236, "learning_rate": 5.904188439616692e-05, "loss": 0.0024, "num_input_tokens_seen": 227264160, "step": 105360 }, { "epoch": 17.1884176182708, "grad_norm": 0.028771253302693367, "learning_rate": 5.9008334258186195e-05, "loss": 0.0067, "num_input_tokens_seen": 227274688, "step": 105365 }, { "epoch": 17.189233278955953, "grad_norm": 0.044789355248212814, "learning_rate": 5.897479305746079e-05, "loss": 0.0081, "num_input_tokens_seen": 227285440, "step": 105370 }, { "epoch": 17.19004893964111, "grad_norm": 0.08576209098100662, "learning_rate": 5.894126079467077e-05, "loss": 0.0035, "num_input_tokens_seen": 227296960, "step": 105375 }, { "epoch": 17.190864600326265, "grad_norm": 0.023311812430620193, "learning_rate": 5.890773747049566e-05, "loss": 0.0062, "num_input_tokens_seen": 227307040, "step": 105380 }, { "epoch": 17.19168026101142, "grad_norm": 0.02562984637916088, "learning_rate": 5.88742230856148e-05, "loss": 0.0035, "num_input_tokens_seen": 227318432, "step": 105385 }, { "epoch": 17.192495921696572, "grad_norm": 0.0018296631751582026, "learning_rate": 5.884071764070736e-05, "loss": 0.003, "num_input_tokens_seen": 227330144, "step": 105390 }, { "epoch": 17.193311582381728, "grad_norm": 0.01308351568877697, "learning_rate": 5.880722113645248e-05, "loss": 0.0043, "num_input_tokens_seen": 227340576, "step": 105395 }, { "epoch": 17.194127243066884, "grad_norm": 0.005630400963127613, "learning_rate": 5.877373357352894e-05, "loss": 0.0051, "num_input_tokens_seen": 227350720, "step": 105400 }, { "epoch": 17.19494290375204, "grad_norm": 0.0003944635100197047, "learning_rate": 5.874025495261548e-05, "loss": 0.0019, "num_input_tokens_seen": 227361696, "step": 105405 }, { "epoch": 17.195758564437195, "grad_norm": 0.004116757307201624, "learning_rate": 5.870678527439049e-05, "loss": 0.01, "num_input_tokens_seen": 227371616, "step": 105410 }, { "epoch": 17.196574225122347, "grad_norm": 0.03690781071782112, "learning_rate": 5.867332453953228e-05, "loss": 0.0017, "num_input_tokens_seen": 227382496, "step": 105415 }, { "epoch": 17.197389885807503, "grad_norm": 0.351229190826416, "learning_rate": 5.863987274871907e-05, "loss": 0.0112, "num_input_tokens_seen": 227393408, "step": 105420 }, { "epoch": 17.19820554649266, "grad_norm": 0.001840342185460031, "learning_rate": 5.860642990262871e-05, "loss": 0.0039, "num_input_tokens_seen": 227404384, "step": 105425 }, { "epoch": 17.199021207177815, "grad_norm": 0.0754779651761055, "learning_rate": 5.857299600193899e-05, "loss": 0.0037, "num_input_tokens_seen": 227415168, "step": 105430 }, { "epoch": 17.19983686786297, "grad_norm": 0.8277974724769592, "learning_rate": 5.853957104732749e-05, "loss": 0.0548, "num_input_tokens_seen": 227426080, "step": 105435 }, { "epoch": 17.200652528548122, "grad_norm": 0.041319165378808975, "learning_rate": 5.850615503947166e-05, "loss": 0.0054, "num_input_tokens_seen": 227437216, "step": 105440 }, { "epoch": 17.201468189233278, "grad_norm": 0.06847307831048965, "learning_rate": 5.8472747979048665e-05, "loss": 0.0397, "num_input_tokens_seen": 227446592, "step": 105445 }, { "epoch": 17.202283849918434, "grad_norm": 0.017976053059101105, "learning_rate": 5.843934986673549e-05, "loss": 0.004, "num_input_tokens_seen": 227457024, "step": 105450 }, { "epoch": 17.20309951060359, "grad_norm": 0.0022900656331330538, "learning_rate": 5.840596070320914e-05, "loss": 0.002, "num_input_tokens_seen": 227467328, "step": 105455 }, { "epoch": 17.203915171288745, "grad_norm": 0.004650783259421587, "learning_rate": 5.837258048914612e-05, "loss": 0.0008, "num_input_tokens_seen": 227478528, "step": 105460 }, { "epoch": 17.204730831973897, "grad_norm": 0.08967630565166473, "learning_rate": 5.833920922522301e-05, "loss": 0.0047, "num_input_tokens_seen": 227490272, "step": 105465 }, { "epoch": 17.205546492659053, "grad_norm": 0.25043097138404846, "learning_rate": 5.830584691211615e-05, "loss": 0.0039, "num_input_tokens_seen": 227499808, "step": 105470 }, { "epoch": 17.20636215334421, "grad_norm": 0.004303548950701952, "learning_rate": 5.827249355050163e-05, "loss": 0.0018, "num_input_tokens_seen": 227509792, "step": 105475 }, { "epoch": 17.207177814029365, "grad_norm": 0.13997763395309448, "learning_rate": 5.823914914105527e-05, "loss": 0.0069, "num_input_tokens_seen": 227520224, "step": 105480 }, { "epoch": 17.20799347471452, "grad_norm": 0.02868352085351944, "learning_rate": 5.820581368445316e-05, "loss": 0.0075, "num_input_tokens_seen": 227530240, "step": 105485 }, { "epoch": 17.208809135399672, "grad_norm": 0.0017134350491687655, "learning_rate": 5.817248718137053e-05, "loss": 0.0109, "num_input_tokens_seen": 227540864, "step": 105490 }, { "epoch": 17.209624796084828, "grad_norm": 0.25311079621315, "learning_rate": 5.8139169632483e-05, "loss": 0.0072, "num_input_tokens_seen": 227552320, "step": 105495 }, { "epoch": 17.210440456769984, "grad_norm": 0.003870647866278887, "learning_rate": 5.810586103846577e-05, "loss": 0.0083, "num_input_tokens_seen": 227564448, "step": 105500 }, { "epoch": 17.21125611745514, "grad_norm": 0.0007329988875426352, "learning_rate": 5.807256139999384e-05, "loss": 0.001, "num_input_tokens_seen": 227576096, "step": 105505 }, { "epoch": 17.212071778140295, "grad_norm": 0.1571163386106491, "learning_rate": 5.8039270717742065e-05, "loss": 0.0047, "num_input_tokens_seen": 227585920, "step": 105510 }, { "epoch": 17.212887438825447, "grad_norm": 0.005261322949081659, "learning_rate": 5.8005988992385184e-05, "loss": 0.0008, "num_input_tokens_seen": 227596160, "step": 105515 }, { "epoch": 17.213703099510603, "grad_norm": 0.052407603710889816, "learning_rate": 5.79727162245976e-05, "loss": 0.0027, "num_input_tokens_seen": 227606656, "step": 105520 }, { "epoch": 17.21451876019576, "grad_norm": 0.04082410782575607, "learning_rate": 5.7939452415053664e-05, "loss": 0.0043, "num_input_tokens_seen": 227617184, "step": 105525 }, { "epoch": 17.215334420880914, "grad_norm": 0.0019169568549841642, "learning_rate": 5.7906197564427557e-05, "loss": 0.0765, "num_input_tokens_seen": 227628000, "step": 105530 }, { "epoch": 17.21615008156607, "grad_norm": 0.004847490228712559, "learning_rate": 5.7872951673393184e-05, "loss": 0.0024, "num_input_tokens_seen": 227639456, "step": 105535 }, { "epoch": 17.216965742251222, "grad_norm": 0.05944418907165527, "learning_rate": 5.7839714742624284e-05, "loss": 0.0018, "num_input_tokens_seen": 227650944, "step": 105540 }, { "epoch": 17.217781402936378, "grad_norm": 0.3292612135410309, "learning_rate": 5.780648677279454e-05, "loss": 0.0088, "num_input_tokens_seen": 227662528, "step": 105545 }, { "epoch": 17.218597063621534, "grad_norm": 0.04396677017211914, "learning_rate": 5.777326776457725e-05, "loss": 0.0184, "num_input_tokens_seen": 227673984, "step": 105550 }, { "epoch": 17.21941272430669, "grad_norm": 0.0025333331432193518, "learning_rate": 5.774005771864571e-05, "loss": 0.0017, "num_input_tokens_seen": 227685248, "step": 105555 }, { "epoch": 17.22022838499184, "grad_norm": 0.0027680862694978714, "learning_rate": 5.7706856635672986e-05, "loss": 0.0009, "num_input_tokens_seen": 227695296, "step": 105560 }, { "epoch": 17.221044045676997, "grad_norm": 0.009302477352321148, "learning_rate": 5.767366451633188e-05, "loss": 0.0017, "num_input_tokens_seen": 227705344, "step": 105565 }, { "epoch": 17.221859706362153, "grad_norm": 0.005426329560577869, "learning_rate": 5.764048136129507e-05, "loss": 0.0011, "num_input_tokens_seen": 227716672, "step": 105570 }, { "epoch": 17.22267536704731, "grad_norm": 0.0008895626524463296, "learning_rate": 5.760730717123508e-05, "loss": 0.0033, "num_input_tokens_seen": 227728416, "step": 105575 }, { "epoch": 17.223491027732464, "grad_norm": 0.002320733852684498, "learning_rate": 5.757414194682426e-05, "loss": 0.0103, "num_input_tokens_seen": 227739744, "step": 105580 }, { "epoch": 17.224306688417617, "grad_norm": 0.006926517467945814, "learning_rate": 5.754098568873456e-05, "loss": 0.0015, "num_input_tokens_seen": 227749344, "step": 105585 }, { "epoch": 17.225122349102772, "grad_norm": 0.0013561609666794538, "learning_rate": 5.7507838397638346e-05, "loss": 0.002, "num_input_tokens_seen": 227760384, "step": 105590 }, { "epoch": 17.225938009787928, "grad_norm": 0.047089818865060806, "learning_rate": 5.7474700074206856e-05, "loss": 0.0051, "num_input_tokens_seen": 227771712, "step": 105595 }, { "epoch": 17.226753670473084, "grad_norm": 0.08136722445487976, "learning_rate": 5.7441570719112216e-05, "loss": 0.0762, "num_input_tokens_seen": 227781856, "step": 105600 }, { "epoch": 17.22756933115824, "grad_norm": 0.003687071381136775, "learning_rate": 5.740845033302533e-05, "loss": 0.001, "num_input_tokens_seen": 227792992, "step": 105605 }, { "epoch": 17.22838499184339, "grad_norm": 0.019604817032814026, "learning_rate": 5.737533891661789e-05, "loss": 0.0048, "num_input_tokens_seen": 227804224, "step": 105610 }, { "epoch": 17.229200652528547, "grad_norm": 0.0010500960052013397, "learning_rate": 5.734223647056053e-05, "loss": 0.0033, "num_input_tokens_seen": 227815712, "step": 105615 }, { "epoch": 17.230016313213703, "grad_norm": 1.2889610528945923, "learning_rate": 5.7309142995524475e-05, "loss": 0.1174, "num_input_tokens_seen": 227825344, "step": 105620 }, { "epoch": 17.23083197389886, "grad_norm": 0.2292858064174652, "learning_rate": 5.7276058492179984e-05, "loss": 0.0067, "num_input_tokens_seen": 227836704, "step": 105625 }, { "epoch": 17.231647634584014, "grad_norm": 0.0010410818504169583, "learning_rate": 5.724298296119796e-05, "loss": 0.0014, "num_input_tokens_seen": 227847424, "step": 105630 }, { "epoch": 17.232463295269167, "grad_norm": 0.0006612360011786222, "learning_rate": 5.7209916403248574e-05, "loss": 0.0137, "num_input_tokens_seen": 227858880, "step": 105635 }, { "epoch": 17.233278955954322, "grad_norm": 0.0024365591816604137, "learning_rate": 5.717685881900192e-05, "loss": 0.0021, "num_input_tokens_seen": 227870176, "step": 105640 }, { "epoch": 17.234094616639478, "grad_norm": 0.021003657951951027, "learning_rate": 5.714381020912801e-05, "loss": 0.0057, "num_input_tokens_seen": 227880032, "step": 105645 }, { "epoch": 17.234910277324634, "grad_norm": 0.015517549589276314, "learning_rate": 5.711077057429659e-05, "loss": 0.1271, "num_input_tokens_seen": 227891072, "step": 105650 }, { "epoch": 17.23572593800979, "grad_norm": 0.017476201057434082, "learning_rate": 5.7077739915177226e-05, "loss": 0.0071, "num_input_tokens_seen": 227901600, "step": 105655 }, { "epoch": 17.23654159869494, "grad_norm": 0.03993105888366699, "learning_rate": 5.704471823243934e-05, "loss": 0.0015, "num_input_tokens_seen": 227911712, "step": 105660 }, { "epoch": 17.237357259380097, "grad_norm": 0.17961212992668152, "learning_rate": 5.701170552675217e-05, "loss": 0.0078, "num_input_tokens_seen": 227923264, "step": 105665 }, { "epoch": 17.238172920065253, "grad_norm": 0.013602195307612419, "learning_rate": 5.6978701798784785e-05, "loss": 0.0023, "num_input_tokens_seen": 227933504, "step": 105670 }, { "epoch": 17.23898858075041, "grad_norm": 0.04965252801775932, "learning_rate": 5.6945707049205985e-05, "loss": 0.0104, "num_input_tokens_seen": 227943648, "step": 105675 }, { "epoch": 17.239804241435564, "grad_norm": 0.21386080980300903, "learning_rate": 5.691272127868452e-05, "loss": 0.0033, "num_input_tokens_seen": 227953600, "step": 105680 }, { "epoch": 17.240619902120716, "grad_norm": 0.003448060480877757, "learning_rate": 5.6879744487888854e-05, "loss": 0.0066, "num_input_tokens_seen": 227964832, "step": 105685 }, { "epoch": 17.241435562805872, "grad_norm": 0.060092322528362274, "learning_rate": 5.684677667748717e-05, "loss": 0.1289, "num_input_tokens_seen": 227975712, "step": 105690 }, { "epoch": 17.242251223491028, "grad_norm": 0.0034384140744805336, "learning_rate": 5.681381784814799e-05, "loss": 0.0178, "num_input_tokens_seen": 227987520, "step": 105695 }, { "epoch": 17.243066884176184, "grad_norm": 0.05299306660890579, "learning_rate": 5.678086800053878e-05, "loss": 0.0079, "num_input_tokens_seen": 227997728, "step": 105700 }, { "epoch": 17.24388254486134, "grad_norm": 0.16944345831871033, "learning_rate": 5.674792713532772e-05, "loss": 0.0087, "num_input_tokens_seen": 228007968, "step": 105705 }, { "epoch": 17.24469820554649, "grad_norm": 0.002090180991217494, "learning_rate": 5.671499525318208e-05, "loss": 0.0027, "num_input_tokens_seen": 228019936, "step": 105710 }, { "epoch": 17.245513866231647, "grad_norm": 0.014519709162414074, "learning_rate": 5.668207235476957e-05, "loss": 0.0016, "num_input_tokens_seen": 228030912, "step": 105715 }, { "epoch": 17.246329526916803, "grad_norm": 0.04587464779615402, "learning_rate": 5.664915844075702e-05, "loss": 0.0029, "num_input_tokens_seen": 228041984, "step": 105720 }, { "epoch": 17.24714518760196, "grad_norm": 0.009980043396353722, "learning_rate": 5.6616253511811934e-05, "loss": 0.004, "num_input_tokens_seen": 228052000, "step": 105725 }, { "epoch": 17.247960848287114, "grad_norm": 0.0008637777646072209, "learning_rate": 5.6583357568600776e-05, "loss": 0.1139, "num_input_tokens_seen": 228063328, "step": 105730 }, { "epoch": 17.248776508972266, "grad_norm": 0.0023552237544208765, "learning_rate": 5.6550470611790584e-05, "loss": 0.0269, "num_input_tokens_seen": 228074848, "step": 105735 }, { "epoch": 17.249592169657422, "grad_norm": 0.05823243036866188, "learning_rate": 5.6517592642047424e-05, "loss": 0.005, "num_input_tokens_seen": 228084064, "step": 105740 }, { "epoch": 17.250407830342578, "grad_norm": 0.028320234268903732, "learning_rate": 5.648472366003804e-05, "loss": 0.0061, "num_input_tokens_seen": 228094304, "step": 105745 }, { "epoch": 17.251223491027734, "grad_norm": 0.542858362197876, "learning_rate": 5.6451863666428236e-05, "loss": 0.0113, "num_input_tokens_seen": 228105600, "step": 105750 }, { "epoch": 17.252039151712886, "grad_norm": 0.013722716830670834, "learning_rate": 5.6419012661884206e-05, "loss": 0.0034, "num_input_tokens_seen": 228116352, "step": 105755 }, { "epoch": 17.25285481239804, "grad_norm": 0.004122633021324873, "learning_rate": 5.6386170647071464e-05, "loss": 0.0009, "num_input_tokens_seen": 228127616, "step": 105760 }, { "epoch": 17.253670473083197, "grad_norm": 0.10480719059705734, "learning_rate": 5.6353337622655935e-05, "loss": 0.0048, "num_input_tokens_seen": 228137920, "step": 105765 }, { "epoch": 17.254486133768353, "grad_norm": 0.009474368765950203, "learning_rate": 5.632051358930263e-05, "loss": 0.0008, "num_input_tokens_seen": 228148832, "step": 105770 }, { "epoch": 17.25530179445351, "grad_norm": 0.2846315801143646, "learning_rate": 5.628769854767707e-05, "loss": 0.0151, "num_input_tokens_seen": 228159296, "step": 105775 }, { "epoch": 17.25611745513866, "grad_norm": 0.007287974469363689, "learning_rate": 5.6254892498444175e-05, "loss": 0.006, "num_input_tokens_seen": 228171584, "step": 105780 }, { "epoch": 17.256933115823816, "grad_norm": 0.0024010909255594015, "learning_rate": 5.6222095442268805e-05, "loss": 0.0015, "num_input_tokens_seen": 228182176, "step": 105785 }, { "epoch": 17.257748776508972, "grad_norm": 0.023974020034074783, "learning_rate": 5.6189307379815645e-05, "loss": 0.0019, "num_input_tokens_seen": 228192928, "step": 105790 }, { "epoch": 17.258564437194128, "grad_norm": 0.07873609662055969, "learning_rate": 5.615652831174917e-05, "loss": 0.0214, "num_input_tokens_seen": 228204608, "step": 105795 }, { "epoch": 17.259380097879284, "grad_norm": 0.034890975803136826, "learning_rate": 5.612375823873373e-05, "loss": 0.0017, "num_input_tokens_seen": 228216096, "step": 105800 }, { "epoch": 17.260195758564436, "grad_norm": 0.00665041571483016, "learning_rate": 5.60909971614334e-05, "loss": 0.0033, "num_input_tokens_seen": 228226848, "step": 105805 }, { "epoch": 17.26101141924959, "grad_norm": 0.1947702169418335, "learning_rate": 5.605824508051216e-05, "loss": 0.0062, "num_input_tokens_seen": 228238080, "step": 105810 }, { "epoch": 17.261827079934747, "grad_norm": 0.007277274038642645, "learning_rate": 5.602550199663381e-05, "loss": 0.023, "num_input_tokens_seen": 228248544, "step": 105815 }, { "epoch": 17.262642740619903, "grad_norm": 0.0820997878909111, "learning_rate": 5.599276791046182e-05, "loss": 0.005, "num_input_tokens_seen": 228259104, "step": 105820 }, { "epoch": 17.26345840130506, "grad_norm": 0.03490905463695526, "learning_rate": 5.5960042822659596e-05, "loss": 0.0048, "num_input_tokens_seen": 228270048, "step": 105825 }, { "epoch": 17.26427406199021, "grad_norm": 0.0021072705276310444, "learning_rate": 5.592732673389056e-05, "loss": 0.0018, "num_input_tokens_seen": 228279840, "step": 105830 }, { "epoch": 17.265089722675366, "grad_norm": 0.006472090259194374, "learning_rate": 5.5894619644817455e-05, "loss": 0.004, "num_input_tokens_seen": 228290336, "step": 105835 }, { "epoch": 17.265905383360522, "grad_norm": 0.012815264984965324, "learning_rate": 5.586192155610342e-05, "loss": 0.0043, "num_input_tokens_seen": 228300864, "step": 105840 }, { "epoch": 17.266721044045678, "grad_norm": 0.002005665097385645, "learning_rate": 5.582923246841082e-05, "loss": 0.0116, "num_input_tokens_seen": 228311008, "step": 105845 }, { "epoch": 17.267536704730833, "grad_norm": 0.026167651638388634, "learning_rate": 5.5796552382402446e-05, "loss": 0.0014, "num_input_tokens_seen": 228321952, "step": 105850 }, { "epoch": 17.268352365415986, "grad_norm": 0.001797463628463447, "learning_rate": 5.576388129874027e-05, "loss": 0.0924, "num_input_tokens_seen": 228332000, "step": 105855 }, { "epoch": 17.26916802610114, "grad_norm": 0.005714161787182093, "learning_rate": 5.5731219218086824e-05, "loss": 0.0089, "num_input_tokens_seen": 228342752, "step": 105860 }, { "epoch": 17.269983686786297, "grad_norm": 0.003983082715421915, "learning_rate": 5.569856614110358e-05, "loss": 0.0081, "num_input_tokens_seen": 228352384, "step": 105865 }, { "epoch": 17.270799347471453, "grad_norm": 0.0019210937898606062, "learning_rate": 5.566592206845272e-05, "loss": 0.0024, "num_input_tokens_seen": 228364320, "step": 105870 }, { "epoch": 17.27161500815661, "grad_norm": 0.0031292270869016647, "learning_rate": 5.563328700079545e-05, "loss": 0.0221, "num_input_tokens_seen": 228375008, "step": 105875 }, { "epoch": 17.27243066884176, "grad_norm": 0.010120287537574768, "learning_rate": 5.560066093879351e-05, "loss": 0.003, "num_input_tokens_seen": 228385024, "step": 105880 }, { "epoch": 17.273246329526916, "grad_norm": 0.0011655694106593728, "learning_rate": 5.556804388310777e-05, "loss": 0.001, "num_input_tokens_seen": 228395776, "step": 105885 }, { "epoch": 17.274061990212072, "grad_norm": 0.009180642664432526, "learning_rate": 5.5535435834399626e-05, "loss": 0.0218, "num_input_tokens_seen": 228406336, "step": 105890 }, { "epoch": 17.274877650897228, "grad_norm": 0.002919831546023488, "learning_rate": 5.550283679332951e-05, "loss": 0.0065, "num_input_tokens_seen": 228417056, "step": 105895 }, { "epoch": 17.275693311582383, "grad_norm": 0.08930733799934387, "learning_rate": 5.5470246760558455e-05, "loss": 0.1007, "num_input_tokens_seen": 228428480, "step": 105900 }, { "epoch": 17.276508972267536, "grad_norm": 0.05358482524752617, "learning_rate": 5.543766573674663e-05, "loss": 0.016, "num_input_tokens_seen": 228438752, "step": 105905 }, { "epoch": 17.27732463295269, "grad_norm": 0.13469183444976807, "learning_rate": 5.5405093722554534e-05, "loss": 0.0034, "num_input_tokens_seen": 228449216, "step": 105910 }, { "epoch": 17.278140293637847, "grad_norm": 0.6311333775520325, "learning_rate": 5.5372530718642235e-05, "loss": 0.0308, "num_input_tokens_seen": 228460832, "step": 105915 }, { "epoch": 17.278955954323003, "grad_norm": 0.0051232511177659035, "learning_rate": 5.533997672566965e-05, "loss": 0.0045, "num_input_tokens_seen": 228473504, "step": 105920 }, { "epoch": 17.27977161500816, "grad_norm": 0.20474274456501007, "learning_rate": 5.5307431744296534e-05, "loss": 0.0163, "num_input_tokens_seen": 228484192, "step": 105925 }, { "epoch": 17.28058727569331, "grad_norm": 0.0005584656610153615, "learning_rate": 5.5274895775182464e-05, "loss": 0.0058, "num_input_tokens_seen": 228494624, "step": 105930 }, { "epoch": 17.281402936378466, "grad_norm": 0.0011485079303383827, "learning_rate": 5.524236881898681e-05, "loss": 0.0013, "num_input_tokens_seen": 228505728, "step": 105935 }, { "epoch": 17.282218597063622, "grad_norm": 0.07669688016176224, "learning_rate": 5.5209850876368705e-05, "loss": 0.0062, "num_input_tokens_seen": 228516608, "step": 105940 }, { "epoch": 17.283034257748778, "grad_norm": 0.01749027706682682, "learning_rate": 5.517734194798729e-05, "loss": 0.0023, "num_input_tokens_seen": 228527104, "step": 105945 }, { "epoch": 17.28384991843393, "grad_norm": 0.012399944476783276, "learning_rate": 5.514484203450132e-05, "loss": 0.002, "num_input_tokens_seen": 228537216, "step": 105950 }, { "epoch": 17.284665579119086, "grad_norm": 0.004999093245714903, "learning_rate": 5.511235113656943e-05, "loss": 0.0052, "num_input_tokens_seen": 228547680, "step": 105955 }, { "epoch": 17.28548123980424, "grad_norm": 1.6486831903457642, "learning_rate": 5.50798692548502e-05, "loss": 0.0416, "num_input_tokens_seen": 228557792, "step": 105960 }, { "epoch": 17.286296900489397, "grad_norm": 0.09911487996578217, "learning_rate": 5.504739639000178e-05, "loss": 0.0026, "num_input_tokens_seen": 228568096, "step": 105965 }, { "epoch": 17.287112561174553, "grad_norm": 0.26965439319610596, "learning_rate": 5.501493254268225e-05, "loss": 0.0046, "num_input_tokens_seen": 228578112, "step": 105970 }, { "epoch": 17.287928221859705, "grad_norm": 0.0054838634096086025, "learning_rate": 5.4982477713549806e-05, "loss": 0.0008, "num_input_tokens_seen": 228588992, "step": 105975 }, { "epoch": 17.28874388254486, "grad_norm": 0.020531220361590385, "learning_rate": 5.495003190326181e-05, "loss": 0.0015, "num_input_tokens_seen": 228599808, "step": 105980 }, { "epoch": 17.289559543230016, "grad_norm": 0.008615223690867424, "learning_rate": 5.491759511247618e-05, "loss": 0.0038, "num_input_tokens_seen": 228610848, "step": 105985 }, { "epoch": 17.290375203915172, "grad_norm": 0.0009625949314795434, "learning_rate": 5.488516734184995e-05, "loss": 0.0046, "num_input_tokens_seen": 228621504, "step": 105990 }, { "epoch": 17.291190864600328, "grad_norm": 0.015420560725033283, "learning_rate": 5.485274859204065e-05, "loss": 0.0016, "num_input_tokens_seen": 228633120, "step": 105995 }, { "epoch": 17.29200652528548, "grad_norm": 0.0031076062005013227, "learning_rate": 5.482033886370491e-05, "loss": 0.0026, "num_input_tokens_seen": 228644000, "step": 106000 }, { "epoch": 17.292822185970635, "grad_norm": 0.00955930259078741, "learning_rate": 5.478793815749994e-05, "loss": 0.0028, "num_input_tokens_seen": 228655936, "step": 106005 }, { "epoch": 17.29363784665579, "grad_norm": 0.017561521381139755, "learning_rate": 5.4755546474082044e-05, "loss": 0.0037, "num_input_tokens_seen": 228667840, "step": 106010 }, { "epoch": 17.294453507340947, "grad_norm": 0.003938904497772455, "learning_rate": 5.472316381410786e-05, "loss": 0.007, "num_input_tokens_seen": 228679904, "step": 106015 }, { "epoch": 17.295269168026103, "grad_norm": 0.0020593940280377865, "learning_rate": 5.46907901782337e-05, "loss": 0.0062, "num_input_tokens_seen": 228690400, "step": 106020 }, { "epoch": 17.296084828711255, "grad_norm": 0.0014919234672561288, "learning_rate": 5.4658425567115535e-05, "loss": 0.0088, "num_input_tokens_seen": 228701376, "step": 106025 }, { "epoch": 17.29690048939641, "grad_norm": 0.0035499483346939087, "learning_rate": 5.4626069981409395e-05, "loss": 0.0011, "num_input_tokens_seen": 228711008, "step": 106030 }, { "epoch": 17.297716150081566, "grad_norm": 0.06085728481411934, "learning_rate": 5.459372342177088e-05, "loss": 0.0087, "num_input_tokens_seen": 228722976, "step": 106035 }, { "epoch": 17.298531810766722, "grad_norm": 0.03571614995598793, "learning_rate": 5.456138588885562e-05, "loss": 0.0017, "num_input_tokens_seen": 228732832, "step": 106040 }, { "epoch": 17.299347471451878, "grad_norm": 0.0007503023953177035, "learning_rate": 5.452905738331898e-05, "loss": 0.0013, "num_input_tokens_seen": 228743520, "step": 106045 }, { "epoch": 17.30016313213703, "grad_norm": 0.009129318408668041, "learning_rate": 5.449673790581611e-05, "loss": 0.0027, "num_input_tokens_seen": 228753952, "step": 106050 }, { "epoch": 17.300978792822185, "grad_norm": 0.002559749176725745, "learning_rate": 5.446442745700198e-05, "loss": 0.0339, "num_input_tokens_seen": 228766368, "step": 106055 }, { "epoch": 17.30179445350734, "grad_norm": 0.00529769342392683, "learning_rate": 5.443212603753145e-05, "loss": 0.0017, "num_input_tokens_seen": 228777216, "step": 106060 }, { "epoch": 17.302610114192497, "grad_norm": 0.0008816124172881246, "learning_rate": 5.439983364805912e-05, "loss": 0.0008, "num_input_tokens_seen": 228789216, "step": 106065 }, { "epoch": 17.303425774877653, "grad_norm": 0.00813740212470293, "learning_rate": 5.436755028923945e-05, "loss": 0.0278, "num_input_tokens_seen": 228800736, "step": 106070 }, { "epoch": 17.304241435562805, "grad_norm": 0.03043738752603531, "learning_rate": 5.433527596172666e-05, "loss": 0.0013, "num_input_tokens_seen": 228811200, "step": 106075 }, { "epoch": 17.30505709624796, "grad_norm": 0.003167427610605955, "learning_rate": 5.430301066617493e-05, "loss": 0.0023, "num_input_tokens_seen": 228822304, "step": 106080 }, { "epoch": 17.305872756933116, "grad_norm": 0.00413166917860508, "learning_rate": 5.4270754403238034e-05, "loss": 0.0184, "num_input_tokens_seen": 228833184, "step": 106085 }, { "epoch": 17.306688417618272, "grad_norm": 0.02545577473938465, "learning_rate": 5.4238507173569816e-05, "loss": 0.0235, "num_input_tokens_seen": 228843008, "step": 106090 }, { "epoch": 17.307504078303428, "grad_norm": 0.004918523132801056, "learning_rate": 5.420626897782366e-05, "loss": 0.007, "num_input_tokens_seen": 228854624, "step": 106095 }, { "epoch": 17.30831973898858, "grad_norm": 0.01486718188971281, "learning_rate": 5.417403981665309e-05, "loss": 0.0046, "num_input_tokens_seen": 228864288, "step": 106100 }, { "epoch": 17.309135399673735, "grad_norm": 0.015068331733345985, "learning_rate": 5.414181969071108e-05, "loss": 0.0024, "num_input_tokens_seen": 228874048, "step": 106105 }, { "epoch": 17.30995106035889, "grad_norm": 0.07643243670463562, "learning_rate": 5.410960860065073e-05, "loss": 0.0066, "num_input_tokens_seen": 228886944, "step": 106110 }, { "epoch": 17.310766721044047, "grad_norm": 0.02430008165538311, "learning_rate": 5.407740654712473e-05, "loss": 0.0233, "num_input_tokens_seen": 228899392, "step": 106115 }, { "epoch": 17.3115823817292, "grad_norm": 0.005446766968816519, "learning_rate": 5.4045213530785896e-05, "loss": 0.0013, "num_input_tokens_seen": 228910944, "step": 106120 }, { "epoch": 17.312398042414355, "grad_norm": 0.06368857622146606, "learning_rate": 5.401302955228654e-05, "loss": 0.0076, "num_input_tokens_seen": 228922368, "step": 106125 }, { "epoch": 17.31321370309951, "grad_norm": 0.0059592826291918755, "learning_rate": 5.398085461227886e-05, "loss": 0.0046, "num_input_tokens_seen": 228934464, "step": 106130 }, { "epoch": 17.314029363784666, "grad_norm": 0.008231204934418201, "learning_rate": 5.394868871141506e-05, "loss": 0.0848, "num_input_tokens_seen": 228943712, "step": 106135 }, { "epoch": 17.31484502446982, "grad_norm": 0.002472488908097148, "learning_rate": 5.3916531850346895e-05, "loss": 0.003, "num_input_tokens_seen": 228952832, "step": 106140 }, { "epoch": 17.315660685154974, "grad_norm": 0.004790904000401497, "learning_rate": 5.388438402972612e-05, "loss": 0.0038, "num_input_tokens_seen": 228962368, "step": 106145 }, { "epoch": 17.31647634584013, "grad_norm": 0.04169195517897606, "learning_rate": 5.385224525020421e-05, "loss": 0.0157, "num_input_tokens_seen": 228974336, "step": 106150 }, { "epoch": 17.317292006525285, "grad_norm": 0.005036138463765383, "learning_rate": 5.382011551243254e-05, "loss": 0.0104, "num_input_tokens_seen": 228985440, "step": 106155 }, { "epoch": 17.31810766721044, "grad_norm": 0.0012995150173082948, "learning_rate": 5.3787994817062256e-05, "loss": 0.0027, "num_input_tokens_seen": 228995840, "step": 106160 }, { "epoch": 17.318923327895597, "grad_norm": 0.15581193566322327, "learning_rate": 5.3755883164744335e-05, "loss": 0.0127, "num_input_tokens_seen": 229006432, "step": 106165 }, { "epoch": 17.31973898858075, "grad_norm": 0.00904909148812294, "learning_rate": 5.372378055612953e-05, "loss": 0.0046, "num_input_tokens_seen": 229017792, "step": 106170 }, { "epoch": 17.320554649265905, "grad_norm": 0.002591232070699334, "learning_rate": 5.369168699186844e-05, "loss": 0.0008, "num_input_tokens_seen": 229027040, "step": 106175 }, { "epoch": 17.32137030995106, "grad_norm": 0.010148869827389717, "learning_rate": 5.365960247261148e-05, "loss": 0.0017, "num_input_tokens_seen": 229038240, "step": 106180 }, { "epoch": 17.322185970636216, "grad_norm": 0.003630705177783966, "learning_rate": 5.3627526999008966e-05, "loss": 0.0006, "num_input_tokens_seen": 229047872, "step": 106185 }, { "epoch": 17.32300163132137, "grad_norm": 0.0005996466497890651, "learning_rate": 5.359546057171083e-05, "loss": 0.0062, "num_input_tokens_seen": 229058336, "step": 106190 }, { "epoch": 17.323817292006524, "grad_norm": 0.007094386965036392, "learning_rate": 5.356340319136699e-05, "loss": 0.002, "num_input_tokens_seen": 229069184, "step": 106195 }, { "epoch": 17.32463295269168, "grad_norm": 0.012251464650034904, "learning_rate": 5.353135485862715e-05, "loss": 0.0007, "num_input_tokens_seen": 229080416, "step": 106200 }, { "epoch": 17.325448613376835, "grad_norm": 0.0034311923664063215, "learning_rate": 5.3499315574140784e-05, "loss": 0.0015, "num_input_tokens_seen": 229091424, "step": 106205 }, { "epoch": 17.32626427406199, "grad_norm": 0.0066384305246174335, "learning_rate": 5.3467285338557213e-05, "loss": 0.002, "num_input_tokens_seen": 229104064, "step": 106210 }, { "epoch": 17.327079934747147, "grad_norm": 0.029814772307872772, "learning_rate": 5.343526415252553e-05, "loss": 0.0011, "num_input_tokens_seen": 229115680, "step": 106215 }, { "epoch": 17.3278955954323, "grad_norm": 0.009663302451372147, "learning_rate": 5.340325201669477e-05, "loss": 0.0061, "num_input_tokens_seen": 229126848, "step": 106220 }, { "epoch": 17.328711256117455, "grad_norm": 0.008774216286838055, "learning_rate": 5.337124893171358e-05, "loss": 0.0042, "num_input_tokens_seen": 229138560, "step": 106225 }, { "epoch": 17.32952691680261, "grad_norm": 0.00978968758136034, "learning_rate": 5.333925489823077e-05, "loss": 0.0046, "num_input_tokens_seen": 229149024, "step": 106230 }, { "epoch": 17.330342577487766, "grad_norm": 0.002513617742806673, "learning_rate": 5.330726991689439e-05, "loss": 0.0042, "num_input_tokens_seen": 229160416, "step": 106235 }, { "epoch": 17.33115823817292, "grad_norm": 0.06425296515226364, "learning_rate": 5.327529398835307e-05, "loss": 0.005, "num_input_tokens_seen": 229171296, "step": 106240 }, { "epoch": 17.331973898858074, "grad_norm": 0.0010995703050866723, "learning_rate": 5.324332711325447e-05, "loss": 0.0012, "num_input_tokens_seen": 229182144, "step": 106245 }, { "epoch": 17.33278955954323, "grad_norm": 0.12019895017147064, "learning_rate": 5.3211369292246735e-05, "loss": 0.0047, "num_input_tokens_seen": 229193728, "step": 106250 }, { "epoch": 17.333605220228385, "grad_norm": 0.021015681326389313, "learning_rate": 5.317942052597724e-05, "loss": 0.0472, "num_input_tokens_seen": 229205248, "step": 106255 }, { "epoch": 17.33442088091354, "grad_norm": 0.0013597443467006087, "learning_rate": 5.3147480815093684e-05, "loss": 0.0005, "num_input_tokens_seen": 229215904, "step": 106260 }, { "epoch": 17.335236541598697, "grad_norm": 0.0022907054517418146, "learning_rate": 5.311555016024328e-05, "loss": 0.0011, "num_input_tokens_seen": 229226496, "step": 106265 }, { "epoch": 17.33605220228385, "grad_norm": 0.002462542150169611, "learning_rate": 5.308362856207322e-05, "loss": 0.0011, "num_input_tokens_seen": 229237248, "step": 106270 }, { "epoch": 17.336867862969005, "grad_norm": 0.17879919707775116, "learning_rate": 5.3051716021230375e-05, "loss": 0.0053, "num_input_tokens_seen": 229248064, "step": 106275 }, { "epoch": 17.33768352365416, "grad_norm": 0.07100942730903625, "learning_rate": 5.3019812538361466e-05, "loss": 0.0027, "num_input_tokens_seen": 229259616, "step": 106280 }, { "epoch": 17.338499184339316, "grad_norm": 0.005897930823266506, "learning_rate": 5.298791811411313e-05, "loss": 0.0078, "num_input_tokens_seen": 229271200, "step": 106285 }, { "epoch": 17.339314845024468, "grad_norm": 0.0012617027387022972, "learning_rate": 5.295603274913169e-05, "loss": 0.0009, "num_input_tokens_seen": 229283616, "step": 106290 }, { "epoch": 17.340130505709624, "grad_norm": 0.0013515963219106197, "learning_rate": 5.292415644406334e-05, "loss": 0.0047, "num_input_tokens_seen": 229294848, "step": 106295 }, { "epoch": 17.34094616639478, "grad_norm": 0.015960700809955597, "learning_rate": 5.289228919955413e-05, "loss": 0.0346, "num_input_tokens_seen": 229305472, "step": 106300 }, { "epoch": 17.341761827079935, "grad_norm": 0.00039093420491553843, "learning_rate": 5.286043101624988e-05, "loss": 0.0135, "num_input_tokens_seen": 229314560, "step": 106305 }, { "epoch": 17.34257748776509, "grad_norm": 0.033756744116544724, "learning_rate": 5.2828581894796226e-05, "loss": 0.0021, "num_input_tokens_seen": 229324640, "step": 106310 }, { "epoch": 17.343393148450243, "grad_norm": 0.0035440288484096527, "learning_rate": 5.2796741835838656e-05, "loss": 0.0316, "num_input_tokens_seen": 229334624, "step": 106315 }, { "epoch": 17.3442088091354, "grad_norm": 0.003925473894923925, "learning_rate": 5.276491084002238e-05, "loss": 0.0084, "num_input_tokens_seen": 229344288, "step": 106320 }, { "epoch": 17.345024469820554, "grad_norm": 0.0060007162392139435, "learning_rate": 5.273308890799261e-05, "loss": 0.0018, "num_input_tokens_seen": 229354336, "step": 106325 }, { "epoch": 17.34584013050571, "grad_norm": 0.02019144594669342, "learning_rate": 5.270127604039404e-05, "loss": 0.0012, "num_input_tokens_seen": 229365440, "step": 106330 }, { "epoch": 17.346655791190866, "grad_norm": 0.014805261045694351, "learning_rate": 5.266947223787177e-05, "loss": 0.0111, "num_input_tokens_seen": 229376352, "step": 106335 }, { "epoch": 17.347471451876018, "grad_norm": 0.009570176713168621, "learning_rate": 5.263767750106996e-05, "loss": 0.0021, "num_input_tokens_seen": 229387392, "step": 106340 }, { "epoch": 17.348287112561174, "grad_norm": 0.41117486357688904, "learning_rate": 5.2605891830633304e-05, "loss": 0.0067, "num_input_tokens_seen": 229397824, "step": 106345 }, { "epoch": 17.34910277324633, "grad_norm": 0.632027268409729, "learning_rate": 5.257411522720562e-05, "loss": 0.0486, "num_input_tokens_seen": 229408960, "step": 106350 }, { "epoch": 17.349918433931485, "grad_norm": 0.0007944665849208832, "learning_rate": 5.2542347691431235e-05, "loss": 0.0056, "num_input_tokens_seen": 229420224, "step": 106355 }, { "epoch": 17.35073409461664, "grad_norm": 0.004876964725553989, "learning_rate": 5.251058922395368e-05, "loss": 0.0005, "num_input_tokens_seen": 229430720, "step": 106360 }, { "epoch": 17.351549755301793, "grad_norm": 0.009294813498854637, "learning_rate": 5.24788398254169e-05, "loss": 0.0027, "num_input_tokens_seen": 229441152, "step": 106365 }, { "epoch": 17.35236541598695, "grad_norm": 0.013689043931663036, "learning_rate": 5.2447099496463925e-05, "loss": 0.0411, "num_input_tokens_seen": 229451488, "step": 106370 }, { "epoch": 17.353181076672104, "grad_norm": 0.027652563527226448, "learning_rate": 5.241536823773846e-05, "loss": 0.0018, "num_input_tokens_seen": 229462656, "step": 106375 }, { "epoch": 17.35399673735726, "grad_norm": 0.005764303263276815, "learning_rate": 5.238364604988316e-05, "loss": 0.001, "num_input_tokens_seen": 229472320, "step": 106380 }, { "epoch": 17.354812398042416, "grad_norm": 0.023024912923574448, "learning_rate": 5.235193293354129e-05, "loss": 0.0014, "num_input_tokens_seen": 229482560, "step": 106385 }, { "epoch": 17.355628058727568, "grad_norm": 0.07529404759407043, "learning_rate": 5.2320228889355224e-05, "loss": 0.0035, "num_input_tokens_seen": 229493152, "step": 106390 }, { "epoch": 17.356443719412724, "grad_norm": 0.0009557876619510353, "learning_rate": 5.228853391796784e-05, "loss": 0.0211, "num_input_tokens_seen": 229504352, "step": 106395 }, { "epoch": 17.35725938009788, "grad_norm": 0.03170386701822281, "learning_rate": 5.225684802002106e-05, "loss": 0.0046, "num_input_tokens_seen": 229515616, "step": 106400 }, { "epoch": 17.358075040783035, "grad_norm": 0.0012530352687463164, "learning_rate": 5.222517119615733e-05, "loss": 0.0038, "num_input_tokens_seen": 229527136, "step": 106405 }, { "epoch": 17.35889070146819, "grad_norm": 0.003827820997685194, "learning_rate": 5.2193503447018564e-05, "loss": 0.0775, "num_input_tokens_seen": 229536160, "step": 106410 }, { "epoch": 17.359706362153343, "grad_norm": 0.017615502700209618, "learning_rate": 5.216184477324659e-05, "loss": 0.0098, "num_input_tokens_seen": 229547136, "step": 106415 }, { "epoch": 17.3605220228385, "grad_norm": 0.004534272942692041, "learning_rate": 5.2130195175482896e-05, "loss": 0.0031, "num_input_tokens_seen": 229557792, "step": 106420 }, { "epoch": 17.361337683523654, "grad_norm": 0.009054245427250862, "learning_rate": 5.209855465436897e-05, "loss": 0.0031, "num_input_tokens_seen": 229568928, "step": 106425 }, { "epoch": 17.36215334420881, "grad_norm": 1.9013805389404297, "learning_rate": 5.2066923210546015e-05, "loss": 0.0236, "num_input_tokens_seen": 229578976, "step": 106430 }, { "epoch": 17.362969004893966, "grad_norm": 0.03139745071530342, "learning_rate": 5.203530084465513e-05, "loss": 0.0022, "num_input_tokens_seen": 229590880, "step": 106435 }, { "epoch": 17.363784665579118, "grad_norm": 0.001451101852580905, "learning_rate": 5.20036875573372e-05, "loss": 0.0621, "num_input_tokens_seen": 229602368, "step": 106440 }, { "epoch": 17.364600326264274, "grad_norm": 0.015167618170380592, "learning_rate": 5.197208334923281e-05, "loss": 0.0043, "num_input_tokens_seen": 229612736, "step": 106445 }, { "epoch": 17.36541598694943, "grad_norm": 4.268157482147217, "learning_rate": 5.1940488220982516e-05, "loss": 0.1037, "num_input_tokens_seen": 229623456, "step": 106450 }, { "epoch": 17.366231647634585, "grad_norm": 0.0036006104201078415, "learning_rate": 5.1908902173226524e-05, "loss": 0.0008, "num_input_tokens_seen": 229633792, "step": 106455 }, { "epoch": 17.36704730831974, "grad_norm": 0.2377825677394867, "learning_rate": 5.1877325206605316e-05, "loss": 0.0075, "num_input_tokens_seen": 229644512, "step": 106460 }, { "epoch": 17.367862969004893, "grad_norm": 2.158886432647705, "learning_rate": 5.1845757321758394e-05, "loss": 0.0483, "num_input_tokens_seen": 229654144, "step": 106465 }, { "epoch": 17.36867862969005, "grad_norm": 0.0010756613919511437, "learning_rate": 5.181419851932589e-05, "loss": 0.0051, "num_input_tokens_seen": 229665984, "step": 106470 }, { "epoch": 17.369494290375204, "grad_norm": 1.3532488346099854, "learning_rate": 5.178264879994704e-05, "loss": 0.1931, "num_input_tokens_seen": 229675744, "step": 106475 }, { "epoch": 17.37030995106036, "grad_norm": 0.007468590512871742, "learning_rate": 5.17511081642616e-05, "loss": 0.0032, "num_input_tokens_seen": 229687008, "step": 106480 }, { "epoch": 17.371125611745512, "grad_norm": 0.011480139568448067, "learning_rate": 5.171957661290838e-05, "loss": 0.0033, "num_input_tokens_seen": 229697248, "step": 106485 }, { "epoch": 17.371941272430668, "grad_norm": 0.24517092108726501, "learning_rate": 5.1688054146526886e-05, "loss": 0.0554, "num_input_tokens_seen": 229708416, "step": 106490 }, { "epoch": 17.372756933115824, "grad_norm": 0.005046484060585499, "learning_rate": 5.165654076575543e-05, "loss": 0.0012, "num_input_tokens_seen": 229719360, "step": 106495 }, { "epoch": 17.37357259380098, "grad_norm": 0.0076622189953923225, "learning_rate": 5.162503647123318e-05, "loss": 0.004, "num_input_tokens_seen": 229729024, "step": 106500 }, { "epoch": 17.374388254486135, "grad_norm": 0.8807948231697083, "learning_rate": 5.159354126359816e-05, "loss": 0.1016, "num_input_tokens_seen": 229739360, "step": 106505 }, { "epoch": 17.375203915171287, "grad_norm": 0.7221991419792175, "learning_rate": 5.156205514348905e-05, "loss": 0.0142, "num_input_tokens_seen": 229749120, "step": 106510 }, { "epoch": 17.376019575856443, "grad_norm": 0.008473489433526993, "learning_rate": 5.1530578111543605e-05, "loss": 0.0022, "num_input_tokens_seen": 229759744, "step": 106515 }, { "epoch": 17.3768352365416, "grad_norm": 0.3015041947364807, "learning_rate": 5.149911016840009e-05, "loss": 0.0109, "num_input_tokens_seen": 229771552, "step": 106520 }, { "epoch": 17.377650897226754, "grad_norm": 0.0025806480553001165, "learning_rate": 5.146765131469594e-05, "loss": 0.0042, "num_input_tokens_seen": 229782144, "step": 106525 }, { "epoch": 17.37846655791191, "grad_norm": 0.003956929314881563, "learning_rate": 5.1436201551068987e-05, "loss": 0.0135, "num_input_tokens_seen": 229792672, "step": 106530 }, { "epoch": 17.379282218597062, "grad_norm": 0.0256084855645895, "learning_rate": 5.140476087815621e-05, "loss": 0.0106, "num_input_tokens_seen": 229802496, "step": 106535 }, { "epoch": 17.380097879282218, "grad_norm": 0.0009416280663572252, "learning_rate": 5.137332929659522e-05, "loss": 0.0365, "num_input_tokens_seen": 229813536, "step": 106540 }, { "epoch": 17.380913539967374, "grad_norm": 1.3515641689300537, "learning_rate": 5.134190680702278e-05, "loss": 0.0825, "num_input_tokens_seen": 229825536, "step": 106545 }, { "epoch": 17.38172920065253, "grad_norm": 0.037548672407865524, "learning_rate": 5.1310493410075765e-05, "loss": 0.0028, "num_input_tokens_seen": 229836160, "step": 106550 }, { "epoch": 17.382544861337685, "grad_norm": 0.04716819152235985, "learning_rate": 5.127908910639084e-05, "loss": 0.0039, "num_input_tokens_seen": 229848128, "step": 106555 }, { "epoch": 17.383360522022837, "grad_norm": 0.059123218059539795, "learning_rate": 5.1247693896604386e-05, "loss": 0.0031, "num_input_tokens_seen": 229858912, "step": 106560 }, { "epoch": 17.384176182707993, "grad_norm": 0.0005176172126084566, "learning_rate": 5.1216307781352724e-05, "loss": 0.0014, "num_input_tokens_seen": 229869856, "step": 106565 }, { "epoch": 17.38499184339315, "grad_norm": 0.0006276738713495433, "learning_rate": 5.11849307612719e-05, "loss": 0.0022, "num_input_tokens_seen": 229880672, "step": 106570 }, { "epoch": 17.385807504078304, "grad_norm": 0.009963922202587128, "learning_rate": 5.115356283699779e-05, "loss": 0.0015, "num_input_tokens_seen": 229891264, "step": 106575 }, { "epoch": 17.38662316476346, "grad_norm": 0.00705756526440382, "learning_rate": 5.112220400916617e-05, "loss": 0.0023, "num_input_tokens_seen": 229903360, "step": 106580 }, { "epoch": 17.387438825448612, "grad_norm": 0.022189928218722343, "learning_rate": 5.109085427841248e-05, "loss": 0.0012, "num_input_tokens_seen": 229911872, "step": 106585 }, { "epoch": 17.388254486133768, "grad_norm": 0.0018340954557061195, "learning_rate": 5.1059513645372146e-05, "loss": 0.0013, "num_input_tokens_seen": 229922624, "step": 106590 }, { "epoch": 17.389070146818923, "grad_norm": 0.005596390459686518, "learning_rate": 5.1028182110680275e-05, "loss": 0.0015, "num_input_tokens_seen": 229933280, "step": 106595 }, { "epoch": 17.38988580750408, "grad_norm": 0.0026430157013237476, "learning_rate": 5.0996859674971805e-05, "loss": 0.0019, "num_input_tokens_seen": 229943392, "step": 106600 }, { "epoch": 17.390701468189235, "grad_norm": 0.07464700192213058, "learning_rate": 5.096554633888173e-05, "loss": 0.0035, "num_input_tokens_seen": 229954112, "step": 106605 }, { "epoch": 17.391517128874387, "grad_norm": 0.04634150490164757, "learning_rate": 5.093424210304426e-05, "loss": 0.0011, "num_input_tokens_seen": 229964608, "step": 106610 }, { "epoch": 17.392332789559543, "grad_norm": 0.006397622637450695, "learning_rate": 5.090294696809428e-05, "loss": 0.0013, "num_input_tokens_seen": 229976736, "step": 106615 }, { "epoch": 17.3931484502447, "grad_norm": 0.4231289029121399, "learning_rate": 5.087166093466566e-05, "loss": 0.0243, "num_input_tokens_seen": 229988032, "step": 106620 }, { "epoch": 17.393964110929854, "grad_norm": 0.02839449793100357, "learning_rate": 5.0840384003392745e-05, "loss": 0.0015, "num_input_tokens_seen": 229997440, "step": 106625 }, { "epoch": 17.39477977161501, "grad_norm": 0.015790916979312897, "learning_rate": 5.080911617490902e-05, "loss": 0.0118, "num_input_tokens_seen": 230007200, "step": 106630 }, { "epoch": 17.395595432300162, "grad_norm": 0.007269262336194515, "learning_rate": 5.0777857449848644e-05, "loss": 0.0041, "num_input_tokens_seen": 230017536, "step": 106635 }, { "epoch": 17.396411092985318, "grad_norm": 0.005332783795893192, "learning_rate": 5.074660782884461e-05, "loss": 0.002, "num_input_tokens_seen": 230028224, "step": 106640 }, { "epoch": 17.397226753670473, "grad_norm": 0.0687684565782547, "learning_rate": 5.071536731253074e-05, "loss": 0.0024, "num_input_tokens_seen": 230038944, "step": 106645 }, { "epoch": 17.39804241435563, "grad_norm": 0.0068834088742733, "learning_rate": 5.0684135901539694e-05, "loss": 0.0021, "num_input_tokens_seen": 230049824, "step": 106650 }, { "epoch": 17.39885807504078, "grad_norm": 1.4761101007461548, "learning_rate": 5.0652913596504704e-05, "loss": 0.0916, "num_input_tokens_seen": 230059488, "step": 106655 }, { "epoch": 17.399673735725937, "grad_norm": 0.09000371396541595, "learning_rate": 5.062170039805847e-05, "loss": 0.1182, "num_input_tokens_seen": 230070464, "step": 106660 }, { "epoch": 17.400489396411093, "grad_norm": 0.029932547360658646, "learning_rate": 5.05904963068336e-05, "loss": 0.0063, "num_input_tokens_seen": 230081536, "step": 106665 }, { "epoch": 17.40130505709625, "grad_norm": 0.0021879500709474087, "learning_rate": 5.055930132346237e-05, "loss": 0.0247, "num_input_tokens_seen": 230090720, "step": 106670 }, { "epoch": 17.402120717781404, "grad_norm": 0.012944883666932583, "learning_rate": 5.0528115448577105e-05, "loss": 0.006, "num_input_tokens_seen": 230102688, "step": 106675 }, { "epoch": 17.402936378466556, "grad_norm": 0.009593087248504162, "learning_rate": 5.0496938682809744e-05, "loss": 0.0043, "num_input_tokens_seen": 230114112, "step": 106680 }, { "epoch": 17.403752039151712, "grad_norm": 0.04760242998600006, "learning_rate": 5.0465771026792175e-05, "loss": 0.0019, "num_input_tokens_seen": 230124096, "step": 106685 }, { "epoch": 17.404567699836868, "grad_norm": 0.00063190987566486, "learning_rate": 5.043461248115605e-05, "loss": 0.0062, "num_input_tokens_seen": 230134656, "step": 106690 }, { "epoch": 17.405383360522023, "grad_norm": 0.0009684142423793674, "learning_rate": 5.040346304653276e-05, "loss": 0.0041, "num_input_tokens_seen": 230143968, "step": 106695 }, { "epoch": 17.40619902120718, "grad_norm": 0.0076236664317548275, "learning_rate": 5.037232272355369e-05, "loss": 0.0079, "num_input_tokens_seen": 230154176, "step": 106700 }, { "epoch": 17.40701468189233, "grad_norm": 0.14355719089508057, "learning_rate": 5.034119151284988e-05, "loss": 0.0069, "num_input_tokens_seen": 230164160, "step": 106705 }, { "epoch": 17.407830342577487, "grad_norm": 0.3410855233669281, "learning_rate": 5.031006941505228e-05, "loss": 0.0058, "num_input_tokens_seen": 230175168, "step": 106710 }, { "epoch": 17.408646003262643, "grad_norm": 0.0056464835070073605, "learning_rate": 5.0278956430791555e-05, "loss": 0.0024, "num_input_tokens_seen": 230186240, "step": 106715 }, { "epoch": 17.4094616639478, "grad_norm": 0.009609702043235302, "learning_rate": 5.0247852560698304e-05, "loss": 0.0111, "num_input_tokens_seen": 230197184, "step": 106720 }, { "epoch": 17.410277324632954, "grad_norm": 0.004299835301935673, "learning_rate": 5.0216757805402856e-05, "loss": 0.0407, "num_input_tokens_seen": 230208288, "step": 106725 }, { "epoch": 17.411092985318106, "grad_norm": 1.7392078638076782, "learning_rate": 5.018567216553543e-05, "loss": 0.0341, "num_input_tokens_seen": 230219264, "step": 106730 }, { "epoch": 17.411908646003262, "grad_norm": 0.05234310403466225, "learning_rate": 5.015459564172597e-05, "loss": 0.0042, "num_input_tokens_seen": 230229376, "step": 106735 }, { "epoch": 17.412724306688418, "grad_norm": 0.004588207229971886, "learning_rate": 5.0123528234604307e-05, "loss": 0.0013, "num_input_tokens_seen": 230239424, "step": 106740 }, { "epoch": 17.413539967373573, "grad_norm": 0.26369786262512207, "learning_rate": 5.009246994479999e-05, "loss": 0.0128, "num_input_tokens_seen": 230249408, "step": 106745 }, { "epoch": 17.41435562805873, "grad_norm": 0.00462182704359293, "learning_rate": 5.006142077294268e-05, "loss": 0.0722, "num_input_tokens_seen": 230260416, "step": 106750 }, { "epoch": 17.41517128874388, "grad_norm": 0.010390338487923145, "learning_rate": 5.003038071966126e-05, "loss": 0.0021, "num_input_tokens_seen": 230271072, "step": 106755 }, { "epoch": 17.415986949429037, "grad_norm": 0.0009901653975248337, "learning_rate": 4.999934978558513e-05, "loss": 0.0019, "num_input_tokens_seen": 230281952, "step": 106760 }, { "epoch": 17.416802610114193, "grad_norm": 0.006678654812276363, "learning_rate": 4.996832797134299e-05, "loss": 0.0042, "num_input_tokens_seen": 230292448, "step": 106765 }, { "epoch": 17.41761827079935, "grad_norm": 0.009243758395314217, "learning_rate": 4.9937315277563625e-05, "loss": 0.0152, "num_input_tokens_seen": 230303264, "step": 106770 }, { "epoch": 17.418433931484504, "grad_norm": 0.02125442400574684, "learning_rate": 4.990631170487553e-05, "loss": 0.054, "num_input_tokens_seen": 230315040, "step": 106775 }, { "epoch": 17.419249592169656, "grad_norm": 0.028468549251556396, "learning_rate": 4.987531725390698e-05, "loss": 0.0163, "num_input_tokens_seen": 230325888, "step": 106780 }, { "epoch": 17.420065252854812, "grad_norm": 0.014066056348383427, "learning_rate": 4.9844331925286145e-05, "loss": 0.0027, "num_input_tokens_seen": 230337760, "step": 106785 }, { "epoch": 17.420880913539968, "grad_norm": 0.006012423895299435, "learning_rate": 4.981335571964102e-05, "loss": 0.0126, "num_input_tokens_seen": 230348544, "step": 106790 }, { "epoch": 17.421696574225123, "grad_norm": 0.011282017454504967, "learning_rate": 4.978238863759932e-05, "loss": 0.0053, "num_input_tokens_seen": 230358432, "step": 106795 }, { "epoch": 17.42251223491028, "grad_norm": 0.04208167642354965, "learning_rate": 4.975143067978866e-05, "loss": 0.002, "num_input_tokens_seen": 230367616, "step": 106800 }, { "epoch": 17.42332789559543, "grad_norm": 0.054864853620529175, "learning_rate": 4.9720481846836416e-05, "loss": 0.0086, "num_input_tokens_seen": 230378176, "step": 106805 }, { "epoch": 17.424143556280587, "grad_norm": 0.0029723099432885647, "learning_rate": 4.968954213936988e-05, "loss": 0.0018, "num_input_tokens_seen": 230389056, "step": 106810 }, { "epoch": 17.424959216965743, "grad_norm": 0.5465546250343323, "learning_rate": 4.9658611558015984e-05, "loss": 0.0083, "num_input_tokens_seen": 230399616, "step": 106815 }, { "epoch": 17.4257748776509, "grad_norm": 0.003604365512728691, "learning_rate": 4.962769010340163e-05, "loss": 0.0053, "num_input_tokens_seen": 230411264, "step": 106820 }, { "epoch": 17.42659053833605, "grad_norm": 1.0798932313919067, "learning_rate": 4.959677777615351e-05, "loss": 0.0585, "num_input_tokens_seen": 230422368, "step": 106825 }, { "epoch": 17.427406199021206, "grad_norm": 0.005631966982036829, "learning_rate": 4.956587457689804e-05, "loss": 0.0033, "num_input_tokens_seen": 230433216, "step": 106830 }, { "epoch": 17.428221859706362, "grad_norm": 0.11051293462514877, "learning_rate": 4.953498050626154e-05, "loss": 0.0034, "num_input_tokens_seen": 230442848, "step": 106835 }, { "epoch": 17.429037520391518, "grad_norm": 0.0022961171343922615, "learning_rate": 4.9504095564870124e-05, "loss": 0.0008, "num_input_tokens_seen": 230453920, "step": 106840 }, { "epoch": 17.429853181076673, "grad_norm": 0.008645901456475258, "learning_rate": 4.947321975334967e-05, "loss": 0.0009, "num_input_tokens_seen": 230464896, "step": 106845 }, { "epoch": 17.430668841761825, "grad_norm": 0.0025480305776000023, "learning_rate": 4.944235307232597e-05, "loss": 0.0025, "num_input_tokens_seen": 230475328, "step": 106850 }, { "epoch": 17.43148450244698, "grad_norm": 0.005160070024430752, "learning_rate": 4.941149552242458e-05, "loss": 0.0445, "num_input_tokens_seen": 230485824, "step": 106855 }, { "epoch": 17.432300163132137, "grad_norm": 0.004534872714430094, "learning_rate": 4.9380647104270814e-05, "loss": 0.003, "num_input_tokens_seen": 230496064, "step": 106860 }, { "epoch": 17.433115823817293, "grad_norm": 0.0004967268323525786, "learning_rate": 4.93498078184898e-05, "loss": 0.0023, "num_input_tokens_seen": 230508288, "step": 106865 }, { "epoch": 17.43393148450245, "grad_norm": 0.0038472090382128954, "learning_rate": 4.9318977665706866e-05, "loss": 0.0008, "num_input_tokens_seen": 230519840, "step": 106870 }, { "epoch": 17.4347471451876, "grad_norm": 0.00324858701787889, "learning_rate": 4.928815664654635e-05, "loss": 0.0031, "num_input_tokens_seen": 230530304, "step": 106875 }, { "epoch": 17.435562805872756, "grad_norm": 0.0015411574859172106, "learning_rate": 4.9257344761633236e-05, "loss": 0.0006, "num_input_tokens_seen": 230542752, "step": 106880 }, { "epoch": 17.436378466557912, "grad_norm": 0.011189618147909641, "learning_rate": 4.9226542011591716e-05, "loss": 0.0011, "num_input_tokens_seen": 230554272, "step": 106885 }, { "epoch": 17.437194127243067, "grad_norm": 0.01155557855963707, "learning_rate": 4.919574839704627e-05, "loss": 0.0025, "num_input_tokens_seen": 230564928, "step": 106890 }, { "epoch": 17.438009787928223, "grad_norm": 0.006138679105788469, "learning_rate": 4.916496391862085e-05, "loss": 0.0013, "num_input_tokens_seen": 230574752, "step": 106895 }, { "epoch": 17.438825448613375, "grad_norm": 0.004851847887039185, "learning_rate": 4.913418857693936e-05, "loss": 0.0009, "num_input_tokens_seen": 230586272, "step": 106900 }, { "epoch": 17.43964110929853, "grad_norm": 0.03208262473344803, "learning_rate": 4.9103422372625496e-05, "loss": 0.004, "num_input_tokens_seen": 230598080, "step": 106905 }, { "epoch": 17.440456769983687, "grad_norm": 0.007536340970546007, "learning_rate": 4.907266530630278e-05, "loss": 0.0043, "num_input_tokens_seen": 230608704, "step": 106910 }, { "epoch": 17.441272430668842, "grad_norm": 0.013015173375606537, "learning_rate": 4.904191737859454e-05, "loss": 0.0007, "num_input_tokens_seen": 230619616, "step": 106915 }, { "epoch": 17.442088091353998, "grad_norm": 0.12278652936220169, "learning_rate": 4.901117859012394e-05, "loss": 0.003, "num_input_tokens_seen": 230631008, "step": 106920 }, { "epoch": 17.44290375203915, "grad_norm": 0.029165683314204216, "learning_rate": 4.898044894151393e-05, "loss": 0.0017, "num_input_tokens_seen": 230641248, "step": 106925 }, { "epoch": 17.443719412724306, "grad_norm": 0.05257963389158249, "learning_rate": 4.894972843338724e-05, "loss": 0.0022, "num_input_tokens_seen": 230652288, "step": 106930 }, { "epoch": 17.44453507340946, "grad_norm": 0.004478447139263153, "learning_rate": 4.891901706636653e-05, "loss": 0.0018, "num_input_tokens_seen": 230663200, "step": 106935 }, { "epoch": 17.445350734094617, "grad_norm": 0.0034136767499148846, "learning_rate": 4.88883148410742e-05, "loss": 0.0092, "num_input_tokens_seen": 230674336, "step": 106940 }, { "epoch": 17.446166394779773, "grad_norm": 0.027896808460354805, "learning_rate": 4.885762175813241e-05, "loss": 0.0046, "num_input_tokens_seen": 230684896, "step": 106945 }, { "epoch": 17.446982055464925, "grad_norm": 0.021405057981610298, "learning_rate": 4.882693781816327e-05, "loss": 0.0041, "num_input_tokens_seen": 230694816, "step": 106950 }, { "epoch": 17.44779771615008, "grad_norm": 0.05595509335398674, "learning_rate": 4.8796263021788524e-05, "loss": 0.002, "num_input_tokens_seen": 230706432, "step": 106955 }, { "epoch": 17.448613376835237, "grad_norm": 0.028521167114377022, "learning_rate": 4.876559736962999e-05, "loss": 0.0497, "num_input_tokens_seen": 230717472, "step": 106960 }, { "epoch": 17.449429037520392, "grad_norm": 0.0019382149912416935, "learning_rate": 4.8734940862309006e-05, "loss": 0.0014, "num_input_tokens_seen": 230729600, "step": 106965 }, { "epoch": 17.450244698205548, "grad_norm": 0.011388166807591915, "learning_rate": 4.8704293500446806e-05, "loss": 0.0013, "num_input_tokens_seen": 230739712, "step": 106970 }, { "epoch": 17.4510603588907, "grad_norm": 0.0506131611764431, "learning_rate": 4.867365528466477e-05, "loss": 0.1331, "num_input_tokens_seen": 230750720, "step": 106975 }, { "epoch": 17.451876019575856, "grad_norm": 0.038329076021909714, "learning_rate": 4.864302621558353e-05, "loss": 0.0601, "num_input_tokens_seen": 230761792, "step": 106980 }, { "epoch": 17.45269168026101, "grad_norm": 0.012507863342761993, "learning_rate": 4.861240629382413e-05, "loss": 0.0043, "num_input_tokens_seen": 230773504, "step": 106985 }, { "epoch": 17.453507340946167, "grad_norm": 0.3313423991203308, "learning_rate": 4.858179552000674e-05, "loss": 0.0065, "num_input_tokens_seen": 230783520, "step": 106990 }, { "epoch": 17.454323001631323, "grad_norm": 0.8557566404342651, "learning_rate": 4.85511938947521e-05, "loss": 0.0927, "num_input_tokens_seen": 230794496, "step": 106995 }, { "epoch": 17.455138662316475, "grad_norm": 0.0049517350271344185, "learning_rate": 4.8520601418680085e-05, "loss": 0.003, "num_input_tokens_seen": 230805440, "step": 107000 }, { "epoch": 17.45595432300163, "grad_norm": 0.15968742966651917, "learning_rate": 4.849001809241099e-05, "loss": 0.0075, "num_input_tokens_seen": 230816032, "step": 107005 }, { "epoch": 17.456769983686787, "grad_norm": 0.00218332395888865, "learning_rate": 4.845944391656426e-05, "loss": 0.0015, "num_input_tokens_seen": 230826432, "step": 107010 }, { "epoch": 17.457585644371942, "grad_norm": 0.08275929093360901, "learning_rate": 4.84288788917599e-05, "loss": 0.0462, "num_input_tokens_seen": 230837088, "step": 107015 }, { "epoch": 17.458401305057095, "grad_norm": 0.24268391728401184, "learning_rate": 4.839832301861696e-05, "loss": 0.1385, "num_input_tokens_seen": 230847136, "step": 107020 }, { "epoch": 17.45921696574225, "grad_norm": 0.001240850891917944, "learning_rate": 4.836777629775513e-05, "loss": 0.008, "num_input_tokens_seen": 230859072, "step": 107025 }, { "epoch": 17.460032626427406, "grad_norm": 0.0007838698220439255, "learning_rate": 4.833723872979306e-05, "loss": 0.0014, "num_input_tokens_seen": 230870432, "step": 107030 }, { "epoch": 17.46084828711256, "grad_norm": 0.4521684944629669, "learning_rate": 4.830671031534989e-05, "loss": 0.0234, "num_input_tokens_seen": 230881664, "step": 107035 }, { "epoch": 17.461663947797717, "grad_norm": 0.0036173707339912653, "learning_rate": 4.827619105504427e-05, "loss": 0.001, "num_input_tokens_seen": 230892672, "step": 107040 }, { "epoch": 17.46247960848287, "grad_norm": 0.0067673432640731335, "learning_rate": 4.8245680949494664e-05, "loss": 0.0018, "num_input_tokens_seen": 230901280, "step": 107045 }, { "epoch": 17.463295269168025, "grad_norm": 0.03379007801413536, "learning_rate": 4.821517999931946e-05, "loss": 0.0021, "num_input_tokens_seen": 230910784, "step": 107050 }, { "epoch": 17.46411092985318, "grad_norm": 0.01144998986274004, "learning_rate": 4.8184688205136716e-05, "loss": 0.0015, "num_input_tokens_seen": 230922080, "step": 107055 }, { "epoch": 17.464926590538337, "grad_norm": 0.058468565344810486, "learning_rate": 4.8154205567564503e-05, "loss": 0.0033, "num_input_tokens_seen": 230932576, "step": 107060 }, { "epoch": 17.465742251223492, "grad_norm": 0.012282819487154484, "learning_rate": 4.812373208722048e-05, "loss": 0.009, "num_input_tokens_seen": 230943776, "step": 107065 }, { "epoch": 17.466557911908644, "grad_norm": 0.008485949598252773, "learning_rate": 4.809326776472228e-05, "loss": 0.0016, "num_input_tokens_seen": 230955136, "step": 107070 }, { "epoch": 17.4673735725938, "grad_norm": 0.004723215941339731, "learning_rate": 4.806281260068729e-05, "loss": 0.0062, "num_input_tokens_seen": 230965536, "step": 107075 }, { "epoch": 17.468189233278956, "grad_norm": 0.004339437931776047, "learning_rate": 4.803236659573274e-05, "loss": 0.0007, "num_input_tokens_seen": 230976864, "step": 107080 }, { "epoch": 17.46900489396411, "grad_norm": 0.41068845987319946, "learning_rate": 4.800192975047551e-05, "loss": 0.0223, "num_input_tokens_seen": 230986816, "step": 107085 }, { "epoch": 17.469820554649267, "grad_norm": 0.9826202988624573, "learning_rate": 4.79715020655328e-05, "loss": 0.1007, "num_input_tokens_seen": 230998496, "step": 107090 }, { "epoch": 17.47063621533442, "grad_norm": 0.0007015647133812308, "learning_rate": 4.794108354152082e-05, "loss": 0.0061, "num_input_tokens_seen": 231009216, "step": 107095 }, { "epoch": 17.471451876019575, "grad_norm": 0.22791485488414764, "learning_rate": 4.791067417905648e-05, "loss": 0.0343, "num_input_tokens_seen": 231020224, "step": 107100 }, { "epoch": 17.47226753670473, "grad_norm": 0.047676581889390945, "learning_rate": 4.7880273978755606e-05, "loss": 0.0021, "num_input_tokens_seen": 231031744, "step": 107105 }, { "epoch": 17.473083197389887, "grad_norm": 0.0004758232971653342, "learning_rate": 4.784988294123477e-05, "loss": 0.0221, "num_input_tokens_seen": 231043392, "step": 107110 }, { "epoch": 17.473898858075042, "grad_norm": 0.07350235432386398, "learning_rate": 4.781950106710942e-05, "loss": 0.0035, "num_input_tokens_seen": 231055360, "step": 107115 }, { "epoch": 17.474714518760194, "grad_norm": 0.0018216270254924893, "learning_rate": 4.7789128356995727e-05, "loss": 0.0063, "num_input_tokens_seen": 231064800, "step": 107120 }, { "epoch": 17.47553017944535, "grad_norm": 0.039561089128255844, "learning_rate": 4.775876481150887e-05, "loss": 0.0043, "num_input_tokens_seen": 231075616, "step": 107125 }, { "epoch": 17.476345840130506, "grad_norm": 0.20550936460494995, "learning_rate": 4.772841043126447e-05, "loss": 0.0098, "num_input_tokens_seen": 231085824, "step": 107130 }, { "epoch": 17.47716150081566, "grad_norm": 0.0037590847350656986, "learning_rate": 4.769806521687742e-05, "loss": 0.0029, "num_input_tokens_seen": 231096736, "step": 107135 }, { "epoch": 17.477977161500817, "grad_norm": 0.004327923990786076, "learning_rate": 4.766772916896306e-05, "loss": 0.1045, "num_input_tokens_seen": 231108032, "step": 107140 }, { "epoch": 17.47879282218597, "grad_norm": 0.2986736297607422, "learning_rate": 4.763740228813579e-05, "loss": 0.1743, "num_input_tokens_seen": 231119392, "step": 107145 }, { "epoch": 17.479608482871125, "grad_norm": 0.22231744229793549, "learning_rate": 4.760708457501062e-05, "loss": 0.0159, "num_input_tokens_seen": 231129728, "step": 107150 }, { "epoch": 17.48042414355628, "grad_norm": 0.0037460243329405785, "learning_rate": 4.7576776030201606e-05, "loss": 0.1378, "num_input_tokens_seen": 231139648, "step": 107155 }, { "epoch": 17.481239804241437, "grad_norm": 0.003907213918864727, "learning_rate": 4.754647665432338e-05, "loss": 0.0023, "num_input_tokens_seen": 231150688, "step": 107160 }, { "epoch": 17.482055464926592, "grad_norm": 0.03762059286236763, "learning_rate": 4.751618644798955e-05, "loss": 0.0661, "num_input_tokens_seen": 231160640, "step": 107165 }, { "epoch": 17.482871125611744, "grad_norm": 0.02256060764193535, "learning_rate": 4.7485905411814414e-05, "loss": 0.1393, "num_input_tokens_seen": 231171680, "step": 107170 }, { "epoch": 17.4836867862969, "grad_norm": 0.020156804472208023, "learning_rate": 4.745563354641125e-05, "loss": 0.0009, "num_input_tokens_seen": 231181568, "step": 107175 }, { "epoch": 17.484502446982056, "grad_norm": 0.016182219609618187, "learning_rate": 4.74253708523939e-05, "loss": 0.0021, "num_input_tokens_seen": 231193728, "step": 107180 }, { "epoch": 17.48531810766721, "grad_norm": 0.001260829041711986, "learning_rate": 4.7395117330375494e-05, "loss": 0.0036, "num_input_tokens_seen": 231204800, "step": 107185 }, { "epoch": 17.486133768352367, "grad_norm": 0.004725717473775148, "learning_rate": 4.7364872980969254e-05, "loss": 0.0633, "num_input_tokens_seen": 231214976, "step": 107190 }, { "epoch": 17.48694942903752, "grad_norm": 0.015284993685781956, "learning_rate": 4.733463780478808e-05, "loss": 0.0012, "num_input_tokens_seen": 231224576, "step": 107195 }, { "epoch": 17.487765089722675, "grad_norm": 0.04753398522734642, "learning_rate": 4.7304411802444656e-05, "loss": 0.0127, "num_input_tokens_seen": 231235904, "step": 107200 }, { "epoch": 17.48858075040783, "grad_norm": 0.0006615896127186716, "learning_rate": 4.7274194974551656e-05, "loss": 0.0035, "num_input_tokens_seen": 231246912, "step": 107205 }, { "epoch": 17.489396411092986, "grad_norm": 0.01627221144735813, "learning_rate": 4.724398732172142e-05, "loss": 0.0288, "num_input_tokens_seen": 231259264, "step": 107210 }, { "epoch": 17.49021207177814, "grad_norm": 0.00037789461202919483, "learning_rate": 4.721378884456612e-05, "loss": 0.0451, "num_input_tokens_seen": 231270976, "step": 107215 }, { "epoch": 17.491027732463294, "grad_norm": 0.043850019574165344, "learning_rate": 4.718359954369783e-05, "loss": 0.0094, "num_input_tokens_seen": 231282176, "step": 107220 }, { "epoch": 17.49184339314845, "grad_norm": 0.017143243923783302, "learning_rate": 4.7153419419728285e-05, "loss": 0.0061, "num_input_tokens_seen": 231293024, "step": 107225 }, { "epoch": 17.492659053833606, "grad_norm": 0.16217546164989471, "learning_rate": 4.7123248473269096e-05, "loss": 0.0286, "num_input_tokens_seen": 231303616, "step": 107230 }, { "epoch": 17.49347471451876, "grad_norm": 0.13963352143764496, "learning_rate": 4.7093086704931955e-05, "loss": 0.1055, "num_input_tokens_seen": 231314496, "step": 107235 }, { "epoch": 17.494290375203914, "grad_norm": 0.0030651872511953115, "learning_rate": 4.7062934115327804e-05, "loss": 0.0154, "num_input_tokens_seen": 231325344, "step": 107240 }, { "epoch": 17.49510603588907, "grad_norm": 0.0015858528204262257, "learning_rate": 4.7032790705068105e-05, "loss": 0.2343, "num_input_tokens_seen": 231335104, "step": 107245 }, { "epoch": 17.495921696574225, "grad_norm": 0.019279591739177704, "learning_rate": 4.700265647476332e-05, "loss": 0.0012, "num_input_tokens_seen": 231345376, "step": 107250 }, { "epoch": 17.49673735725938, "grad_norm": 0.013692829757928848, "learning_rate": 4.69725314250245e-05, "loss": 0.0394, "num_input_tokens_seen": 231356480, "step": 107255 }, { "epoch": 17.497553017944536, "grad_norm": 0.020630525425076485, "learning_rate": 4.6942415556461894e-05, "loss": 0.0207, "num_input_tokens_seen": 231367232, "step": 107260 }, { "epoch": 17.49836867862969, "grad_norm": 0.0348367877304554, "learning_rate": 4.691230886968617e-05, "loss": 0.076, "num_input_tokens_seen": 231378944, "step": 107265 }, { "epoch": 17.499184339314844, "grad_norm": 0.028547020629048347, "learning_rate": 4.688221136530712e-05, "loss": 0.0099, "num_input_tokens_seen": 231389792, "step": 107270 }, { "epoch": 17.5, "grad_norm": 0.019565775990486145, "learning_rate": 4.6852123043935044e-05, "loss": 0.0255, "num_input_tokens_seen": 231400800, "step": 107275 }, { "epoch": 17.500815660685156, "grad_norm": 0.0030005560256540775, "learning_rate": 4.682204390617939e-05, "loss": 0.0586, "num_input_tokens_seen": 231412448, "step": 107280 }, { "epoch": 17.50163132137031, "grad_norm": 0.0025025021750479937, "learning_rate": 4.6791973952650056e-05, "loss": 0.0066, "num_input_tokens_seen": 231423168, "step": 107285 }, { "epoch": 17.502446982055464, "grad_norm": 0.7462862133979797, "learning_rate": 4.6761913183956175e-05, "loss": 0.2433, "num_input_tokens_seen": 231434112, "step": 107290 }, { "epoch": 17.50326264274062, "grad_norm": 0.0013868464156985283, "learning_rate": 4.673186160070714e-05, "loss": 0.0006, "num_input_tokens_seen": 231445152, "step": 107295 }, { "epoch": 17.504078303425775, "grad_norm": 0.08477751165628433, "learning_rate": 4.6701819203511964e-05, "loss": 0.0013, "num_input_tokens_seen": 231456224, "step": 107300 }, { "epoch": 17.50489396411093, "grad_norm": 0.03686311095952988, "learning_rate": 4.667178599297944e-05, "loss": 0.0085, "num_input_tokens_seen": 231466368, "step": 107305 }, { "epoch": 17.505709624796086, "grad_norm": 0.01052112691104412, "learning_rate": 4.664176196971831e-05, "loss": 0.0033, "num_input_tokens_seen": 231476704, "step": 107310 }, { "epoch": 17.50652528548124, "grad_norm": 0.0735478550195694, "learning_rate": 4.661174713433697e-05, "loss": 0.0526, "num_input_tokens_seen": 231486176, "step": 107315 }, { "epoch": 17.507340946166394, "grad_norm": 0.22430603206157684, "learning_rate": 4.6581741487443765e-05, "loss": 0.0052, "num_input_tokens_seen": 231497312, "step": 107320 }, { "epoch": 17.50815660685155, "grad_norm": 0.004482749383896589, "learning_rate": 4.655174502964676e-05, "loss": 0.0026, "num_input_tokens_seen": 231508288, "step": 107325 }, { "epoch": 17.508972267536706, "grad_norm": 0.006286046002060175, "learning_rate": 4.6521757761553873e-05, "loss": 0.0013, "num_input_tokens_seen": 231519360, "step": 107330 }, { "epoch": 17.50978792822186, "grad_norm": 0.8812110424041748, "learning_rate": 4.6491779683772825e-05, "loss": 0.052, "num_input_tokens_seen": 231530560, "step": 107335 }, { "epoch": 17.510603588907014, "grad_norm": 0.03438449278473854, "learning_rate": 4.64618107969112e-05, "loss": 0.0107, "num_input_tokens_seen": 231540896, "step": 107340 }, { "epoch": 17.51141924959217, "grad_norm": 0.003100682282820344, "learning_rate": 4.643185110157633e-05, "loss": 0.0034, "num_input_tokens_seen": 231551840, "step": 107345 }, { "epoch": 17.512234910277325, "grad_norm": 0.21627454459667206, "learning_rate": 4.640190059837535e-05, "loss": 0.0084, "num_input_tokens_seen": 231562112, "step": 107350 }, { "epoch": 17.51305057096248, "grad_norm": 0.007444999646395445, "learning_rate": 4.637195928791532e-05, "loss": 0.0057, "num_input_tokens_seen": 231572160, "step": 107355 }, { "epoch": 17.513866231647633, "grad_norm": 0.09545465558767319, "learning_rate": 4.634202717080305e-05, "loss": 0.0887, "num_input_tokens_seen": 231581248, "step": 107360 }, { "epoch": 17.51468189233279, "grad_norm": 0.07918854057788849, "learning_rate": 4.6312104247645035e-05, "loss": 0.0018, "num_input_tokens_seen": 231591040, "step": 107365 }, { "epoch": 17.515497553017944, "grad_norm": 0.004878185223788023, "learning_rate": 4.6282190519047805e-05, "loss": 0.0026, "num_input_tokens_seen": 231602336, "step": 107370 }, { "epoch": 17.5163132137031, "grad_norm": 0.004869256634265184, "learning_rate": 4.625228598561748e-05, "loss": 0.0034, "num_input_tokens_seen": 231614048, "step": 107375 }, { "epoch": 17.517128874388256, "grad_norm": 0.019652556627988815, "learning_rate": 4.6222390647960356e-05, "loss": 0.0015, "num_input_tokens_seen": 231624448, "step": 107380 }, { "epoch": 17.517944535073408, "grad_norm": 0.15309220552444458, "learning_rate": 4.619250450668194e-05, "loss": 0.0056, "num_input_tokens_seen": 231634816, "step": 107385 }, { "epoch": 17.518760195758563, "grad_norm": 0.06802511215209961, "learning_rate": 4.616262756238837e-05, "loss": 0.0036, "num_input_tokens_seen": 231646176, "step": 107390 }, { "epoch": 17.51957585644372, "grad_norm": 0.014270437881350517, "learning_rate": 4.613275981568465e-05, "loss": 0.0013, "num_input_tokens_seen": 231656768, "step": 107395 }, { "epoch": 17.520391517128875, "grad_norm": 0.0004909934941679239, "learning_rate": 4.610290126717642e-05, "loss": 0.0021, "num_input_tokens_seen": 231668128, "step": 107400 }, { "epoch": 17.52120717781403, "grad_norm": 0.015690559521317482, "learning_rate": 4.607305191746874e-05, "loss": 0.0078, "num_input_tokens_seen": 231679584, "step": 107405 }, { "epoch": 17.522022838499183, "grad_norm": 0.044342610985040665, "learning_rate": 4.604321176716647e-05, "loss": 0.0026, "num_input_tokens_seen": 231690848, "step": 107410 }, { "epoch": 17.52283849918434, "grad_norm": 0.005933872424066067, "learning_rate": 4.6013380816874394e-05, "loss": 0.0018, "num_input_tokens_seen": 231701440, "step": 107415 }, { "epoch": 17.523654159869494, "grad_norm": 0.008841227740049362, "learning_rate": 4.598355906719709e-05, "loss": 0.0023, "num_input_tokens_seen": 231712928, "step": 107420 }, { "epoch": 17.52446982055465, "grad_norm": 0.02178630232810974, "learning_rate": 4.595374651873896e-05, "loss": 0.0093, "num_input_tokens_seen": 231723360, "step": 107425 }, { "epoch": 17.525285481239806, "grad_norm": 0.01319460105150938, "learning_rate": 4.592394317210413e-05, "loss": 0.0054, "num_input_tokens_seen": 231733664, "step": 107430 }, { "epoch": 17.526101141924958, "grad_norm": 0.011630572378635406, "learning_rate": 4.589414902789662e-05, "loss": 0.002, "num_input_tokens_seen": 231743872, "step": 107435 }, { "epoch": 17.526916802610113, "grad_norm": 0.11428508907556534, "learning_rate": 4.586436408672023e-05, "loss": 0.0062, "num_input_tokens_seen": 231754144, "step": 107440 }, { "epoch": 17.52773246329527, "grad_norm": 0.109157033264637, "learning_rate": 4.583458834917864e-05, "loss": 0.007, "num_input_tokens_seen": 231764672, "step": 107445 }, { "epoch": 17.528548123980425, "grad_norm": 0.11251997202634811, "learning_rate": 4.580482181587531e-05, "loss": 0.046, "num_input_tokens_seen": 231776224, "step": 107450 }, { "epoch": 17.52936378466558, "grad_norm": 0.0019085211679339409, "learning_rate": 4.5775064487413424e-05, "loss": 0.0031, "num_input_tokens_seen": 231786944, "step": 107455 }, { "epoch": 17.530179445350733, "grad_norm": 0.0021932257805019617, "learning_rate": 4.574531636439605e-05, "loss": 0.0018, "num_input_tokens_seen": 231797760, "step": 107460 }, { "epoch": 17.53099510603589, "grad_norm": 0.10764362663030624, "learning_rate": 4.57155774474261e-05, "loss": 0.0029, "num_input_tokens_seen": 231807840, "step": 107465 }, { "epoch": 17.531810766721044, "grad_norm": 0.008451038971543312, "learning_rate": 4.568584773710632e-05, "loss": 0.0033, "num_input_tokens_seen": 231818304, "step": 107470 }, { "epoch": 17.5326264274062, "grad_norm": 0.0032383943907916546, "learning_rate": 4.565612723403911e-05, "loss": 0.0049, "num_input_tokens_seen": 231828832, "step": 107475 }, { "epoch": 17.533442088091356, "grad_norm": 0.11814907193183899, "learning_rate": 4.562641593882694e-05, "loss": 0.0087, "num_input_tokens_seen": 231839712, "step": 107480 }, { "epoch": 17.534257748776508, "grad_norm": 0.12445204704999924, "learning_rate": 4.5596713852071816e-05, "loss": 0.0039, "num_input_tokens_seen": 231850976, "step": 107485 }, { "epoch": 17.535073409461663, "grad_norm": 0.00476786307990551, "learning_rate": 4.556702097437576e-05, "loss": 0.0097, "num_input_tokens_seen": 231861536, "step": 107490 }, { "epoch": 17.53588907014682, "grad_norm": 0.010264194570481777, "learning_rate": 4.5537337306340466e-05, "loss": 0.0034, "num_input_tokens_seen": 231873216, "step": 107495 }, { "epoch": 17.536704730831975, "grad_norm": 0.4237960875034332, "learning_rate": 4.550766284856761e-05, "loss": 0.0221, "num_input_tokens_seen": 231884384, "step": 107500 }, { "epoch": 17.53752039151713, "grad_norm": 0.011056013405323029, "learning_rate": 4.5477997601658384e-05, "loss": 0.0612, "num_input_tokens_seen": 231894848, "step": 107505 }, { "epoch": 17.538336052202283, "grad_norm": 0.009564247913658619, "learning_rate": 4.5448341566214354e-05, "loss": 0.0048, "num_input_tokens_seen": 231906048, "step": 107510 }, { "epoch": 17.53915171288744, "grad_norm": 0.0013423444470390677, "learning_rate": 4.541869474283616e-05, "loss": 0.0007, "num_input_tokens_seen": 231915584, "step": 107515 }, { "epoch": 17.539967373572594, "grad_norm": 0.0017248950898647308, "learning_rate": 4.538905713212488e-05, "loss": 0.004, "num_input_tokens_seen": 231926080, "step": 107520 }, { "epoch": 17.54078303425775, "grad_norm": 0.006918685510754585, "learning_rate": 4.535942873468102e-05, "loss": 0.0037, "num_input_tokens_seen": 231937088, "step": 107525 }, { "epoch": 17.541598694942905, "grad_norm": 0.009078907780349255, "learning_rate": 4.532980955110516e-05, "loss": 0.0095, "num_input_tokens_seen": 231947840, "step": 107530 }, { "epoch": 17.542414355628058, "grad_norm": 0.0044654798693954945, "learning_rate": 4.530019958199744e-05, "loss": 0.0116, "num_input_tokens_seen": 231958080, "step": 107535 }, { "epoch": 17.543230016313213, "grad_norm": 0.0485394150018692, "learning_rate": 4.527059882795803e-05, "loss": 0.0023, "num_input_tokens_seen": 231968096, "step": 107540 }, { "epoch": 17.54404567699837, "grad_norm": 0.14111091196537018, "learning_rate": 4.52410072895868e-05, "loss": 0.0053, "num_input_tokens_seen": 231978208, "step": 107545 }, { "epoch": 17.544861337683525, "grad_norm": 0.14804194867610931, "learning_rate": 4.521142496748348e-05, "loss": 0.0061, "num_input_tokens_seen": 231988480, "step": 107550 }, { "epoch": 17.545676998368677, "grad_norm": 0.01205470785498619, "learning_rate": 4.5181851862247544e-05, "loss": 0.0072, "num_input_tokens_seen": 231998912, "step": 107555 }, { "epoch": 17.546492659053833, "grad_norm": 0.04184176027774811, "learning_rate": 4.51522879744784e-05, "loss": 0.0174, "num_input_tokens_seen": 232009152, "step": 107560 }, { "epoch": 17.54730831973899, "grad_norm": 0.0044145104475319386, "learning_rate": 4.5122733304775124e-05, "loss": 0.0008, "num_input_tokens_seen": 232020736, "step": 107565 }, { "epoch": 17.548123980424144, "grad_norm": 0.16912396252155304, "learning_rate": 4.509318785373667e-05, "loss": 0.0039, "num_input_tokens_seen": 232032736, "step": 107570 }, { "epoch": 17.5489396411093, "grad_norm": 0.009338653646409512, "learning_rate": 4.506365162196191e-05, "loss": 0.0019, "num_input_tokens_seen": 232043712, "step": 107575 }, { "epoch": 17.549755301794452, "grad_norm": 0.04649984464049339, "learning_rate": 4.503412461004935e-05, "loss": 0.0885, "num_input_tokens_seen": 232052576, "step": 107580 }, { "epoch": 17.550570962479608, "grad_norm": 0.9109947085380554, "learning_rate": 4.500460681859742e-05, "loss": 0.0228, "num_input_tokens_seen": 232063104, "step": 107585 }, { "epoch": 17.551386623164763, "grad_norm": 0.017164727672934532, "learning_rate": 4.4975098248204394e-05, "loss": 0.0074, "num_input_tokens_seen": 232073728, "step": 107590 }, { "epoch": 17.55220228384992, "grad_norm": 0.017494168132543564, "learning_rate": 4.494559889946814e-05, "loss": 0.0013, "num_input_tokens_seen": 232084992, "step": 107595 }, { "epoch": 17.553017944535075, "grad_norm": 0.010589837096631527, "learning_rate": 4.4916108772986686e-05, "loss": 0.0168, "num_input_tokens_seen": 232096032, "step": 107600 }, { "epoch": 17.553833605220227, "grad_norm": 0.06345407664775848, "learning_rate": 4.48866278693576e-05, "loss": 0.0049, "num_input_tokens_seen": 232106944, "step": 107605 }, { "epoch": 17.554649265905383, "grad_norm": 0.003284136764705181, "learning_rate": 4.485715618917818e-05, "loss": 0.0013, "num_input_tokens_seen": 232117536, "step": 107610 }, { "epoch": 17.55546492659054, "grad_norm": 0.0009325508144684136, "learning_rate": 4.482769373304613e-05, "loss": 0.0236, "num_input_tokens_seen": 232127616, "step": 107615 }, { "epoch": 17.556280587275694, "grad_norm": 0.005316039081662893, "learning_rate": 4.4798240501558115e-05, "loss": 0.0045, "num_input_tokens_seen": 232138656, "step": 107620 }, { "epoch": 17.55709624796085, "grad_norm": 0.001330060651525855, "learning_rate": 4.4768796495311406e-05, "loss": 0.0021, "num_input_tokens_seen": 232149600, "step": 107625 }, { "epoch": 17.557911908646002, "grad_norm": 0.003580335760489106, "learning_rate": 4.473936171490228e-05, "loss": 0.0174, "num_input_tokens_seen": 232159776, "step": 107630 }, { "epoch": 17.558727569331158, "grad_norm": 0.006064064800739288, "learning_rate": 4.470993616092778e-05, "loss": 0.0083, "num_input_tokens_seen": 232169792, "step": 107635 }, { "epoch": 17.559543230016313, "grad_norm": 0.019970590248703957, "learning_rate": 4.46805198339838e-05, "loss": 0.0022, "num_input_tokens_seen": 232180800, "step": 107640 }, { "epoch": 17.56035889070147, "grad_norm": 0.004178846720606089, "learning_rate": 4.4651112734666874e-05, "loss": 0.0026, "num_input_tokens_seen": 232190752, "step": 107645 }, { "epoch": 17.561174551386625, "grad_norm": 0.013814548030495644, "learning_rate": 4.462171486357264e-05, "loss": 0.0012, "num_input_tokens_seen": 232201856, "step": 107650 }, { "epoch": 17.561990212071777, "grad_norm": 0.016698498278856277, "learning_rate": 4.459232622129722e-05, "loss": 0.0114, "num_input_tokens_seen": 232213280, "step": 107655 }, { "epoch": 17.562805872756933, "grad_norm": 0.03660855069756508, "learning_rate": 4.4562946808435864e-05, "loss": 0.0025, "num_input_tokens_seen": 232225056, "step": 107660 }, { "epoch": 17.563621533442088, "grad_norm": 0.04763437807559967, "learning_rate": 4.453357662558422e-05, "loss": 0.0052, "num_input_tokens_seen": 232234944, "step": 107665 }, { "epoch": 17.564437194127244, "grad_norm": 0.008197019807994366, "learning_rate": 4.450421567333746e-05, "loss": 0.0011, "num_input_tokens_seen": 232244608, "step": 107670 }, { "epoch": 17.5652528548124, "grad_norm": 0.002325588371604681, "learning_rate": 4.447486395229061e-05, "loss": 0.004, "num_input_tokens_seen": 232255264, "step": 107675 }, { "epoch": 17.56606851549755, "grad_norm": 0.006914843339473009, "learning_rate": 4.4445521463038486e-05, "loss": 0.005, "num_input_tokens_seen": 232266688, "step": 107680 }, { "epoch": 17.566884176182707, "grad_norm": 0.004482678137719631, "learning_rate": 4.441618820617582e-05, "loss": 0.0075, "num_input_tokens_seen": 232275968, "step": 107685 }, { "epoch": 17.567699836867863, "grad_norm": 0.01785637065768242, "learning_rate": 4.438686418229698e-05, "loss": 0.0053, "num_input_tokens_seen": 232286752, "step": 107690 }, { "epoch": 17.56851549755302, "grad_norm": 0.11471552401781082, "learning_rate": 4.4357549391996376e-05, "loss": 0.0105, "num_input_tokens_seen": 232296768, "step": 107695 }, { "epoch": 17.569331158238175, "grad_norm": 0.10488258302211761, "learning_rate": 4.432824383586809e-05, "loss": 0.0393, "num_input_tokens_seen": 232307776, "step": 107700 }, { "epoch": 17.570146818923327, "grad_norm": 0.017758633941411972, "learning_rate": 4.429894751450597e-05, "loss": 0.0019, "num_input_tokens_seen": 232318336, "step": 107705 }, { "epoch": 17.570962479608482, "grad_norm": 0.0127429673448205, "learning_rate": 4.4269660428503774e-05, "loss": 0.028, "num_input_tokens_seen": 232329920, "step": 107710 }, { "epoch": 17.571778140293638, "grad_norm": 0.0012403883738443255, "learning_rate": 4.4240382578454915e-05, "loss": 0.0062, "num_input_tokens_seen": 232341312, "step": 107715 }, { "epoch": 17.572593800978794, "grad_norm": 0.002174537628889084, "learning_rate": 4.4211113964953144e-05, "loss": 0.001, "num_input_tokens_seen": 232352416, "step": 107720 }, { "epoch": 17.57340946166395, "grad_norm": 0.041134439408779144, "learning_rate": 4.4181854588591085e-05, "loss": 0.0039, "num_input_tokens_seen": 232363168, "step": 107725 }, { "epoch": 17.5742251223491, "grad_norm": 0.03331729397177696, "learning_rate": 4.415260444996222e-05, "loss": 0.0044, "num_input_tokens_seen": 232373376, "step": 107730 }, { "epoch": 17.575040783034257, "grad_norm": 0.003109544515609741, "learning_rate": 4.4123363549658955e-05, "loss": 0.162, "num_input_tokens_seen": 232384064, "step": 107735 }, { "epoch": 17.575856443719413, "grad_norm": 0.013674991205334663, "learning_rate": 4.409413188827416e-05, "loss": 0.0013, "num_input_tokens_seen": 232392960, "step": 107740 }, { "epoch": 17.57667210440457, "grad_norm": 0.007668232545256615, "learning_rate": 4.4064909466400014e-05, "loss": 0.0024, "num_input_tokens_seen": 232404288, "step": 107745 }, { "epoch": 17.57748776508972, "grad_norm": 0.0671762153506279, "learning_rate": 4.4035696284629e-05, "loss": 0.0026, "num_input_tokens_seen": 232415168, "step": 107750 }, { "epoch": 17.578303425774877, "grad_norm": 0.21214818954467773, "learning_rate": 4.4006492343552915e-05, "loss": 0.0048, "num_input_tokens_seen": 232426304, "step": 107755 }, { "epoch": 17.579119086460032, "grad_norm": 0.0016184755368158221, "learning_rate": 4.39772976437639e-05, "loss": 0.0073, "num_input_tokens_seen": 232436224, "step": 107760 }, { "epoch": 17.579934747145188, "grad_norm": 0.2004365473985672, "learning_rate": 4.394811218585326e-05, "loss": 0.0034, "num_input_tokens_seen": 232447744, "step": 107765 }, { "epoch": 17.580750407830344, "grad_norm": 0.014041485264897346, "learning_rate": 4.3918935970412796e-05, "loss": 0.0024, "num_input_tokens_seen": 232457664, "step": 107770 }, { "epoch": 17.581566068515496, "grad_norm": 0.08400784432888031, "learning_rate": 4.38897689980336e-05, "loss": 0.0044, "num_input_tokens_seen": 232467360, "step": 107775 }, { "epoch": 17.58238172920065, "grad_norm": 0.0028867083601653576, "learning_rate": 4.386061126930696e-05, "loss": 0.0018, "num_input_tokens_seen": 232478592, "step": 107780 }, { "epoch": 17.583197389885807, "grad_norm": 0.002563504036515951, "learning_rate": 4.3831462784823525e-05, "loss": 0.0325, "num_input_tokens_seen": 232489824, "step": 107785 }, { "epoch": 17.584013050570963, "grad_norm": 0.0021558150183409452, "learning_rate": 4.380232354517433e-05, "loss": 0.0117, "num_input_tokens_seen": 232501696, "step": 107790 }, { "epoch": 17.58482871125612, "grad_norm": 0.13048721849918365, "learning_rate": 4.3773193550949664e-05, "loss": 0.0547, "num_input_tokens_seen": 232513472, "step": 107795 }, { "epoch": 17.58564437194127, "grad_norm": 0.008377422578632832, "learning_rate": 4.374407280274007e-05, "loss": 0.001, "num_input_tokens_seen": 232524256, "step": 107800 }, { "epoch": 17.586460032626427, "grad_norm": 0.021969666704535484, "learning_rate": 4.371496130113561e-05, "loss": 0.0018, "num_input_tokens_seen": 232532704, "step": 107805 }, { "epoch": 17.587275693311582, "grad_norm": 0.00664404034614563, "learning_rate": 4.3685859046726284e-05, "loss": 0.1098, "num_input_tokens_seen": 232543200, "step": 107810 }, { "epoch": 17.588091353996738, "grad_norm": 0.005486016161739826, "learning_rate": 4.3656766040101933e-05, "loss": 0.1005, "num_input_tokens_seen": 232552704, "step": 107815 }, { "epoch": 17.588907014681894, "grad_norm": 0.05723780766129494, "learning_rate": 4.362768228185216e-05, "loss": 0.005, "num_input_tokens_seen": 232562176, "step": 107820 }, { "epoch": 17.589722675367046, "grad_norm": 0.02484714426100254, "learning_rate": 4.35986077725663e-05, "loss": 0.0035, "num_input_tokens_seen": 232572576, "step": 107825 }, { "epoch": 17.5905383360522, "grad_norm": 0.015293368138372898, "learning_rate": 4.3569542512833684e-05, "loss": 0.0011, "num_input_tokens_seen": 232584288, "step": 107830 }, { "epoch": 17.591353996737357, "grad_norm": 0.3780241012573242, "learning_rate": 4.354048650324327e-05, "loss": 0.0895, "num_input_tokens_seen": 232595520, "step": 107835 }, { "epoch": 17.592169657422513, "grad_norm": 0.0022786709014326334, "learning_rate": 4.3511439744383984e-05, "loss": 0.0023, "num_input_tokens_seen": 232607168, "step": 107840 }, { "epoch": 17.59298531810767, "grad_norm": 0.011645214632153511, "learning_rate": 4.348240223684447e-05, "loss": 0.0217, "num_input_tokens_seen": 232617568, "step": 107845 }, { "epoch": 17.59380097879282, "grad_norm": 0.0034358096309006214, "learning_rate": 4.3453373981213184e-05, "loss": 0.0523, "num_input_tokens_seen": 232628160, "step": 107850 }, { "epoch": 17.594616639477977, "grad_norm": 0.19933156669139862, "learning_rate": 4.342435497807845e-05, "loss": 0.0446, "num_input_tokens_seen": 232638784, "step": 107855 }, { "epoch": 17.595432300163132, "grad_norm": 0.03359149396419525, "learning_rate": 4.3395345228028294e-05, "loss": 0.0057, "num_input_tokens_seen": 232649248, "step": 107860 }, { "epoch": 17.596247960848288, "grad_norm": 0.18799568712711334, "learning_rate": 4.336634473165091e-05, "loss": 0.1197, "num_input_tokens_seen": 232659424, "step": 107865 }, { "epoch": 17.597063621533444, "grad_norm": 0.020560981705784798, "learning_rate": 4.3337353489533606e-05, "loss": 0.003, "num_input_tokens_seen": 232670048, "step": 107870 }, { "epoch": 17.597879282218596, "grad_norm": 0.002444885903969407, "learning_rate": 4.3308371502264355e-05, "loss": 0.0018, "num_input_tokens_seen": 232680192, "step": 107875 }, { "epoch": 17.59869494290375, "grad_norm": 0.008794990368187428, "learning_rate": 4.327939877043013e-05, "loss": 0.0019, "num_input_tokens_seen": 232689792, "step": 107880 }, { "epoch": 17.599510603588907, "grad_norm": 0.35468122363090515, "learning_rate": 4.3250435294618473e-05, "loss": 0.0145, "num_input_tokens_seen": 232700320, "step": 107885 }, { "epoch": 17.600326264274063, "grad_norm": 0.0019192816689610481, "learning_rate": 4.322148107541596e-05, "loss": 0.0033, "num_input_tokens_seen": 232710624, "step": 107890 }, { "epoch": 17.601141924959215, "grad_norm": 0.005131114274263382, "learning_rate": 4.3192536113409785e-05, "loss": 0.0179, "num_input_tokens_seen": 232722304, "step": 107895 }, { "epoch": 17.60195758564437, "grad_norm": 0.0021040362771600485, "learning_rate": 4.316360040918621e-05, "loss": 0.0047, "num_input_tokens_seen": 232731136, "step": 107900 }, { "epoch": 17.602773246329527, "grad_norm": 0.07023309171199799, "learning_rate": 4.3134673963331985e-05, "loss": 0.0081, "num_input_tokens_seen": 232742912, "step": 107905 }, { "epoch": 17.603588907014682, "grad_norm": 0.0013207800220698118, "learning_rate": 4.310575677643297e-05, "loss": 0.0011, "num_input_tokens_seen": 232753984, "step": 107910 }, { "epoch": 17.604404567699838, "grad_norm": 0.00213421112857759, "learning_rate": 4.307684884907559e-05, "loss": 0.0023, "num_input_tokens_seen": 232764352, "step": 107915 }, { "epoch": 17.605220228384994, "grad_norm": 0.028272783383727074, "learning_rate": 4.304795018184537e-05, "loss": 0.0014, "num_input_tokens_seen": 232774112, "step": 107920 }, { "epoch": 17.606035889070146, "grad_norm": 0.004300239495933056, "learning_rate": 4.3019060775328186e-05, "loss": 0.0052, "num_input_tokens_seen": 232784736, "step": 107925 }, { "epoch": 17.6068515497553, "grad_norm": 0.010114967823028564, "learning_rate": 4.2990180630109455e-05, "loss": 0.0076, "num_input_tokens_seen": 232795392, "step": 107930 }, { "epoch": 17.607667210440457, "grad_norm": 0.0015734832268208265, "learning_rate": 4.296130974677448e-05, "loss": 0.0036, "num_input_tokens_seen": 232805472, "step": 107935 }, { "epoch": 17.608482871125613, "grad_norm": 0.11391749233007431, "learning_rate": 4.293244812590835e-05, "loss": 0.0038, "num_input_tokens_seen": 232817280, "step": 107940 }, { "epoch": 17.609298531810765, "grad_norm": 0.004720916040241718, "learning_rate": 4.2903595768095995e-05, "loss": 0.0108, "num_input_tokens_seen": 232827008, "step": 107945 }, { "epoch": 17.61011419249592, "grad_norm": 0.09931259602308273, "learning_rate": 4.28747526739221e-05, "loss": 0.0029, "num_input_tokens_seen": 232838048, "step": 107950 }, { "epoch": 17.610929853181077, "grad_norm": 0.007249794434756041, "learning_rate": 4.284591884397132e-05, "loss": 0.0035, "num_input_tokens_seen": 232848640, "step": 107955 }, { "epoch": 17.611745513866232, "grad_norm": 0.004599269013851881, "learning_rate": 4.281709427882791e-05, "loss": 0.0009, "num_input_tokens_seen": 232860640, "step": 107960 }, { "epoch": 17.612561174551388, "grad_norm": 0.0233658105134964, "learning_rate": 4.2788278979076003e-05, "loss": 0.0073, "num_input_tokens_seen": 232871040, "step": 107965 }, { "epoch": 17.61337683523654, "grad_norm": 0.02126176841557026, "learning_rate": 4.275947294529969e-05, "loss": 0.0117, "num_input_tokens_seen": 232883520, "step": 107970 }, { "epoch": 17.614192495921696, "grad_norm": 0.018893824890255928, "learning_rate": 4.2730676178082736e-05, "loss": 0.0127, "num_input_tokens_seen": 232894496, "step": 107975 }, { "epoch": 17.61500815660685, "grad_norm": 0.782471776008606, "learning_rate": 4.2701888678008674e-05, "loss": 0.041, "num_input_tokens_seen": 232906368, "step": 107980 }, { "epoch": 17.615823817292007, "grad_norm": 0.0056444075889885426, "learning_rate": 4.267311044566097e-05, "loss": 0.002, "num_input_tokens_seen": 232917120, "step": 107985 }, { "epoch": 17.616639477977163, "grad_norm": 0.15986652672290802, "learning_rate": 4.2644341481622825e-05, "loss": 0.0059, "num_input_tokens_seen": 232929024, "step": 107990 }, { "epoch": 17.617455138662315, "grad_norm": 0.007332351058721542, "learning_rate": 4.2615581786477234e-05, "loss": 0.0015, "num_input_tokens_seen": 232940608, "step": 107995 }, { "epoch": 17.61827079934747, "grad_norm": 0.03334016352891922, "learning_rate": 4.2586831360807265e-05, "loss": 0.0232, "num_input_tokens_seen": 232952448, "step": 108000 }, { "epoch": 17.619086460032626, "grad_norm": 0.04762066528201103, "learning_rate": 4.25580902051953e-05, "loss": 0.0043, "num_input_tokens_seen": 232962848, "step": 108005 }, { "epoch": 17.619902120717782, "grad_norm": 1.0481981039047241, "learning_rate": 4.252935832022409e-05, "loss": 0.1222, "num_input_tokens_seen": 232973792, "step": 108010 }, { "epoch": 17.620717781402938, "grad_norm": 0.006444391794502735, "learning_rate": 4.250063570647561e-05, "loss": 0.0029, "num_input_tokens_seen": 232984480, "step": 108015 }, { "epoch": 17.62153344208809, "grad_norm": 1.3807339668273926, "learning_rate": 4.247192236453229e-05, "loss": 0.103, "num_input_tokens_seen": 232995744, "step": 108020 }, { "epoch": 17.622349102773246, "grad_norm": 0.02395247109234333, "learning_rate": 4.244321829497566e-05, "loss": 0.0037, "num_input_tokens_seen": 233006976, "step": 108025 }, { "epoch": 17.6231647634584, "grad_norm": 0.0057980334386229515, "learning_rate": 4.2414523498387926e-05, "loss": 0.0733, "num_input_tokens_seen": 233017536, "step": 108030 }, { "epoch": 17.623980424143557, "grad_norm": 0.0037633534520864487, "learning_rate": 4.2385837975350115e-05, "loss": 0.0026, "num_input_tokens_seen": 233028704, "step": 108035 }, { "epoch": 17.624796084828713, "grad_norm": 0.3537445664405823, "learning_rate": 4.235716172644394e-05, "loss": 0.1277, "num_input_tokens_seen": 233040736, "step": 108040 }, { "epoch": 17.625611745513865, "grad_norm": 0.0019887310918420553, "learning_rate": 4.232849475225048e-05, "loss": 0.0101, "num_input_tokens_seen": 233050688, "step": 108045 }, { "epoch": 17.62642740619902, "grad_norm": 0.14474238455295563, "learning_rate": 4.2299837053350606e-05, "loss": 0.0099, "num_input_tokens_seen": 233062176, "step": 108050 }, { "epoch": 17.627243066884176, "grad_norm": 0.037946078926324844, "learning_rate": 4.2271188630325195e-05, "loss": 0.0023, "num_input_tokens_seen": 233073888, "step": 108055 }, { "epoch": 17.628058727569332, "grad_norm": 0.021258994936943054, "learning_rate": 4.2242549483754836e-05, "loss": 0.0032, "num_input_tokens_seen": 233085088, "step": 108060 }, { "epoch": 17.628874388254488, "grad_norm": 0.006693769246339798, "learning_rate": 4.221391961421989e-05, "loss": 0.0006, "num_input_tokens_seen": 233096480, "step": 108065 }, { "epoch": 17.62969004893964, "grad_norm": 0.0034352289512753487, "learning_rate": 4.218529902230062e-05, "loss": 0.0375, "num_input_tokens_seen": 233107904, "step": 108070 }, { "epoch": 17.630505709624796, "grad_norm": 0.017755651846528053, "learning_rate": 4.2156687708577e-05, "loss": 0.0142, "num_input_tokens_seen": 233119200, "step": 108075 }, { "epoch": 17.63132137030995, "grad_norm": 0.002996844472363591, "learning_rate": 4.212808567362897e-05, "loss": 0.0053, "num_input_tokens_seen": 233129536, "step": 108080 }, { "epoch": 17.632137030995107, "grad_norm": 0.026234645396471024, "learning_rate": 4.209949291803611e-05, "loss": 0.0024, "num_input_tokens_seen": 233139232, "step": 108085 }, { "epoch": 17.63295269168026, "grad_norm": 0.0008986800676211715, "learning_rate": 4.207090944237796e-05, "loss": 0.0063, "num_input_tokens_seen": 233149504, "step": 108090 }, { "epoch": 17.633768352365415, "grad_norm": 0.17140436172485352, "learning_rate": 4.204233524723372e-05, "loss": 0.0039, "num_input_tokens_seen": 233160160, "step": 108095 }, { "epoch": 17.63458401305057, "grad_norm": 0.0014749376568943262, "learning_rate": 4.201377033318249e-05, "loss": 0.008, "num_input_tokens_seen": 233171360, "step": 108100 }, { "epoch": 17.635399673735726, "grad_norm": 0.009469899348914623, "learning_rate": 4.198521470080324e-05, "loss": 0.0021, "num_input_tokens_seen": 233181312, "step": 108105 }, { "epoch": 17.636215334420882, "grad_norm": 0.040909960865974426, "learning_rate": 4.195666835067463e-05, "loss": 0.0107, "num_input_tokens_seen": 233192160, "step": 108110 }, { "epoch": 17.637030995106034, "grad_norm": 0.03652292117476463, "learning_rate": 4.1928131283375246e-05, "loss": 0.0024, "num_input_tokens_seen": 233204000, "step": 108115 }, { "epoch": 17.63784665579119, "grad_norm": 0.2114836573600769, "learning_rate": 4.189960349948335e-05, "loss": 0.003, "num_input_tokens_seen": 233214176, "step": 108120 }, { "epoch": 17.638662316476346, "grad_norm": 0.23834684491157532, "learning_rate": 4.1871084999577146e-05, "loss": 0.0116, "num_input_tokens_seen": 233225216, "step": 108125 }, { "epoch": 17.6394779771615, "grad_norm": 0.001918346737511456, "learning_rate": 4.184257578423456e-05, "loss": 0.0058, "num_input_tokens_seen": 233237184, "step": 108130 }, { "epoch": 17.640293637846657, "grad_norm": 0.01167698111385107, "learning_rate": 4.1814075854033405e-05, "loss": 0.0833, "num_input_tokens_seen": 233248960, "step": 108135 }, { "epoch": 17.64110929853181, "grad_norm": 0.04548150673508644, "learning_rate": 4.178558520955117e-05, "loss": 0.0056, "num_input_tokens_seen": 233259552, "step": 108140 }, { "epoch": 17.641924959216965, "grad_norm": 0.020379438996315002, "learning_rate": 4.175710385136539e-05, "loss": 0.0019, "num_input_tokens_seen": 233271584, "step": 108145 }, { "epoch": 17.64274061990212, "grad_norm": 1.2918727397918701, "learning_rate": 4.172863178005326e-05, "loss": 0.0507, "num_input_tokens_seen": 233281632, "step": 108150 }, { "epoch": 17.643556280587276, "grad_norm": 0.0013788256328552961, "learning_rate": 4.1700168996191726e-05, "loss": 0.0354, "num_input_tokens_seen": 233293216, "step": 108155 }, { "epoch": 17.644371941272432, "grad_norm": 0.02446756139397621, "learning_rate": 4.16717155003577e-05, "loss": 0.1021, "num_input_tokens_seen": 233304128, "step": 108160 }, { "epoch": 17.645187601957584, "grad_norm": 0.0017420410877093673, "learning_rate": 4.164327129312778e-05, "loss": 0.0138, "num_input_tokens_seen": 233314912, "step": 108165 }, { "epoch": 17.64600326264274, "grad_norm": 0.006676455494016409, "learning_rate": 4.161483637507846e-05, "loss": 0.003, "num_input_tokens_seen": 233325856, "step": 108170 }, { "epoch": 17.646818923327896, "grad_norm": 0.015717096626758575, "learning_rate": 4.1586410746785927e-05, "loss": 0.0037, "num_input_tokens_seen": 233337344, "step": 108175 }, { "epoch": 17.64763458401305, "grad_norm": 0.002789268270134926, "learning_rate": 4.155799440882635e-05, "loss": 0.0009, "num_input_tokens_seen": 233349312, "step": 108180 }, { "epoch": 17.648450244698207, "grad_norm": 0.0010609611636027694, "learning_rate": 4.152958736177559e-05, "loss": 0.0185, "num_input_tokens_seen": 233359104, "step": 108185 }, { "epoch": 17.64926590538336, "grad_norm": 0.0467817448079586, "learning_rate": 4.1501189606209356e-05, "loss": 0.0357, "num_input_tokens_seen": 233369632, "step": 108190 }, { "epoch": 17.650081566068515, "grad_norm": 0.0026449784636497498, "learning_rate": 4.147280114270319e-05, "loss": 0.0146, "num_input_tokens_seen": 233380960, "step": 108195 }, { "epoch": 17.65089722675367, "grad_norm": 0.0010525686666369438, "learning_rate": 4.1444421971832346e-05, "loss": 0.002, "num_input_tokens_seen": 233391392, "step": 108200 }, { "epoch": 17.651712887438826, "grad_norm": 1.3117533922195435, "learning_rate": 4.1416052094171985e-05, "loss": 0.0583, "num_input_tokens_seen": 233400608, "step": 108205 }, { "epoch": 17.652528548123982, "grad_norm": 0.0152825266122818, "learning_rate": 4.1387691510297146e-05, "loss": 0.0033, "num_input_tokens_seen": 233411072, "step": 108210 }, { "epoch": 17.653344208809134, "grad_norm": 0.004172016400843859, "learning_rate": 4.1359340220782524e-05, "loss": 0.0211, "num_input_tokens_seen": 233422080, "step": 108215 }, { "epoch": 17.65415986949429, "grad_norm": 0.003522388869896531, "learning_rate": 4.133099822620268e-05, "loss": 0.009, "num_input_tokens_seen": 233431808, "step": 108220 }, { "epoch": 17.654975530179446, "grad_norm": 0.005500887054949999, "learning_rate": 4.130266552713202e-05, "loss": 0.0024, "num_input_tokens_seen": 233443040, "step": 108225 }, { "epoch": 17.6557911908646, "grad_norm": 0.00622909190133214, "learning_rate": 4.1274342124144713e-05, "loss": 0.004, "num_input_tokens_seen": 233453184, "step": 108230 }, { "epoch": 17.656606851549757, "grad_norm": 0.0038555441424250603, "learning_rate": 4.124602801781485e-05, "loss": 0.0038, "num_input_tokens_seen": 233464288, "step": 108235 }, { "epoch": 17.65742251223491, "grad_norm": 0.2888663411140442, "learning_rate": 4.1217723208716196e-05, "loss": 0.013, "num_input_tokens_seen": 233475616, "step": 108240 }, { "epoch": 17.658238172920065, "grad_norm": 0.8293136954307556, "learning_rate": 4.118942769742234e-05, "loss": 0.0785, "num_input_tokens_seen": 233486048, "step": 108245 }, { "epoch": 17.65905383360522, "grad_norm": 0.0008844927069731057, "learning_rate": 4.116114148450673e-05, "loss": 0.0026, "num_input_tokens_seen": 233496128, "step": 108250 }, { "epoch": 17.659869494290376, "grad_norm": 0.024397972971200943, "learning_rate": 4.113286457054283e-05, "loss": 0.0044, "num_input_tokens_seen": 233507200, "step": 108255 }, { "epoch": 17.660685154975532, "grad_norm": 0.01897307112812996, "learning_rate": 4.1104596956103356e-05, "loss": 0.0388, "num_input_tokens_seen": 233517536, "step": 108260 }, { "epoch": 17.661500815660684, "grad_norm": 0.002732275053858757, "learning_rate": 4.107633864176158e-05, "loss": 0.0012, "num_input_tokens_seen": 233527808, "step": 108265 }, { "epoch": 17.66231647634584, "grad_norm": 0.0006151073612272739, "learning_rate": 4.104808962808976e-05, "loss": 0.0011, "num_input_tokens_seen": 233538272, "step": 108270 }, { "epoch": 17.663132137030995, "grad_norm": 0.58116614818573, "learning_rate": 4.101984991566082e-05, "loss": 0.0144, "num_input_tokens_seen": 233550528, "step": 108275 }, { "epoch": 17.66394779771615, "grad_norm": 0.20577213168144226, "learning_rate": 4.0991619505046764e-05, "loss": 0.0063, "num_input_tokens_seen": 233560256, "step": 108280 }, { "epoch": 17.664763458401303, "grad_norm": 0.0013743528397753835, "learning_rate": 4.096339839681984e-05, "loss": 0.1343, "num_input_tokens_seen": 233571968, "step": 108285 }, { "epoch": 17.66557911908646, "grad_norm": 0.007647587452083826, "learning_rate": 4.0935186591552044e-05, "loss": 0.0034, "num_input_tokens_seen": 233582496, "step": 108290 }, { "epoch": 17.666394779771615, "grad_norm": 0.013646963983774185, "learning_rate": 4.0906984089815026e-05, "loss": 0.003, "num_input_tokens_seen": 233595040, "step": 108295 }, { "epoch": 17.66721044045677, "grad_norm": 0.027100933715701103, "learning_rate": 4.087879089218033e-05, "loss": 0.0012, "num_input_tokens_seen": 233606752, "step": 108300 }, { "epoch": 17.668026101141926, "grad_norm": 0.009104099124670029, "learning_rate": 4.085060699921944e-05, "loss": 0.0025, "num_input_tokens_seen": 233615776, "step": 108305 }, { "epoch": 17.66884176182708, "grad_norm": 0.01836259476840496, "learning_rate": 4.0822432411503464e-05, "loss": 0.1086, "num_input_tokens_seen": 233626944, "step": 108310 }, { "epoch": 17.669657422512234, "grad_norm": 0.008539282716810703, "learning_rate": 4.079426712960338e-05, "loss": 0.1216, "num_input_tokens_seen": 233635616, "step": 108315 }, { "epoch": 17.67047308319739, "grad_norm": 0.0016384175978600979, "learning_rate": 4.076611115409001e-05, "loss": 0.0097, "num_input_tokens_seen": 233646688, "step": 108320 }, { "epoch": 17.671288743882545, "grad_norm": 1.3884669542312622, "learning_rate": 4.073796448553402e-05, "loss": 0.0527, "num_input_tokens_seen": 233656608, "step": 108325 }, { "epoch": 17.6721044045677, "grad_norm": 0.11342182755470276, "learning_rate": 4.070982712450571e-05, "loss": 0.0056, "num_input_tokens_seen": 233667776, "step": 108330 }, { "epoch": 17.672920065252853, "grad_norm": 0.23211225867271423, "learning_rate": 4.068169907157548e-05, "loss": 0.0078, "num_input_tokens_seen": 233679168, "step": 108335 }, { "epoch": 17.67373572593801, "grad_norm": 0.023611871525645256, "learning_rate": 4.065358032731331e-05, "loss": 0.0019, "num_input_tokens_seen": 233690592, "step": 108340 }, { "epoch": 17.674551386623165, "grad_norm": 0.33445098996162415, "learning_rate": 4.062547089228902e-05, "loss": 0.0066, "num_input_tokens_seen": 233702592, "step": 108345 }, { "epoch": 17.67536704730832, "grad_norm": 0.004384627100080252, "learning_rate": 4.0597370767072315e-05, "loss": 0.0019, "num_input_tokens_seen": 233713632, "step": 108350 }, { "epoch": 17.676182707993476, "grad_norm": 0.01881973072886467, "learning_rate": 4.056927995223264e-05, "loss": 0.0013, "num_input_tokens_seen": 233725216, "step": 108355 }, { "epoch": 17.67699836867863, "grad_norm": 0.03080703131854534, "learning_rate": 4.054119844833948e-05, "loss": 0.0104, "num_input_tokens_seen": 233734272, "step": 108360 }, { "epoch": 17.677814029363784, "grad_norm": 0.002784198150038719, "learning_rate": 4.0513126255961594e-05, "loss": 0.0019, "num_input_tokens_seen": 233744768, "step": 108365 }, { "epoch": 17.67862969004894, "grad_norm": 0.02892421931028366, "learning_rate": 4.0485063375668316e-05, "loss": 0.0018, "num_input_tokens_seen": 233756384, "step": 108370 }, { "epoch": 17.679445350734095, "grad_norm": 0.02853807806968689, "learning_rate": 4.045700980802802e-05, "loss": 0.0015, "num_input_tokens_seen": 233767392, "step": 108375 }, { "epoch": 17.68026101141925, "grad_norm": 0.006824324373155832, "learning_rate": 4.042896555360953e-05, "loss": 0.0028, "num_input_tokens_seen": 233778432, "step": 108380 }, { "epoch": 17.681076672104403, "grad_norm": 0.0076616196893155575, "learning_rate": 4.040093061298089e-05, "loss": 0.0022, "num_input_tokens_seen": 233788768, "step": 108385 }, { "epoch": 17.68189233278956, "grad_norm": 0.004570065066218376, "learning_rate": 4.037290498671059e-05, "loss": 0.0594, "num_input_tokens_seen": 233799392, "step": 108390 }, { "epoch": 17.682707993474715, "grad_norm": 0.011569446884095669, "learning_rate": 4.0344888675366285e-05, "loss": 0.0043, "num_input_tokens_seen": 233810272, "step": 108395 }, { "epoch": 17.68352365415987, "grad_norm": 0.017932655289769173, "learning_rate": 4.031688167951614e-05, "loss": 0.0022, "num_input_tokens_seen": 233820384, "step": 108400 }, { "epoch": 17.684339314845026, "grad_norm": 0.023999329656362534, "learning_rate": 4.02888839997273e-05, "loss": 0.0013, "num_input_tokens_seen": 233831168, "step": 108405 }, { "epoch": 17.68515497553018, "grad_norm": 0.013973155058920383, "learning_rate": 4.0260895636567654e-05, "loss": 0.0019, "num_input_tokens_seen": 233842240, "step": 108410 }, { "epoch": 17.685970636215334, "grad_norm": 1.1886049509048462, "learning_rate": 4.0232916590603964e-05, "loss": 0.1157, "num_input_tokens_seen": 233852128, "step": 108415 }, { "epoch": 17.68678629690049, "grad_norm": 0.010488208383321762, "learning_rate": 4.020494686240361e-05, "loss": 0.0841, "num_input_tokens_seen": 233862144, "step": 108420 }, { "epoch": 17.687601957585645, "grad_norm": 0.031189044937491417, "learning_rate": 4.017698645253321e-05, "loss": 0.0018, "num_input_tokens_seen": 233872864, "step": 108425 }, { "epoch": 17.6884176182708, "grad_norm": 0.0026821321807801723, "learning_rate": 4.0149035361559504e-05, "loss": 0.0075, "num_input_tokens_seen": 233882688, "step": 108430 }, { "epoch": 17.689233278955953, "grad_norm": 0.3228112757205963, "learning_rate": 4.0121093590049004e-05, "loss": 0.0082, "num_input_tokens_seen": 233894912, "step": 108435 }, { "epoch": 17.69004893964111, "grad_norm": 0.0861503928899765, "learning_rate": 4.009316113856798e-05, "loss": 0.0035, "num_input_tokens_seen": 233905280, "step": 108440 }, { "epoch": 17.690864600326265, "grad_norm": 0.02075924165546894, "learning_rate": 4.0065238007682414e-05, "loss": 0.0078, "num_input_tokens_seen": 233915904, "step": 108445 }, { "epoch": 17.69168026101142, "grad_norm": 0.0005341132637113333, "learning_rate": 4.0037324197958304e-05, "loss": 0.002, "num_input_tokens_seen": 233926720, "step": 108450 }, { "epoch": 17.692495921696576, "grad_norm": 0.0012997210724279284, "learning_rate": 4.00094197099613e-05, "loss": 0.0076, "num_input_tokens_seen": 233936640, "step": 108455 }, { "epoch": 17.693311582381728, "grad_norm": 0.16954372823238373, "learning_rate": 3.9981524544256964e-05, "loss": 0.0065, "num_input_tokens_seen": 233948128, "step": 108460 }, { "epoch": 17.694127243066884, "grad_norm": 0.005473308265209198, "learning_rate": 3.995363870141061e-05, "loss": 0.0026, "num_input_tokens_seen": 233958912, "step": 108465 }, { "epoch": 17.69494290375204, "grad_norm": 0.404483824968338, "learning_rate": 3.9925762181987345e-05, "loss": 0.0127, "num_input_tokens_seen": 233970336, "step": 108470 }, { "epoch": 17.695758564437195, "grad_norm": 0.07925943285226822, "learning_rate": 3.9897894986552216e-05, "loss": 0.0035, "num_input_tokens_seen": 233981216, "step": 108475 }, { "epoch": 17.696574225122347, "grad_norm": 0.34699341654777527, "learning_rate": 3.987003711566978e-05, "loss": 0.0316, "num_input_tokens_seen": 233991392, "step": 108480 }, { "epoch": 17.697389885807503, "grad_norm": 0.08811841905117035, "learning_rate": 3.984218856990496e-05, "loss": 0.0038, "num_input_tokens_seen": 234003424, "step": 108485 }, { "epoch": 17.69820554649266, "grad_norm": 0.00797662790864706, "learning_rate": 3.981434934982176e-05, "loss": 0.0019, "num_input_tokens_seen": 234013440, "step": 108490 }, { "epoch": 17.699021207177815, "grad_norm": 0.010728854686021805, "learning_rate": 3.978651945598472e-05, "loss": 0.0017, "num_input_tokens_seen": 234022560, "step": 108495 }, { "epoch": 17.69983686786297, "grad_norm": 0.0025496194139122963, "learning_rate": 3.975869888895756e-05, "loss": 0.0017, "num_input_tokens_seen": 234032896, "step": 108500 }, { "epoch": 17.700652528548122, "grad_norm": 0.6793642640113831, "learning_rate": 3.973088764930433e-05, "loss": 0.0399, "num_input_tokens_seen": 234043232, "step": 108505 }, { "epoch": 17.701468189233278, "grad_norm": 0.20491771399974823, "learning_rate": 3.9703085737588405e-05, "loss": 0.157, "num_input_tokens_seen": 234054112, "step": 108510 }, { "epoch": 17.702283849918434, "grad_norm": 0.023487458005547523, "learning_rate": 3.967529315437357e-05, "loss": 0.0076, "num_input_tokens_seen": 234066208, "step": 108515 }, { "epoch": 17.70309951060359, "grad_norm": 0.022221649065613747, "learning_rate": 3.96475099002227e-05, "loss": 0.0525, "num_input_tokens_seen": 234077280, "step": 108520 }, { "epoch": 17.703915171288745, "grad_norm": 0.003615758614614606, "learning_rate": 3.9619735975699236e-05, "loss": 0.0031, "num_input_tokens_seen": 234087584, "step": 108525 }, { "epoch": 17.704730831973897, "grad_norm": 0.0038794665597379208, "learning_rate": 3.9591971381365665e-05, "loss": 0.0013, "num_input_tokens_seen": 234098688, "step": 108530 }, { "epoch": 17.705546492659053, "grad_norm": 0.017730658873915672, "learning_rate": 3.956421611778499e-05, "loss": 0.0022, "num_input_tokens_seen": 234109120, "step": 108535 }, { "epoch": 17.70636215334421, "grad_norm": 0.09462428092956543, "learning_rate": 3.953647018551948e-05, "loss": 0.0051, "num_input_tokens_seen": 234120224, "step": 108540 }, { "epoch": 17.707177814029365, "grad_norm": 0.4580334722995758, "learning_rate": 3.950873358513168e-05, "loss": 0.0177, "num_input_tokens_seen": 234131552, "step": 108545 }, { "epoch": 17.70799347471452, "grad_norm": 0.0474005788564682, "learning_rate": 3.948100631718338e-05, "loss": 0.0048, "num_input_tokens_seen": 234143520, "step": 108550 }, { "epoch": 17.708809135399672, "grad_norm": 0.0067507429048419, "learning_rate": 3.945328838223688e-05, "loss": 0.006, "num_input_tokens_seen": 234154560, "step": 108555 }, { "epoch": 17.709624796084828, "grad_norm": 0.003643287578597665, "learning_rate": 3.942557978085354e-05, "loss": 0.003, "num_input_tokens_seen": 234164640, "step": 108560 }, { "epoch": 17.710440456769984, "grad_norm": 0.1314956396818161, "learning_rate": 3.939788051359522e-05, "loss": 0.0067, "num_input_tokens_seen": 234174752, "step": 108565 }, { "epoch": 17.71125611745514, "grad_norm": 0.006114289630204439, "learning_rate": 3.93701905810232e-05, "loss": 0.0015, "num_input_tokens_seen": 234185536, "step": 108570 }, { "epoch": 17.712071778140295, "grad_norm": 0.0005731603014282882, "learning_rate": 3.934250998369859e-05, "loss": 0.0047, "num_input_tokens_seen": 234195968, "step": 108575 }, { "epoch": 17.712887438825447, "grad_norm": 0.0033004481811076403, "learning_rate": 3.931483872218239e-05, "loss": 0.0042, "num_input_tokens_seen": 234207392, "step": 108580 }, { "epoch": 17.713703099510603, "grad_norm": 0.004841564688831568, "learning_rate": 3.928717679703542e-05, "loss": 0.0019, "num_input_tokens_seen": 234218432, "step": 108585 }, { "epoch": 17.71451876019576, "grad_norm": 0.2867191731929779, "learning_rate": 3.925952420881823e-05, "loss": 0.0076, "num_input_tokens_seen": 234230016, "step": 108590 }, { "epoch": 17.715334420880914, "grad_norm": 0.00544817466288805, "learning_rate": 3.9231880958091325e-05, "loss": 0.0015, "num_input_tokens_seen": 234240992, "step": 108595 }, { "epoch": 17.71615008156607, "grad_norm": 0.08561509847640991, "learning_rate": 3.920424704541481e-05, "loss": 0.0038, "num_input_tokens_seen": 234251904, "step": 108600 }, { "epoch": 17.716965742251222, "grad_norm": 1.6015598773956299, "learning_rate": 3.9176622471348845e-05, "loss": 0.0801, "num_input_tokens_seen": 234263424, "step": 108605 }, { "epoch": 17.717781402936378, "grad_norm": 0.002208326244726777, "learning_rate": 3.9149007236453204e-05, "loss": 0.0161, "num_input_tokens_seen": 234274272, "step": 108610 }, { "epoch": 17.718597063621534, "grad_norm": 0.1251727193593979, "learning_rate": 3.912140134128761e-05, "loss": 0.0046, "num_input_tokens_seen": 234284352, "step": 108615 }, { "epoch": 17.71941272430669, "grad_norm": 0.027344530448317528, "learning_rate": 3.909380478641139e-05, "loss": 0.0493, "num_input_tokens_seen": 234295072, "step": 108620 }, { "epoch": 17.72022838499184, "grad_norm": 0.039213795214891434, "learning_rate": 3.906621757238393e-05, "loss": 0.0102, "num_input_tokens_seen": 234306592, "step": 108625 }, { "epoch": 17.721044045676997, "grad_norm": 0.012384423986077309, "learning_rate": 3.90386396997644e-05, "loss": 0.0044, "num_input_tokens_seen": 234315808, "step": 108630 }, { "epoch": 17.721859706362153, "grad_norm": 0.002503765281289816, "learning_rate": 3.901107116911145e-05, "loss": 0.0014, "num_input_tokens_seen": 234327104, "step": 108635 }, { "epoch": 17.72267536704731, "grad_norm": 0.11969221383333206, "learning_rate": 3.8983511980984154e-05, "loss": 0.005, "num_input_tokens_seen": 234337696, "step": 108640 }, { "epoch": 17.723491027732464, "grad_norm": 0.005011583678424358, "learning_rate": 3.895596213594066e-05, "loss": 0.0021, "num_input_tokens_seen": 234347840, "step": 108645 }, { "epoch": 17.724306688417617, "grad_norm": 0.0013527945848181844, "learning_rate": 3.892842163453964e-05, "loss": 0.1167, "num_input_tokens_seen": 234357920, "step": 108650 }, { "epoch": 17.725122349102772, "grad_norm": 0.016214122995734215, "learning_rate": 3.8900890477338856e-05, "loss": 0.0039, "num_input_tokens_seen": 234369568, "step": 108655 }, { "epoch": 17.725938009787928, "grad_norm": 1.5241947174072266, "learning_rate": 3.887336866489666e-05, "loss": 0.0453, "num_input_tokens_seen": 234381120, "step": 108660 }, { "epoch": 17.726753670473084, "grad_norm": 0.005558548495173454, "learning_rate": 3.884585619777048e-05, "loss": 0.0035, "num_input_tokens_seen": 234392512, "step": 108665 }, { "epoch": 17.72756933115824, "grad_norm": 0.002146009588614106, "learning_rate": 3.881835307651816e-05, "loss": 0.0048, "num_input_tokens_seen": 234401408, "step": 108670 }, { "epoch": 17.72838499184339, "grad_norm": 0.03569883480668068, "learning_rate": 3.879085930169685e-05, "loss": 0.0026, "num_input_tokens_seen": 234411904, "step": 108675 }, { "epoch": 17.729200652528547, "grad_norm": 0.09351170808076859, "learning_rate": 3.8763374873863886e-05, "loss": 0.0041, "num_input_tokens_seen": 234423552, "step": 108680 }, { "epoch": 17.730016313213703, "grad_norm": 0.024770360440015793, "learning_rate": 3.873589979357633e-05, "loss": 0.0057, "num_input_tokens_seen": 234433760, "step": 108685 }, { "epoch": 17.73083197389886, "grad_norm": 0.0017064602579921484, "learning_rate": 3.870843406139085e-05, "loss": 0.043, "num_input_tokens_seen": 234444096, "step": 108690 }, { "epoch": 17.731647634584014, "grad_norm": 0.00616328464820981, "learning_rate": 3.868097767786416e-05, "loss": 0.0246, "num_input_tokens_seen": 234455360, "step": 108695 }, { "epoch": 17.732463295269167, "grad_norm": 0.0037460988387465477, "learning_rate": 3.86535306435527e-05, "loss": 0.004, "num_input_tokens_seen": 234466016, "step": 108700 }, { "epoch": 17.733278955954322, "grad_norm": 0.008089281618595123, "learning_rate": 3.8626092959012706e-05, "loss": 0.0017, "num_input_tokens_seen": 234477536, "step": 108705 }, { "epoch": 17.734094616639478, "grad_norm": 0.09864860773086548, "learning_rate": 3.8598664624800215e-05, "loss": 0.0167, "num_input_tokens_seen": 234489024, "step": 108710 }, { "epoch": 17.734910277324634, "grad_norm": 0.08230987191200256, "learning_rate": 3.857124564147113e-05, "loss": 0.0038, "num_input_tokens_seen": 234499488, "step": 108715 }, { "epoch": 17.73572593800979, "grad_norm": 0.013991584070026875, "learning_rate": 3.8543836009581115e-05, "loss": 0.0039, "num_input_tokens_seen": 234510784, "step": 108720 }, { "epoch": 17.73654159869494, "grad_norm": 0.11554239690303802, "learning_rate": 3.851643572968566e-05, "loss": 0.0047, "num_input_tokens_seen": 234521696, "step": 108725 }, { "epoch": 17.737357259380097, "grad_norm": 0.016300391405820847, "learning_rate": 3.848904480234006e-05, "loss": 0.0099, "num_input_tokens_seen": 234532896, "step": 108730 }, { "epoch": 17.738172920065253, "grad_norm": 1.1818451881408691, "learning_rate": 3.846166322809941e-05, "loss": 0.0865, "num_input_tokens_seen": 234543040, "step": 108735 }, { "epoch": 17.73898858075041, "grad_norm": 0.030155852437019348, "learning_rate": 3.8434291007518665e-05, "loss": 0.004, "num_input_tokens_seen": 234554496, "step": 108740 }, { "epoch": 17.739804241435564, "grad_norm": 0.0019499200861901045, "learning_rate": 3.8406928141152596e-05, "loss": 0.0011, "num_input_tokens_seen": 234565024, "step": 108745 }, { "epoch": 17.740619902120716, "grad_norm": 0.06138243526220322, "learning_rate": 3.8379574629555656e-05, "loss": 0.0345, "num_input_tokens_seen": 234576800, "step": 108750 }, { "epoch": 17.741435562805872, "grad_norm": 0.005087955389171839, "learning_rate": 3.835223047328229e-05, "loss": 0.0019, "num_input_tokens_seen": 234587584, "step": 108755 }, { "epoch": 17.742251223491028, "grad_norm": 0.005172235891222954, "learning_rate": 3.8324895672886554e-05, "loss": 0.0058, "num_input_tokens_seen": 234599520, "step": 108760 }, { "epoch": 17.743066884176184, "grad_norm": 0.7284666299819946, "learning_rate": 3.829757022892255e-05, "loss": 0.0214, "num_input_tokens_seen": 234610752, "step": 108765 }, { "epoch": 17.74388254486134, "grad_norm": 0.2929638922214508, "learning_rate": 3.827025414194385e-05, "loss": 0.0312, "num_input_tokens_seen": 234621056, "step": 108770 }, { "epoch": 17.74469820554649, "grad_norm": 0.0215049646794796, "learning_rate": 3.824294741250439e-05, "loss": 0.0034, "num_input_tokens_seen": 234632448, "step": 108775 }, { "epoch": 17.745513866231647, "grad_norm": 0.7225651741027832, "learning_rate": 3.821565004115723e-05, "loss": 0.0202, "num_input_tokens_seen": 234644192, "step": 108780 }, { "epoch": 17.746329526916803, "grad_norm": 0.07116445153951645, "learning_rate": 3.8188362028455826e-05, "loss": 0.0288, "num_input_tokens_seen": 234655424, "step": 108785 }, { "epoch": 17.74714518760196, "grad_norm": 0.14058519899845123, "learning_rate": 3.8161083374953056e-05, "loss": 0.0136, "num_input_tokens_seen": 234665664, "step": 108790 }, { "epoch": 17.747960848287114, "grad_norm": 0.001963726244866848, "learning_rate": 3.8133814081201866e-05, "loss": 0.0022, "num_input_tokens_seen": 234677152, "step": 108795 }, { "epoch": 17.748776508972266, "grad_norm": 0.049774639308452606, "learning_rate": 3.810655414775482e-05, "loss": 0.1212, "num_input_tokens_seen": 234688672, "step": 108800 }, { "epoch": 17.749592169657422, "grad_norm": 0.007642972283065319, "learning_rate": 3.807930357516448e-05, "loss": 0.0021, "num_input_tokens_seen": 234700192, "step": 108805 }, { "epoch": 17.750407830342578, "grad_norm": 0.04321343079209328, "learning_rate": 3.8052062363982957e-05, "loss": 0.0029, "num_input_tokens_seen": 234710304, "step": 108810 }, { "epoch": 17.751223491027734, "grad_norm": 0.008718160912394524, "learning_rate": 3.8024830514762465e-05, "loss": 0.0035, "num_input_tokens_seen": 234721024, "step": 108815 }, { "epoch": 17.752039151712886, "grad_norm": 0.020636728033423424, "learning_rate": 3.79976080280548e-05, "loss": 0.0122, "num_input_tokens_seen": 234731936, "step": 108820 }, { "epoch": 17.75285481239804, "grad_norm": 0.06016651913523674, "learning_rate": 3.7970394904411733e-05, "loss": 0.0072, "num_input_tokens_seen": 234742400, "step": 108825 }, { "epoch": 17.753670473083197, "grad_norm": 0.04393483325839043, "learning_rate": 3.7943191144384716e-05, "loss": 0.0068, "num_input_tokens_seen": 234753312, "step": 108830 }, { "epoch": 17.754486133768353, "grad_norm": 0.04480397701263428, "learning_rate": 3.7915996748525086e-05, "loss": 0.0019, "num_input_tokens_seen": 234763296, "step": 108835 }, { "epoch": 17.75530179445351, "grad_norm": 0.0012751177418977022, "learning_rate": 3.788881171738401e-05, "loss": 0.0046, "num_input_tokens_seen": 234774144, "step": 108840 }, { "epoch": 17.75611745513866, "grad_norm": 0.3250577747821808, "learning_rate": 3.7861636051512385e-05, "loss": 0.0064, "num_input_tokens_seen": 234785504, "step": 108845 }, { "epoch": 17.756933115823816, "grad_norm": 0.0019082811195403337, "learning_rate": 3.783446975146099e-05, "loss": 0.0028, "num_input_tokens_seen": 234796736, "step": 108850 }, { "epoch": 17.757748776508972, "grad_norm": 1.1901103258132935, "learning_rate": 3.7807312817780325e-05, "loss": 0.0862, "num_input_tokens_seen": 234807616, "step": 108855 }, { "epoch": 17.758564437194128, "grad_norm": 0.00977563951164484, "learning_rate": 3.7780165251020794e-05, "loss": 0.0019, "num_input_tokens_seen": 234817664, "step": 108860 }, { "epoch": 17.759380097879284, "grad_norm": 0.01310741901397705, "learning_rate": 3.7753027051732615e-05, "loss": 0.0035, "num_input_tokens_seen": 234827904, "step": 108865 }, { "epoch": 17.760195758564436, "grad_norm": 0.0022614533081650734, "learning_rate": 3.772589822046568e-05, "loss": 0.0025, "num_input_tokens_seen": 234838496, "step": 108870 }, { "epoch": 17.76101141924959, "grad_norm": 0.7558068633079529, "learning_rate": 3.7698778757769944e-05, "loss": 0.1636, "num_input_tokens_seen": 234848416, "step": 108875 }, { "epoch": 17.761827079934747, "grad_norm": 0.012398308143019676, "learning_rate": 3.767166866419486e-05, "loss": 0.0032, "num_input_tokens_seen": 234860032, "step": 108880 }, { "epoch": 17.762642740619903, "grad_norm": 0.0747433677315712, "learning_rate": 3.764456794028992e-05, "loss": 0.0174, "num_input_tokens_seen": 234871520, "step": 108885 }, { "epoch": 17.76345840130506, "grad_norm": 0.004475100431591272, "learning_rate": 3.7617476586604304e-05, "loss": 0.0018, "num_input_tokens_seen": 234882528, "step": 108890 }, { "epoch": 17.76427406199021, "grad_norm": 0.04939369857311249, "learning_rate": 3.759039460368724e-05, "loss": 0.0027, "num_input_tokens_seen": 234892000, "step": 108895 }, { "epoch": 17.765089722675366, "grad_norm": 0.003317739814519882, "learning_rate": 3.756332199208728e-05, "loss": 0.0027, "num_input_tokens_seen": 234903264, "step": 108900 }, { "epoch": 17.765905383360522, "grad_norm": 0.000704900361597538, "learning_rate": 3.753625875235345e-05, "loss": 0.0007, "num_input_tokens_seen": 234914208, "step": 108905 }, { "epoch": 17.766721044045678, "grad_norm": 1.6700718402862549, "learning_rate": 3.750920488503379e-05, "loss": 0.0552, "num_input_tokens_seen": 234925536, "step": 108910 }, { "epoch": 17.767536704730833, "grad_norm": 0.023585356771945953, "learning_rate": 3.7482160390676866e-05, "loss": 0.0053, "num_input_tokens_seen": 234934624, "step": 108915 }, { "epoch": 17.768352365415986, "grad_norm": 0.6479132771492004, "learning_rate": 3.745512526983075e-05, "loss": 0.0231, "num_input_tokens_seen": 234946464, "step": 108920 }, { "epoch": 17.76916802610114, "grad_norm": 0.013141671195626259, "learning_rate": 3.7428099523043325e-05, "loss": 0.0043, "num_input_tokens_seen": 234957408, "step": 108925 }, { "epoch": 17.769983686786297, "grad_norm": 0.009158209897577763, "learning_rate": 3.7401083150862216e-05, "loss": 0.0055, "num_input_tokens_seen": 234968256, "step": 108930 }, { "epoch": 17.770799347471453, "grad_norm": 0.017409566789865494, "learning_rate": 3.7374076153835033e-05, "loss": 0.0013, "num_input_tokens_seen": 234979680, "step": 108935 }, { "epoch": 17.77161500815661, "grad_norm": 0.24853363633155823, "learning_rate": 3.734707853250907e-05, "loss": 0.093, "num_input_tokens_seen": 234990080, "step": 108940 }, { "epoch": 17.77243066884176, "grad_norm": 0.04368171840906143, "learning_rate": 3.73200902874315e-05, "loss": 0.0017, "num_input_tokens_seen": 235001600, "step": 108945 }, { "epoch": 17.773246329526916, "grad_norm": 0.0033492459915578365, "learning_rate": 3.729311141914926e-05, "loss": 0.0874, "num_input_tokens_seen": 235011488, "step": 108950 }, { "epoch": 17.774061990212072, "grad_norm": 0.015057655982673168, "learning_rate": 3.72661419282091e-05, "loss": 0.0014, "num_input_tokens_seen": 235022688, "step": 108955 }, { "epoch": 17.774877650897228, "grad_norm": 0.0033204418141394854, "learning_rate": 3.723918181515756e-05, "loss": 0.0339, "num_input_tokens_seen": 235033056, "step": 108960 }, { "epoch": 17.775693311582383, "grad_norm": 0.014483749866485596, "learning_rate": 3.721223108054106e-05, "loss": 0.0017, "num_input_tokens_seen": 235042432, "step": 108965 }, { "epoch": 17.776508972267536, "grad_norm": 0.11528478562831879, "learning_rate": 3.7185289724905814e-05, "loss": 0.0176, "num_input_tokens_seen": 235052896, "step": 108970 }, { "epoch": 17.77732463295269, "grad_norm": 0.06556189060211182, "learning_rate": 3.7158357748797775e-05, "loss": 0.0019, "num_input_tokens_seen": 235063328, "step": 108975 }, { "epoch": 17.778140293637847, "grad_norm": 0.00664970139041543, "learning_rate": 3.7131435152762735e-05, "loss": 0.0501, "num_input_tokens_seen": 235073216, "step": 108980 }, { "epoch": 17.778955954323003, "grad_norm": 0.006126494612544775, "learning_rate": 3.710452193734643e-05, "loss": 0.0124, "num_input_tokens_seen": 235084800, "step": 108985 }, { "epoch": 17.77977161500816, "grad_norm": 0.0034734883811324835, "learning_rate": 3.707761810309418e-05, "loss": 0.0015, "num_input_tokens_seen": 235095872, "step": 108990 }, { "epoch": 17.78058727569331, "grad_norm": 0.07951512932777405, "learning_rate": 3.705072365055112e-05, "loss": 0.0062, "num_input_tokens_seen": 235106336, "step": 108995 }, { "epoch": 17.781402936378466, "grad_norm": 0.009807456284761429, "learning_rate": 3.7023838580262706e-05, "loss": 0.0235, "num_input_tokens_seen": 235117408, "step": 109000 }, { "epoch": 17.782218597063622, "grad_norm": 0.004841372836381197, "learning_rate": 3.699696289277327e-05, "loss": 0.0069, "num_input_tokens_seen": 235127904, "step": 109005 }, { "epoch": 17.783034257748778, "grad_norm": 0.011876796372234821, "learning_rate": 3.697009658862793e-05, "loss": 0.0029, "num_input_tokens_seen": 235138592, "step": 109010 }, { "epoch": 17.78384991843393, "grad_norm": 0.003520722035318613, "learning_rate": 3.694323966837088e-05, "loss": 0.0323, "num_input_tokens_seen": 235147968, "step": 109015 }, { "epoch": 17.784665579119086, "grad_norm": 0.05808113515377045, "learning_rate": 3.6916392132546605e-05, "loss": 0.01, "num_input_tokens_seen": 235158720, "step": 109020 }, { "epoch": 17.78548123980424, "grad_norm": 0.29209601879119873, "learning_rate": 3.6889553981698966e-05, "loss": 0.0226, "num_input_tokens_seen": 235169088, "step": 109025 }, { "epoch": 17.786296900489397, "grad_norm": 0.009934332221746445, "learning_rate": 3.6862725216372185e-05, "loss": 0.0013, "num_input_tokens_seen": 235179328, "step": 109030 }, { "epoch": 17.787112561174553, "grad_norm": 0.06909696012735367, "learning_rate": 3.683590583710961e-05, "loss": 0.003, "num_input_tokens_seen": 235190592, "step": 109035 }, { "epoch": 17.787928221859705, "grad_norm": 0.02774885855615139, "learning_rate": 3.6809095844455134e-05, "loss": 0.0027, "num_input_tokens_seen": 235201216, "step": 109040 }, { "epoch": 17.78874388254486, "grad_norm": 0.0023587734904140234, "learning_rate": 3.678229523895177e-05, "loss": 0.0301, "num_input_tokens_seen": 235212640, "step": 109045 }, { "epoch": 17.789559543230016, "grad_norm": 0.11882767826318741, "learning_rate": 3.675550402114303e-05, "loss": 0.0077, "num_input_tokens_seen": 235223328, "step": 109050 }, { "epoch": 17.790375203915172, "grad_norm": 0.03807162493467331, "learning_rate": 3.6728722191571476e-05, "loss": 0.0016, "num_input_tokens_seen": 235234464, "step": 109055 }, { "epoch": 17.791190864600328, "grad_norm": 0.0052057658322155476, "learning_rate": 3.670194975078017e-05, "loss": 0.0295, "num_input_tokens_seen": 235245664, "step": 109060 }, { "epoch": 17.79200652528548, "grad_norm": 0.0411880686879158, "learning_rate": 3.667518669931158e-05, "loss": 0.0036, "num_input_tokens_seen": 235256416, "step": 109065 }, { "epoch": 17.792822185970635, "grad_norm": 0.0010741720907390118, "learning_rate": 3.6648433037708094e-05, "loss": 0.0047, "num_input_tokens_seen": 235267808, "step": 109070 }, { "epoch": 17.79363784665579, "grad_norm": 0.047295864671468735, "learning_rate": 3.66216887665119e-05, "loss": 0.0043, "num_input_tokens_seen": 235278496, "step": 109075 }, { "epoch": 17.794453507340947, "grad_norm": 0.014782195910811424, "learning_rate": 3.659495388626505e-05, "loss": 0.0011, "num_input_tokens_seen": 235288128, "step": 109080 }, { "epoch": 17.795269168026103, "grad_norm": 0.016459807753562927, "learning_rate": 3.6568228397509286e-05, "loss": 0.0018, "num_input_tokens_seen": 235299904, "step": 109085 }, { "epoch": 17.796084828711255, "grad_norm": 0.0623759850859642, "learning_rate": 3.654151230078628e-05, "loss": 0.0137, "num_input_tokens_seen": 235311040, "step": 109090 }, { "epoch": 17.79690048939641, "grad_norm": 0.005386735778301954, "learning_rate": 3.6514805596637504e-05, "loss": 0.0011, "num_input_tokens_seen": 235322752, "step": 109095 }, { "epoch": 17.797716150081566, "grad_norm": 0.004079962614923716, "learning_rate": 3.648810828560417e-05, "loss": 0.0037, "num_input_tokens_seen": 235334848, "step": 109100 }, { "epoch": 17.798531810766722, "grad_norm": 0.03594367206096649, "learning_rate": 3.6461420368227304e-05, "loss": 0.0026, "num_input_tokens_seen": 235345824, "step": 109105 }, { "epoch": 17.799347471451878, "grad_norm": 0.0010984732070937753, "learning_rate": 3.643474184504775e-05, "loss": 0.0051, "num_input_tokens_seen": 235355712, "step": 109110 }, { "epoch": 17.80016313213703, "grad_norm": 0.008450896479189396, "learning_rate": 3.6408072716606344e-05, "loss": 0.0097, "num_input_tokens_seen": 235366560, "step": 109115 }, { "epoch": 17.800978792822185, "grad_norm": 0.04090797156095505, "learning_rate": 3.6381412983443277e-05, "loss": 0.0021, "num_input_tokens_seen": 235377664, "step": 109120 }, { "epoch": 17.80179445350734, "grad_norm": 0.052297260612249374, "learning_rate": 3.635476264609922e-05, "loss": 0.0029, "num_input_tokens_seen": 235388544, "step": 109125 }, { "epoch": 17.802610114192497, "grad_norm": 0.0020869241561740637, "learning_rate": 3.6328121705113905e-05, "loss": 0.0745, "num_input_tokens_seen": 235399648, "step": 109130 }, { "epoch": 17.803425774877653, "grad_norm": 0.0068589928559958935, "learning_rate": 3.6301490161027574e-05, "loss": 0.018, "num_input_tokens_seen": 235411520, "step": 109135 }, { "epoch": 17.804241435562805, "grad_norm": 0.03771299123764038, "learning_rate": 3.6274868014379624e-05, "loss": 0.0279, "num_input_tokens_seen": 235421728, "step": 109140 }, { "epoch": 17.80505709624796, "grad_norm": 0.09148657321929932, "learning_rate": 3.6248255265709906e-05, "loss": 0.0029, "num_input_tokens_seen": 235431552, "step": 109145 }, { "epoch": 17.805872756933116, "grad_norm": 0.09772548824548721, "learning_rate": 3.6221651915557484e-05, "loss": 0.0075, "num_input_tokens_seen": 235441920, "step": 109150 }, { "epoch": 17.806688417618272, "grad_norm": 0.07137858122587204, "learning_rate": 3.6195057964461764e-05, "loss": 0.0717, "num_input_tokens_seen": 235451104, "step": 109155 }, { "epoch": 17.807504078303424, "grad_norm": 0.16208145022392273, "learning_rate": 3.616847341296137e-05, "loss": 0.0106, "num_input_tokens_seen": 235461952, "step": 109160 }, { "epoch": 17.80831973898858, "grad_norm": 0.011016537435352802, "learning_rate": 3.6141898261595475e-05, "loss": 0.0064, "num_input_tokens_seen": 235472736, "step": 109165 }, { "epoch": 17.809135399673735, "grad_norm": 0.010072106495499611, "learning_rate": 3.611533251090232e-05, "loss": 0.0029, "num_input_tokens_seen": 235483808, "step": 109170 }, { "epoch": 17.80995106035889, "grad_norm": 0.0052125295624136925, "learning_rate": 3.608877616142053e-05, "loss": 0.0021, "num_input_tokens_seen": 235494720, "step": 109175 }, { "epoch": 17.810766721044047, "grad_norm": 0.02806362882256508, "learning_rate": 3.606222921368807e-05, "loss": 0.0027, "num_input_tokens_seen": 235505280, "step": 109180 }, { "epoch": 17.8115823817292, "grad_norm": 0.6100532412528992, "learning_rate": 3.603569166824327e-05, "loss": 0.0084, "num_input_tokens_seen": 235515328, "step": 109185 }, { "epoch": 17.812398042414355, "grad_norm": 0.014112958684563637, "learning_rate": 3.600916352562356e-05, "loss": 0.0011, "num_input_tokens_seen": 235527104, "step": 109190 }, { "epoch": 17.81321370309951, "grad_norm": 0.4220636487007141, "learning_rate": 3.598264478636698e-05, "loss": 0.0113, "num_input_tokens_seen": 235537376, "step": 109195 }, { "epoch": 17.814029363784666, "grad_norm": 0.0014272704720497131, "learning_rate": 3.595613545101056e-05, "loss": 0.0088, "num_input_tokens_seen": 235548896, "step": 109200 }, { "epoch": 17.81484502446982, "grad_norm": 0.0028466274961829185, "learning_rate": 3.592963552009182e-05, "loss": 0.0037, "num_input_tokens_seen": 235561312, "step": 109205 }, { "epoch": 17.815660685154974, "grad_norm": 0.00741557078436017, "learning_rate": 3.590314499414771e-05, "loss": 0.072, "num_input_tokens_seen": 235573600, "step": 109210 }, { "epoch": 17.81647634584013, "grad_norm": 0.04468819499015808, "learning_rate": 3.587666387371513e-05, "loss": 0.0192, "num_input_tokens_seen": 235584320, "step": 109215 }, { "epoch": 17.817292006525285, "grad_norm": 0.1483396738767624, "learning_rate": 3.585019215933072e-05, "loss": 0.0029, "num_input_tokens_seen": 235594464, "step": 109220 }, { "epoch": 17.81810766721044, "grad_norm": 1.1628373861312866, "learning_rate": 3.5823729851530983e-05, "loss": 0.0873, "num_input_tokens_seen": 235605184, "step": 109225 }, { "epoch": 17.818923327895597, "grad_norm": 0.03035479597747326, "learning_rate": 3.5797276950852276e-05, "loss": 0.0067, "num_input_tokens_seen": 235616480, "step": 109230 }, { "epoch": 17.81973898858075, "grad_norm": 0.01161948498338461, "learning_rate": 3.5770833457830554e-05, "loss": 0.0025, "num_input_tokens_seen": 235626816, "step": 109235 }, { "epoch": 17.820554649265905, "grad_norm": 0.002286621369421482, "learning_rate": 3.5744399373001834e-05, "loss": 0.0026, "num_input_tokens_seen": 235637536, "step": 109240 }, { "epoch": 17.82137030995106, "grad_norm": 0.019046874716877937, "learning_rate": 3.57179746969018e-05, "loss": 0.0017, "num_input_tokens_seen": 235647424, "step": 109245 }, { "epoch": 17.822185970636216, "grad_norm": 0.006165751721709967, "learning_rate": 3.569155943006602e-05, "loss": 0.0061, "num_input_tokens_seen": 235659488, "step": 109250 }, { "epoch": 17.82300163132137, "grad_norm": 0.011524243280291557, "learning_rate": 3.566515357302974e-05, "loss": 0.002, "num_input_tokens_seen": 235670880, "step": 109255 }, { "epoch": 17.823817292006524, "grad_norm": 0.9374936819076538, "learning_rate": 3.56387571263283e-05, "loss": 0.0531, "num_input_tokens_seen": 235681824, "step": 109260 }, { "epoch": 17.82463295269168, "grad_norm": 0.0014485190622508526, "learning_rate": 3.561237009049639e-05, "loss": 0.0063, "num_input_tokens_seen": 235692928, "step": 109265 }, { "epoch": 17.825448613376835, "grad_norm": 0.2933654487133026, "learning_rate": 3.558599246606903e-05, "loss": 0.0121, "num_input_tokens_seen": 235704544, "step": 109270 }, { "epoch": 17.82626427406199, "grad_norm": 0.0005558645352721214, "learning_rate": 3.555962425358056e-05, "loss": 0.0015, "num_input_tokens_seen": 235716064, "step": 109275 }, { "epoch": 17.827079934747147, "grad_norm": 0.0026684331241995096, "learning_rate": 3.5533265453565664e-05, "loss": 0.0062, "num_input_tokens_seen": 235727456, "step": 109280 }, { "epoch": 17.8278955954323, "grad_norm": 0.05255614221096039, "learning_rate": 3.55069160665582e-05, "loss": 0.0157, "num_input_tokens_seen": 235737856, "step": 109285 }, { "epoch": 17.828711256117455, "grad_norm": 0.0020313458517193794, "learning_rate": 3.5480576093092466e-05, "loss": 0.0096, "num_input_tokens_seen": 235748096, "step": 109290 }, { "epoch": 17.82952691680261, "grad_norm": 0.03496650978922844, "learning_rate": 3.545424553370202e-05, "loss": 0.0074, "num_input_tokens_seen": 235757888, "step": 109295 }, { "epoch": 17.830342577487766, "grad_norm": 0.0004604421847034246, "learning_rate": 3.5427924388920727e-05, "loss": 0.0015, "num_input_tokens_seen": 235767616, "step": 109300 }, { "epoch": 17.83115823817292, "grad_norm": 0.006558060180395842, "learning_rate": 3.540161265928177e-05, "loss": 0.0944, "num_input_tokens_seen": 235778976, "step": 109305 }, { "epoch": 17.831973898858074, "grad_norm": 0.05562964826822281, "learning_rate": 3.537531034531855e-05, "loss": 0.0019, "num_input_tokens_seen": 235789824, "step": 109310 }, { "epoch": 17.83278955954323, "grad_norm": 0.06375233829021454, "learning_rate": 3.5349017447564135e-05, "loss": 0.0025, "num_input_tokens_seen": 235800864, "step": 109315 }, { "epoch": 17.833605220228385, "grad_norm": 0.002500336617231369, "learning_rate": 3.532273396655128e-05, "loss": 0.0091, "num_input_tokens_seen": 235812416, "step": 109320 }, { "epoch": 17.83442088091354, "grad_norm": 0.009180664084851742, "learning_rate": 3.5296459902812775e-05, "loss": 0.0061, "num_input_tokens_seen": 235824800, "step": 109325 }, { "epoch": 17.835236541598697, "grad_norm": 0.03370778262615204, "learning_rate": 3.527019525688097e-05, "loss": 0.0052, "num_input_tokens_seen": 235835712, "step": 109330 }, { "epoch": 17.83605220228385, "grad_norm": 0.016822507604956627, "learning_rate": 3.524394002928821e-05, "loss": 0.0104, "num_input_tokens_seen": 235846720, "step": 109335 }, { "epoch": 17.836867862969005, "grad_norm": 0.06928674876689911, "learning_rate": 3.5217694220566644e-05, "loss": 0.0095, "num_input_tokens_seen": 235857568, "step": 109340 }, { "epoch": 17.83768352365416, "grad_norm": 0.0071517666801810265, "learning_rate": 3.5191457831248054e-05, "loss": 0.0009, "num_input_tokens_seen": 235867552, "step": 109345 }, { "epoch": 17.838499184339316, "grad_norm": 0.007304340600967407, "learning_rate": 3.516523086186429e-05, "loss": 0.0011, "num_input_tokens_seen": 235878016, "step": 109350 }, { "epoch": 17.839314845024468, "grad_norm": 0.041549425572156906, "learning_rate": 3.513901331294678e-05, "loss": 0.3145, "num_input_tokens_seen": 235888192, "step": 109355 }, { "epoch": 17.840130505709624, "grad_norm": 0.10152288526296616, "learning_rate": 3.5112805185026853e-05, "loss": 0.0108, "num_input_tokens_seen": 235899520, "step": 109360 }, { "epoch": 17.84094616639478, "grad_norm": 0.9411654472351074, "learning_rate": 3.5086606478635706e-05, "loss": 0.1348, "num_input_tokens_seen": 235911712, "step": 109365 }, { "epoch": 17.841761827079935, "grad_norm": 0.02658175677061081, "learning_rate": 3.506041719430425e-05, "loss": 0.0022, "num_input_tokens_seen": 235922464, "step": 109370 }, { "epoch": 17.84257748776509, "grad_norm": 0.000612710602581501, "learning_rate": 3.503423733256328e-05, "loss": 0.0025, "num_input_tokens_seen": 235932352, "step": 109375 }, { "epoch": 17.843393148450243, "grad_norm": 0.0045662992633879185, "learning_rate": 3.500806689394337e-05, "loss": 0.0024, "num_input_tokens_seen": 235943552, "step": 109380 }, { "epoch": 17.8442088091354, "grad_norm": 0.011363174766302109, "learning_rate": 3.4981905878974815e-05, "loss": 0.0011, "num_input_tokens_seen": 235953088, "step": 109385 }, { "epoch": 17.845024469820554, "grad_norm": 0.005420438479632139, "learning_rate": 3.495575428818787e-05, "loss": 0.0203, "num_input_tokens_seen": 235962880, "step": 109390 }, { "epoch": 17.84584013050571, "grad_norm": 0.0020749683026224375, "learning_rate": 3.492961212211249e-05, "loss": 0.0015, "num_input_tokens_seen": 235974848, "step": 109395 }, { "epoch": 17.846655791190866, "grad_norm": 0.13750065863132477, "learning_rate": 3.490347938127847e-05, "loss": 0.005, "num_input_tokens_seen": 235985824, "step": 109400 }, { "epoch": 17.847471451876018, "grad_norm": 0.005063600372523069, "learning_rate": 3.4877356066215614e-05, "loss": 0.0338, "num_input_tokens_seen": 235995808, "step": 109405 }, { "epoch": 17.848287112561174, "grad_norm": 0.050522349774837494, "learning_rate": 3.4851242177453e-05, "loss": 0.0119, "num_input_tokens_seen": 236006464, "step": 109410 }, { "epoch": 17.84910277324633, "grad_norm": 0.12437298893928528, "learning_rate": 3.482513771552021e-05, "loss": 0.0662, "num_input_tokens_seen": 236017664, "step": 109415 }, { "epoch": 17.849918433931485, "grad_norm": 0.006333586294203997, "learning_rate": 3.4799042680945966e-05, "loss": 0.0026, "num_input_tokens_seen": 236028544, "step": 109420 }, { "epoch": 17.85073409461664, "grad_norm": 0.09271486103534698, "learning_rate": 3.477295707425937e-05, "loss": 0.0018, "num_input_tokens_seen": 236039456, "step": 109425 }, { "epoch": 17.851549755301793, "grad_norm": 0.3711385130882263, "learning_rate": 3.474688089598893e-05, "loss": 0.0789, "num_input_tokens_seen": 236049408, "step": 109430 }, { "epoch": 17.85236541598695, "grad_norm": 0.0031212426256388426, "learning_rate": 3.4720814146663226e-05, "loss": 0.0263, "num_input_tokens_seen": 236060736, "step": 109435 }, { "epoch": 17.853181076672104, "grad_norm": 0.011895189061760902, "learning_rate": 3.469475682681045e-05, "loss": 0.0022, "num_input_tokens_seen": 236071648, "step": 109440 }, { "epoch": 17.85399673735726, "grad_norm": 0.055187080055475235, "learning_rate": 3.466870893695867e-05, "loss": 0.0068, "num_input_tokens_seen": 236083360, "step": 109445 }, { "epoch": 17.854812398042416, "grad_norm": 0.026093844324350357, "learning_rate": 3.4642670477635866e-05, "loss": 0.0016, "num_input_tokens_seen": 236092096, "step": 109450 }, { "epoch": 17.855628058727568, "grad_norm": 0.035862281918525696, "learning_rate": 3.4616641449369656e-05, "loss": 0.0026, "num_input_tokens_seen": 236103616, "step": 109455 }, { "epoch": 17.856443719412724, "grad_norm": 0.020404426380991936, "learning_rate": 3.459062185268763e-05, "loss": 0.0125, "num_input_tokens_seen": 236116256, "step": 109460 }, { "epoch": 17.85725938009788, "grad_norm": 0.005481383763253689, "learning_rate": 3.456461168811703e-05, "loss": 0.0034, "num_input_tokens_seen": 236125120, "step": 109465 }, { "epoch": 17.858075040783035, "grad_norm": 0.013153529725968838, "learning_rate": 3.4538610956185044e-05, "loss": 0.0034, "num_input_tokens_seen": 236137024, "step": 109470 }, { "epoch": 17.85889070146819, "grad_norm": 0.005547014065086842, "learning_rate": 3.451261965741859e-05, "loss": 0.0089, "num_input_tokens_seen": 236148192, "step": 109475 }, { "epoch": 17.859706362153343, "grad_norm": 0.002272387035191059, "learning_rate": 3.44866377923444e-05, "loss": 0.0066, "num_input_tokens_seen": 236158304, "step": 109480 }, { "epoch": 17.8605220228385, "grad_norm": 0.0006552781560458243, "learning_rate": 3.446066536148901e-05, "loss": 0.0026, "num_input_tokens_seen": 236168416, "step": 109485 }, { "epoch": 17.861337683523654, "grad_norm": 0.020940493792295456, "learning_rate": 3.4434702365378825e-05, "loss": 0.0023, "num_input_tokens_seen": 236179360, "step": 109490 }, { "epoch": 17.86215334420881, "grad_norm": 0.03716748580336571, "learning_rate": 3.4408748804540034e-05, "loss": 0.0017, "num_input_tokens_seen": 236190080, "step": 109495 }, { "epoch": 17.862969004893966, "grad_norm": 0.12921665608882904, "learning_rate": 3.4382804679498616e-05, "loss": 0.0034, "num_input_tokens_seen": 236200704, "step": 109500 }, { "epoch": 17.863784665579118, "grad_norm": 0.000620523001998663, "learning_rate": 3.4356869990780305e-05, "loss": 0.0012, "num_input_tokens_seen": 236211136, "step": 109505 }, { "epoch": 17.864600326264274, "grad_norm": 0.10015097260475159, "learning_rate": 3.4330944738910744e-05, "loss": 0.0034, "num_input_tokens_seen": 236221920, "step": 109510 }, { "epoch": 17.86541598694943, "grad_norm": 0.0024320646189153194, "learning_rate": 3.430502892441528e-05, "loss": 0.0015, "num_input_tokens_seen": 236232448, "step": 109515 }, { "epoch": 17.866231647634585, "grad_norm": 0.010310580022633076, "learning_rate": 3.427912254781923e-05, "loss": 0.0055, "num_input_tokens_seen": 236243616, "step": 109520 }, { "epoch": 17.86704730831974, "grad_norm": 0.0052977572195231915, "learning_rate": 3.425322560964761e-05, "loss": 0.066, "num_input_tokens_seen": 236254624, "step": 109525 }, { "epoch": 17.867862969004893, "grad_norm": 0.030509650707244873, "learning_rate": 3.422733811042506e-05, "loss": 0.0039, "num_input_tokens_seen": 236265984, "step": 109530 }, { "epoch": 17.86867862969005, "grad_norm": 0.0016161062521860003, "learning_rate": 3.420146005067659e-05, "loss": 0.0342, "num_input_tokens_seen": 236275744, "step": 109535 }, { "epoch": 17.869494290375204, "grad_norm": 0.04856735095381737, "learning_rate": 3.4175591430926244e-05, "loss": 0.0021, "num_input_tokens_seen": 236286752, "step": 109540 }, { "epoch": 17.87030995106036, "grad_norm": 0.007528121117502451, "learning_rate": 3.414973225169854e-05, "loss": 0.0043, "num_input_tokens_seen": 236297920, "step": 109545 }, { "epoch": 17.871125611745512, "grad_norm": 0.003953055012971163, "learning_rate": 3.412388251351756e-05, "loss": 0.0052, "num_input_tokens_seen": 236308224, "step": 109550 }, { "epoch": 17.871941272430668, "grad_norm": 0.025258127599954605, "learning_rate": 3.4098042216907045e-05, "loss": 0.0066, "num_input_tokens_seen": 236318528, "step": 109555 }, { "epoch": 17.872756933115824, "grad_norm": 0.050536926835775375, "learning_rate": 3.4072211362390746e-05, "loss": 0.0044, "num_input_tokens_seen": 236327936, "step": 109560 }, { "epoch": 17.87357259380098, "grad_norm": 0.005220276769250631, "learning_rate": 3.40463899504922e-05, "loss": 0.0029, "num_input_tokens_seen": 236337568, "step": 109565 }, { "epoch": 17.874388254486135, "grad_norm": 0.01073362771421671, "learning_rate": 3.402057798173463e-05, "loss": 0.0029, "num_input_tokens_seen": 236350336, "step": 109570 }, { "epoch": 17.875203915171287, "grad_norm": 0.0012042643502354622, "learning_rate": 3.39947754566412e-05, "loss": 0.014, "num_input_tokens_seen": 236362176, "step": 109575 }, { "epoch": 17.876019575856443, "grad_norm": 0.007591853849589825, "learning_rate": 3.3968982375734813e-05, "loss": 0.0027, "num_input_tokens_seen": 236372928, "step": 109580 }, { "epoch": 17.8768352365416, "grad_norm": 0.9416438937187195, "learning_rate": 3.394319873953816e-05, "loss": 0.1174, "num_input_tokens_seen": 236383904, "step": 109585 }, { "epoch": 17.877650897226754, "grad_norm": 0.06702055037021637, "learning_rate": 3.391742454857388e-05, "loss": 0.0021, "num_input_tokens_seen": 236395584, "step": 109590 }, { "epoch": 17.87846655791191, "grad_norm": 2.8062806129455566, "learning_rate": 3.3891659803364225e-05, "loss": 0.1472, "num_input_tokens_seen": 236405824, "step": 109595 }, { "epoch": 17.879282218597062, "grad_norm": 0.14425620436668396, "learning_rate": 3.386590450443139e-05, "loss": 0.0059, "num_input_tokens_seen": 236417024, "step": 109600 }, { "epoch": 17.880097879282218, "grad_norm": 0.0031849080696702003, "learning_rate": 3.3840158652297335e-05, "loss": 0.0063, "num_input_tokens_seen": 236427136, "step": 109605 }, { "epoch": 17.880913539967374, "grad_norm": 0.57267826795578, "learning_rate": 3.381442224748382e-05, "loss": 0.01, "num_input_tokens_seen": 236438816, "step": 109610 }, { "epoch": 17.88172920065253, "grad_norm": 0.006588062737137079, "learning_rate": 3.378869529051243e-05, "loss": 0.0023, "num_input_tokens_seen": 236450336, "step": 109615 }, { "epoch": 17.882544861337685, "grad_norm": 0.0033268635161221027, "learning_rate": 3.376297778190457e-05, "loss": 0.0033, "num_input_tokens_seen": 236460352, "step": 109620 }, { "epoch": 17.883360522022837, "grad_norm": 0.007259331177920103, "learning_rate": 3.373726972218144e-05, "loss": 0.0233, "num_input_tokens_seen": 236471264, "step": 109625 }, { "epoch": 17.884176182707993, "grad_norm": 0.006705754436552525, "learning_rate": 3.3711571111864014e-05, "loss": 0.0042, "num_input_tokens_seen": 236481248, "step": 109630 }, { "epoch": 17.88499184339315, "grad_norm": 0.026630578562617302, "learning_rate": 3.3685881951473096e-05, "loss": 0.0063, "num_input_tokens_seen": 236491648, "step": 109635 }, { "epoch": 17.885807504078304, "grad_norm": 0.364725798368454, "learning_rate": 3.366020224152949e-05, "loss": 0.0502, "num_input_tokens_seen": 236501248, "step": 109640 }, { "epoch": 17.88662316476346, "grad_norm": 0.003389750374481082, "learning_rate": 3.363453198255328e-05, "loss": 0.0024, "num_input_tokens_seen": 236512064, "step": 109645 }, { "epoch": 17.887438825448612, "grad_norm": 2.0405433177948, "learning_rate": 3.360887117506506e-05, "loss": 0.0599, "num_input_tokens_seen": 236523424, "step": 109650 }, { "epoch": 17.888254486133768, "grad_norm": 0.03257044404745102, "learning_rate": 3.358321981958462e-05, "loss": 0.0042, "num_input_tokens_seen": 236533888, "step": 109655 }, { "epoch": 17.889070146818923, "grad_norm": 0.005299916956573725, "learning_rate": 3.3557577916632055e-05, "loss": 0.0033, "num_input_tokens_seen": 236544960, "step": 109660 }, { "epoch": 17.88988580750408, "grad_norm": 0.017927639186382294, "learning_rate": 3.353194546672672e-05, "loss": 0.005, "num_input_tokens_seen": 236556640, "step": 109665 }, { "epoch": 17.890701468189235, "grad_norm": 0.009881078265607357, "learning_rate": 3.3506322470388426e-05, "loss": 0.0075, "num_input_tokens_seen": 236566592, "step": 109670 }, { "epoch": 17.891517128874387, "grad_norm": 0.32996535301208496, "learning_rate": 3.3480708928136204e-05, "loss": 0.0081, "num_input_tokens_seen": 236577696, "step": 109675 }, { "epoch": 17.892332789559543, "grad_norm": 0.007434056140482426, "learning_rate": 3.34551048404893e-05, "loss": 0.0014, "num_input_tokens_seen": 236587968, "step": 109680 }, { "epoch": 17.8931484502447, "grad_norm": 0.08623666316270828, "learning_rate": 3.342951020796647e-05, "loss": 0.0059, "num_input_tokens_seen": 236598272, "step": 109685 }, { "epoch": 17.893964110929854, "grad_norm": 0.022791855037212372, "learning_rate": 3.3403925031086525e-05, "loss": 0.0018, "num_input_tokens_seen": 236608192, "step": 109690 }, { "epoch": 17.894779771615006, "grad_norm": 0.004132063128054142, "learning_rate": 3.337834931036798e-05, "loss": 0.0023, "num_input_tokens_seen": 236619264, "step": 109695 }, { "epoch": 17.895595432300162, "grad_norm": 0.05206795036792755, "learning_rate": 3.335278304632916e-05, "loss": 0.1477, "num_input_tokens_seen": 236630912, "step": 109700 }, { "epoch": 17.896411092985318, "grad_norm": 0.011668484658002853, "learning_rate": 3.332722623948814e-05, "loss": 0.0062, "num_input_tokens_seen": 236641344, "step": 109705 }, { "epoch": 17.897226753670473, "grad_norm": 0.0005960998823866248, "learning_rate": 3.330167889036295e-05, "loss": 0.0011, "num_input_tokens_seen": 236652032, "step": 109710 }, { "epoch": 17.89804241435563, "grad_norm": 0.0061317929066717625, "learning_rate": 3.327614099947124e-05, "loss": 0.0034, "num_input_tokens_seen": 236662816, "step": 109715 }, { "epoch": 17.898858075040785, "grad_norm": 0.03300924226641655, "learning_rate": 3.325061256733058e-05, "loss": 0.0124, "num_input_tokens_seen": 236674912, "step": 109720 }, { "epoch": 17.899673735725937, "grad_norm": 0.02139178290963173, "learning_rate": 3.3225093594458465e-05, "loss": 0.0199, "num_input_tokens_seen": 236684448, "step": 109725 }, { "epoch": 17.900489396411093, "grad_norm": 1.0528470277786255, "learning_rate": 3.319958408137192e-05, "loss": 0.0438, "num_input_tokens_seen": 236695328, "step": 109730 }, { "epoch": 17.90130505709625, "grad_norm": 0.025451093912124634, "learning_rate": 3.317408402858796e-05, "loss": 0.0105, "num_input_tokens_seen": 236706144, "step": 109735 }, { "epoch": 17.902120717781404, "grad_norm": 0.0055743977427482605, "learning_rate": 3.314859343662335e-05, "loss": 0.0018, "num_input_tokens_seen": 236716640, "step": 109740 }, { "epoch": 17.902936378466556, "grad_norm": 0.3203646242618561, "learning_rate": 3.312311230599491e-05, "loss": 0.0162, "num_input_tokens_seen": 236728224, "step": 109745 }, { "epoch": 17.903752039151712, "grad_norm": 0.4513990581035614, "learning_rate": 3.3097640637218654e-05, "loss": 0.0321, "num_input_tokens_seen": 236738784, "step": 109750 }, { "epoch": 17.904567699836868, "grad_norm": 0.19070300459861755, "learning_rate": 3.307217843081123e-05, "loss": 0.0039, "num_input_tokens_seen": 236749344, "step": 109755 }, { "epoch": 17.905383360522023, "grad_norm": 0.029198521748185158, "learning_rate": 3.3046725687288285e-05, "loss": 0.0041, "num_input_tokens_seen": 236760224, "step": 109760 }, { "epoch": 17.90619902120718, "grad_norm": 0.01763581484556198, "learning_rate": 3.302128240716595e-05, "loss": 0.005, "num_input_tokens_seen": 236771008, "step": 109765 }, { "epoch": 17.90701468189233, "grad_norm": 0.013902156613767147, "learning_rate": 3.299584859095961e-05, "loss": 0.002, "num_input_tokens_seen": 236781824, "step": 109770 }, { "epoch": 17.907830342577487, "grad_norm": 0.0525086410343647, "learning_rate": 3.297042423918495e-05, "loss": 0.0077, "num_input_tokens_seen": 236792320, "step": 109775 }, { "epoch": 17.908646003262643, "grad_norm": 0.06992597877979279, "learning_rate": 3.2945009352357e-05, "loss": 0.0048, "num_input_tokens_seen": 236803328, "step": 109780 }, { "epoch": 17.9094616639478, "grad_norm": 0.014887413010001183, "learning_rate": 3.291960393099108e-05, "loss": 0.0096, "num_input_tokens_seen": 236813952, "step": 109785 }, { "epoch": 17.910277324632954, "grad_norm": 0.009227737784385681, "learning_rate": 3.289420797560172e-05, "loss": 0.0031, "num_input_tokens_seen": 236825824, "step": 109790 }, { "epoch": 17.911092985318106, "grad_norm": 0.1385674774646759, "learning_rate": 3.2868821486704003e-05, "loss": 0.1071, "num_input_tokens_seen": 236836576, "step": 109795 }, { "epoch": 17.911908646003262, "grad_norm": 0.0404207669198513, "learning_rate": 3.284344446481208e-05, "loss": 0.0096, "num_input_tokens_seen": 236847840, "step": 109800 }, { "epoch": 17.912724306688418, "grad_norm": 0.008892850950360298, "learning_rate": 3.2818076910440476e-05, "loss": 0.0036, "num_input_tokens_seen": 236858144, "step": 109805 }, { "epoch": 17.913539967373573, "grad_norm": 0.0018868370680138469, "learning_rate": 3.279271882410312e-05, "loss": 0.0019, "num_input_tokens_seen": 236870016, "step": 109810 }, { "epoch": 17.91435562805873, "grad_norm": 0.0038989060558378696, "learning_rate": 3.27673702063141e-05, "loss": 0.0056, "num_input_tokens_seen": 236880384, "step": 109815 }, { "epoch": 17.91517128874388, "grad_norm": 1.554358720779419, "learning_rate": 3.274203105758694e-05, "loss": 0.0717, "num_input_tokens_seen": 236889248, "step": 109820 }, { "epoch": 17.915986949429037, "grad_norm": 0.04720429703593254, "learning_rate": 3.2716701378435355e-05, "loss": 0.0476, "num_input_tokens_seen": 236900160, "step": 109825 }, { "epoch": 17.916802610114193, "grad_norm": 0.0034177436027675867, "learning_rate": 3.269138116937259e-05, "loss": 0.003, "num_input_tokens_seen": 236909184, "step": 109830 }, { "epoch": 17.91761827079935, "grad_norm": 0.35917583107948303, "learning_rate": 3.2666070430911796e-05, "loss": 0.0104, "num_input_tokens_seen": 236919456, "step": 109835 }, { "epoch": 17.918433931484504, "grad_norm": 0.008511784486472607, "learning_rate": 3.264076916356601e-05, "loss": 0.0012, "num_input_tokens_seen": 236929920, "step": 109840 }, { "epoch": 17.919249592169656, "grad_norm": 0.024363014847040176, "learning_rate": 3.2615477367847866e-05, "loss": 0.0022, "num_input_tokens_seen": 236940960, "step": 109845 }, { "epoch": 17.920065252854812, "grad_norm": 0.037325985729694366, "learning_rate": 3.2590195044269965e-05, "loss": 0.006, "num_input_tokens_seen": 236951328, "step": 109850 }, { "epoch": 17.920880913539968, "grad_norm": 0.0685247927904129, "learning_rate": 3.256492219334478e-05, "loss": 0.0082, "num_input_tokens_seen": 236962176, "step": 109855 }, { "epoch": 17.921696574225123, "grad_norm": 0.001942574162967503, "learning_rate": 3.2539658815584404e-05, "loss": 0.0058, "num_input_tokens_seen": 236973152, "step": 109860 }, { "epoch": 17.92251223491028, "grad_norm": 0.2310718446969986, "learning_rate": 3.2514404911500814e-05, "loss": 0.0064, "num_input_tokens_seen": 236984448, "step": 109865 }, { "epoch": 17.92332789559543, "grad_norm": 0.00533346738666296, "learning_rate": 3.248916048160588e-05, "loss": 0.0011, "num_input_tokens_seen": 236995296, "step": 109870 }, { "epoch": 17.924143556280587, "grad_norm": 0.029340386390686035, "learning_rate": 3.246392552641125e-05, "loss": 0.005, "num_input_tokens_seen": 237006656, "step": 109875 }, { "epoch": 17.924959216965743, "grad_norm": 0.0018949677469208837, "learning_rate": 3.2438700046428185e-05, "loss": 0.0025, "num_input_tokens_seen": 237018304, "step": 109880 }, { "epoch": 17.9257748776509, "grad_norm": 0.5329261422157288, "learning_rate": 3.2413484042167984e-05, "loss": 0.0191, "num_input_tokens_seen": 237028192, "step": 109885 }, { "epoch": 17.92659053833605, "grad_norm": 0.02960762195289135, "learning_rate": 3.2388277514141864e-05, "loss": 0.0028, "num_input_tokens_seen": 237039008, "step": 109890 }, { "epoch": 17.927406199021206, "grad_norm": 0.2280931919813156, "learning_rate": 3.236308046286035e-05, "loss": 0.0088, "num_input_tokens_seen": 237050912, "step": 109895 }, { "epoch": 17.928221859706362, "grad_norm": 0.012176332995295525, "learning_rate": 3.2337892888834375e-05, "loss": 0.0112, "num_input_tokens_seen": 237061664, "step": 109900 }, { "epoch": 17.929037520391518, "grad_norm": 0.03758019208908081, "learning_rate": 3.231271479257414e-05, "loss": 0.002, "num_input_tokens_seen": 237072928, "step": 109905 }, { "epoch": 17.929853181076673, "grad_norm": 0.004258170258253813, "learning_rate": 3.228754617459023e-05, "loss": 0.0028, "num_input_tokens_seen": 237082720, "step": 109910 }, { "epoch": 17.930668841761825, "grad_norm": 0.262134313583374, "learning_rate": 3.2262387035392305e-05, "loss": 0.0092, "num_input_tokens_seen": 237093376, "step": 109915 }, { "epoch": 17.93148450244698, "grad_norm": 0.10040028393268585, "learning_rate": 3.2237237375490666e-05, "loss": 0.0017, "num_input_tokens_seen": 237103264, "step": 109920 }, { "epoch": 17.932300163132137, "grad_norm": 0.0031387123744934797, "learning_rate": 3.221209719539469e-05, "loss": 0.0014, "num_input_tokens_seen": 237113632, "step": 109925 }, { "epoch": 17.933115823817293, "grad_norm": 0.0022007671650499105, "learning_rate": 3.218696649561409e-05, "loss": 0.0126, "num_input_tokens_seen": 237124192, "step": 109930 }, { "epoch": 17.93393148450245, "grad_norm": 0.002424369566142559, "learning_rate": 3.2161845276658e-05, "loss": 0.0206, "num_input_tokens_seen": 237134880, "step": 109935 }, { "epoch": 17.9347471451876, "grad_norm": 0.0174933522939682, "learning_rate": 3.213673353903568e-05, "loss": 0.01, "num_input_tokens_seen": 237145248, "step": 109940 }, { "epoch": 17.935562805872756, "grad_norm": 0.00102341512683779, "learning_rate": 3.211163128325589e-05, "loss": 0.0127, "num_input_tokens_seen": 237155712, "step": 109945 }, { "epoch": 17.936378466557912, "grad_norm": 0.0010239763651043177, "learning_rate": 3.208653850982746e-05, "loss": 0.0604, "num_input_tokens_seen": 237165120, "step": 109950 }, { "epoch": 17.937194127243067, "grad_norm": 0.011651289649307728, "learning_rate": 3.206145521925896e-05, "loss": 0.0016, "num_input_tokens_seen": 237175712, "step": 109955 }, { "epoch": 17.938009787928223, "grad_norm": 0.007782896514981985, "learning_rate": 3.2036381412058725e-05, "loss": 0.0024, "num_input_tokens_seen": 237186208, "step": 109960 }, { "epoch": 17.938825448613375, "grad_norm": 1.0816344022750854, "learning_rate": 3.2011317088734836e-05, "loss": 0.0186, "num_input_tokens_seen": 237197824, "step": 109965 }, { "epoch": 17.93964110929853, "grad_norm": 0.011112635023891926, "learning_rate": 3.1986262249795286e-05, "loss": 0.001, "num_input_tokens_seen": 237209344, "step": 109970 }, { "epoch": 17.940456769983687, "grad_norm": 0.007119552697986364, "learning_rate": 3.196121689574782e-05, "loss": 0.0011, "num_input_tokens_seen": 237219680, "step": 109975 }, { "epoch": 17.941272430668842, "grad_norm": 0.014736584387719631, "learning_rate": 3.193618102710011e-05, "loss": 0.0033, "num_input_tokens_seen": 237230720, "step": 109980 }, { "epoch": 17.942088091353998, "grad_norm": 0.000350876827724278, "learning_rate": 3.191115464435945e-05, "loss": 0.0056, "num_input_tokens_seen": 237241312, "step": 109985 }, { "epoch": 17.94290375203915, "grad_norm": 0.0012466544285416603, "learning_rate": 3.188613774803306e-05, "loss": 0.0042, "num_input_tokens_seen": 237250880, "step": 109990 }, { "epoch": 17.943719412724306, "grad_norm": 0.010409148409962654, "learning_rate": 3.186113033862792e-05, "loss": 0.0016, "num_input_tokens_seen": 237260384, "step": 109995 }, { "epoch": 17.94453507340946, "grad_norm": 0.050100281834602356, "learning_rate": 3.1836132416650844e-05, "loss": 0.0031, "num_input_tokens_seen": 237270528, "step": 110000 }, { "epoch": 17.945350734094617, "grad_norm": 0.02285275235772133, "learning_rate": 3.1811143982608426e-05, "loss": 0.0356, "num_input_tokens_seen": 237281632, "step": 110005 }, { "epoch": 17.946166394779773, "grad_norm": 0.021413106471300125, "learning_rate": 3.1786165037007156e-05, "loss": 0.101, "num_input_tokens_seen": 237292960, "step": 110010 }, { "epoch": 17.946982055464925, "grad_norm": 0.003660618094727397, "learning_rate": 3.176119558035323e-05, "loss": 0.0104, "num_input_tokens_seen": 237302720, "step": 110015 }, { "epoch": 17.94779771615008, "grad_norm": 0.08859164267778397, "learning_rate": 3.173623561315259e-05, "loss": 0.0031, "num_input_tokens_seen": 237313952, "step": 110020 }, { "epoch": 17.948613376835237, "grad_norm": 0.0033154471311718225, "learning_rate": 3.171128513591132e-05, "loss": 0.003, "num_input_tokens_seen": 237323584, "step": 110025 }, { "epoch": 17.949429037520392, "grad_norm": 0.0026501587126404047, "learning_rate": 3.1686344149134735e-05, "loss": 0.1346, "num_input_tokens_seen": 237334048, "step": 110030 }, { "epoch": 17.950244698205548, "grad_norm": 0.002132370602339506, "learning_rate": 3.1661412653328724e-05, "loss": 0.0343, "num_input_tokens_seen": 237344416, "step": 110035 }, { "epoch": 17.9510603588907, "grad_norm": 0.05342055857181549, "learning_rate": 3.1636490648998095e-05, "loss": 0.0027, "num_input_tokens_seen": 237354656, "step": 110040 }, { "epoch": 17.951876019575856, "grad_norm": 0.027702845633029938, "learning_rate": 3.1611578136648336e-05, "loss": 0.0031, "num_input_tokens_seen": 237365728, "step": 110045 }, { "epoch": 17.95269168026101, "grad_norm": 0.006339787505567074, "learning_rate": 3.158667511678393e-05, "loss": 0.0042, "num_input_tokens_seen": 237375264, "step": 110050 }, { "epoch": 17.953507340946167, "grad_norm": 0.029385803267359734, "learning_rate": 3.156178158990991e-05, "loss": 0.0021, "num_input_tokens_seen": 237387264, "step": 110055 }, { "epoch": 17.954323001631323, "grad_norm": 0.009832211770117283, "learning_rate": 3.153689755653061e-05, "loss": 0.0027, "num_input_tokens_seen": 237398784, "step": 110060 }, { "epoch": 17.955138662316475, "grad_norm": 0.005969129968434572, "learning_rate": 3.151202301715034e-05, "loss": 0.0036, "num_input_tokens_seen": 237410656, "step": 110065 }, { "epoch": 17.95595432300163, "grad_norm": 0.18711057305335999, "learning_rate": 3.148715797227331e-05, "loss": 0.0252, "num_input_tokens_seen": 237421088, "step": 110070 }, { "epoch": 17.956769983686787, "grad_norm": 0.06399157643318176, "learning_rate": 3.1462302422403334e-05, "loss": 0.0034, "num_input_tokens_seen": 237430208, "step": 110075 }, { "epoch": 17.957585644371942, "grad_norm": 0.00300413160584867, "learning_rate": 3.143745636804418e-05, "loss": 0.002, "num_input_tokens_seen": 237441696, "step": 110080 }, { "epoch": 17.958401305057095, "grad_norm": 0.0013683438301086426, "learning_rate": 3.14126198096994e-05, "loss": 0.0007, "num_input_tokens_seen": 237452288, "step": 110085 }, { "epoch": 17.95921696574225, "grad_norm": 0.002750659128651023, "learning_rate": 3.138779274787235e-05, "loss": 0.002, "num_input_tokens_seen": 237462656, "step": 110090 }, { "epoch": 17.960032626427406, "grad_norm": 0.018564768135547638, "learning_rate": 3.136297518306614e-05, "loss": 0.0614, "num_input_tokens_seen": 237474112, "step": 110095 }, { "epoch": 17.96084828711256, "grad_norm": 0.007910870015621185, "learning_rate": 3.133816711578369e-05, "loss": 0.0039, "num_input_tokens_seen": 237485632, "step": 110100 }, { "epoch": 17.961663947797717, "grad_norm": 0.008938297629356384, "learning_rate": 3.131336854652789e-05, "loss": 0.0321, "num_input_tokens_seen": 237496032, "step": 110105 }, { "epoch": 17.96247960848287, "grad_norm": 0.015826964750885963, "learning_rate": 3.1288579475801215e-05, "loss": 0.0029, "num_input_tokens_seen": 237506816, "step": 110110 }, { "epoch": 17.963295269168025, "grad_norm": 0.004122958052903414, "learning_rate": 3.12637999041061e-05, "loss": 0.0048, "num_input_tokens_seen": 237516896, "step": 110115 }, { "epoch": 17.96411092985318, "grad_norm": 0.008585760369896889, "learning_rate": 3.123902983194471e-05, "loss": 0.0089, "num_input_tokens_seen": 237528416, "step": 110120 }, { "epoch": 17.964926590538337, "grad_norm": 0.0040806797333061695, "learning_rate": 3.1214269259819014e-05, "loss": 0.008, "num_input_tokens_seen": 237540096, "step": 110125 }, { "epoch": 17.965742251223492, "grad_norm": 0.06702255457639694, "learning_rate": 3.11895181882309e-05, "loss": 0.013, "num_input_tokens_seen": 237551552, "step": 110130 }, { "epoch": 17.966557911908644, "grad_norm": 0.0009204765083268285, "learning_rate": 3.116477661768191e-05, "loss": 0.0032, "num_input_tokens_seen": 237563584, "step": 110135 }, { "epoch": 17.9673735725938, "grad_norm": 0.0035411592107266188, "learning_rate": 3.1140044548673476e-05, "loss": 0.0097, "num_input_tokens_seen": 237575168, "step": 110140 }, { "epoch": 17.968189233278956, "grad_norm": 0.8468047380447388, "learning_rate": 3.11153219817068e-05, "loss": 0.0214, "num_input_tokens_seen": 237586400, "step": 110145 }, { "epoch": 17.96900489396411, "grad_norm": 0.004821680020540953, "learning_rate": 3.109060891728299e-05, "loss": 0.0007, "num_input_tokens_seen": 237596992, "step": 110150 }, { "epoch": 17.969820554649267, "grad_norm": 0.032332733273506165, "learning_rate": 3.1065905355902865e-05, "loss": 0.0017, "num_input_tokens_seen": 237606592, "step": 110155 }, { "epoch": 17.97063621533442, "grad_norm": 0.008430082350969315, "learning_rate": 3.104121129806697e-05, "loss": 0.0013, "num_input_tokens_seen": 237617696, "step": 110160 }, { "epoch": 17.971451876019575, "grad_norm": 0.0006375116645358503, "learning_rate": 3.101652674427585e-05, "loss": 0.0008, "num_input_tokens_seen": 237629376, "step": 110165 }, { "epoch": 17.97226753670473, "grad_norm": 0.0010647986782714725, "learning_rate": 3.0991851695029825e-05, "loss": 0.0014, "num_input_tokens_seen": 237639136, "step": 110170 }, { "epoch": 17.973083197389887, "grad_norm": 0.009568249806761742, "learning_rate": 3.0967186150828886e-05, "loss": 0.0023, "num_input_tokens_seen": 237651232, "step": 110175 }, { "epoch": 17.973898858075042, "grad_norm": 0.017908114939928055, "learning_rate": 3.0942530112172905e-05, "loss": 0.0053, "num_input_tokens_seen": 237660992, "step": 110180 }, { "epoch": 17.974714518760194, "grad_norm": 0.002710700035095215, "learning_rate": 3.0917883579561604e-05, "loss": 0.0056, "num_input_tokens_seen": 237670816, "step": 110185 }, { "epoch": 17.97553017944535, "grad_norm": 0.0035616292152553797, "learning_rate": 3.0893246553494516e-05, "loss": 0.1807, "num_input_tokens_seen": 237680992, "step": 110190 }, { "epoch": 17.976345840130506, "grad_norm": 0.0023239515721797943, "learning_rate": 3.08686190344708e-05, "loss": 0.0029, "num_input_tokens_seen": 237690144, "step": 110195 }, { "epoch": 17.97716150081566, "grad_norm": 0.026209762319922447, "learning_rate": 3.084400102298973e-05, "loss": 0.0014, "num_input_tokens_seen": 237700960, "step": 110200 }, { "epoch": 17.977977161500817, "grad_norm": 0.23370394110679626, "learning_rate": 3.0819392519550125e-05, "loss": 0.0117, "num_input_tokens_seen": 237710528, "step": 110205 }, { "epoch": 17.97879282218597, "grad_norm": 0.023154260590672493, "learning_rate": 3.079479352465076e-05, "loss": 0.0064, "num_input_tokens_seen": 237720736, "step": 110210 }, { "epoch": 17.979608482871125, "grad_norm": 0.013101979158818722, "learning_rate": 3.077020403879005e-05, "loss": 0.0123, "num_input_tokens_seen": 237731552, "step": 110215 }, { "epoch": 17.98042414355628, "grad_norm": 0.021233070641756058, "learning_rate": 3.07456240624665e-05, "loss": 0.0015, "num_input_tokens_seen": 237741760, "step": 110220 }, { "epoch": 17.981239804241437, "grad_norm": 1.0014257431030273, "learning_rate": 3.072105359617811e-05, "loss": 0.0421, "num_input_tokens_seen": 237753248, "step": 110225 }, { "epoch": 17.982055464926592, "grad_norm": 0.047984082251787186, "learning_rate": 3.0696492640422954e-05, "loss": 0.004, "num_input_tokens_seen": 237763392, "step": 110230 }, { "epoch": 17.982871125611744, "grad_norm": 0.015628721565008163, "learning_rate": 3.067194119569866e-05, "loss": 0.006, "num_input_tokens_seen": 237773248, "step": 110235 }, { "epoch": 17.9836867862969, "grad_norm": 0.04452935978770256, "learning_rate": 3.064739926250293e-05, "loss": 0.0036, "num_input_tokens_seen": 237784416, "step": 110240 }, { "epoch": 17.984502446982056, "grad_norm": 0.0025733227375894785, "learning_rate": 3.062286684133303e-05, "loss": 0.0042, "num_input_tokens_seen": 237795936, "step": 110245 }, { "epoch": 17.98531810766721, "grad_norm": 0.0020124067086726427, "learning_rate": 3.059834393268618e-05, "loss": 0.0017, "num_input_tokens_seen": 237807488, "step": 110250 }, { "epoch": 17.986133768352367, "grad_norm": 0.0032147427555173635, "learning_rate": 3.057383053705937e-05, "loss": 0.0029, "num_input_tokens_seen": 237818176, "step": 110255 }, { "epoch": 17.98694942903752, "grad_norm": 0.0008061889675445855, "learning_rate": 3.054932665494936e-05, "loss": 0.0098, "num_input_tokens_seen": 237829376, "step": 110260 }, { "epoch": 17.987765089722675, "grad_norm": 0.002898773644119501, "learning_rate": 3.052483228685282e-05, "loss": 0.0024, "num_input_tokens_seen": 237839648, "step": 110265 }, { "epoch": 17.98858075040783, "grad_norm": 0.001121152308769524, "learning_rate": 3.050034743326613e-05, "loss": 0.0039, "num_input_tokens_seen": 237849760, "step": 110270 }, { "epoch": 17.989396411092986, "grad_norm": 0.00148231559433043, "learning_rate": 3.0475872094685443e-05, "loss": 0.0806, "num_input_tokens_seen": 237860864, "step": 110275 }, { "epoch": 17.99021207177814, "grad_norm": 0.020031476393342018, "learning_rate": 3.0451406271606974e-05, "loss": 0.0053, "num_input_tokens_seen": 237871328, "step": 110280 }, { "epoch": 17.991027732463294, "grad_norm": 0.00787524227052927, "learning_rate": 3.0426949964526272e-05, "loss": 0.003, "num_input_tokens_seen": 237883392, "step": 110285 }, { "epoch": 17.99184339314845, "grad_norm": 0.00380660779774189, "learning_rate": 3.0402503173939277e-05, "loss": 0.0011, "num_input_tokens_seen": 237895648, "step": 110290 }, { "epoch": 17.992659053833606, "grad_norm": 0.0005485599976964295, "learning_rate": 3.0378065900341146e-05, "loss": 0.0013, "num_input_tokens_seen": 237906112, "step": 110295 }, { "epoch": 17.99347471451876, "grad_norm": 0.010188670828938484, "learning_rate": 3.035363814422737e-05, "loss": 0.0011, "num_input_tokens_seen": 237917856, "step": 110300 }, { "epoch": 17.994290375203914, "grad_norm": 0.009910130873322487, "learning_rate": 3.0329219906092776e-05, "loss": 0.0009, "num_input_tokens_seen": 237928416, "step": 110305 }, { "epoch": 17.99510603588907, "grad_norm": 0.0014891490573063493, "learning_rate": 3.030481118643247e-05, "loss": 0.0196, "num_input_tokens_seen": 237940032, "step": 110310 }, { "epoch": 17.995921696574225, "grad_norm": 0.0022545901592820883, "learning_rate": 3.0280411985740995e-05, "loss": 0.0014, "num_input_tokens_seen": 237950944, "step": 110315 }, { "epoch": 17.99673735725938, "grad_norm": 0.0061975023709237576, "learning_rate": 3.0256022304512854e-05, "loss": 0.0008, "num_input_tokens_seen": 237961408, "step": 110320 }, { "epoch": 17.997553017944536, "grad_norm": 0.44082239270210266, "learning_rate": 3.023164214324231e-05, "loss": 0.0089, "num_input_tokens_seen": 237971712, "step": 110325 }, { "epoch": 17.99836867862969, "grad_norm": 0.0010443934006616473, "learning_rate": 3.0207271502423527e-05, "loss": 0.0022, "num_input_tokens_seen": 237982880, "step": 110330 }, { "epoch": 17.999184339314844, "grad_norm": 0.003938247915357351, "learning_rate": 3.018291038255033e-05, "loss": 0.0016, "num_input_tokens_seen": 237993952, "step": 110335 }, { "epoch": 18.0, "grad_norm": 0.030845345929265022, "learning_rate": 3.0158558784116442e-05, "loss": 0.0057, "num_input_tokens_seen": 238003072, "step": 110340 }, { "epoch": 18.0, "eval_loss": 0.25989124178886414, "eval_runtime": 104.7806, "eval_samples_per_second": 26.007, "eval_steps_per_second": 6.509, "num_input_tokens_seen": 238003072, "step": 110340 }, { "epoch": 18.000815660685156, "grad_norm": 0.0032420025672763586, "learning_rate": 3.0134216707615404e-05, "loss": 0.0118, "num_input_tokens_seen": 238013216, "step": 110345 }, { "epoch": 18.00163132137031, "grad_norm": 0.02002076990902424, "learning_rate": 3.0109884153540545e-05, "loss": 0.0033, "num_input_tokens_seen": 238023488, "step": 110350 }, { "epoch": 18.002446982055464, "grad_norm": 0.003790483344346285, "learning_rate": 3.0085561122384974e-05, "loss": 0.0006, "num_input_tokens_seen": 238034368, "step": 110355 }, { "epoch": 18.00326264274062, "grad_norm": 0.024364322423934937, "learning_rate": 3.0061247614641684e-05, "loss": 0.0026, "num_input_tokens_seen": 238046080, "step": 110360 }, { "epoch": 18.004078303425775, "grad_norm": 0.024060510098934174, "learning_rate": 3.0036943630803282e-05, "loss": 0.0062, "num_input_tokens_seen": 238056064, "step": 110365 }, { "epoch": 18.00489396411093, "grad_norm": 0.0031494831200689077, "learning_rate": 3.0012649171362482e-05, "loss": 0.0008, "num_input_tokens_seen": 238067552, "step": 110370 }, { "epoch": 18.005709624796086, "grad_norm": 0.034977883100509644, "learning_rate": 2.998836423681156e-05, "loss": 0.0079, "num_input_tokens_seen": 238078336, "step": 110375 }, { "epoch": 18.00652528548124, "grad_norm": 0.004964748863130808, "learning_rate": 2.9964088827642564e-05, "loss": 0.0012, "num_input_tokens_seen": 238089408, "step": 110380 }, { "epoch": 18.007340946166394, "grad_norm": 0.0015604979125782847, "learning_rate": 2.993982294434777e-05, "loss": 0.0081, "num_input_tokens_seen": 238100608, "step": 110385 }, { "epoch": 18.00815660685155, "grad_norm": 0.14705047011375427, "learning_rate": 2.991556658741862e-05, "loss": 0.0052, "num_input_tokens_seen": 238110496, "step": 110390 }, { "epoch": 18.008972267536706, "grad_norm": 0.0069566452875733376, "learning_rate": 2.9891319757347047e-05, "loss": 0.0266, "num_input_tokens_seen": 238120960, "step": 110395 }, { "epoch": 18.00978792822186, "grad_norm": 0.018964657559990883, "learning_rate": 2.986708245462405e-05, "loss": 0.001, "num_input_tokens_seen": 238131840, "step": 110400 }, { "epoch": 18.010603588907014, "grad_norm": 0.02770310640335083, "learning_rate": 2.984285467974124e-05, "loss": 0.0032, "num_input_tokens_seen": 238142272, "step": 110405 }, { "epoch": 18.01141924959217, "grad_norm": 0.25226134061813354, "learning_rate": 2.981863643318922e-05, "loss": 0.0078, "num_input_tokens_seen": 238153376, "step": 110410 }, { "epoch": 18.012234910277325, "grad_norm": 0.0031739319674670696, "learning_rate": 2.979442771545915e-05, "loss": 0.003, "num_input_tokens_seen": 238163744, "step": 110415 }, { "epoch": 18.01305057096248, "grad_norm": 0.016581352800130844, "learning_rate": 2.9770228527041364e-05, "loss": 0.0015, "num_input_tokens_seen": 238175232, "step": 110420 }, { "epoch": 18.013866231647636, "grad_norm": 0.006048796698451042, "learning_rate": 2.9746038868426584e-05, "loss": 0.0013, "num_input_tokens_seen": 238187040, "step": 110425 }, { "epoch": 18.01468189233279, "grad_norm": 0.0018911100924015045, "learning_rate": 2.9721858740104747e-05, "loss": 0.001, "num_input_tokens_seen": 238199744, "step": 110430 }, { "epoch": 18.015497553017944, "grad_norm": 0.05036916956305504, "learning_rate": 2.9697688142566127e-05, "loss": 0.0089, "num_input_tokens_seen": 238210976, "step": 110435 }, { "epoch": 18.0163132137031, "grad_norm": 0.002081788145005703, "learning_rate": 2.967352707630039e-05, "loss": 0.0015, "num_input_tokens_seen": 238222304, "step": 110440 }, { "epoch": 18.017128874388256, "grad_norm": 0.013876624405384064, "learning_rate": 2.9649375541797418e-05, "loss": 0.0069, "num_input_tokens_seen": 238232192, "step": 110445 }, { "epoch": 18.017944535073408, "grad_norm": 0.3170938789844513, "learning_rate": 2.9625233539546326e-05, "loss": 0.0058, "num_input_tokens_seen": 238242944, "step": 110450 }, { "epoch": 18.018760195758563, "grad_norm": 0.03798019513487816, "learning_rate": 2.960110107003672e-05, "loss": 0.0038, "num_input_tokens_seen": 238254176, "step": 110455 }, { "epoch": 18.01957585644372, "grad_norm": 0.0033752650488168, "learning_rate": 2.9576978133757536e-05, "loss": 0.002, "num_input_tokens_seen": 238266144, "step": 110460 }, { "epoch": 18.020391517128875, "grad_norm": 0.03110847808420658, "learning_rate": 2.955286473119767e-05, "loss": 0.0051, "num_input_tokens_seen": 238277280, "step": 110465 }, { "epoch": 18.02120717781403, "grad_norm": 0.032985590398311615, "learning_rate": 2.9528760862845783e-05, "loss": 0.0021, "num_input_tokens_seen": 238288000, "step": 110470 }, { "epoch": 18.022022838499183, "grad_norm": 0.009545426815748215, "learning_rate": 2.9504666529190426e-05, "loss": 0.0022, "num_input_tokens_seen": 238298720, "step": 110475 }, { "epoch": 18.02283849918434, "grad_norm": 0.005889372434467077, "learning_rate": 2.9480581730719825e-05, "loss": 0.0147, "num_input_tokens_seen": 238309952, "step": 110480 }, { "epoch": 18.023654159869494, "grad_norm": 0.023687105625867844, "learning_rate": 2.945650646792214e-05, "loss": 0.018, "num_input_tokens_seen": 238321152, "step": 110485 }, { "epoch": 18.02446982055465, "grad_norm": 0.12146256864070892, "learning_rate": 2.9432440741285314e-05, "loss": 0.0063, "num_input_tokens_seen": 238331392, "step": 110490 }, { "epoch": 18.025285481239806, "grad_norm": 0.07189668715000153, "learning_rate": 2.940838455129696e-05, "loss": 0.1654, "num_input_tokens_seen": 238341600, "step": 110495 }, { "epoch": 18.026101141924958, "grad_norm": 0.12367760390043259, "learning_rate": 2.9384337898444747e-05, "loss": 0.0034, "num_input_tokens_seen": 238350912, "step": 110500 }, { "epoch": 18.026916802610113, "grad_norm": 0.01857893355190754, "learning_rate": 2.9360300783215832e-05, "loss": 0.0664, "num_input_tokens_seen": 238362240, "step": 110505 }, { "epoch": 18.02773246329527, "grad_norm": 0.3206346333026886, "learning_rate": 2.9336273206097663e-05, "loss": 0.0081, "num_input_tokens_seen": 238371936, "step": 110510 }, { "epoch": 18.028548123980425, "grad_norm": 0.0030566102359443903, "learning_rate": 2.931225516757685e-05, "loss": 0.006, "num_input_tokens_seen": 238384352, "step": 110515 }, { "epoch": 18.02936378466558, "grad_norm": 0.0024228261318057775, "learning_rate": 2.9288246668140396e-05, "loss": 0.0019, "num_input_tokens_seen": 238394272, "step": 110520 }, { "epoch": 18.030179445350733, "grad_norm": 0.009001361206173897, "learning_rate": 2.9264247708274628e-05, "loss": 0.0018, "num_input_tokens_seen": 238405632, "step": 110525 }, { "epoch": 18.03099510603589, "grad_norm": 0.0012214462039992213, "learning_rate": 2.9240258288466215e-05, "loss": 0.0018, "num_input_tokens_seen": 238415968, "step": 110530 }, { "epoch": 18.031810766721044, "grad_norm": 0.005284599494189024, "learning_rate": 2.921627840920099e-05, "loss": 0.0491, "num_input_tokens_seen": 238426464, "step": 110535 }, { "epoch": 18.0326264274062, "grad_norm": 0.00877090822905302, "learning_rate": 2.919230807096529e-05, "loss": 0.0022, "num_input_tokens_seen": 238437824, "step": 110540 }, { "epoch": 18.033442088091356, "grad_norm": 0.00559457391500473, "learning_rate": 2.916834727424461e-05, "loss": 0.0006, "num_input_tokens_seen": 238448128, "step": 110545 }, { "epoch": 18.034257748776508, "grad_norm": 0.2803525924682617, "learning_rate": 2.9144396019524788e-05, "loss": 0.0041, "num_input_tokens_seen": 238458944, "step": 110550 }, { "epoch": 18.035073409461663, "grad_norm": 0.0057829516008496284, "learning_rate": 2.9120454307290933e-05, "loss": 0.0029, "num_input_tokens_seen": 238469824, "step": 110555 }, { "epoch": 18.03588907014682, "grad_norm": 0.01205528900027275, "learning_rate": 2.90965221380286e-05, "loss": 0.0042, "num_input_tokens_seen": 238480288, "step": 110560 }, { "epoch": 18.036704730831975, "grad_norm": 0.004738457966595888, "learning_rate": 2.9072599512222464e-05, "loss": 0.0094, "num_input_tokens_seen": 238491808, "step": 110565 }, { "epoch": 18.03752039151713, "grad_norm": 0.020472265779972076, "learning_rate": 2.9048686430357685e-05, "loss": 0.0018, "num_input_tokens_seen": 238501984, "step": 110570 }, { "epoch": 18.038336052202283, "grad_norm": 0.02266506850719452, "learning_rate": 2.9024782892918543e-05, "loss": 0.0013, "num_input_tokens_seen": 238512576, "step": 110575 }, { "epoch": 18.03915171288744, "grad_norm": 0.1214066594839096, "learning_rate": 2.9000888900389764e-05, "loss": 0.0049, "num_input_tokens_seen": 238523424, "step": 110580 }, { "epoch": 18.039967373572594, "grad_norm": 0.018037667497992516, "learning_rate": 2.8977004453255406e-05, "loss": 0.003, "num_input_tokens_seen": 238533984, "step": 110585 }, { "epoch": 18.04078303425775, "grad_norm": 0.003038729541003704, "learning_rate": 2.8953129551999634e-05, "loss": 0.002, "num_input_tokens_seen": 238546144, "step": 110590 }, { "epoch": 18.041598694942905, "grad_norm": 0.0028753632213920355, "learning_rate": 2.892926419710623e-05, "loss": 0.0102, "num_input_tokens_seen": 238558112, "step": 110595 }, { "epoch": 18.042414355628058, "grad_norm": 0.004927318077534437, "learning_rate": 2.8905408389058917e-05, "loss": 0.0015, "num_input_tokens_seen": 238568288, "step": 110600 }, { "epoch": 18.043230016313213, "grad_norm": 0.10549242049455643, "learning_rate": 2.8881562128341088e-05, "loss": 0.0028, "num_input_tokens_seen": 238579008, "step": 110605 }, { "epoch": 18.04404567699837, "grad_norm": 0.004573399666696787, "learning_rate": 2.885772541543613e-05, "loss": 0.0035, "num_input_tokens_seen": 238590112, "step": 110610 }, { "epoch": 18.044861337683525, "grad_norm": 0.23879069089889526, "learning_rate": 2.8833898250826994e-05, "loss": 0.0077, "num_input_tokens_seen": 238601984, "step": 110615 }, { "epoch": 18.045676998368677, "grad_norm": 0.001544365775771439, "learning_rate": 2.881008063499663e-05, "loss": 0.0128, "num_input_tokens_seen": 238612896, "step": 110620 }, { "epoch": 18.046492659053833, "grad_norm": 0.21908503770828247, "learning_rate": 2.878627256842775e-05, "loss": 0.0045, "num_input_tokens_seen": 238623936, "step": 110625 }, { "epoch": 18.04730831973899, "grad_norm": 0.0007382305338978767, "learning_rate": 2.8762474051602816e-05, "loss": 0.0041, "num_input_tokens_seen": 238633824, "step": 110630 }, { "epoch": 18.048123980424144, "grad_norm": 0.02994553931057453, "learning_rate": 2.8738685085004156e-05, "loss": 0.0017, "num_input_tokens_seen": 238645152, "step": 110635 }, { "epoch": 18.0489396411093, "grad_norm": 0.0038313083350658417, "learning_rate": 2.871490566911389e-05, "loss": 0.0017, "num_input_tokens_seen": 238656160, "step": 110640 }, { "epoch": 18.049755301794452, "grad_norm": 0.018869640305638313, "learning_rate": 2.8691135804413905e-05, "loss": 0.0054, "num_input_tokens_seen": 238666720, "step": 110645 }, { "epoch": 18.050570962479608, "grad_norm": 0.005525832995772362, "learning_rate": 2.8667375491385928e-05, "loss": 0.0019, "num_input_tokens_seen": 238677152, "step": 110650 }, { "epoch": 18.051386623164763, "grad_norm": 0.047520287334918976, "learning_rate": 2.864362473051163e-05, "loss": 0.004, "num_input_tokens_seen": 238689024, "step": 110655 }, { "epoch": 18.05220228384992, "grad_norm": 0.02587849088013172, "learning_rate": 2.8619883522272072e-05, "loss": 0.0036, "num_input_tokens_seen": 238700000, "step": 110660 }, { "epoch": 18.053017944535075, "grad_norm": 0.007071511819958687, "learning_rate": 2.85961518671487e-05, "loss": 0.0036, "num_input_tokens_seen": 238711136, "step": 110665 }, { "epoch": 18.053833605220227, "grad_norm": 0.03864255174994469, "learning_rate": 2.8572429765622243e-05, "loss": 0.0065, "num_input_tokens_seen": 238721888, "step": 110670 }, { "epoch": 18.054649265905383, "grad_norm": 0.028970861807465553, "learning_rate": 2.8548717218173647e-05, "loss": 0.0019, "num_input_tokens_seen": 238733024, "step": 110675 }, { "epoch": 18.05546492659054, "grad_norm": 0.0006666274857707322, "learning_rate": 2.8525014225283195e-05, "loss": 0.0005, "num_input_tokens_seen": 238744704, "step": 110680 }, { "epoch": 18.056280587275694, "grad_norm": 0.24778135120868683, "learning_rate": 2.8501320787431673e-05, "loss": 0.0189, "num_input_tokens_seen": 238754656, "step": 110685 }, { "epoch": 18.05709624796085, "grad_norm": 0.020206930115818977, "learning_rate": 2.8477636905098802e-05, "loss": 0.0143, "num_input_tokens_seen": 238764672, "step": 110690 }, { "epoch": 18.057911908646002, "grad_norm": 0.04875219985842705, "learning_rate": 2.845396257876487e-05, "loss": 0.0025, "num_input_tokens_seen": 238774976, "step": 110695 }, { "epoch": 18.058727569331158, "grad_norm": 0.012949072755873203, "learning_rate": 2.84302978089096e-05, "loss": 0.0022, "num_input_tokens_seen": 238786240, "step": 110700 }, { "epoch": 18.059543230016313, "grad_norm": 0.030359946191310883, "learning_rate": 2.840664259601261e-05, "loss": 0.0243, "num_input_tokens_seen": 238796384, "step": 110705 }, { "epoch": 18.06035889070147, "grad_norm": 0.002239334164187312, "learning_rate": 2.838299694055324e-05, "loss": 0.0012, "num_input_tokens_seen": 238806016, "step": 110710 }, { "epoch": 18.061174551386625, "grad_norm": 0.0010983234969899058, "learning_rate": 2.835936084301072e-05, "loss": 0.0061, "num_input_tokens_seen": 238816576, "step": 110715 }, { "epoch": 18.061990212071777, "grad_norm": 0.031010150909423828, "learning_rate": 2.8335734303864047e-05, "loss": 0.0032, "num_input_tokens_seen": 238826272, "step": 110720 }, { "epoch": 18.062805872756933, "grad_norm": 0.061946142464876175, "learning_rate": 2.8312117323592125e-05, "loss": 0.0023, "num_input_tokens_seen": 238837760, "step": 110725 }, { "epoch": 18.063621533442088, "grad_norm": 0.0005517059471458197, "learning_rate": 2.8288509902673454e-05, "loss": 0.0025, "num_input_tokens_seen": 238848960, "step": 110730 }, { "epoch": 18.064437194127244, "grad_norm": 0.0025749073829501867, "learning_rate": 2.8264912041586598e-05, "loss": 0.002, "num_input_tokens_seen": 238858912, "step": 110735 }, { "epoch": 18.0652528548124, "grad_norm": 0.0054060714319348335, "learning_rate": 2.8241323740809676e-05, "loss": 0.0007, "num_input_tokens_seen": 238869440, "step": 110740 }, { "epoch": 18.06606851549755, "grad_norm": 0.017807895317673683, "learning_rate": 2.821774500082086e-05, "loss": 0.0015, "num_input_tokens_seen": 238880160, "step": 110745 }, { "epoch": 18.066884176182707, "grad_norm": 0.01684551313519478, "learning_rate": 2.819417582209788e-05, "loss": 0.0055, "num_input_tokens_seen": 238891264, "step": 110750 }, { "epoch": 18.067699836867863, "grad_norm": 0.004358191974461079, "learning_rate": 2.8170616205118516e-05, "loss": 0.0012, "num_input_tokens_seen": 238901568, "step": 110755 }, { "epoch": 18.06851549755302, "grad_norm": 0.007040275260806084, "learning_rate": 2.8147066150360167e-05, "loss": 0.0049, "num_input_tokens_seen": 238912416, "step": 110760 }, { "epoch": 18.069331158238175, "grad_norm": 0.005362845957279205, "learning_rate": 2.8123525658300066e-05, "loss": 0.0019, "num_input_tokens_seen": 238922528, "step": 110765 }, { "epoch": 18.070146818923327, "grad_norm": 0.00669235922396183, "learning_rate": 2.8099994729415377e-05, "loss": 0.0012, "num_input_tokens_seen": 238933408, "step": 110770 }, { "epoch": 18.070962479608482, "grad_norm": 0.0018705344991758466, "learning_rate": 2.8076473364182897e-05, "loss": 0.0025, "num_input_tokens_seen": 238944032, "step": 110775 }, { "epoch": 18.071778140293638, "grad_norm": 0.018955344334244728, "learning_rate": 2.8052961563079403e-05, "loss": 0.0017, "num_input_tokens_seen": 238954944, "step": 110780 }, { "epoch": 18.072593800978794, "grad_norm": 1.0042839050292969, "learning_rate": 2.8029459326581353e-05, "loss": 0.0733, "num_input_tokens_seen": 238966528, "step": 110785 }, { "epoch": 18.07340946166395, "grad_norm": 0.05079049989581108, "learning_rate": 2.8005966655165026e-05, "loss": 0.0028, "num_input_tokens_seen": 238977920, "step": 110790 }, { "epoch": 18.0742251223491, "grad_norm": 0.014313722960650921, "learning_rate": 2.7982483549306435e-05, "loss": 0.0948, "num_input_tokens_seen": 238987264, "step": 110795 }, { "epoch": 18.075040783034257, "grad_norm": 0.012376434169709682, "learning_rate": 2.795901000948181e-05, "loss": 0.012, "num_input_tokens_seen": 238997504, "step": 110800 }, { "epoch": 18.075856443719413, "grad_norm": 0.37495285272598267, "learning_rate": 2.7935546036166548e-05, "loss": 0.0169, "num_input_tokens_seen": 239008448, "step": 110805 }, { "epoch": 18.07667210440457, "grad_norm": 0.007738239597529173, "learning_rate": 2.7912091629836324e-05, "loss": 0.0058, "num_input_tokens_seen": 239019616, "step": 110810 }, { "epoch": 18.07748776508972, "grad_norm": 0.0139017878100276, "learning_rate": 2.7888646790966476e-05, "loss": 0.0009, "num_input_tokens_seen": 239030944, "step": 110815 }, { "epoch": 18.078303425774877, "grad_norm": 0.004696589894592762, "learning_rate": 2.786521152003213e-05, "loss": 0.0016, "num_input_tokens_seen": 239041280, "step": 110820 }, { "epoch": 18.079119086460032, "grad_norm": 0.007817994803190231, "learning_rate": 2.784178581750818e-05, "loss": 0.0016, "num_input_tokens_seen": 239053280, "step": 110825 }, { "epoch": 18.079934747145188, "grad_norm": 0.055292125791311264, "learning_rate": 2.781836968386947e-05, "loss": 0.0015, "num_input_tokens_seen": 239063040, "step": 110830 }, { "epoch": 18.080750407830344, "grad_norm": 0.03732554242014885, "learning_rate": 2.7794963119590454e-05, "loss": 0.0088, "num_input_tokens_seen": 239072352, "step": 110835 }, { "epoch": 18.081566068515496, "grad_norm": 0.4371916949748993, "learning_rate": 2.7771566125145588e-05, "loss": 0.0167, "num_input_tokens_seen": 239082752, "step": 110840 }, { "epoch": 18.08238172920065, "grad_norm": 0.046880487352609634, "learning_rate": 2.774817870100893e-05, "loss": 0.0034, "num_input_tokens_seen": 239091872, "step": 110845 }, { "epoch": 18.083197389885807, "grad_norm": 0.02115476131439209, "learning_rate": 2.7724800847654608e-05, "loss": 0.0008, "num_input_tokens_seen": 239102464, "step": 110850 }, { "epoch": 18.084013050570963, "grad_norm": 0.003253078320994973, "learning_rate": 2.7701432565556296e-05, "loss": 0.0058, "num_input_tokens_seen": 239112288, "step": 110855 }, { "epoch": 18.08482871125612, "grad_norm": 0.013155912049114704, "learning_rate": 2.767807385518756e-05, "loss": 0.0011, "num_input_tokens_seen": 239122464, "step": 110860 }, { "epoch": 18.08564437194127, "grad_norm": 0.0035945619456470013, "learning_rate": 2.765472471702185e-05, "loss": 0.0031, "num_input_tokens_seen": 239132448, "step": 110865 }, { "epoch": 18.086460032626427, "grad_norm": 0.00699431961402297, "learning_rate": 2.7631385151532405e-05, "loss": 0.0028, "num_input_tokens_seen": 239143520, "step": 110870 }, { "epoch": 18.087275693311582, "grad_norm": 0.025897471234202385, "learning_rate": 2.7608055159192125e-05, "loss": 0.0047, "num_input_tokens_seen": 239155392, "step": 110875 }, { "epoch": 18.088091353996738, "grad_norm": 0.0018394642975181341, "learning_rate": 2.7584734740473905e-05, "loss": 0.0008, "num_input_tokens_seen": 239166080, "step": 110880 }, { "epoch": 18.088907014681894, "grad_norm": 0.011876602657139301, "learning_rate": 2.756142389585037e-05, "loss": 0.0023, "num_input_tokens_seen": 239178176, "step": 110885 }, { "epoch": 18.089722675367046, "grad_norm": 0.0019783477764576674, "learning_rate": 2.753812262579386e-05, "loss": 0.1267, "num_input_tokens_seen": 239188096, "step": 110890 }, { "epoch": 18.0905383360522, "grad_norm": 0.000996107584796846, "learning_rate": 2.7514830930776667e-05, "loss": 0.0019, "num_input_tokens_seen": 239198816, "step": 110895 }, { "epoch": 18.091353996737357, "grad_norm": 0.013970524072647095, "learning_rate": 2.749154881127086e-05, "loss": 0.0027, "num_input_tokens_seen": 239208896, "step": 110900 }, { "epoch": 18.092169657422513, "grad_norm": 0.04456076771020889, "learning_rate": 2.7468276267748172e-05, "loss": 0.0056, "num_input_tokens_seen": 239219072, "step": 110905 }, { "epoch": 18.09298531810767, "grad_norm": 0.06121499463915825, "learning_rate": 2.7445013300680333e-05, "loss": 0.0081, "num_input_tokens_seen": 239228544, "step": 110910 }, { "epoch": 18.09380097879282, "grad_norm": 0.0019346483750268817, "learning_rate": 2.7421759910538745e-05, "loss": 0.0005, "num_input_tokens_seen": 239239264, "step": 110915 }, { "epoch": 18.094616639477977, "grad_norm": 0.0024536096025258303, "learning_rate": 2.739851609779481e-05, "loss": 0.0432, "num_input_tokens_seen": 239251040, "step": 110920 }, { "epoch": 18.095432300163132, "grad_norm": 0.06327703595161438, "learning_rate": 2.737528186291932e-05, "loss": 0.111, "num_input_tokens_seen": 239261472, "step": 110925 }, { "epoch": 18.096247960848288, "grad_norm": 0.002084777457639575, "learning_rate": 2.735205720638351e-05, "loss": 0.0417, "num_input_tokens_seen": 239272576, "step": 110930 }, { "epoch": 18.097063621533444, "grad_norm": 0.011925769969820976, "learning_rate": 2.732884212865766e-05, "loss": 0.0059, "num_input_tokens_seen": 239282688, "step": 110935 }, { "epoch": 18.097879282218596, "grad_norm": 0.007653436157852411, "learning_rate": 2.730563663021257e-05, "loss": 0.0017, "num_input_tokens_seen": 239293568, "step": 110940 }, { "epoch": 18.09869494290375, "grad_norm": 0.0030614316929131746, "learning_rate": 2.7282440711518363e-05, "loss": 0.0098, "num_input_tokens_seen": 239303584, "step": 110945 }, { "epoch": 18.099510603588907, "grad_norm": 0.020306801423430443, "learning_rate": 2.725925437304522e-05, "loss": 0.0018, "num_input_tokens_seen": 239313600, "step": 110950 }, { "epoch": 18.100326264274063, "grad_norm": 1.0667283535003662, "learning_rate": 2.7236077615262976e-05, "loss": 0.1147, "num_input_tokens_seen": 239323488, "step": 110955 }, { "epoch": 18.10114192495922, "grad_norm": 0.001774086500518024, "learning_rate": 2.721291043864138e-05, "loss": 0.0247, "num_input_tokens_seen": 239334112, "step": 110960 }, { "epoch": 18.10195758564437, "grad_norm": 0.8720222115516663, "learning_rate": 2.7189752843649885e-05, "loss": 0.1425, "num_input_tokens_seen": 239344736, "step": 110965 }, { "epoch": 18.102773246329527, "grad_norm": 0.0074751973152160645, "learning_rate": 2.716660483075789e-05, "loss": 0.0047, "num_input_tokens_seen": 239355872, "step": 110970 }, { "epoch": 18.103588907014682, "grad_norm": 0.1653270125389099, "learning_rate": 2.714346640043447e-05, "loss": 0.0062, "num_input_tokens_seen": 239364960, "step": 110975 }, { "epoch": 18.104404567699838, "grad_norm": 0.0005325135425664485, "learning_rate": 2.7120337553148578e-05, "loss": 0.0005, "num_input_tokens_seen": 239375936, "step": 110980 }, { "epoch": 18.10522022838499, "grad_norm": 0.00400956068187952, "learning_rate": 2.7097218289368896e-05, "loss": 0.0019, "num_input_tokens_seen": 239386144, "step": 110985 }, { "epoch": 18.106035889070146, "grad_norm": 0.05589064210653305, "learning_rate": 2.7074108609564053e-05, "loss": 0.004, "num_input_tokens_seen": 239396288, "step": 110990 }, { "epoch": 18.1068515497553, "grad_norm": 0.003153751138597727, "learning_rate": 2.7051008514202336e-05, "loss": 0.0036, "num_input_tokens_seen": 239406752, "step": 110995 }, { "epoch": 18.107667210440457, "grad_norm": 0.0011209991062059999, "learning_rate": 2.7027918003751873e-05, "loss": 0.0766, "num_input_tokens_seen": 239417984, "step": 111000 }, { "epoch": 18.108482871125613, "grad_norm": 0.004855471663177013, "learning_rate": 2.7004837078680678e-05, "loss": 0.0089, "num_input_tokens_seen": 239429248, "step": 111005 }, { "epoch": 18.109298531810765, "grad_norm": 0.0037382803857326508, "learning_rate": 2.698176573945654e-05, "loss": 0.0059, "num_input_tokens_seen": 239438464, "step": 111010 }, { "epoch": 18.11011419249592, "grad_norm": 0.0015625183004885912, "learning_rate": 2.695870398654693e-05, "loss": 0.0009, "num_input_tokens_seen": 239449472, "step": 111015 }, { "epoch": 18.110929853181077, "grad_norm": 0.7098719477653503, "learning_rate": 2.693565182041924e-05, "loss": 0.0324, "num_input_tokens_seen": 239460864, "step": 111020 }, { "epoch": 18.111745513866232, "grad_norm": 0.0011978426482528448, "learning_rate": 2.6912609241540818e-05, "loss": 0.0009, "num_input_tokens_seen": 239471232, "step": 111025 }, { "epoch": 18.112561174551388, "grad_norm": 0.003983037546277046, "learning_rate": 2.688957625037841e-05, "loss": 0.0038, "num_input_tokens_seen": 239481568, "step": 111030 }, { "epoch": 18.11337683523654, "grad_norm": 0.0013602040708065033, "learning_rate": 2.6866552847399028e-05, "loss": 0.0011, "num_input_tokens_seen": 239493472, "step": 111035 }, { "epoch": 18.114192495921696, "grad_norm": 0.0347524918615818, "learning_rate": 2.684353903306902e-05, "loss": 0.0059, "num_input_tokens_seen": 239504672, "step": 111040 }, { "epoch": 18.11500815660685, "grad_norm": 0.014805582351982594, "learning_rate": 2.6820534807855124e-05, "loss": 0.0023, "num_input_tokens_seen": 239515456, "step": 111045 }, { "epoch": 18.115823817292007, "grad_norm": 0.05199724808335304, "learning_rate": 2.679754017222319e-05, "loss": 0.0015, "num_input_tokens_seen": 239526784, "step": 111050 }, { "epoch": 18.116639477977163, "grad_norm": 0.0026068249717354774, "learning_rate": 2.677455512663951e-05, "loss": 0.0013, "num_input_tokens_seen": 239538752, "step": 111055 }, { "epoch": 18.117455138662315, "grad_norm": 0.026650821790099144, "learning_rate": 2.6751579671569715e-05, "loss": 0.0035, "num_input_tokens_seen": 239550176, "step": 111060 }, { "epoch": 18.11827079934747, "grad_norm": 0.00625242106616497, "learning_rate": 2.6728613807479594e-05, "loss": 0.0015, "num_input_tokens_seen": 239561056, "step": 111065 }, { "epoch": 18.119086460032626, "grad_norm": 0.000535019498784095, "learning_rate": 2.6705657534834394e-05, "loss": 0.0182, "num_input_tokens_seen": 239572352, "step": 111070 }, { "epoch": 18.119902120717782, "grad_norm": 0.010871322825551033, "learning_rate": 2.6682710854099623e-05, "loss": 0.0043, "num_input_tokens_seen": 239583584, "step": 111075 }, { "epoch": 18.120717781402938, "grad_norm": 0.11744468659162521, "learning_rate": 2.6659773765740025e-05, "loss": 0.0041, "num_input_tokens_seen": 239595264, "step": 111080 }, { "epoch": 18.12153344208809, "grad_norm": 0.09282297641038895, "learning_rate": 2.6636846270220615e-05, "loss": 0.0266, "num_input_tokens_seen": 239606528, "step": 111085 }, { "epoch": 18.122349102773246, "grad_norm": 0.00990519393235445, "learning_rate": 2.661392836800608e-05, "loss": 0.0075, "num_input_tokens_seen": 239616992, "step": 111090 }, { "epoch": 18.1231647634584, "grad_norm": 0.001589664607308805, "learning_rate": 2.6591020059560766e-05, "loss": 0.0181, "num_input_tokens_seen": 239627808, "step": 111095 }, { "epoch": 18.123980424143557, "grad_norm": 0.02519725076854229, "learning_rate": 2.656812134534897e-05, "loss": 0.0053, "num_input_tokens_seen": 239638144, "step": 111100 }, { "epoch": 18.124796084828713, "grad_norm": 0.7706707715988159, "learning_rate": 2.6545232225834825e-05, "loss": 0.0469, "num_input_tokens_seen": 239648992, "step": 111105 }, { "epoch": 18.125611745513865, "grad_norm": 0.006206317339092493, "learning_rate": 2.6522352701482178e-05, "loss": 0.0023, "num_input_tokens_seen": 239659968, "step": 111110 }, { "epoch": 18.12642740619902, "grad_norm": 0.005388461984694004, "learning_rate": 2.6499482772754714e-05, "loss": 0.0042, "num_input_tokens_seen": 239672256, "step": 111115 }, { "epoch": 18.127243066884176, "grad_norm": 0.016153821721673012, "learning_rate": 2.6476622440115894e-05, "loss": 0.0014, "num_input_tokens_seen": 239682112, "step": 111120 }, { "epoch": 18.128058727569332, "grad_norm": 0.0004956317716278136, "learning_rate": 2.6453771704029017e-05, "loss": 0.0026, "num_input_tokens_seen": 239692608, "step": 111125 }, { "epoch": 18.128874388254488, "grad_norm": 0.004708434455096722, "learning_rate": 2.6430930564957213e-05, "loss": 0.0028, "num_input_tokens_seen": 239702144, "step": 111130 }, { "epoch": 18.12969004893964, "grad_norm": 0.003930427134037018, "learning_rate": 2.6408099023363275e-05, "loss": 0.0028, "num_input_tokens_seen": 239712992, "step": 111135 }, { "epoch": 18.130505709624796, "grad_norm": 0.004929612390697002, "learning_rate": 2.6385277079710113e-05, "loss": 0.0056, "num_input_tokens_seen": 239723776, "step": 111140 }, { "epoch": 18.13132137030995, "grad_norm": 0.011383865028619766, "learning_rate": 2.6362464734460024e-05, "loss": 0.0016, "num_input_tokens_seen": 239734336, "step": 111145 }, { "epoch": 18.132137030995107, "grad_norm": 1.540089726448059, "learning_rate": 2.633966198807558e-05, "loss": 0.0319, "num_input_tokens_seen": 239744576, "step": 111150 }, { "epoch": 18.13295269168026, "grad_norm": 0.05189463868737221, "learning_rate": 2.631686884101864e-05, "loss": 0.0027, "num_input_tokens_seen": 239754112, "step": 111155 }, { "epoch": 18.133768352365415, "grad_norm": 0.006966361775994301, "learning_rate": 2.6294085293751435e-05, "loss": 0.0018, "num_input_tokens_seen": 239764000, "step": 111160 }, { "epoch": 18.13458401305057, "grad_norm": 0.07090909034013748, "learning_rate": 2.6271311346735326e-05, "loss": 0.0022, "num_input_tokens_seen": 239775680, "step": 111165 }, { "epoch": 18.135399673735726, "grad_norm": 0.05070669203996658, "learning_rate": 2.624854700043222e-05, "loss": 0.0054, "num_input_tokens_seen": 239787456, "step": 111170 }, { "epoch": 18.136215334420882, "grad_norm": 0.015738025307655334, "learning_rate": 2.6225792255303195e-05, "loss": 0.0031, "num_input_tokens_seen": 239798016, "step": 111175 }, { "epoch": 18.137030995106034, "grad_norm": 0.03503786772489548, "learning_rate": 2.6203047111809597e-05, "loss": 0.0058, "num_input_tokens_seen": 239809152, "step": 111180 }, { "epoch": 18.13784665579119, "grad_norm": 0.005223119631409645, "learning_rate": 2.6180311570412174e-05, "loss": 0.0093, "num_input_tokens_seen": 239819520, "step": 111185 }, { "epoch": 18.138662316476346, "grad_norm": 0.08614151179790497, "learning_rate": 2.6157585631572e-05, "loss": 0.0029, "num_input_tokens_seen": 239831936, "step": 111190 }, { "epoch": 18.1394779771615, "grad_norm": 0.02004201151430607, "learning_rate": 2.613486929574932e-05, "loss": 0.0014, "num_input_tokens_seen": 239843136, "step": 111195 }, { "epoch": 18.140293637846657, "grad_norm": 0.14382705092430115, "learning_rate": 2.611216256340476e-05, "loss": 0.0043, "num_input_tokens_seen": 239854336, "step": 111200 }, { "epoch": 18.14110929853181, "grad_norm": 0.0007909032283350825, "learning_rate": 2.6089465434998296e-05, "loss": 0.0027, "num_input_tokens_seen": 239866368, "step": 111205 }, { "epoch": 18.141924959216965, "grad_norm": 0.056989386677742004, "learning_rate": 2.6066777910990104e-05, "loss": 0.0094, "num_input_tokens_seen": 239876224, "step": 111210 }, { "epoch": 18.14274061990212, "grad_norm": 0.12282928079366684, "learning_rate": 2.6044099991839766e-05, "loss": 0.0023, "num_input_tokens_seen": 239887520, "step": 111215 }, { "epoch": 18.143556280587276, "grad_norm": 0.0009930587839335203, "learning_rate": 2.602143167800719e-05, "loss": 0.0034, "num_input_tokens_seen": 239899552, "step": 111220 }, { "epoch": 18.144371941272432, "grad_norm": 0.06086418777704239, "learning_rate": 2.59987729699514e-05, "loss": 0.004, "num_input_tokens_seen": 239911456, "step": 111225 }, { "epoch": 18.145187601957584, "grad_norm": 0.008784052915871143, "learning_rate": 2.5976123868131864e-05, "loss": 0.0081, "num_input_tokens_seen": 239922368, "step": 111230 }, { "epoch": 18.14600326264274, "grad_norm": 0.004765613004565239, "learning_rate": 2.5953484373007487e-05, "loss": 0.0093, "num_input_tokens_seen": 239932864, "step": 111235 }, { "epoch": 18.146818923327896, "grad_norm": 0.00959201343357563, "learning_rate": 2.5930854485037124e-05, "loss": 0.001, "num_input_tokens_seen": 239943392, "step": 111240 }, { "epoch": 18.14763458401305, "grad_norm": 0.31539952754974365, "learning_rate": 2.590823420467947e-05, "loss": 0.0075, "num_input_tokens_seen": 239953344, "step": 111245 }, { "epoch": 18.148450244698207, "grad_norm": 0.00975149404257536, "learning_rate": 2.5885623532392823e-05, "loss": 0.0005, "num_input_tokens_seen": 239964160, "step": 111250 }, { "epoch": 18.14926590538336, "grad_norm": 0.07698607444763184, "learning_rate": 2.586302246863548e-05, "loss": 0.0019, "num_input_tokens_seen": 239974560, "step": 111255 }, { "epoch": 18.150081566068515, "grad_norm": 0.1376171112060547, "learning_rate": 2.584043101386546e-05, "loss": 0.0029, "num_input_tokens_seen": 239986176, "step": 111260 }, { "epoch": 18.15089722675367, "grad_norm": 0.0026094571221619844, "learning_rate": 2.5817849168540576e-05, "loss": 0.0012, "num_input_tokens_seen": 239997024, "step": 111265 }, { "epoch": 18.151712887438826, "grad_norm": 0.00037045072531327605, "learning_rate": 2.5795276933118618e-05, "loss": 0.0044, "num_input_tokens_seen": 240006080, "step": 111270 }, { "epoch": 18.152528548123982, "grad_norm": 0.06298432499170303, "learning_rate": 2.5772714308056887e-05, "loss": 0.02, "num_input_tokens_seen": 240017664, "step": 111275 }, { "epoch": 18.153344208809134, "grad_norm": 0.0011239241575822234, "learning_rate": 2.5750161293812635e-05, "loss": 0.0044, "num_input_tokens_seen": 240028864, "step": 111280 }, { "epoch": 18.15415986949429, "grad_norm": 0.032165538519620895, "learning_rate": 2.572761789084316e-05, "loss": 0.0016, "num_input_tokens_seen": 240040224, "step": 111285 }, { "epoch": 18.154975530179446, "grad_norm": 0.18463104963302612, "learning_rate": 2.570508409960498e-05, "loss": 0.1699, "num_input_tokens_seen": 240050720, "step": 111290 }, { "epoch": 18.1557911908646, "grad_norm": 0.004164436366409063, "learning_rate": 2.5682559920555127e-05, "loss": 0.0022, "num_input_tokens_seen": 240062464, "step": 111295 }, { "epoch": 18.156606851549757, "grad_norm": 1.0949454307556152, "learning_rate": 2.5660045354149786e-05, "loss": 0.1002, "num_input_tokens_seen": 240073888, "step": 111300 }, { "epoch": 18.15742251223491, "grad_norm": 0.21131518483161926, "learning_rate": 2.5637540400845483e-05, "loss": 0.0391, "num_input_tokens_seen": 240084672, "step": 111305 }, { "epoch": 18.158238172920065, "grad_norm": 0.02788284420967102, "learning_rate": 2.561504506109802e-05, "loss": 0.0026, "num_input_tokens_seen": 240095648, "step": 111310 }, { "epoch": 18.15905383360522, "grad_norm": 0.0065851821564137936, "learning_rate": 2.5592559335363696e-05, "loss": 0.0271, "num_input_tokens_seen": 240107424, "step": 111315 }, { "epoch": 18.159869494290376, "grad_norm": 0.011040419340133667, "learning_rate": 2.5570083224097763e-05, "loss": 0.0677, "num_input_tokens_seen": 240117824, "step": 111320 }, { "epoch": 18.160685154975532, "grad_norm": 0.015364638529717922, "learning_rate": 2.554761672775613e-05, "loss": 0.001, "num_input_tokens_seen": 240128992, "step": 111325 }, { "epoch": 18.161500815660684, "grad_norm": 0.02553625963628292, "learning_rate": 2.5525159846793822e-05, "loss": 0.03, "num_input_tokens_seen": 240138528, "step": 111330 }, { "epoch": 18.16231647634584, "grad_norm": 0.001807875232771039, "learning_rate": 2.550271258166609e-05, "loss": 0.0014, "num_input_tokens_seen": 240149472, "step": 111335 }, { "epoch": 18.163132137030995, "grad_norm": 0.006684791296720505, "learning_rate": 2.548027493282784e-05, "loss": 0.0018, "num_input_tokens_seen": 240161504, "step": 111340 }, { "epoch": 18.16394779771615, "grad_norm": 0.004705562721937895, "learning_rate": 2.5457846900733774e-05, "loss": 0.001, "num_input_tokens_seen": 240171424, "step": 111345 }, { "epoch": 18.164763458401303, "grad_norm": 0.0018168031238019466, "learning_rate": 2.5435428485838465e-05, "loss": 0.0204, "num_input_tokens_seen": 240183232, "step": 111350 }, { "epoch": 18.16557911908646, "grad_norm": 0.0034394520334899426, "learning_rate": 2.5413019688596218e-05, "loss": 0.008, "num_input_tokens_seen": 240193344, "step": 111355 }, { "epoch": 18.166394779771615, "grad_norm": 0.006621944718062878, "learning_rate": 2.539062050946117e-05, "loss": 0.0094, "num_input_tokens_seen": 240202720, "step": 111360 }, { "epoch": 18.16721044045677, "grad_norm": 0.0019619478844106197, "learning_rate": 2.5368230948887295e-05, "loss": 0.0013, "num_input_tokens_seen": 240213376, "step": 111365 }, { "epoch": 18.168026101141926, "grad_norm": 0.07544968277215958, "learning_rate": 2.5345851007328336e-05, "loss": 0.0851, "num_input_tokens_seen": 240224992, "step": 111370 }, { "epoch": 18.16884176182708, "grad_norm": 0.07755742967128754, "learning_rate": 2.532348068523782e-05, "loss": 0.0028, "num_input_tokens_seen": 240234368, "step": 111375 }, { "epoch": 18.169657422512234, "grad_norm": 0.005401346366852522, "learning_rate": 2.5301119983069165e-05, "loss": 0.0051, "num_input_tokens_seen": 240245216, "step": 111380 }, { "epoch": 18.17047308319739, "grad_norm": 0.15238258242607117, "learning_rate": 2.5278768901275506e-05, "loss": 0.0057, "num_input_tokens_seen": 240256192, "step": 111385 }, { "epoch": 18.171288743882545, "grad_norm": 0.022075453773140907, "learning_rate": 2.5256427440309815e-05, "loss": 0.0054, "num_input_tokens_seen": 240266880, "step": 111390 }, { "epoch": 18.1721044045677, "grad_norm": 0.04228004068136215, "learning_rate": 2.5234095600624896e-05, "loss": 0.0121, "num_input_tokens_seen": 240277088, "step": 111395 }, { "epoch": 18.172920065252853, "grad_norm": 0.0018854702357202768, "learning_rate": 2.5211773382673274e-05, "loss": 0.0011, "num_input_tokens_seen": 240288512, "step": 111400 }, { "epoch": 18.17373572593801, "grad_norm": 0.007811045739799738, "learning_rate": 2.5189460786907425e-05, "loss": 0.0012, "num_input_tokens_seen": 240299200, "step": 111405 }, { "epoch": 18.174551386623165, "grad_norm": 0.12818017601966858, "learning_rate": 2.5167157813779485e-05, "loss": 0.2091, "num_input_tokens_seen": 240310112, "step": 111410 }, { "epoch": 18.17536704730832, "grad_norm": 0.09420022368431091, "learning_rate": 2.5144864463741423e-05, "loss": 0.0046, "num_input_tokens_seen": 240320480, "step": 111415 }, { "epoch": 18.176182707993476, "grad_norm": 0.00842718780040741, "learning_rate": 2.5122580737245105e-05, "loss": 0.0269, "num_input_tokens_seen": 240330976, "step": 111420 }, { "epoch": 18.17699836867863, "grad_norm": 0.014596380293369293, "learning_rate": 2.5100306634742053e-05, "loss": 0.003, "num_input_tokens_seen": 240341952, "step": 111425 }, { "epoch": 18.177814029363784, "grad_norm": 0.03125973045825958, "learning_rate": 2.5078042156683854e-05, "loss": 0.0014, "num_input_tokens_seen": 240353472, "step": 111430 }, { "epoch": 18.17862969004894, "grad_norm": 0.0022874604910612106, "learning_rate": 2.5055787303521483e-05, "loss": 0.0029, "num_input_tokens_seen": 240365472, "step": 111435 }, { "epoch": 18.179445350734095, "grad_norm": 0.0036635762080550194, "learning_rate": 2.5033542075706184e-05, "loss": 0.0068, "num_input_tokens_seen": 240375872, "step": 111440 }, { "epoch": 18.18026101141925, "grad_norm": 0.005582126788794994, "learning_rate": 2.5011306473688656e-05, "loss": 0.0013, "num_input_tokens_seen": 240386176, "step": 111445 }, { "epoch": 18.181076672104403, "grad_norm": 0.5982539653778076, "learning_rate": 2.4989080497919593e-05, "loss": 0.1245, "num_input_tokens_seen": 240395584, "step": 111450 }, { "epoch": 18.18189233278956, "grad_norm": 0.5354536771774292, "learning_rate": 2.496686414884941e-05, "loss": 0.0144, "num_input_tokens_seen": 240405472, "step": 111455 }, { "epoch": 18.182707993474715, "grad_norm": 0.001990189542993903, "learning_rate": 2.4944657426928306e-05, "loss": 0.0024, "num_input_tokens_seen": 240415616, "step": 111460 }, { "epoch": 18.18352365415987, "grad_norm": 0.004446466453373432, "learning_rate": 2.492246033260642e-05, "loss": 0.0025, "num_input_tokens_seen": 240427744, "step": 111465 }, { "epoch": 18.184339314845026, "grad_norm": 1.7101188898086548, "learning_rate": 2.490027286633356e-05, "loss": 0.1051, "num_input_tokens_seen": 240438464, "step": 111470 }, { "epoch": 18.18515497553018, "grad_norm": 0.0011157828848809004, "learning_rate": 2.487809502855931e-05, "loss": 0.0013, "num_input_tokens_seen": 240450016, "step": 111475 }, { "epoch": 18.185970636215334, "grad_norm": 0.0033376403152942657, "learning_rate": 2.4855926819733253e-05, "loss": 0.0021, "num_input_tokens_seen": 240461120, "step": 111480 }, { "epoch": 18.18678629690049, "grad_norm": 0.0017718520248308778, "learning_rate": 2.4833768240304587e-05, "loss": 0.0021, "num_input_tokens_seen": 240471104, "step": 111485 }, { "epoch": 18.187601957585645, "grad_norm": 0.001832994050346315, "learning_rate": 2.48116192907224e-05, "loss": 0.0275, "num_input_tokens_seen": 240483264, "step": 111490 }, { "epoch": 18.1884176182708, "grad_norm": 0.0016577000496909022, "learning_rate": 2.4789479971435602e-05, "loss": 0.001, "num_input_tokens_seen": 240495200, "step": 111495 }, { "epoch": 18.189233278955953, "grad_norm": 2.1191773414611816, "learning_rate": 2.4767350282892788e-05, "loss": 0.0769, "num_input_tokens_seen": 240507104, "step": 111500 }, { "epoch": 18.19004893964111, "grad_norm": 0.005944345612078905, "learning_rate": 2.4745230225542536e-05, "loss": 0.001, "num_input_tokens_seen": 240517408, "step": 111505 }, { "epoch": 18.190864600326265, "grad_norm": 0.004903233144432306, "learning_rate": 2.472311979983305e-05, "loss": 0.0033, "num_input_tokens_seen": 240527136, "step": 111510 }, { "epoch": 18.19168026101142, "grad_norm": 0.0562250055372715, "learning_rate": 2.470101900621252e-05, "loss": 0.0026, "num_input_tokens_seen": 240537344, "step": 111515 }, { "epoch": 18.192495921696572, "grad_norm": 0.017636625096201897, "learning_rate": 2.4678927845128762e-05, "loss": 0.0027, "num_input_tokens_seen": 240547584, "step": 111520 }, { "epoch": 18.193311582381728, "grad_norm": 0.01052794698625803, "learning_rate": 2.4656846317029524e-05, "loss": 0.0012, "num_input_tokens_seen": 240559072, "step": 111525 }, { "epoch": 18.194127243066884, "grad_norm": 0.005003239959478378, "learning_rate": 2.463477442236234e-05, "loss": 0.0038, "num_input_tokens_seen": 240569824, "step": 111530 }, { "epoch": 18.19494290375204, "grad_norm": 0.25396376848220825, "learning_rate": 2.4612712161574457e-05, "loss": 0.006, "num_input_tokens_seen": 240580896, "step": 111535 }, { "epoch": 18.195758564437195, "grad_norm": 0.0011220743181183934, "learning_rate": 2.459065953511308e-05, "loss": 0.0033, "num_input_tokens_seen": 240591520, "step": 111540 }, { "epoch": 18.196574225122347, "grad_norm": 0.0068071819841861725, "learning_rate": 2.456861654342507e-05, "loss": 0.001, "num_input_tokens_seen": 240601440, "step": 111545 }, { "epoch": 18.197389885807503, "grad_norm": 0.0025223486591130495, "learning_rate": 2.454658318695713e-05, "loss": 0.0255, "num_input_tokens_seen": 240611936, "step": 111550 }, { "epoch": 18.19820554649266, "grad_norm": 0.004089963156729937, "learning_rate": 2.4524559466155838e-05, "loss": 0.0047, "num_input_tokens_seen": 240622816, "step": 111555 }, { "epoch": 18.199021207177815, "grad_norm": 0.0020937640219926834, "learning_rate": 2.450254538146762e-05, "loss": 0.0026, "num_input_tokens_seen": 240634240, "step": 111560 }, { "epoch": 18.19983686786297, "grad_norm": 0.04516677185893059, "learning_rate": 2.44805409333384e-05, "loss": 0.0025, "num_input_tokens_seen": 240644896, "step": 111565 }, { "epoch": 18.200652528548122, "grad_norm": 0.013825511559844017, "learning_rate": 2.445854612221432e-05, "loss": 0.0024, "num_input_tokens_seen": 240655808, "step": 111570 }, { "epoch": 18.201468189233278, "grad_norm": 0.001076919841580093, "learning_rate": 2.443656094854113e-05, "loss": 0.001, "num_input_tokens_seen": 240667200, "step": 111575 }, { "epoch": 18.202283849918434, "grad_norm": 0.00052706862334162, "learning_rate": 2.4414585412764255e-05, "loss": 0.0026, "num_input_tokens_seen": 240679488, "step": 111580 }, { "epoch": 18.20309951060359, "grad_norm": 0.0012779412791132927, "learning_rate": 2.4392619515329173e-05, "loss": 0.0009, "num_input_tokens_seen": 240690368, "step": 111585 }, { "epoch": 18.203915171288745, "grad_norm": 0.021107377484440804, "learning_rate": 2.437066325668097e-05, "loss": 0.0017, "num_input_tokens_seen": 240701888, "step": 111590 }, { "epoch": 18.204730831973897, "grad_norm": 0.6604924201965332, "learning_rate": 2.434871663726468e-05, "loss": 0.1335, "num_input_tokens_seen": 240712864, "step": 111595 }, { "epoch": 18.205546492659053, "grad_norm": 0.008355256170034409, "learning_rate": 2.4326779657525055e-05, "loss": 0.0254, "num_input_tokens_seen": 240723936, "step": 111600 }, { "epoch": 18.20636215334421, "grad_norm": 0.004155251197516918, "learning_rate": 2.430485231790669e-05, "loss": 0.0036, "num_input_tokens_seen": 240735424, "step": 111605 }, { "epoch": 18.207177814029365, "grad_norm": 0.028274651616811752, "learning_rate": 2.428293461885389e-05, "loss": 0.0036, "num_input_tokens_seen": 240746400, "step": 111610 }, { "epoch": 18.20799347471452, "grad_norm": 0.021356062963604927, "learning_rate": 2.426102656081097e-05, "loss": 0.0056, "num_input_tokens_seen": 240757856, "step": 111615 }, { "epoch": 18.208809135399672, "grad_norm": 0.0013132854364812374, "learning_rate": 2.4239128144221857e-05, "loss": 0.0016, "num_input_tokens_seen": 240768544, "step": 111620 }, { "epoch": 18.209624796084828, "grad_norm": 0.022219570353627205, "learning_rate": 2.4217239369530354e-05, "loss": 0.0089, "num_input_tokens_seen": 240779712, "step": 111625 }, { "epoch": 18.210440456769984, "grad_norm": 0.09361851215362549, "learning_rate": 2.4195360237180053e-05, "loss": 0.0026, "num_input_tokens_seen": 240791552, "step": 111630 }, { "epoch": 18.21125611745514, "grad_norm": 0.0010377283906564116, "learning_rate": 2.417349074761438e-05, "loss": 0.0101, "num_input_tokens_seen": 240802912, "step": 111635 }, { "epoch": 18.212071778140295, "grad_norm": 0.0108187859877944, "learning_rate": 2.4151630901276534e-05, "loss": 0.0197, "num_input_tokens_seen": 240814144, "step": 111640 }, { "epoch": 18.212887438825447, "grad_norm": 0.00474878866225481, "learning_rate": 2.4129780698609606e-05, "loss": 0.0015, "num_input_tokens_seen": 240825408, "step": 111645 }, { "epoch": 18.213703099510603, "grad_norm": 0.31870952248573303, "learning_rate": 2.4107940140056294e-05, "loss": 0.0047, "num_input_tokens_seen": 240835680, "step": 111650 }, { "epoch": 18.21451876019576, "grad_norm": 0.06353544443845749, "learning_rate": 2.4086109226059305e-05, "loss": 0.002, "num_input_tokens_seen": 240845280, "step": 111655 }, { "epoch": 18.215334420880914, "grad_norm": 0.04116976261138916, "learning_rate": 2.4064287957061003e-05, "loss": 0.0022, "num_input_tokens_seen": 240855648, "step": 111660 }, { "epoch": 18.21615008156607, "grad_norm": 0.005722154397517443, "learning_rate": 2.404247633350376e-05, "loss": 0.0016, "num_input_tokens_seen": 240865984, "step": 111665 }, { "epoch": 18.216965742251222, "grad_norm": 0.2929523289203644, "learning_rate": 2.402067435582944e-05, "loss": 0.0253, "num_input_tokens_seen": 240876672, "step": 111670 }, { "epoch": 18.217781402936378, "grad_norm": 0.01436008419841528, "learning_rate": 2.3998882024480085e-05, "loss": 0.0062, "num_input_tokens_seen": 240887776, "step": 111675 }, { "epoch": 18.218597063621534, "grad_norm": 0.2845918536186218, "learning_rate": 2.3977099339897112e-05, "loss": 0.0102, "num_input_tokens_seen": 240897600, "step": 111680 }, { "epoch": 18.21941272430669, "grad_norm": 0.005448872223496437, "learning_rate": 2.395532630252223e-05, "loss": 0.0021, "num_input_tokens_seen": 240909696, "step": 111685 }, { "epoch": 18.22022838499184, "grad_norm": 0.07055465877056122, "learning_rate": 2.393356291279647e-05, "loss": 0.012, "num_input_tokens_seen": 240920992, "step": 111690 }, { "epoch": 18.221044045676997, "grad_norm": 0.2125808596611023, "learning_rate": 2.391180917116109e-05, "loss": 0.0055, "num_input_tokens_seen": 240932320, "step": 111695 }, { "epoch": 18.221859706362153, "grad_norm": 0.04135651886463165, "learning_rate": 2.389006507805669e-05, "loss": 0.0022, "num_input_tokens_seen": 240943936, "step": 111700 }, { "epoch": 18.22267536704731, "grad_norm": 0.005673881154507399, "learning_rate": 2.3868330633924295e-05, "loss": 0.0038, "num_input_tokens_seen": 240954976, "step": 111705 }, { "epoch": 18.223491027732464, "grad_norm": 0.007143644616007805, "learning_rate": 2.3846605839204062e-05, "loss": 0.0017, "num_input_tokens_seen": 240964832, "step": 111710 }, { "epoch": 18.224306688417617, "grad_norm": 0.13095195591449738, "learning_rate": 2.3824890694336467e-05, "loss": 0.0051, "num_input_tokens_seen": 240975904, "step": 111715 }, { "epoch": 18.225122349102772, "grad_norm": 1.1789814233779907, "learning_rate": 2.380318519976149e-05, "loss": 0.044, "num_input_tokens_seen": 240985376, "step": 111720 }, { "epoch": 18.225938009787928, "grad_norm": 0.04211236163973808, "learning_rate": 2.3781489355919117e-05, "loss": 0.0303, "num_input_tokens_seen": 240996672, "step": 111725 }, { "epoch": 18.226753670473084, "grad_norm": 0.02973569557070732, "learning_rate": 2.375980316324894e-05, "loss": 0.0018, "num_input_tokens_seen": 241007328, "step": 111730 }, { "epoch": 18.22756933115824, "grad_norm": 0.006436047609895468, "learning_rate": 2.373812662219055e-05, "loss": 0.0014, "num_input_tokens_seen": 241018208, "step": 111735 }, { "epoch": 18.22838499184339, "grad_norm": 0.020444504916667938, "learning_rate": 2.3716459733183205e-05, "loss": 0.0026, "num_input_tokens_seen": 241029120, "step": 111740 }, { "epoch": 18.229200652528547, "grad_norm": 0.0016047678655013442, "learning_rate": 2.3694802496665945e-05, "loss": 0.002, "num_input_tokens_seen": 241040288, "step": 111745 }, { "epoch": 18.230016313213703, "grad_norm": 0.0015256740152835846, "learning_rate": 2.367315491307781e-05, "loss": 0.0012, "num_input_tokens_seen": 241050400, "step": 111750 }, { "epoch": 18.23083197389886, "grad_norm": 0.0043203942477703094, "learning_rate": 2.3651516982857448e-05, "loss": 0.0064, "num_input_tokens_seen": 241061344, "step": 111755 }, { "epoch": 18.231647634584014, "grad_norm": 0.0023988226894289255, "learning_rate": 2.362988870644339e-05, "loss": 0.0029, "num_input_tokens_seen": 241071872, "step": 111760 }, { "epoch": 18.232463295269167, "grad_norm": 0.0009392412612214684, "learning_rate": 2.3608270084273853e-05, "loss": 0.0009, "num_input_tokens_seen": 241083040, "step": 111765 }, { "epoch": 18.233278955954322, "grad_norm": 0.09521272778511047, "learning_rate": 2.3586661116787255e-05, "loss": 0.0081, "num_input_tokens_seen": 241092800, "step": 111770 }, { "epoch": 18.234094616639478, "grad_norm": 0.002297584665939212, "learning_rate": 2.3565061804421195e-05, "loss": 0.0027, "num_input_tokens_seen": 241104064, "step": 111775 }, { "epoch": 18.234910277324634, "grad_norm": 0.027292216196656227, "learning_rate": 2.3543472147613654e-05, "loss": 0.0023, "num_input_tokens_seen": 241114432, "step": 111780 }, { "epoch": 18.23572593800979, "grad_norm": 0.025290237739682198, "learning_rate": 2.3521892146801947e-05, "loss": 0.0036, "num_input_tokens_seen": 241124672, "step": 111785 }, { "epoch": 18.23654159869494, "grad_norm": 0.5383252501487732, "learning_rate": 2.350032180242373e-05, "loss": 0.017, "num_input_tokens_seen": 241135712, "step": 111790 }, { "epoch": 18.237357259380097, "grad_norm": 0.02291577123105526, "learning_rate": 2.3478761114915814e-05, "loss": 0.0029, "num_input_tokens_seen": 241145664, "step": 111795 }, { "epoch": 18.238172920065253, "grad_norm": 0.012970684096217155, "learning_rate": 2.3457210084715462e-05, "loss": 0.053, "num_input_tokens_seen": 241155680, "step": 111800 }, { "epoch": 18.23898858075041, "grad_norm": 0.012559064663946629, "learning_rate": 2.3435668712259105e-05, "loss": 0.0033, "num_input_tokens_seen": 241165504, "step": 111805 }, { "epoch": 18.239804241435564, "grad_norm": 0.0024680495262145996, "learning_rate": 2.341413699798367e-05, "loss": 0.0027, "num_input_tokens_seen": 241176352, "step": 111810 }, { "epoch": 18.240619902120716, "grad_norm": 0.16315732896327972, "learning_rate": 2.3392614942325196e-05, "loss": 0.1174, "num_input_tokens_seen": 241186496, "step": 111815 }, { "epoch": 18.241435562805872, "grad_norm": 0.24155724048614502, "learning_rate": 2.3371102545720112e-05, "loss": 0.0095, "num_input_tokens_seen": 241197952, "step": 111820 }, { "epoch": 18.242251223491028, "grad_norm": 0.0029029150027781725, "learning_rate": 2.3349599808604182e-05, "loss": 0.0216, "num_input_tokens_seen": 241207904, "step": 111825 }, { "epoch": 18.243066884176184, "grad_norm": 0.031905531883239746, "learning_rate": 2.332810673141339e-05, "loss": 0.0042, "num_input_tokens_seen": 241219264, "step": 111830 }, { "epoch": 18.24388254486134, "grad_norm": 0.004641303792595863, "learning_rate": 2.3306623314583108e-05, "loss": 0.0469, "num_input_tokens_seen": 241231104, "step": 111835 }, { "epoch": 18.24469820554649, "grad_norm": 0.01572561077773571, "learning_rate": 2.3285149558548934e-05, "loss": 0.0019, "num_input_tokens_seen": 241241920, "step": 111840 }, { "epoch": 18.245513866231647, "grad_norm": 0.0011973329819738865, "learning_rate": 2.3263685463745854e-05, "loss": 0.0022, "num_input_tokens_seen": 241251584, "step": 111845 }, { "epoch": 18.246329526916803, "grad_norm": 0.0013132892781868577, "learning_rate": 2.324223103060913e-05, "loss": 0.0027, "num_input_tokens_seen": 241262400, "step": 111850 }, { "epoch": 18.24714518760196, "grad_norm": 0.00993996299803257, "learning_rate": 2.322078625957319e-05, "loss": 0.0019, "num_input_tokens_seen": 241273088, "step": 111855 }, { "epoch": 18.247960848287114, "grad_norm": 0.006397727411240339, "learning_rate": 2.319935115107302e-05, "loss": 0.0106, "num_input_tokens_seen": 241283104, "step": 111860 }, { "epoch": 18.248776508972266, "grad_norm": 0.22306303679943085, "learning_rate": 2.317792570554278e-05, "loss": 0.005, "num_input_tokens_seen": 241293696, "step": 111865 }, { "epoch": 18.249592169657422, "grad_norm": 0.20095105469226837, "learning_rate": 2.3156509923416778e-05, "loss": 0.0056, "num_input_tokens_seen": 241304288, "step": 111870 }, { "epoch": 18.250407830342578, "grad_norm": 0.005691381171345711, "learning_rate": 2.3135103805129065e-05, "loss": 0.0107, "num_input_tokens_seen": 241314816, "step": 111875 }, { "epoch": 18.251223491027734, "grad_norm": 0.009066348895430565, "learning_rate": 2.31137073511134e-05, "loss": 0.1037, "num_input_tokens_seen": 241325632, "step": 111880 }, { "epoch": 18.252039151712886, "grad_norm": 0.026368988677859306, "learning_rate": 2.3092320561803436e-05, "loss": 0.0745, "num_input_tokens_seen": 241336448, "step": 111885 }, { "epoch": 18.25285481239804, "grad_norm": 0.004802832845598459, "learning_rate": 2.3070943437632553e-05, "loss": 0.0009, "num_input_tokens_seen": 241346528, "step": 111890 }, { "epoch": 18.253670473083197, "grad_norm": 0.0012719426304101944, "learning_rate": 2.3049575979034066e-05, "loss": 0.0008, "num_input_tokens_seen": 241358048, "step": 111895 }, { "epoch": 18.254486133768353, "grad_norm": 0.007002538535743952, "learning_rate": 2.3028218186440964e-05, "loss": 0.0028, "num_input_tokens_seen": 241369184, "step": 111900 }, { "epoch": 18.25530179445351, "grad_norm": 0.001879014540463686, "learning_rate": 2.3006870060286123e-05, "loss": 0.0063, "num_input_tokens_seen": 241381024, "step": 111905 }, { "epoch": 18.25611745513866, "grad_norm": 0.0816144123673439, "learning_rate": 2.2985531601002084e-05, "loss": 0.0084, "num_input_tokens_seen": 241392544, "step": 111910 }, { "epoch": 18.256933115823816, "grad_norm": 0.0033564334735274315, "learning_rate": 2.2964202809021563e-05, "loss": 0.0031, "num_input_tokens_seen": 241404032, "step": 111915 }, { "epoch": 18.257748776508972, "grad_norm": 0.006035107187926769, "learning_rate": 2.2942883684776428e-05, "loss": 0.0033, "num_input_tokens_seen": 241413792, "step": 111920 }, { "epoch": 18.258564437194128, "grad_norm": 0.008599616587162018, "learning_rate": 2.2921574228699116e-05, "loss": 0.0015, "num_input_tokens_seen": 241425856, "step": 111925 }, { "epoch": 18.259380097879284, "grad_norm": 0.014032349921762943, "learning_rate": 2.290027444122117e-05, "loss": 0.0038, "num_input_tokens_seen": 241436224, "step": 111930 }, { "epoch": 18.260195758564436, "grad_norm": 0.022291334345936775, "learning_rate": 2.2878984322774578e-05, "loss": 0.002, "num_input_tokens_seen": 241447840, "step": 111935 }, { "epoch": 18.26101141924959, "grad_norm": 0.015724286437034607, "learning_rate": 2.2857703873790435e-05, "loss": 0.0245, "num_input_tokens_seen": 241459200, "step": 111940 }, { "epoch": 18.261827079934747, "grad_norm": 0.009958015754818916, "learning_rate": 2.2836433094700405e-05, "loss": 0.0988, "num_input_tokens_seen": 241469536, "step": 111945 }, { "epoch": 18.262642740619903, "grad_norm": 0.0026117325760424137, "learning_rate": 2.2815171985935246e-05, "loss": 0.011, "num_input_tokens_seen": 241480416, "step": 111950 }, { "epoch": 18.26345840130506, "grad_norm": 0.010779080912470818, "learning_rate": 2.279392054792612e-05, "loss": 0.0025, "num_input_tokens_seen": 241491040, "step": 111955 }, { "epoch": 18.26427406199021, "grad_norm": 0.003968062810599804, "learning_rate": 2.277267878110345e-05, "loss": 0.0927, "num_input_tokens_seen": 241501056, "step": 111960 }, { "epoch": 18.265089722675366, "grad_norm": 0.009467178955674171, "learning_rate": 2.275144668589796e-05, "loss": 0.0012, "num_input_tokens_seen": 241512736, "step": 111965 }, { "epoch": 18.265905383360522, "grad_norm": 0.0005257199518382549, "learning_rate": 2.2730224262739687e-05, "loss": 0.0074, "num_input_tokens_seen": 241523584, "step": 111970 }, { "epoch": 18.266721044045678, "grad_norm": 0.037835754454135895, "learning_rate": 2.270901151205895e-05, "loss": 0.0026, "num_input_tokens_seen": 241534336, "step": 111975 }, { "epoch": 18.267536704730833, "grad_norm": 0.0016134449979290366, "learning_rate": 2.2687808434285585e-05, "loss": 0.0038, "num_input_tokens_seen": 241545248, "step": 111980 }, { "epoch": 18.268352365415986, "grad_norm": 0.0035498873330652714, "learning_rate": 2.266661502984929e-05, "loss": 0.0013, "num_input_tokens_seen": 241555424, "step": 111985 }, { "epoch": 18.26916802610114, "grad_norm": 0.5064801573753357, "learning_rate": 2.264543129917962e-05, "loss": 0.01, "num_input_tokens_seen": 241566752, "step": 111990 }, { "epoch": 18.269983686786297, "grad_norm": 0.0009146176162175834, "learning_rate": 2.2624257242705838e-05, "loss": 0.0021, "num_input_tokens_seen": 241575520, "step": 111995 }, { "epoch": 18.270799347471453, "grad_norm": 0.01116583775728941, "learning_rate": 2.2603092860857045e-05, "loss": 0.0058, "num_input_tokens_seen": 241585152, "step": 112000 }, { "epoch": 18.27161500815661, "grad_norm": 0.8390815258026123, "learning_rate": 2.258193815406223e-05, "loss": 0.0425, "num_input_tokens_seen": 241596384, "step": 112005 }, { "epoch": 18.27243066884176, "grad_norm": 0.9471369981765747, "learning_rate": 2.2560793122750056e-05, "loss": 0.1206, "num_input_tokens_seen": 241607744, "step": 112010 }, { "epoch": 18.273246329526916, "grad_norm": 0.11717867851257324, "learning_rate": 2.253965776734912e-05, "loss": 0.0041, "num_input_tokens_seen": 241618208, "step": 112015 }, { "epoch": 18.274061990212072, "grad_norm": 0.0025332942605018616, "learning_rate": 2.251853208828769e-05, "loss": 0.0007, "num_input_tokens_seen": 241628576, "step": 112020 }, { "epoch": 18.274877650897228, "grad_norm": 0.00236615096218884, "learning_rate": 2.2497416085993983e-05, "loss": 0.0047, "num_input_tokens_seen": 241638144, "step": 112025 }, { "epoch": 18.275693311582383, "grad_norm": 0.00347616127692163, "learning_rate": 2.247630976089582e-05, "loss": 0.0044, "num_input_tokens_seen": 241649344, "step": 112030 }, { "epoch": 18.276508972267536, "grad_norm": 0.3252258002758026, "learning_rate": 2.245521311342108e-05, "loss": 0.0091, "num_input_tokens_seen": 241659808, "step": 112035 }, { "epoch": 18.27732463295269, "grad_norm": 0.04584804177284241, "learning_rate": 2.2434126143997258e-05, "loss": 0.0013, "num_input_tokens_seen": 241671232, "step": 112040 }, { "epoch": 18.278140293637847, "grad_norm": 0.005874639376997948, "learning_rate": 2.241304885305162e-05, "loss": 0.0062, "num_input_tokens_seen": 241682048, "step": 112045 }, { "epoch": 18.278955954323003, "grad_norm": 0.00046285297139547765, "learning_rate": 2.2391981241011495e-05, "loss": 0.0019, "num_input_tokens_seen": 241692832, "step": 112050 }, { "epoch": 18.27977161500816, "grad_norm": 0.06830260157585144, "learning_rate": 2.2370923308303702e-05, "loss": 0.0014, "num_input_tokens_seen": 241702016, "step": 112055 }, { "epoch": 18.28058727569331, "grad_norm": 0.0020050317980349064, "learning_rate": 2.234987505535513e-05, "loss": 0.0024, "num_input_tokens_seen": 241712736, "step": 112060 }, { "epoch": 18.281402936378466, "grad_norm": 0.03383228927850723, "learning_rate": 2.2328836482592208e-05, "loss": 0.0044, "num_input_tokens_seen": 241725024, "step": 112065 }, { "epoch": 18.282218597063622, "grad_norm": 0.0029741546604782343, "learning_rate": 2.2307807590441486e-05, "loss": 0.0008, "num_input_tokens_seen": 241735072, "step": 112070 }, { "epoch": 18.283034257748778, "grad_norm": 0.0019839233718812466, "learning_rate": 2.2286788379328905e-05, "loss": 0.0021, "num_input_tokens_seen": 241744672, "step": 112075 }, { "epoch": 18.28384991843393, "grad_norm": 0.10218685865402222, "learning_rate": 2.2265778849680673e-05, "loss": 0.0037, "num_input_tokens_seen": 241755488, "step": 112080 }, { "epoch": 18.284665579119086, "grad_norm": 0.026236621662974358, "learning_rate": 2.2244779001922457e-05, "loss": 0.0039, "num_input_tokens_seen": 241766656, "step": 112085 }, { "epoch": 18.28548123980424, "grad_norm": 0.04731789976358414, "learning_rate": 2.222378883647985e-05, "loss": 0.0017, "num_input_tokens_seen": 241776992, "step": 112090 }, { "epoch": 18.286296900489397, "grad_norm": 0.003429105505347252, "learning_rate": 2.2202808353778302e-05, "loss": 0.0015, "num_input_tokens_seen": 241787584, "step": 112095 }, { "epoch": 18.287112561174553, "grad_norm": 0.04918624833226204, "learning_rate": 2.2181837554242968e-05, "loss": 0.0014, "num_input_tokens_seen": 241798560, "step": 112100 }, { "epoch": 18.287928221859705, "grad_norm": 0.026064038276672363, "learning_rate": 2.216087643829884e-05, "loss": 0.0023, "num_input_tokens_seen": 241810496, "step": 112105 }, { "epoch": 18.28874388254486, "grad_norm": 0.013998693786561489, "learning_rate": 2.213992500637074e-05, "loss": 0.0013, "num_input_tokens_seen": 241820800, "step": 112110 }, { "epoch": 18.289559543230016, "grad_norm": 0.06476452201604843, "learning_rate": 2.211898325888323e-05, "loss": 0.0025, "num_input_tokens_seen": 241832224, "step": 112115 }, { "epoch": 18.290375203915172, "grad_norm": 0.0007917836774140596, "learning_rate": 2.2098051196260794e-05, "loss": 0.0034, "num_input_tokens_seen": 241843680, "step": 112120 }, { "epoch": 18.291190864600328, "grad_norm": 0.004940042272210121, "learning_rate": 2.207712881892765e-05, "loss": 0.0018, "num_input_tokens_seen": 241853312, "step": 112125 }, { "epoch": 18.29200652528548, "grad_norm": 0.02889937348663807, "learning_rate": 2.205621612730774e-05, "loss": 0.0043, "num_input_tokens_seen": 241864640, "step": 112130 }, { "epoch": 18.292822185970635, "grad_norm": 0.022351887077093124, "learning_rate": 2.2035313121824884e-05, "loss": 0.0059, "num_input_tokens_seen": 241875808, "step": 112135 }, { "epoch": 18.29363784665579, "grad_norm": 0.0031957582104951143, "learning_rate": 2.2014419802902808e-05, "loss": 0.0025, "num_input_tokens_seen": 241886944, "step": 112140 }, { "epoch": 18.294453507340947, "grad_norm": 0.0005873734480701387, "learning_rate": 2.1993536170964832e-05, "loss": 0.001, "num_input_tokens_seen": 241897664, "step": 112145 }, { "epoch": 18.295269168026103, "grad_norm": 0.23319636285305023, "learning_rate": 2.1972662226434292e-05, "loss": 0.0054, "num_input_tokens_seen": 241909568, "step": 112150 }, { "epoch": 18.296084828711255, "grad_norm": 0.002525811782106757, "learning_rate": 2.1951797969734178e-05, "loss": 0.0056, "num_input_tokens_seen": 241920768, "step": 112155 }, { "epoch": 18.29690048939641, "grad_norm": 0.023335514590144157, "learning_rate": 2.193094340128726e-05, "loss": 0.0035, "num_input_tokens_seen": 241932832, "step": 112160 }, { "epoch": 18.297716150081566, "grad_norm": 1.0479681491851807, "learning_rate": 2.191009852151632e-05, "loss": 0.0199, "num_input_tokens_seen": 241943360, "step": 112165 }, { "epoch": 18.298531810766722, "grad_norm": 0.014527312479913235, "learning_rate": 2.188926333084368e-05, "loss": 0.0016, "num_input_tokens_seen": 241954144, "step": 112170 }, { "epoch": 18.299347471451878, "grad_norm": 0.010166442021727562, "learning_rate": 2.186843782969167e-05, "loss": 0.0047, "num_input_tokens_seen": 241964672, "step": 112175 }, { "epoch": 18.30016313213703, "grad_norm": 0.022418471053242683, "learning_rate": 2.1847622018482283e-05, "loss": 0.0017, "num_input_tokens_seen": 241974368, "step": 112180 }, { "epoch": 18.300978792822185, "grad_norm": 0.0035566140431910753, "learning_rate": 2.182681589763741e-05, "loss": 0.0021, "num_input_tokens_seen": 241984256, "step": 112185 }, { "epoch": 18.30179445350734, "grad_norm": 0.0013084502425044775, "learning_rate": 2.1806019467578765e-05, "loss": 0.0059, "num_input_tokens_seen": 241994944, "step": 112190 }, { "epoch": 18.302610114192497, "grad_norm": 0.04697943478822708, "learning_rate": 2.1785232728727734e-05, "loss": 0.0071, "num_input_tokens_seen": 242006304, "step": 112195 }, { "epoch": 18.303425774877653, "grad_norm": 0.5012332201004028, "learning_rate": 2.1764455681505645e-05, "loss": 0.0085, "num_input_tokens_seen": 242017024, "step": 112200 }, { "epoch": 18.304241435562805, "grad_norm": 0.02387244440615177, "learning_rate": 2.1743688326333555e-05, "loss": 0.0041, "num_input_tokens_seen": 242027904, "step": 112205 }, { "epoch": 18.30505709624796, "grad_norm": 0.001273478614166379, "learning_rate": 2.1722930663632344e-05, "loss": 0.0038, "num_input_tokens_seen": 242039168, "step": 112210 }, { "epoch": 18.305872756933116, "grad_norm": 0.03708365932106972, "learning_rate": 2.1702182693822625e-05, "loss": 0.0132, "num_input_tokens_seen": 242050656, "step": 112215 }, { "epoch": 18.306688417618272, "grad_norm": 1.4374446868896484, "learning_rate": 2.1681444417325004e-05, "loss": 0.0493, "num_input_tokens_seen": 242061632, "step": 112220 }, { "epoch": 18.307504078303428, "grad_norm": 0.012807702645659447, "learning_rate": 2.166071583455964e-05, "loss": 0.064, "num_input_tokens_seen": 242072544, "step": 112225 }, { "epoch": 18.30831973898858, "grad_norm": 0.0016089219134300947, "learning_rate": 2.1639996945946706e-05, "loss": 0.0106, "num_input_tokens_seen": 242083616, "step": 112230 }, { "epoch": 18.309135399673735, "grad_norm": 0.8118360638618469, "learning_rate": 2.1619287751906135e-05, "loss": 0.0207, "num_input_tokens_seen": 242093728, "step": 112235 }, { "epoch": 18.30995106035889, "grad_norm": 0.3129132390022278, "learning_rate": 2.1598588252857486e-05, "loss": 0.0096, "num_input_tokens_seen": 242104864, "step": 112240 }, { "epoch": 18.310766721044047, "grad_norm": 1.2427152395248413, "learning_rate": 2.157789844922037e-05, "loss": 0.0277, "num_input_tokens_seen": 242116672, "step": 112245 }, { "epoch": 18.3115823817292, "grad_norm": 0.01889273338019848, "learning_rate": 2.1557218341414055e-05, "loss": 0.0014, "num_input_tokens_seen": 242127488, "step": 112250 }, { "epoch": 18.312398042414355, "grad_norm": 0.05895964428782463, "learning_rate": 2.1536547929857707e-05, "loss": 0.0054, "num_input_tokens_seen": 242137760, "step": 112255 }, { "epoch": 18.31321370309951, "grad_norm": 0.0005601291777566075, "learning_rate": 2.1515887214970165e-05, "loss": 0.0005, "num_input_tokens_seen": 242149472, "step": 112260 }, { "epoch": 18.314029363784666, "grad_norm": 0.0013113823952153325, "learning_rate": 2.1495236197170143e-05, "loss": 0.0012, "num_input_tokens_seen": 242159712, "step": 112265 }, { "epoch": 18.31484502446982, "grad_norm": 0.0020294557325541973, "learning_rate": 2.1474594876876198e-05, "loss": 0.0027, "num_input_tokens_seen": 242170976, "step": 112270 }, { "epoch": 18.315660685154974, "grad_norm": 0.003882236545905471, "learning_rate": 2.1453963254506604e-05, "loss": 0.0016, "num_input_tokens_seen": 242183648, "step": 112275 }, { "epoch": 18.31647634584013, "grad_norm": 0.0029808462131768465, "learning_rate": 2.1433341330479583e-05, "loss": 0.0068, "num_input_tokens_seen": 242193472, "step": 112280 }, { "epoch": 18.317292006525285, "grad_norm": 0.03055729530751705, "learning_rate": 2.141272910521297e-05, "loss": 0.0345, "num_input_tokens_seen": 242204768, "step": 112285 }, { "epoch": 18.31810766721044, "grad_norm": 0.11781969666481018, "learning_rate": 2.1392126579124536e-05, "loss": 0.0042, "num_input_tokens_seen": 242214848, "step": 112290 }, { "epoch": 18.318923327895597, "grad_norm": 0.004369907081127167, "learning_rate": 2.1371533752631844e-05, "loss": 0.0171, "num_input_tokens_seen": 242225696, "step": 112295 }, { "epoch": 18.31973898858075, "grad_norm": 0.03471706062555313, "learning_rate": 2.135095062615211e-05, "loss": 0.0053, "num_input_tokens_seen": 242235168, "step": 112300 }, { "epoch": 18.320554649265905, "grad_norm": 0.001102107809856534, "learning_rate": 2.1330377200102723e-05, "loss": 0.0019, "num_input_tokens_seen": 242244288, "step": 112305 }, { "epoch": 18.32137030995106, "grad_norm": 0.006317996885627508, "learning_rate": 2.130981347490035e-05, "loss": 0.0097, "num_input_tokens_seen": 242255360, "step": 112310 }, { "epoch": 18.322185970636216, "grad_norm": 0.006401197053492069, "learning_rate": 2.1289259450961995e-05, "loss": 0.0015, "num_input_tokens_seen": 242264896, "step": 112315 }, { "epoch": 18.32300163132137, "grad_norm": 0.03259045258164406, "learning_rate": 2.1268715128703932e-05, "loss": 0.0014, "num_input_tokens_seen": 242275200, "step": 112320 }, { "epoch": 18.323817292006524, "grad_norm": 0.027511024847626686, "learning_rate": 2.124818050854277e-05, "loss": 0.0749, "num_input_tokens_seen": 242285952, "step": 112325 }, { "epoch": 18.32463295269168, "grad_norm": 0.00551788043230772, "learning_rate": 2.122765559089451e-05, "loss": 0.0007, "num_input_tokens_seen": 242297248, "step": 112330 }, { "epoch": 18.325448613376835, "grad_norm": 0.004453037865459919, "learning_rate": 2.1207140376175214e-05, "loss": 0.0012, "num_input_tokens_seen": 242307072, "step": 112335 }, { "epoch": 18.32626427406199, "grad_norm": 0.013340902514755726, "learning_rate": 2.1186634864800603e-05, "loss": 0.0015, "num_input_tokens_seen": 242316800, "step": 112340 }, { "epoch": 18.327079934747147, "grad_norm": 0.009347260929644108, "learning_rate": 2.116613905718623e-05, "loss": 0.0007, "num_input_tokens_seen": 242327584, "step": 112345 }, { "epoch": 18.3278955954323, "grad_norm": 1.4588762521743774, "learning_rate": 2.114565295374754e-05, "loss": 0.068, "num_input_tokens_seen": 242338752, "step": 112350 }, { "epoch": 18.328711256117455, "grad_norm": 0.004609760362654924, "learning_rate": 2.112517655489965e-05, "loss": 0.0037, "num_input_tokens_seen": 242349312, "step": 112355 }, { "epoch": 18.32952691680261, "grad_norm": 0.002330854767933488, "learning_rate": 2.110470986105756e-05, "loss": 0.0014, "num_input_tokens_seen": 242360512, "step": 112360 }, { "epoch": 18.330342577487766, "grad_norm": 0.024924756959080696, "learning_rate": 2.1084252872636046e-05, "loss": 0.0093, "num_input_tokens_seen": 242371840, "step": 112365 }, { "epoch": 18.33115823817292, "grad_norm": 0.0630999505519867, "learning_rate": 2.1063805590049667e-05, "loss": 0.0039, "num_input_tokens_seen": 242383296, "step": 112370 }, { "epoch": 18.331973898858074, "grad_norm": 0.0041314177215099335, "learning_rate": 2.1043368013712872e-05, "loss": 0.0009, "num_input_tokens_seen": 242394464, "step": 112375 }, { "epoch": 18.33278955954323, "grad_norm": 0.004315406084060669, "learning_rate": 2.102294014403977e-05, "loss": 0.003, "num_input_tokens_seen": 242406336, "step": 112380 }, { "epoch": 18.333605220228385, "grad_norm": 0.005620104726403952, "learning_rate": 2.1002521981444477e-05, "loss": 0.0045, "num_input_tokens_seen": 242416864, "step": 112385 }, { "epoch": 18.33442088091354, "grad_norm": 0.02904062718153, "learning_rate": 2.0982113526340662e-05, "loss": 0.0699, "num_input_tokens_seen": 242429152, "step": 112390 }, { "epoch": 18.335236541598697, "grad_norm": 0.017469625920057297, "learning_rate": 2.0961714779142048e-05, "loss": 0.0806, "num_input_tokens_seen": 242439712, "step": 112395 }, { "epoch": 18.33605220228385, "grad_norm": 0.06052832305431366, "learning_rate": 2.0941325740261975e-05, "loss": 0.0035, "num_input_tokens_seen": 242451392, "step": 112400 }, { "epoch": 18.336867862969005, "grad_norm": 0.006190297193825245, "learning_rate": 2.0920946410113604e-05, "loss": 0.002, "num_input_tokens_seen": 242461728, "step": 112405 }, { "epoch": 18.33768352365416, "grad_norm": 0.0022313501685857773, "learning_rate": 2.0900576789110116e-05, "loss": 0.0183, "num_input_tokens_seen": 242472928, "step": 112410 }, { "epoch": 18.338499184339316, "grad_norm": 0.0007250035414472222, "learning_rate": 2.0880216877664116e-05, "loss": 0.0048, "num_input_tokens_seen": 242484064, "step": 112415 }, { "epoch": 18.339314845024468, "grad_norm": 0.13846053183078766, "learning_rate": 2.0859866676188445e-05, "loss": 0.0081, "num_input_tokens_seen": 242494144, "step": 112420 }, { "epoch": 18.340130505709624, "grad_norm": 0.04607554152607918, "learning_rate": 2.083952618509527e-05, "loss": 0.0023, "num_input_tokens_seen": 242503648, "step": 112425 }, { "epoch": 18.34094616639478, "grad_norm": 0.0019294536905363202, "learning_rate": 2.0819195404797098e-05, "loss": 0.0007, "num_input_tokens_seen": 242514528, "step": 112430 }, { "epoch": 18.341761827079935, "grad_norm": 0.00926291011273861, "learning_rate": 2.0798874335705707e-05, "loss": 0.014, "num_input_tokens_seen": 242524992, "step": 112435 }, { "epoch": 18.34257748776509, "grad_norm": 0.001229145796969533, "learning_rate": 2.077856297823316e-05, "loss": 0.0036, "num_input_tokens_seen": 242535200, "step": 112440 }, { "epoch": 18.343393148450243, "grad_norm": 0.019815078005194664, "learning_rate": 2.0758261332790796e-05, "loss": 0.001, "num_input_tokens_seen": 242546208, "step": 112445 }, { "epoch": 18.3442088091354, "grad_norm": 0.6919834017753601, "learning_rate": 2.0737969399790392e-05, "loss": 0.0116, "num_input_tokens_seen": 242556544, "step": 112450 }, { "epoch": 18.345024469820554, "grad_norm": 0.009739162400364876, "learning_rate": 2.0717687179642896e-05, "loss": 0.0009, "num_input_tokens_seen": 242566784, "step": 112455 }, { "epoch": 18.34584013050571, "grad_norm": 0.03250671178102493, "learning_rate": 2.0697414672759596e-05, "loss": 0.0028, "num_input_tokens_seen": 242577280, "step": 112460 }, { "epoch": 18.346655791190866, "grad_norm": 0.0008226912468671799, "learning_rate": 2.0677151879551103e-05, "loss": 0.0011, "num_input_tokens_seen": 242588736, "step": 112465 }, { "epoch": 18.347471451876018, "grad_norm": 0.012095787562429905, "learning_rate": 2.0656898800428313e-05, "loss": 0.0017, "num_input_tokens_seen": 242600160, "step": 112470 }, { "epoch": 18.348287112561174, "grad_norm": 0.033984698355197906, "learning_rate": 2.0636655435801455e-05, "loss": 0.0064, "num_input_tokens_seen": 242611040, "step": 112475 }, { "epoch": 18.34910277324633, "grad_norm": 0.057545460760593414, "learning_rate": 2.061642178608092e-05, "loss": 0.0039, "num_input_tokens_seen": 242621696, "step": 112480 }, { "epoch": 18.349918433931485, "grad_norm": 0.004836331587284803, "learning_rate": 2.0596197851676768e-05, "loss": 0.0022, "num_input_tokens_seen": 242633312, "step": 112485 }, { "epoch": 18.35073409461664, "grad_norm": 0.02767900563776493, "learning_rate": 2.057598363299884e-05, "loss": 0.0019, "num_input_tokens_seen": 242644384, "step": 112490 }, { "epoch": 18.351549755301793, "grad_norm": 0.0036001421976834536, "learning_rate": 2.055577913045675e-05, "loss": 0.0015, "num_input_tokens_seen": 242654784, "step": 112495 }, { "epoch": 18.35236541598695, "grad_norm": 0.026656100526452065, "learning_rate": 2.0535584344460066e-05, "loss": 0.003, "num_input_tokens_seen": 242665568, "step": 112500 }, { "epoch": 18.353181076672104, "grad_norm": 0.00042158691212534904, "learning_rate": 2.0515399275417958e-05, "loss": 0.0058, "num_input_tokens_seen": 242677088, "step": 112505 }, { "epoch": 18.35399673735726, "grad_norm": 1.08430016040802, "learning_rate": 2.0495223923739593e-05, "loss": 0.0393, "num_input_tokens_seen": 242686784, "step": 112510 }, { "epoch": 18.354812398042416, "grad_norm": 0.0033995662815868855, "learning_rate": 2.0475058289833815e-05, "loss": 0.0052, "num_input_tokens_seen": 242697312, "step": 112515 }, { "epoch": 18.355628058727568, "grad_norm": 0.0035734970588237047, "learning_rate": 2.045490237410924e-05, "loss": 0.0014, "num_input_tokens_seen": 242707520, "step": 112520 }, { "epoch": 18.356443719412724, "grad_norm": 0.013985952362418175, "learning_rate": 2.043475617697449e-05, "loss": 0.0037, "num_input_tokens_seen": 242717024, "step": 112525 }, { "epoch": 18.35725938009788, "grad_norm": 0.0044988770969212055, "learning_rate": 2.0414619698837677e-05, "loss": 0.0291, "num_input_tokens_seen": 242728832, "step": 112530 }, { "epoch": 18.358075040783035, "grad_norm": 0.003514474956318736, "learning_rate": 2.0394492940107144e-05, "loss": 0.0026, "num_input_tokens_seen": 242739456, "step": 112535 }, { "epoch": 18.35889070146819, "grad_norm": 0.005525980610400438, "learning_rate": 2.0374375901190456e-05, "loss": 0.002, "num_input_tokens_seen": 242750624, "step": 112540 }, { "epoch": 18.359706362153343, "grad_norm": 0.0016800418961793184, "learning_rate": 2.0354268582495673e-05, "loss": 0.0044, "num_input_tokens_seen": 242761536, "step": 112545 }, { "epoch": 18.3605220228385, "grad_norm": 0.001727626658976078, "learning_rate": 2.0334170984429966e-05, "loss": 0.0029, "num_input_tokens_seen": 242771872, "step": 112550 }, { "epoch": 18.361337683523654, "grad_norm": 0.8358365893363953, "learning_rate": 2.0314083107400904e-05, "loss": 0.0718, "num_input_tokens_seen": 242782624, "step": 112555 }, { "epoch": 18.36215334420881, "grad_norm": 0.02232564240694046, "learning_rate": 2.0294004951815324e-05, "loss": 0.0022, "num_input_tokens_seen": 242792928, "step": 112560 }, { "epoch": 18.362969004893966, "grad_norm": 0.05416397005319595, "learning_rate": 2.027393651808046e-05, "loss": 0.0028, "num_input_tokens_seen": 242803616, "step": 112565 }, { "epoch": 18.363784665579118, "grad_norm": 0.013503114692866802, "learning_rate": 2.0253877806602648e-05, "loss": 0.0642, "num_input_tokens_seen": 242813376, "step": 112570 }, { "epoch": 18.364600326264274, "grad_norm": 0.19286523759365082, "learning_rate": 2.0233828817788792e-05, "loss": 0.1323, "num_input_tokens_seen": 242823968, "step": 112575 }, { "epoch": 18.36541598694943, "grad_norm": 0.0009346117149107158, "learning_rate": 2.0213789552044893e-05, "loss": 0.0052, "num_input_tokens_seen": 242834880, "step": 112580 }, { "epoch": 18.366231647634585, "grad_norm": 0.0042688255198299885, "learning_rate": 2.0193760009777295e-05, "loss": 0.0012, "num_input_tokens_seen": 242845216, "step": 112585 }, { "epoch": 18.36704730831974, "grad_norm": 0.002021209103986621, "learning_rate": 2.0173740191391732e-05, "loss": 0.002, "num_input_tokens_seen": 242855008, "step": 112590 }, { "epoch": 18.367862969004893, "grad_norm": 0.03425022214651108, "learning_rate": 2.0153730097294153e-05, "loss": 0.0069, "num_input_tokens_seen": 242866304, "step": 112595 }, { "epoch": 18.36867862969005, "grad_norm": 0.0010670105693861842, "learning_rate": 2.0133729727889794e-05, "loss": 0.0515, "num_input_tokens_seen": 242877856, "step": 112600 }, { "epoch": 18.369494290375204, "grad_norm": 2.0345396995544434, "learning_rate": 2.0113739083584327e-05, "loss": 0.0802, "num_input_tokens_seen": 242888288, "step": 112605 }, { "epoch": 18.37030995106036, "grad_norm": 0.026290442794561386, "learning_rate": 2.0093758164782595e-05, "loss": 0.0018, "num_input_tokens_seen": 242898656, "step": 112610 }, { "epoch": 18.371125611745512, "grad_norm": 0.5926750898361206, "learning_rate": 2.0073786971889662e-05, "loss": 0.055, "num_input_tokens_seen": 242909152, "step": 112615 }, { "epoch": 18.371941272430668, "grad_norm": 0.2070351392030716, "learning_rate": 2.0053825505310318e-05, "loss": 0.0032, "num_input_tokens_seen": 242920256, "step": 112620 }, { "epoch": 18.372756933115824, "grad_norm": 0.11103552579879761, "learning_rate": 2.0033873765449018e-05, "loss": 0.0055, "num_input_tokens_seen": 242931072, "step": 112625 }, { "epoch": 18.37357259380098, "grad_norm": 0.059849560260772705, "learning_rate": 2.0013931752710214e-05, "loss": 0.0041, "num_input_tokens_seen": 242940640, "step": 112630 }, { "epoch": 18.374388254486135, "grad_norm": 0.03814975544810295, "learning_rate": 1.9993999467497913e-05, "loss": 0.1002, "num_input_tokens_seen": 242951552, "step": 112635 }, { "epoch": 18.375203915171287, "grad_norm": 0.016246533021330833, "learning_rate": 1.9974076910216188e-05, "loss": 0.0021, "num_input_tokens_seen": 242962848, "step": 112640 }, { "epoch": 18.376019575856443, "grad_norm": 0.0009120986214838922, "learning_rate": 1.995416408126871e-05, "loss": 0.0038, "num_input_tokens_seen": 242974400, "step": 112645 }, { "epoch": 18.3768352365416, "grad_norm": 0.000944986823014915, "learning_rate": 1.9934260981059103e-05, "loss": 0.0063, "num_input_tokens_seen": 242985984, "step": 112650 }, { "epoch": 18.377650897226754, "grad_norm": 0.01470465213060379, "learning_rate": 1.9914367609990713e-05, "loss": 0.0023, "num_input_tokens_seen": 242996672, "step": 112655 }, { "epoch": 18.37846655791191, "grad_norm": 0.03713277354836464, "learning_rate": 1.9894483968466715e-05, "loss": 0.03, "num_input_tokens_seen": 243007392, "step": 112660 }, { "epoch": 18.379282218597062, "grad_norm": 0.047064829617738724, "learning_rate": 1.9874610056890007e-05, "loss": 0.0037, "num_input_tokens_seen": 243018720, "step": 112665 }, { "epoch": 18.380097879282218, "grad_norm": 0.2868853509426117, "learning_rate": 1.9854745875663438e-05, "loss": 0.0055, "num_input_tokens_seen": 243029984, "step": 112670 }, { "epoch": 18.380913539967374, "grad_norm": 0.24530574679374695, "learning_rate": 1.983489142518946e-05, "loss": 0.1003, "num_input_tokens_seen": 243040672, "step": 112675 }, { "epoch": 18.38172920065253, "grad_norm": 0.04451248049736023, "learning_rate": 1.9815046705870697e-05, "loss": 0.0077, "num_input_tokens_seen": 243051520, "step": 112680 }, { "epoch": 18.382544861337685, "grad_norm": 0.43512970209121704, "learning_rate": 1.979521171810905e-05, "loss": 0.0069, "num_input_tokens_seen": 243061888, "step": 112685 }, { "epoch": 18.383360522022837, "grad_norm": 0.07339402288198471, "learning_rate": 1.9775386462306756e-05, "loss": 0.0038, "num_input_tokens_seen": 243072288, "step": 112690 }, { "epoch": 18.384176182707993, "grad_norm": 0.011859784834086895, "learning_rate": 1.9755570938865263e-05, "loss": 0.0018, "num_input_tokens_seen": 243083744, "step": 112695 }, { "epoch": 18.38499184339315, "grad_norm": 0.0009272433235310018, "learning_rate": 1.9735765148186536e-05, "loss": 0.0009, "num_input_tokens_seen": 243094592, "step": 112700 }, { "epoch": 18.385807504078304, "grad_norm": 0.004980906844139099, "learning_rate": 1.9715969090671693e-05, "loss": 0.0083, "num_input_tokens_seen": 243105536, "step": 112705 }, { "epoch": 18.38662316476346, "grad_norm": 0.007497982122004032, "learning_rate": 1.969618276672208e-05, "loss": 0.005, "num_input_tokens_seen": 243116096, "step": 112710 }, { "epoch": 18.387438825448612, "grad_norm": 0.035160358995199203, "learning_rate": 1.9676406176738547e-05, "loss": 0.004, "num_input_tokens_seen": 243126304, "step": 112715 }, { "epoch": 18.388254486133768, "grad_norm": 0.005718106869608164, "learning_rate": 1.965663932112205e-05, "loss": 0.0017, "num_input_tokens_seen": 243138176, "step": 112720 }, { "epoch": 18.389070146818923, "grad_norm": 0.0014913963386788964, "learning_rate": 1.96368822002731e-05, "loss": 0.0023, "num_input_tokens_seen": 243149536, "step": 112725 }, { "epoch": 18.38988580750408, "grad_norm": 0.08392902463674545, "learning_rate": 1.9617134814592096e-05, "loss": 0.0034, "num_input_tokens_seen": 243160224, "step": 112730 }, { "epoch": 18.390701468189235, "grad_norm": 0.0005805535474792123, "learning_rate": 1.9597397164479282e-05, "loss": 0.0015, "num_input_tokens_seen": 243171200, "step": 112735 }, { "epoch": 18.391517128874387, "grad_norm": 0.07632136344909668, "learning_rate": 1.957766925033466e-05, "loss": 0.0019, "num_input_tokens_seen": 243182272, "step": 112740 }, { "epoch": 18.392332789559543, "grad_norm": 1.7706077098846436, "learning_rate": 1.9557951072557978e-05, "loss": 0.0984, "num_input_tokens_seen": 243192608, "step": 112745 }, { "epoch": 18.3931484502447, "grad_norm": 0.011132840067148209, "learning_rate": 1.9538242631548965e-05, "loss": 0.0011, "num_input_tokens_seen": 243203200, "step": 112750 }, { "epoch": 18.393964110929854, "grad_norm": 0.010769308544695377, "learning_rate": 1.9518543927706968e-05, "loss": 0.0103, "num_input_tokens_seen": 243214176, "step": 112755 }, { "epoch": 18.39477977161501, "grad_norm": 0.006955175660550594, "learning_rate": 1.949885496143117e-05, "loss": 0.0101, "num_input_tokens_seen": 243224736, "step": 112760 }, { "epoch": 18.395595432300162, "grad_norm": 0.0006246392149478197, "learning_rate": 1.947917573312069e-05, "loss": 0.0533, "num_input_tokens_seen": 243236352, "step": 112765 }, { "epoch": 18.396411092985318, "grad_norm": 0.15145614743232727, "learning_rate": 1.945950624317422e-05, "loss": 0.0045, "num_input_tokens_seen": 243246720, "step": 112770 }, { "epoch": 18.397226753670473, "grad_norm": 1.0007070302963257, "learning_rate": 1.943984649199054e-05, "loss": 0.063, "num_input_tokens_seen": 243256160, "step": 112775 }, { "epoch": 18.39804241435563, "grad_norm": 0.0006350878393277526, "learning_rate": 1.9420196479967957e-05, "loss": 0.0018, "num_input_tokens_seen": 243266368, "step": 112780 }, { "epoch": 18.39885807504078, "grad_norm": 0.011603937484323978, "learning_rate": 1.9400556207504805e-05, "loss": 0.0015, "num_input_tokens_seen": 243277280, "step": 112785 }, { "epoch": 18.399673735725937, "grad_norm": 0.01365097425878048, "learning_rate": 1.9380925674998995e-05, "loss": 0.0068, "num_input_tokens_seen": 243287008, "step": 112790 }, { "epoch": 18.400489396411093, "grad_norm": 0.002807130804285407, "learning_rate": 1.9361304882848487e-05, "loss": 0.0022, "num_input_tokens_seen": 243298048, "step": 112795 }, { "epoch": 18.40130505709625, "grad_norm": 0.029686810448765755, "learning_rate": 1.9341693831450847e-05, "loss": 0.0051, "num_input_tokens_seen": 243309184, "step": 112800 }, { "epoch": 18.402120717781404, "grad_norm": 0.1771334707736969, "learning_rate": 1.9322092521203537e-05, "loss": 0.0036, "num_input_tokens_seen": 243321120, "step": 112805 }, { "epoch": 18.402936378466556, "grad_norm": 0.009403710253536701, "learning_rate": 1.93025009525038e-05, "loss": 0.0054, "num_input_tokens_seen": 243333248, "step": 112810 }, { "epoch": 18.403752039151712, "grad_norm": 0.009118743240833282, "learning_rate": 1.92829191257487e-05, "loss": 0.0016, "num_input_tokens_seen": 243344256, "step": 112815 }, { "epoch": 18.404567699836868, "grad_norm": 0.045625537633895874, "learning_rate": 1.9263347041335033e-05, "loss": 0.0044, "num_input_tokens_seen": 243355296, "step": 112820 }, { "epoch": 18.405383360522023, "grad_norm": 0.12229486554861069, "learning_rate": 1.9243784699659538e-05, "loss": 0.0133, "num_input_tokens_seen": 243365504, "step": 112825 }, { "epoch": 18.40619902120718, "grad_norm": 0.01625112071633339, "learning_rate": 1.9224232101118623e-05, "loss": 0.0036, "num_input_tokens_seen": 243376064, "step": 112830 }, { "epoch": 18.40701468189233, "grad_norm": 0.11025586724281311, "learning_rate": 1.9204689246108576e-05, "loss": 0.003, "num_input_tokens_seen": 243387040, "step": 112835 }, { "epoch": 18.407830342577487, "grad_norm": 0.008641725406050682, "learning_rate": 1.9185156135025417e-05, "loss": 0.0046, "num_input_tokens_seen": 243398304, "step": 112840 }, { "epoch": 18.408646003262643, "grad_norm": 0.09586096554994583, "learning_rate": 1.9165632768264994e-05, "loss": 0.0025, "num_input_tokens_seen": 243408640, "step": 112845 }, { "epoch": 18.4094616639478, "grad_norm": 0.002056152792647481, "learning_rate": 1.9146119146223052e-05, "loss": 0.0727, "num_input_tokens_seen": 243420416, "step": 112850 }, { "epoch": 18.410277324632954, "grad_norm": 0.03802042827010155, "learning_rate": 1.9126615269294988e-05, "loss": 0.0068, "num_input_tokens_seen": 243430464, "step": 112855 }, { "epoch": 18.411092985318106, "grad_norm": 0.010652488097548485, "learning_rate": 1.9107121137876106e-05, "loss": 0.0754, "num_input_tokens_seen": 243441344, "step": 112860 }, { "epoch": 18.411908646003262, "grad_norm": 0.13868050277233124, "learning_rate": 1.908763675236147e-05, "loss": 0.0024, "num_input_tokens_seen": 243451648, "step": 112865 }, { "epoch": 18.412724306688418, "grad_norm": 0.6044180989265442, "learning_rate": 1.906816211314599e-05, "loss": 0.0094, "num_input_tokens_seen": 243461408, "step": 112870 }, { "epoch": 18.413539967373573, "grad_norm": 0.07722927629947662, "learning_rate": 1.9048697220624244e-05, "loss": 0.0038, "num_input_tokens_seen": 243472512, "step": 112875 }, { "epoch": 18.41435562805873, "grad_norm": 0.001185476896353066, "learning_rate": 1.9029242075190856e-05, "loss": 0.0058, "num_input_tokens_seen": 243483296, "step": 112880 }, { "epoch": 18.41517128874388, "grad_norm": 0.016079241409897804, "learning_rate": 1.9009796677239953e-05, "loss": 0.0025, "num_input_tokens_seen": 243494624, "step": 112885 }, { "epoch": 18.415986949429037, "grad_norm": 0.024342065677046776, "learning_rate": 1.8990361027165726e-05, "loss": 0.001, "num_input_tokens_seen": 243505088, "step": 112890 }, { "epoch": 18.416802610114193, "grad_norm": 0.001138005405664444, "learning_rate": 1.8970935125362076e-05, "loss": 0.0008, "num_input_tokens_seen": 243516832, "step": 112895 }, { "epoch": 18.41761827079935, "grad_norm": 0.007794965989887714, "learning_rate": 1.8951518972222637e-05, "loss": 0.0022, "num_input_tokens_seen": 243527008, "step": 112900 }, { "epoch": 18.418433931484504, "grad_norm": 0.0013235649093985558, "learning_rate": 1.893211256814087e-05, "loss": 0.0031, "num_input_tokens_seen": 243537856, "step": 112905 }, { "epoch": 18.419249592169656, "grad_norm": 1.4929618835449219, "learning_rate": 1.891271591351018e-05, "loss": 0.1131, "num_input_tokens_seen": 243547936, "step": 112910 }, { "epoch": 18.420065252854812, "grad_norm": 0.012194301933050156, "learning_rate": 1.8893329008723593e-05, "loss": 0.0041, "num_input_tokens_seen": 243558976, "step": 112915 }, { "epoch": 18.420880913539968, "grad_norm": 0.0011607806663960218, "learning_rate": 1.8873951854173955e-05, "loss": 0.0024, "num_input_tokens_seen": 243569280, "step": 112920 }, { "epoch": 18.421696574225123, "grad_norm": 0.0017784551018849015, "learning_rate": 1.885458445025412e-05, "loss": 0.1254, "num_input_tokens_seen": 243581056, "step": 112925 }, { "epoch": 18.42251223491028, "grad_norm": 0.00435857567936182, "learning_rate": 1.883522679735644e-05, "loss": 0.0017, "num_input_tokens_seen": 243591808, "step": 112930 }, { "epoch": 18.42332789559543, "grad_norm": 0.0025807777419686317, "learning_rate": 1.8815878895873328e-05, "loss": 0.0041, "num_input_tokens_seen": 243601952, "step": 112935 }, { "epoch": 18.424143556280587, "grad_norm": 0.005668019410222769, "learning_rate": 1.87965407461968e-05, "loss": 0.0023, "num_input_tokens_seen": 243613088, "step": 112940 }, { "epoch": 18.424959216965743, "grad_norm": 0.3155018091201782, "learning_rate": 1.877721234871893e-05, "loss": 0.0155, "num_input_tokens_seen": 243622208, "step": 112945 }, { "epoch": 18.4257748776509, "grad_norm": 0.025972172617912292, "learning_rate": 1.8757893703831243e-05, "loss": 0.0015, "num_input_tokens_seen": 243633408, "step": 112950 }, { "epoch": 18.42659053833605, "grad_norm": 0.005825090687721968, "learning_rate": 1.8738584811925417e-05, "loss": 0.0019, "num_input_tokens_seen": 243642208, "step": 112955 }, { "epoch": 18.427406199021206, "grad_norm": 0.027793731540441513, "learning_rate": 1.8719285673392594e-05, "loss": 0.0057, "num_input_tokens_seen": 243652704, "step": 112960 }, { "epoch": 18.428221859706362, "grad_norm": 0.0019792530220001936, "learning_rate": 1.869999628862401e-05, "loss": 0.003, "num_input_tokens_seen": 243664000, "step": 112965 }, { "epoch": 18.429037520391518, "grad_norm": 0.006568220444023609, "learning_rate": 1.8680716658010633e-05, "loss": 0.0964, "num_input_tokens_seen": 243674208, "step": 112970 }, { "epoch": 18.429853181076673, "grad_norm": 0.0012691962765529752, "learning_rate": 1.8661446781943093e-05, "loss": 0.002, "num_input_tokens_seen": 243683808, "step": 112975 }, { "epoch": 18.430668841761825, "grad_norm": 0.002401526551693678, "learning_rate": 1.8642186660811965e-05, "loss": 0.0007, "num_input_tokens_seen": 243694656, "step": 112980 }, { "epoch": 18.43148450244698, "grad_norm": 0.0013001378392800689, "learning_rate": 1.862293629500761e-05, "loss": 0.0109, "num_input_tokens_seen": 243705664, "step": 112985 }, { "epoch": 18.432300163132137, "grad_norm": 0.21196968853473663, "learning_rate": 1.8603695684920042e-05, "loss": 0.0026, "num_input_tokens_seen": 243715776, "step": 112990 }, { "epoch": 18.433115823817293, "grad_norm": 0.16872073709964752, "learning_rate": 1.858446483093934e-05, "loss": 0.0054, "num_input_tokens_seen": 243726208, "step": 112995 }, { "epoch": 18.43393148450245, "grad_norm": 0.006972313392907381, "learning_rate": 1.856524373345514e-05, "loss": 0.0048, "num_input_tokens_seen": 243737952, "step": 113000 }, { "epoch": 18.4347471451876, "grad_norm": 0.0046642255038022995, "learning_rate": 1.8546032392857014e-05, "loss": 0.0041, "num_input_tokens_seen": 243749152, "step": 113005 }, { "epoch": 18.435562805872756, "grad_norm": 0.035737112164497375, "learning_rate": 1.8526830809534377e-05, "loss": 0.0029, "num_input_tokens_seen": 243759168, "step": 113010 }, { "epoch": 18.436378466557912, "grad_norm": 0.0005362089723348618, "learning_rate": 1.8507638983876252e-05, "loss": 0.0137, "num_input_tokens_seen": 243770368, "step": 113015 }, { "epoch": 18.437194127243067, "grad_norm": 0.04410313814878464, "learning_rate": 1.84884569162716e-05, "loss": 0.0048, "num_input_tokens_seen": 243781056, "step": 113020 }, { "epoch": 18.438009787928223, "grad_norm": 0.024450242519378662, "learning_rate": 1.8469284607109282e-05, "loss": 0.0508, "num_input_tokens_seen": 243790976, "step": 113025 }, { "epoch": 18.438825448613375, "grad_norm": 0.007414923515170813, "learning_rate": 1.8450122056777762e-05, "loss": 0.1465, "num_input_tokens_seen": 243800864, "step": 113030 }, { "epoch": 18.43964110929853, "grad_norm": 0.005496028810739517, "learning_rate": 1.8430969265665398e-05, "loss": 0.0078, "num_input_tokens_seen": 243812288, "step": 113035 }, { "epoch": 18.440456769983687, "grad_norm": 0.013889149762690067, "learning_rate": 1.8411826234160324e-05, "loss": 0.0022, "num_input_tokens_seen": 243822272, "step": 113040 }, { "epoch": 18.441272430668842, "grad_norm": 0.3773200213909149, "learning_rate": 1.8392692962650504e-05, "loss": 0.0119, "num_input_tokens_seen": 243832672, "step": 113045 }, { "epoch": 18.442088091353998, "grad_norm": 0.031535159796476364, "learning_rate": 1.8373569451523853e-05, "loss": 0.0015, "num_input_tokens_seen": 243844256, "step": 113050 }, { "epoch": 18.44290375203915, "grad_norm": 0.04749482497572899, "learning_rate": 1.8354455701167672e-05, "loss": 0.002, "num_input_tokens_seen": 243855296, "step": 113055 }, { "epoch": 18.443719412724306, "grad_norm": 0.00463978061452508, "learning_rate": 1.833535171196954e-05, "loss": 0.0079, "num_input_tokens_seen": 243865248, "step": 113060 }, { "epoch": 18.44453507340946, "grad_norm": 0.11854441463947296, "learning_rate": 1.831625748431648e-05, "loss": 0.1423, "num_input_tokens_seen": 243876096, "step": 113065 }, { "epoch": 18.445350734094617, "grad_norm": 0.034699294716119766, "learning_rate": 1.829717301859557e-05, "loss": 0.0443, "num_input_tokens_seen": 243886720, "step": 113070 }, { "epoch": 18.446166394779773, "grad_norm": 0.012228133156895638, "learning_rate": 1.8278098315193504e-05, "loss": 0.0046, "num_input_tokens_seen": 243896832, "step": 113075 }, { "epoch": 18.446982055464925, "grad_norm": 0.0031143715605139732, "learning_rate": 1.8259033374496915e-05, "loss": 0.0062, "num_input_tokens_seen": 243907008, "step": 113080 }, { "epoch": 18.44779771615008, "grad_norm": 0.22734366357326508, "learning_rate": 1.8239978196892105e-05, "loss": 0.0034, "num_input_tokens_seen": 243917632, "step": 113085 }, { "epoch": 18.448613376835237, "grad_norm": 0.0005688347155228257, "learning_rate": 1.8220932782765377e-05, "loss": 0.0029, "num_input_tokens_seen": 243929376, "step": 113090 }, { "epoch": 18.449429037520392, "grad_norm": 0.008131057024002075, "learning_rate": 1.8201897132502476e-05, "loss": 0.0094, "num_input_tokens_seen": 243939520, "step": 113095 }, { "epoch": 18.450244698205548, "grad_norm": 0.002863385248929262, "learning_rate": 1.8182871246489487e-05, "loss": 0.001, "num_input_tokens_seen": 243951040, "step": 113100 }, { "epoch": 18.4510603588907, "grad_norm": 0.015208118595182896, "learning_rate": 1.8163855125111707e-05, "loss": 0.0951, "num_input_tokens_seen": 243961216, "step": 113105 }, { "epoch": 18.451876019575856, "grad_norm": 0.004697436932474375, "learning_rate": 1.8144848768754717e-05, "loss": 0.0034, "num_input_tokens_seen": 243970400, "step": 113110 }, { "epoch": 18.45269168026101, "grad_norm": 0.0008406491833738983, "learning_rate": 1.8125852177803658e-05, "loss": 0.0723, "num_input_tokens_seen": 243981312, "step": 113115 }, { "epoch": 18.453507340946167, "grad_norm": 0.11335010826587677, "learning_rate": 1.8106865352643498e-05, "loss": 0.0131, "num_input_tokens_seen": 243991360, "step": 113120 }, { "epoch": 18.454323001631323, "grad_norm": 0.0015210369601845741, "learning_rate": 1.808788829365904e-05, "loss": 0.0008, "num_input_tokens_seen": 244002688, "step": 113125 }, { "epoch": 18.455138662316475, "grad_norm": 0.05409540235996246, "learning_rate": 1.8068921001234862e-05, "loss": 0.0035, "num_input_tokens_seen": 244013184, "step": 113130 }, { "epoch": 18.45595432300163, "grad_norm": 0.001313163316808641, "learning_rate": 1.804996347575538e-05, "loss": 0.0023, "num_input_tokens_seen": 244024256, "step": 113135 }, { "epoch": 18.456769983686787, "grad_norm": 0.00569185009226203, "learning_rate": 1.8031015717604793e-05, "loss": 0.0013, "num_input_tokens_seen": 244035680, "step": 113140 }, { "epoch": 18.457585644371942, "grad_norm": 0.5659423470497131, "learning_rate": 1.8012077727167065e-05, "loss": 0.024, "num_input_tokens_seen": 244047296, "step": 113145 }, { "epoch": 18.458401305057095, "grad_norm": 0.0032691452652215958, "learning_rate": 1.7993149504826056e-05, "loss": 0.0008, "num_input_tokens_seen": 244059520, "step": 113150 }, { "epoch": 18.45921696574225, "grad_norm": 0.07515059411525726, "learning_rate": 1.7974231050965352e-05, "loss": 0.0128, "num_input_tokens_seen": 244069280, "step": 113155 }, { "epoch": 18.460032626427406, "grad_norm": 0.014925154857337475, "learning_rate": 1.7955322365968253e-05, "loss": 0.0405, "num_input_tokens_seen": 244080544, "step": 113160 }, { "epoch": 18.46084828711256, "grad_norm": 0.01167026162147522, "learning_rate": 1.793642345021823e-05, "loss": 0.001, "num_input_tokens_seen": 244092384, "step": 113165 }, { "epoch": 18.461663947797717, "grad_norm": 0.1667666733264923, "learning_rate": 1.7917534304097983e-05, "loss": 0.0924, "num_input_tokens_seen": 244104512, "step": 113170 }, { "epoch": 18.46247960848287, "grad_norm": 0.0021475821267813444, "learning_rate": 1.7898654927990587e-05, "loss": 0.0021, "num_input_tokens_seen": 244115872, "step": 113175 }, { "epoch": 18.463295269168025, "grad_norm": 0.007752015721052885, "learning_rate": 1.7879785322278408e-05, "loss": 0.0073, "num_input_tokens_seen": 244126432, "step": 113180 }, { "epoch": 18.46411092985318, "grad_norm": 0.363170325756073, "learning_rate": 1.786092548734408e-05, "loss": 0.0077, "num_input_tokens_seen": 244137248, "step": 113185 }, { "epoch": 18.464926590538337, "grad_norm": 0.0029976861551404, "learning_rate": 1.7842075423569692e-05, "loss": 0.0005, "num_input_tokens_seen": 244148288, "step": 113190 }, { "epoch": 18.465742251223492, "grad_norm": 0.05666213110089302, "learning_rate": 1.782323513133738e-05, "loss": 0.0031, "num_input_tokens_seen": 244158176, "step": 113195 }, { "epoch": 18.466557911908644, "grad_norm": 0.016328366473317146, "learning_rate": 1.7804404611028778e-05, "loss": 0.0091, "num_input_tokens_seen": 244169344, "step": 113200 }, { "epoch": 18.4673735725938, "grad_norm": 0.0075811175629496574, "learning_rate": 1.7785583863025757e-05, "loss": 0.1265, "num_input_tokens_seen": 244178656, "step": 113205 }, { "epoch": 18.468189233278956, "grad_norm": 0.009348376654088497, "learning_rate": 1.776677288770945e-05, "loss": 0.0034, "num_input_tokens_seen": 244189664, "step": 113210 }, { "epoch": 18.46900489396411, "grad_norm": 0.0046129473485052586, "learning_rate": 1.7747971685461383e-05, "loss": 0.0012, "num_input_tokens_seen": 244199712, "step": 113215 }, { "epoch": 18.469820554649267, "grad_norm": 0.0035806423984467983, "learning_rate": 1.772918025666237e-05, "loss": 0.0098, "num_input_tokens_seen": 244211040, "step": 113220 }, { "epoch": 18.47063621533442, "grad_norm": 0.04108661040663719, "learning_rate": 1.7710398601693432e-05, "loss": 0.0052, "num_input_tokens_seen": 244221920, "step": 113225 }, { "epoch": 18.471451876019575, "grad_norm": 0.04100160300731659, "learning_rate": 1.769162672093494e-05, "loss": 0.0024, "num_input_tokens_seen": 244232896, "step": 113230 }, { "epoch": 18.47226753670473, "grad_norm": 0.0016091152792796493, "learning_rate": 1.7672864614767636e-05, "loss": 0.0008, "num_input_tokens_seen": 244244608, "step": 113235 }, { "epoch": 18.473083197389887, "grad_norm": 0.08778524398803711, "learning_rate": 1.7654112283571446e-05, "loss": 0.0024, "num_input_tokens_seen": 244255744, "step": 113240 }, { "epoch": 18.473898858075042, "grad_norm": 0.08874652534723282, "learning_rate": 1.7635369727726726e-05, "loss": 0.0029, "num_input_tokens_seen": 244267040, "step": 113245 }, { "epoch": 18.474714518760194, "grad_norm": 0.1096988320350647, "learning_rate": 1.7616636947613063e-05, "loss": 0.0031, "num_input_tokens_seen": 244278016, "step": 113250 }, { "epoch": 18.47553017944535, "grad_norm": 0.00367941427975893, "learning_rate": 1.759791394361021e-05, "loss": 0.0015, "num_input_tokens_seen": 244288160, "step": 113255 }, { "epoch": 18.476345840130506, "grad_norm": 0.0013774127000942826, "learning_rate": 1.757920071609764e-05, "loss": 0.0041, "num_input_tokens_seen": 244298912, "step": 113260 }, { "epoch": 18.47716150081566, "grad_norm": 0.0033665935043245554, "learning_rate": 1.75604972654545e-05, "loss": 0.0014, "num_input_tokens_seen": 244308544, "step": 113265 }, { "epoch": 18.477977161500817, "grad_norm": 0.022598180919885635, "learning_rate": 1.754180359205998e-05, "loss": 0.0068, "num_input_tokens_seen": 244319648, "step": 113270 }, { "epoch": 18.47879282218597, "grad_norm": 0.00526468874886632, "learning_rate": 1.752311969629278e-05, "loss": 0.0025, "num_input_tokens_seen": 244330496, "step": 113275 }, { "epoch": 18.479608482871125, "grad_norm": 0.004576224833726883, "learning_rate": 1.7504445578531703e-05, "loss": 0.0036, "num_input_tokens_seen": 244341344, "step": 113280 }, { "epoch": 18.48042414355628, "grad_norm": 0.000379458797397092, "learning_rate": 1.7485781239155063e-05, "loss": 0.0012, "num_input_tokens_seen": 244353280, "step": 113285 }, { "epoch": 18.481239804241437, "grad_norm": 0.0032728025689721107, "learning_rate": 1.7467126678541223e-05, "loss": 0.0074, "num_input_tokens_seen": 244364608, "step": 113290 }, { "epoch": 18.482055464926592, "grad_norm": 0.0028666744474321604, "learning_rate": 1.7448481897068158e-05, "loss": 0.0015, "num_input_tokens_seen": 244375424, "step": 113295 }, { "epoch": 18.482871125611744, "grad_norm": 0.3370775878429413, "learning_rate": 1.742984689511379e-05, "loss": 0.0087, "num_input_tokens_seen": 244386528, "step": 113300 }, { "epoch": 18.4836867862969, "grad_norm": 0.016171569004654884, "learning_rate": 1.7411221673055644e-05, "loss": 0.0069, "num_input_tokens_seen": 244397024, "step": 113305 }, { "epoch": 18.484502446982056, "grad_norm": 0.006438122130930424, "learning_rate": 1.739260623127148e-05, "loss": 0.0012, "num_input_tokens_seen": 244407104, "step": 113310 }, { "epoch": 18.48531810766721, "grad_norm": 0.010684693232178688, "learning_rate": 1.737400057013827e-05, "loss": 0.0406, "num_input_tokens_seen": 244416928, "step": 113315 }, { "epoch": 18.486133768352367, "grad_norm": 0.002083291532471776, "learning_rate": 1.735540469003327e-05, "loss": 0.0039, "num_input_tokens_seen": 244427456, "step": 113320 }, { "epoch": 18.48694942903752, "grad_norm": 0.0019530777353793383, "learning_rate": 1.733681859133318e-05, "loss": 0.0101, "num_input_tokens_seen": 244439488, "step": 113325 }, { "epoch": 18.487765089722675, "grad_norm": 0.004786877892911434, "learning_rate": 1.7318242274414864e-05, "loss": 0.0032, "num_input_tokens_seen": 244451552, "step": 113330 }, { "epoch": 18.48858075040783, "grad_norm": 0.008693159557878971, "learning_rate": 1.7299675739654575e-05, "loss": 0.0015, "num_input_tokens_seen": 244463360, "step": 113335 }, { "epoch": 18.489396411092986, "grad_norm": 0.022262364625930786, "learning_rate": 1.7281118987428847e-05, "loss": 0.0346, "num_input_tokens_seen": 244473952, "step": 113340 }, { "epoch": 18.49021207177814, "grad_norm": 0.004205009434372187, "learning_rate": 1.7262572018113488e-05, "loss": 0.0016, "num_input_tokens_seen": 244484480, "step": 113345 }, { "epoch": 18.491027732463294, "grad_norm": 0.01991439424455166, "learning_rate": 1.7244034832084587e-05, "loss": 0.0028, "num_input_tokens_seen": 244494592, "step": 113350 }, { "epoch": 18.49184339314845, "grad_norm": 0.07236351072788239, "learning_rate": 1.722550742971768e-05, "loss": 0.0026, "num_input_tokens_seen": 244506400, "step": 113355 }, { "epoch": 18.492659053833606, "grad_norm": 0.07305751740932465, "learning_rate": 1.720698981138835e-05, "loss": 0.0022, "num_input_tokens_seen": 244516576, "step": 113360 }, { "epoch": 18.49347471451876, "grad_norm": 0.009688764810562134, "learning_rate": 1.7188481977471804e-05, "loss": 0.0011, "num_input_tokens_seen": 244528480, "step": 113365 }, { "epoch": 18.494290375203914, "grad_norm": 0.0031463911291211843, "learning_rate": 1.716998392834318e-05, "loss": 0.0218, "num_input_tokens_seen": 244538496, "step": 113370 }, { "epoch": 18.49510603588907, "grad_norm": 0.021728230640292168, "learning_rate": 1.715149566437735e-05, "loss": 0.0266, "num_input_tokens_seen": 244548992, "step": 113375 }, { "epoch": 18.495921696574225, "grad_norm": 0.10809872299432755, "learning_rate": 1.7133017185949007e-05, "loss": 0.0209, "num_input_tokens_seen": 244559040, "step": 113380 }, { "epoch": 18.49673735725938, "grad_norm": 0.0025282602291554213, "learning_rate": 1.711454849343258e-05, "loss": 0.0028, "num_input_tokens_seen": 244569920, "step": 113385 }, { "epoch": 18.497553017944536, "grad_norm": 0.0007333853864111006, "learning_rate": 1.709608958720249e-05, "loss": 0.0023, "num_input_tokens_seen": 244581984, "step": 113390 }, { "epoch": 18.49836867862969, "grad_norm": 0.009200424887239933, "learning_rate": 1.7077640467632714e-05, "loss": 0.002, "num_input_tokens_seen": 244593184, "step": 113395 }, { "epoch": 18.499184339314844, "grad_norm": 0.04049834981560707, "learning_rate": 1.705920113509718e-05, "loss": 0.0068, "num_input_tokens_seen": 244602976, "step": 113400 }, { "epoch": 18.5, "grad_norm": 0.0006515970453619957, "learning_rate": 1.7040771589969583e-05, "loss": 0.0148, "num_input_tokens_seen": 244613408, "step": 113405 }, { "epoch": 18.500815660685156, "grad_norm": 0.3578934967517853, "learning_rate": 1.7022351832623407e-05, "loss": 0.0395, "num_input_tokens_seen": 244623968, "step": 113410 }, { "epoch": 18.50163132137031, "grad_norm": 0.0022567554842680693, "learning_rate": 1.7003941863432014e-05, "loss": 0.0016, "num_input_tokens_seen": 244636320, "step": 113415 }, { "epoch": 18.502446982055464, "grad_norm": 0.004416691605001688, "learning_rate": 1.6985541682768445e-05, "loss": 0.0011, "num_input_tokens_seen": 244648384, "step": 113420 }, { "epoch": 18.50326264274062, "grad_norm": 0.0022294630762189627, "learning_rate": 1.696715129100562e-05, "loss": 0.0244, "num_input_tokens_seen": 244658656, "step": 113425 }, { "epoch": 18.504078303425775, "grad_norm": 0.012804738245904446, "learning_rate": 1.6948770688516248e-05, "loss": 0.0709, "num_input_tokens_seen": 244669568, "step": 113430 }, { "epoch": 18.50489396411093, "grad_norm": 0.0020197993144392967, "learning_rate": 1.6930399875672853e-05, "loss": 0.0429, "num_input_tokens_seen": 244680000, "step": 113435 }, { "epoch": 18.505709624796086, "grad_norm": 0.026356181129813194, "learning_rate": 1.69120388528477e-05, "loss": 0.0065, "num_input_tokens_seen": 244691040, "step": 113440 }, { "epoch": 18.50652528548124, "grad_norm": 0.15312574803829193, "learning_rate": 1.6893687620412933e-05, "loss": 0.1615, "num_input_tokens_seen": 244702848, "step": 113445 }, { "epoch": 18.507340946166394, "grad_norm": 0.0031374595127999783, "learning_rate": 1.687534617874037e-05, "loss": 0.0062, "num_input_tokens_seen": 244714720, "step": 113450 }, { "epoch": 18.50815660685155, "grad_norm": 0.006842391565442085, "learning_rate": 1.685701452820193e-05, "loss": 0.0051, "num_input_tokens_seen": 244726688, "step": 113455 }, { "epoch": 18.508972267536706, "grad_norm": 0.011867827735841274, "learning_rate": 1.6838692669168876e-05, "loss": 0.0026, "num_input_tokens_seen": 244737792, "step": 113460 }, { "epoch": 18.50978792822186, "grad_norm": 0.006348334718495607, "learning_rate": 1.682038060201274e-05, "loss": 0.0012, "num_input_tokens_seen": 244748288, "step": 113465 }, { "epoch": 18.510603588907014, "grad_norm": 0.004518447443842888, "learning_rate": 1.680207832710451e-05, "loss": 0.0029, "num_input_tokens_seen": 244760352, "step": 113470 }, { "epoch": 18.51141924959217, "grad_norm": 0.001782942097634077, "learning_rate": 1.6783785844815157e-05, "loss": 0.0006, "num_input_tokens_seen": 244769728, "step": 113475 }, { "epoch": 18.512234910277325, "grad_norm": 0.0037633555475622416, "learning_rate": 1.6765503155515394e-05, "loss": 0.0012, "num_input_tokens_seen": 244781472, "step": 113480 }, { "epoch": 18.51305057096248, "grad_norm": 0.423205703496933, "learning_rate": 1.6747230259575696e-05, "loss": 0.0086, "num_input_tokens_seen": 244790528, "step": 113485 }, { "epoch": 18.513866231647633, "grad_norm": 0.01220904290676117, "learning_rate": 1.6728967157366492e-05, "loss": 0.0014, "num_input_tokens_seen": 244802112, "step": 113490 }, { "epoch": 18.51468189233279, "grad_norm": 0.002544856397435069, "learning_rate": 1.671071384925782e-05, "loss": 0.0048, "num_input_tokens_seen": 244812480, "step": 113495 }, { "epoch": 18.515497553017944, "grad_norm": 0.0719989687204361, "learning_rate": 1.66924703356196e-05, "loss": 0.0034, "num_input_tokens_seen": 244823488, "step": 113500 }, { "epoch": 18.5163132137031, "grad_norm": 0.011464669369161129, "learning_rate": 1.6674236616821602e-05, "loss": 0.0012, "num_input_tokens_seen": 244835136, "step": 113505 }, { "epoch": 18.517128874388256, "grad_norm": 0.002127256942912936, "learning_rate": 1.6656012693233357e-05, "loss": 0.0042, "num_input_tokens_seen": 244845888, "step": 113510 }, { "epoch": 18.517944535073408, "grad_norm": 0.02756613865494728, "learning_rate": 1.6637798565224127e-05, "loss": 0.0073, "num_input_tokens_seen": 244856608, "step": 113515 }, { "epoch": 18.518760195758563, "grad_norm": 0.11734737455844879, "learning_rate": 1.6619594233163172e-05, "loss": 0.0032, "num_input_tokens_seen": 244867008, "step": 113520 }, { "epoch": 18.51957585644372, "grad_norm": 0.02897774800658226, "learning_rate": 1.6601399697419306e-05, "loss": 0.002, "num_input_tokens_seen": 244878976, "step": 113525 }, { "epoch": 18.520391517128875, "grad_norm": 0.004680606070905924, "learning_rate": 1.658321495836135e-05, "loss": 0.0012, "num_input_tokens_seen": 244890912, "step": 113530 }, { "epoch": 18.52120717781403, "grad_norm": 0.027629360556602478, "learning_rate": 1.6565040016357725e-05, "loss": 0.0021, "num_input_tokens_seen": 244902080, "step": 113535 }, { "epoch": 18.522022838499183, "grad_norm": 0.011530987918376923, "learning_rate": 1.654687487177692e-05, "loss": 0.0037, "num_input_tokens_seen": 244913568, "step": 113540 }, { "epoch": 18.52283849918434, "grad_norm": 0.27173224091529846, "learning_rate": 1.6528719524986967e-05, "loss": 0.0081, "num_input_tokens_seen": 244924960, "step": 113545 }, { "epoch": 18.523654159869494, "grad_norm": 0.007531892508268356, "learning_rate": 1.6510573976355858e-05, "loss": 0.0017, "num_input_tokens_seen": 244934432, "step": 113550 }, { "epoch": 18.52446982055465, "grad_norm": 0.012404780834913254, "learning_rate": 1.6492438226251295e-05, "loss": 0.08, "num_input_tokens_seen": 244944832, "step": 113555 }, { "epoch": 18.525285481239806, "grad_norm": 0.05060427263379097, "learning_rate": 1.647431227504087e-05, "loss": 0.0039, "num_input_tokens_seen": 244955360, "step": 113560 }, { "epoch": 18.526101141924958, "grad_norm": 0.16690051555633545, "learning_rate": 1.645619612309185e-05, "loss": 0.0084, "num_input_tokens_seen": 244965536, "step": 113565 }, { "epoch": 18.526916802610113, "grad_norm": 0.04897492751479149, "learning_rate": 1.6438089770771435e-05, "loss": 0.0073, "num_input_tokens_seen": 244976256, "step": 113570 }, { "epoch": 18.52773246329527, "grad_norm": 0.48422157764434814, "learning_rate": 1.6419993218446673e-05, "loss": 0.0071, "num_input_tokens_seen": 244987424, "step": 113575 }, { "epoch": 18.528548123980425, "grad_norm": 0.009023602120578289, "learning_rate": 1.640190646648404e-05, "loss": 0.0466, "num_input_tokens_seen": 244998880, "step": 113580 }, { "epoch": 18.52936378466558, "grad_norm": 0.03680630773305893, "learning_rate": 1.638382951525047e-05, "loss": 0.0026, "num_input_tokens_seen": 245009504, "step": 113585 }, { "epoch": 18.530179445350733, "grad_norm": 0.006731228902935982, "learning_rate": 1.6365762365111947e-05, "loss": 0.0083, "num_input_tokens_seen": 245020960, "step": 113590 }, { "epoch": 18.53099510603589, "grad_norm": 0.06975117325782776, "learning_rate": 1.6347705016434844e-05, "loss": 0.0016, "num_input_tokens_seen": 245031680, "step": 113595 }, { "epoch": 18.531810766721044, "grad_norm": 0.3293974697589874, "learning_rate": 1.6329657469585037e-05, "loss": 0.0055, "num_input_tokens_seen": 245043904, "step": 113600 }, { "epoch": 18.5326264274062, "grad_norm": 0.03506282716989517, "learning_rate": 1.6311619724928283e-05, "loss": 0.0021, "num_input_tokens_seen": 245054560, "step": 113605 }, { "epoch": 18.533442088091356, "grad_norm": 0.07564103603363037, "learning_rate": 1.6293591782830186e-05, "loss": 0.0089, "num_input_tokens_seen": 245063712, "step": 113610 }, { "epoch": 18.534257748776508, "grad_norm": 0.005781334359198809, "learning_rate": 1.6275573643656115e-05, "loss": 0.0053, "num_input_tokens_seen": 245074176, "step": 113615 }, { "epoch": 18.535073409461663, "grad_norm": 0.0017030327580869198, "learning_rate": 1.6257565307771115e-05, "loss": 0.0012, "num_input_tokens_seen": 245083616, "step": 113620 }, { "epoch": 18.53588907014682, "grad_norm": 0.13453902304172516, "learning_rate": 1.6239566775540283e-05, "loss": 0.0099, "num_input_tokens_seen": 245094784, "step": 113625 }, { "epoch": 18.536704730831975, "grad_norm": 0.004103922285139561, "learning_rate": 1.6221578047328322e-05, "loss": 0.0034, "num_input_tokens_seen": 245104480, "step": 113630 }, { "epoch": 18.53752039151713, "grad_norm": 0.05958361178636551, "learning_rate": 1.6203599123499778e-05, "loss": 0.0023, "num_input_tokens_seen": 245114752, "step": 113635 }, { "epoch": 18.538336052202283, "grad_norm": 0.004203310701996088, "learning_rate": 1.6185630004419027e-05, "loss": 0.0039, "num_input_tokens_seen": 245125536, "step": 113640 }, { "epoch": 18.53915171288744, "grad_norm": 0.0008924403227865696, "learning_rate": 1.6167670690450276e-05, "loss": 0.0631, "num_input_tokens_seen": 245136064, "step": 113645 }, { "epoch": 18.539967373572594, "grad_norm": 0.9982555508613586, "learning_rate": 1.6149721181957456e-05, "loss": 0.1572, "num_input_tokens_seen": 245147744, "step": 113650 }, { "epoch": 18.54078303425775, "grad_norm": 0.013280803337693214, "learning_rate": 1.6131781479304332e-05, "loss": 0.0139, "num_input_tokens_seen": 245158624, "step": 113655 }, { "epoch": 18.541598694942905, "grad_norm": 0.11209940910339355, "learning_rate": 1.61138515828545e-05, "loss": 0.0728, "num_input_tokens_seen": 245169888, "step": 113660 }, { "epoch": 18.542414355628058, "grad_norm": 0.011560685001313686, "learning_rate": 1.6095931492971282e-05, "loss": 0.005, "num_input_tokens_seen": 245181728, "step": 113665 }, { "epoch": 18.543230016313213, "grad_norm": 0.003503490472212434, "learning_rate": 1.6078021210017945e-05, "loss": 0.0107, "num_input_tokens_seen": 245192960, "step": 113670 }, { "epoch": 18.54404567699837, "grad_norm": 0.009766645729541779, "learning_rate": 1.6060120734357366e-05, "loss": 0.0077, "num_input_tokens_seen": 245203296, "step": 113675 }, { "epoch": 18.544861337683525, "grad_norm": 0.008662141859531403, "learning_rate": 1.604223006635236e-05, "loss": 0.0591, "num_input_tokens_seen": 245213952, "step": 113680 }, { "epoch": 18.545676998368677, "grad_norm": 0.2341245710849762, "learning_rate": 1.6024349206365475e-05, "loss": 0.0135, "num_input_tokens_seen": 245224768, "step": 113685 }, { "epoch": 18.546492659053833, "grad_norm": 0.0036223565693944693, "learning_rate": 1.6006478154759197e-05, "loss": 0.0042, "num_input_tokens_seen": 245233408, "step": 113690 }, { "epoch": 18.54730831973899, "grad_norm": 0.04902312532067299, "learning_rate": 1.598861691189557e-05, "loss": 0.0019, "num_input_tokens_seen": 245245696, "step": 113695 }, { "epoch": 18.548123980424144, "grad_norm": 0.14736567437648773, "learning_rate": 1.5970765478136696e-05, "loss": 0.0121, "num_input_tokens_seen": 245255968, "step": 113700 }, { "epoch": 18.5489396411093, "grad_norm": 0.013301811181008816, "learning_rate": 1.5952923853844224e-05, "loss": 0.0022, "num_input_tokens_seen": 245267520, "step": 113705 }, { "epoch": 18.549755301794452, "grad_norm": 0.05732986703515053, "learning_rate": 1.5935092039379874e-05, "loss": 0.002, "num_input_tokens_seen": 245279040, "step": 113710 }, { "epoch": 18.550570962479608, "grad_norm": 0.003212499665096402, "learning_rate": 1.5917270035104903e-05, "loss": 0.0012, "num_input_tokens_seen": 245290176, "step": 113715 }, { "epoch": 18.551386623164763, "grad_norm": 0.006301900837570429, "learning_rate": 1.5899457841380637e-05, "loss": 0.0058, "num_input_tokens_seen": 245299392, "step": 113720 }, { "epoch": 18.55220228384992, "grad_norm": 0.0034554116427898407, "learning_rate": 1.5881655458567847e-05, "loss": 0.0015, "num_input_tokens_seen": 245309152, "step": 113725 }, { "epoch": 18.553017944535075, "grad_norm": 0.005437372252345085, "learning_rate": 1.5863862887027626e-05, "loss": 0.003, "num_input_tokens_seen": 245321184, "step": 113730 }, { "epoch": 18.553833605220227, "grad_norm": 0.008455879986286163, "learning_rate": 1.5846080127120244e-05, "loss": 0.0026, "num_input_tokens_seen": 245331296, "step": 113735 }, { "epoch": 18.554649265905383, "grad_norm": 0.0008267336525022984, "learning_rate": 1.58283071792063e-05, "loss": 0.0082, "num_input_tokens_seen": 245342304, "step": 113740 }, { "epoch": 18.55546492659054, "grad_norm": 0.0015720766969025135, "learning_rate": 1.581054404364596e-05, "loss": 0.0015, "num_input_tokens_seen": 245352928, "step": 113745 }, { "epoch": 18.556280587275694, "grad_norm": 0.005089359357953072, "learning_rate": 1.5792790720799144e-05, "loss": 0.0015, "num_input_tokens_seen": 245363200, "step": 113750 }, { "epoch": 18.55709624796085, "grad_norm": 0.0023557040840387344, "learning_rate": 1.5775047211025685e-05, "loss": 0.0012, "num_input_tokens_seen": 245374240, "step": 113755 }, { "epoch": 18.557911908646002, "grad_norm": 0.0005374934989959002, "learning_rate": 1.575731351468518e-05, "loss": 0.0086, "num_input_tokens_seen": 245384960, "step": 113760 }, { "epoch": 18.558727569331158, "grad_norm": 0.0007552157621830702, "learning_rate": 1.5739589632137006e-05, "loss": 0.0054, "num_input_tokens_seen": 245396384, "step": 113765 }, { "epoch": 18.559543230016313, "grad_norm": 0.01587565988302231, "learning_rate": 1.572187556374044e-05, "loss": 0.0025, "num_input_tokens_seen": 245407328, "step": 113770 }, { "epoch": 18.56035889070147, "grad_norm": 0.14203910529613495, "learning_rate": 1.5704171309854354e-05, "loss": 0.004, "num_input_tokens_seen": 245418848, "step": 113775 }, { "epoch": 18.561174551386625, "grad_norm": 0.2624348998069763, "learning_rate": 1.568647687083763e-05, "loss": 0.0071, "num_input_tokens_seen": 245430016, "step": 113780 }, { "epoch": 18.561990212071777, "grad_norm": 0.029735475778579712, "learning_rate": 1.5668792247048868e-05, "loss": 0.0019, "num_input_tokens_seen": 245440544, "step": 113785 }, { "epoch": 18.562805872756933, "grad_norm": 0.033883173018693924, "learning_rate": 1.565111743884634e-05, "loss": 0.0018, "num_input_tokens_seen": 245451616, "step": 113790 }, { "epoch": 18.563621533442088, "grad_norm": 0.004889798816293478, "learning_rate": 1.5633452446588537e-05, "loss": 0.0167, "num_input_tokens_seen": 245463264, "step": 113795 }, { "epoch": 18.564437194127244, "grad_norm": 0.2526243329048157, "learning_rate": 1.5615797270633114e-05, "loss": 0.035, "num_input_tokens_seen": 245474048, "step": 113800 }, { "epoch": 18.5652528548124, "grad_norm": 0.0007484579109586775, "learning_rate": 1.5598151911338176e-05, "loss": 0.001, "num_input_tokens_seen": 245483712, "step": 113805 }, { "epoch": 18.56606851549755, "grad_norm": 0.0022926160600036383, "learning_rate": 1.5580516369061103e-05, "loss": 0.0508, "num_input_tokens_seen": 245494208, "step": 113810 }, { "epoch": 18.566884176182707, "grad_norm": 0.1527080535888672, "learning_rate": 1.55628906441595e-05, "loss": 0.0096, "num_input_tokens_seen": 245504416, "step": 113815 }, { "epoch": 18.567699836867863, "grad_norm": 0.06151263043284416, "learning_rate": 1.5545274736990354e-05, "loss": 0.0081, "num_input_tokens_seen": 245515680, "step": 113820 }, { "epoch": 18.56851549755302, "grad_norm": 0.04166337102651596, "learning_rate": 1.5527668647910886e-05, "loss": 0.0015, "num_input_tokens_seen": 245527296, "step": 113825 }, { "epoch": 18.569331158238175, "grad_norm": 0.0181441567838192, "learning_rate": 1.5510072377277696e-05, "loss": 0.0025, "num_input_tokens_seen": 245537600, "step": 113830 }, { "epoch": 18.570146818923327, "grad_norm": 0.153161883354187, "learning_rate": 1.5492485925447663e-05, "loss": 0.0621, "num_input_tokens_seen": 245548704, "step": 113835 }, { "epoch": 18.570962479608482, "grad_norm": 0.010166927240788937, "learning_rate": 1.5474909292776895e-05, "loss": 0.0867, "num_input_tokens_seen": 245560096, "step": 113840 }, { "epoch": 18.571778140293638, "grad_norm": 0.015028668567538261, "learning_rate": 1.5457342479621883e-05, "loss": 0.0016, "num_input_tokens_seen": 245571072, "step": 113845 }, { "epoch": 18.572593800978794, "grad_norm": 0.028898989781737328, "learning_rate": 1.5439785486338396e-05, "loss": 0.003, "num_input_tokens_seen": 245582848, "step": 113850 }, { "epoch": 18.57340946166395, "grad_norm": 0.027609117329120636, "learning_rate": 1.5422238313282434e-05, "loss": 0.0099, "num_input_tokens_seen": 245592416, "step": 113855 }, { "epoch": 18.5742251223491, "grad_norm": 0.09636738896369934, "learning_rate": 1.540470096080948e-05, "loss": 0.0032, "num_input_tokens_seen": 245603168, "step": 113860 }, { "epoch": 18.575040783034257, "grad_norm": 0.0074952333234250546, "learning_rate": 1.538717342927509e-05, "loss": 0.0668, "num_input_tokens_seen": 245614880, "step": 113865 }, { "epoch": 18.575856443719413, "grad_norm": 0.011603385210037231, "learning_rate": 1.536965571903437e-05, "loss": 0.0029, "num_input_tokens_seen": 245626048, "step": 113870 }, { "epoch": 18.57667210440457, "grad_norm": 0.01362020056694746, "learning_rate": 1.535214783044242e-05, "loss": 0.0036, "num_input_tokens_seen": 245636512, "step": 113875 }, { "epoch": 18.57748776508972, "grad_norm": 0.008852330036461353, "learning_rate": 1.5334649763853903e-05, "loss": 0.007, "num_input_tokens_seen": 245647840, "step": 113880 }, { "epoch": 18.578303425774877, "grad_norm": 0.05802656710147858, "learning_rate": 1.5317161519623647e-05, "loss": 0.0018, "num_input_tokens_seen": 245658592, "step": 113885 }, { "epoch": 18.579119086460032, "grad_norm": 0.004298294894397259, "learning_rate": 1.529968309810592e-05, "loss": 0.0101, "num_input_tokens_seen": 245669312, "step": 113890 }, { "epoch": 18.579934747145188, "grad_norm": 0.0026160171255469322, "learning_rate": 1.5282214499655055e-05, "loss": 0.0034, "num_input_tokens_seen": 245679808, "step": 113895 }, { "epoch": 18.580750407830344, "grad_norm": 0.02582189254462719, "learning_rate": 1.526475572462499e-05, "loss": 0.0014, "num_input_tokens_seen": 245690400, "step": 113900 }, { "epoch": 18.581566068515496, "grad_norm": 0.007931591011583805, "learning_rate": 1.5247306773369552e-05, "loss": 0.0027, "num_input_tokens_seen": 245703104, "step": 113905 }, { "epoch": 18.58238172920065, "grad_norm": 0.007966511882841587, "learning_rate": 1.5229867646242457e-05, "loss": 0.0021, "num_input_tokens_seen": 245715776, "step": 113910 }, { "epoch": 18.583197389885807, "grad_norm": 0.37654855847358704, "learning_rate": 1.5212438343597036e-05, "loss": 0.0196, "num_input_tokens_seen": 245726944, "step": 113915 }, { "epoch": 18.584013050570963, "grad_norm": 0.00602776138111949, "learning_rate": 1.5195018865786559e-05, "loss": 0.0014, "num_input_tokens_seen": 245738048, "step": 113920 }, { "epoch": 18.58482871125612, "grad_norm": 0.0004316051781643182, "learning_rate": 1.5177609213164023e-05, "loss": 0.0027, "num_input_tokens_seen": 245748736, "step": 113925 }, { "epoch": 18.58564437194127, "grad_norm": 0.0009807369206100702, "learning_rate": 1.5160209386082314e-05, "loss": 0.0116, "num_input_tokens_seen": 245759840, "step": 113930 }, { "epoch": 18.586460032626427, "grad_norm": 0.0086582126095891, "learning_rate": 1.5142819384893925e-05, "loss": 0.0016, "num_input_tokens_seen": 245770144, "step": 113935 }, { "epoch": 18.587275693311582, "grad_norm": 0.019097885116934776, "learning_rate": 1.512543920995152e-05, "loss": 0.1267, "num_input_tokens_seen": 245780288, "step": 113940 }, { "epoch": 18.588091353996738, "grad_norm": 1.0397331714630127, "learning_rate": 1.5108068861607094e-05, "loss": 0.0982, "num_input_tokens_seen": 245791264, "step": 113945 }, { "epoch": 18.588907014681894, "grad_norm": 0.0023237664718180895, "learning_rate": 1.5090708340212867e-05, "loss": 0.0122, "num_input_tokens_seen": 245802176, "step": 113950 }, { "epoch": 18.589722675367046, "grad_norm": 0.008761776611208916, "learning_rate": 1.5073357646120501e-05, "loss": 0.0015, "num_input_tokens_seen": 245813536, "step": 113955 }, { "epoch": 18.5905383360522, "grad_norm": 0.025383733212947845, "learning_rate": 1.5056016779681825e-05, "loss": 0.0108, "num_input_tokens_seen": 245822944, "step": 113960 }, { "epoch": 18.591353996737357, "grad_norm": 0.03018474392592907, "learning_rate": 1.5038685741248059e-05, "loss": 0.0027, "num_input_tokens_seen": 245833248, "step": 113965 }, { "epoch": 18.592169657422513, "grad_norm": 0.011699194088578224, "learning_rate": 1.502136453117059e-05, "loss": 0.0012, "num_input_tokens_seen": 245845856, "step": 113970 }, { "epoch": 18.59298531810767, "grad_norm": 0.005503134336322546, "learning_rate": 1.5004053149800356e-05, "loss": 0.0033, "num_input_tokens_seen": 245856960, "step": 113975 }, { "epoch": 18.59380097879282, "grad_norm": 0.04958203434944153, "learning_rate": 1.4986751597488357e-05, "loss": 0.0049, "num_input_tokens_seen": 245866880, "step": 113980 }, { "epoch": 18.594616639477977, "grad_norm": 0.016657941043376923, "learning_rate": 1.4969459874585034e-05, "loss": 0.0074, "num_input_tokens_seen": 245878816, "step": 113985 }, { "epoch": 18.595432300163132, "grad_norm": 0.017392927780747414, "learning_rate": 1.495217798144094e-05, "loss": 0.0017, "num_input_tokens_seen": 245888864, "step": 113990 }, { "epoch": 18.596247960848288, "grad_norm": 0.001096212537959218, "learning_rate": 1.4934905918406239e-05, "loss": 0.0458, "num_input_tokens_seen": 245899296, "step": 113995 }, { "epoch": 18.597063621533444, "grad_norm": 0.015800196677446365, "learning_rate": 1.491764368583104e-05, "loss": 0.0056, "num_input_tokens_seen": 245910624, "step": 114000 }, { "epoch": 18.597879282218596, "grad_norm": 0.034441787749528885, "learning_rate": 1.4900391284065229e-05, "loss": 0.0099, "num_input_tokens_seen": 245920576, "step": 114005 }, { "epoch": 18.59869494290375, "grad_norm": 1.2270455360412598, "learning_rate": 1.4883148713458306e-05, "loss": 0.0742, "num_input_tokens_seen": 245930848, "step": 114010 }, { "epoch": 18.599510603588907, "grad_norm": 0.033619172871112823, "learning_rate": 1.4865915974359823e-05, "loss": 0.0044, "num_input_tokens_seen": 245943296, "step": 114015 }, { "epoch": 18.600326264274063, "grad_norm": 0.008595656603574753, "learning_rate": 1.4848693067119e-05, "loss": 0.0134, "num_input_tokens_seen": 245953696, "step": 114020 }, { "epoch": 18.601141924959215, "grad_norm": 0.0012199173215776682, "learning_rate": 1.483147999208484e-05, "loss": 0.0017, "num_input_tokens_seen": 245963456, "step": 114025 }, { "epoch": 18.60195758564437, "grad_norm": 0.030602358281612396, "learning_rate": 1.4814276749606226e-05, "loss": 0.0025, "num_input_tokens_seen": 245972384, "step": 114030 }, { "epoch": 18.602773246329527, "grad_norm": 0.002436590613797307, "learning_rate": 1.4797083340031769e-05, "loss": 0.0025, "num_input_tokens_seen": 245982912, "step": 114035 }, { "epoch": 18.603588907014682, "grad_norm": 0.059467632323503494, "learning_rate": 1.477989976370997e-05, "loss": 0.0049, "num_input_tokens_seen": 245993472, "step": 114040 }, { "epoch": 18.604404567699838, "grad_norm": 0.32938385009765625, "learning_rate": 1.4762726020989047e-05, "loss": 0.0069, "num_input_tokens_seen": 246004896, "step": 114045 }, { "epoch": 18.605220228384994, "grad_norm": 0.07093621045351028, "learning_rate": 1.4745562112217059e-05, "loss": 0.0044, "num_input_tokens_seen": 246016544, "step": 114050 }, { "epoch": 18.606035889070146, "grad_norm": 0.12244976311922073, "learning_rate": 1.4728408037741836e-05, "loss": 0.0034, "num_input_tokens_seen": 246027680, "step": 114055 }, { "epoch": 18.6068515497553, "grad_norm": 0.0049955593422055244, "learning_rate": 1.4711263797911045e-05, "loss": 0.0011, "num_input_tokens_seen": 246038208, "step": 114060 }, { "epoch": 18.607667210440457, "grad_norm": 0.005156893283128738, "learning_rate": 1.469412939307213e-05, "loss": 0.0021, "num_input_tokens_seen": 246049056, "step": 114065 }, { "epoch": 18.608482871125613, "grad_norm": 0.00047960117808543146, "learning_rate": 1.4677004823572316e-05, "loss": 0.0024, "num_input_tokens_seen": 246059200, "step": 114070 }, { "epoch": 18.609298531810765, "grad_norm": 0.00609675282612443, "learning_rate": 1.4659890089758654e-05, "loss": 0.0045, "num_input_tokens_seen": 246069728, "step": 114075 }, { "epoch": 18.61011419249592, "grad_norm": 0.038690097630023956, "learning_rate": 1.4642785191978036e-05, "loss": 0.0352, "num_input_tokens_seen": 246080224, "step": 114080 }, { "epoch": 18.610929853181077, "grad_norm": 0.001327752717770636, "learning_rate": 1.462569013057713e-05, "loss": 0.0184, "num_input_tokens_seen": 246089568, "step": 114085 }, { "epoch": 18.611745513866232, "grad_norm": 0.028049714863300323, "learning_rate": 1.4608604905902268e-05, "loss": 0.0017, "num_input_tokens_seen": 246100384, "step": 114090 }, { "epoch": 18.612561174551388, "grad_norm": 0.011499990709125996, "learning_rate": 1.4591529518299896e-05, "loss": 0.0017, "num_input_tokens_seen": 246111520, "step": 114095 }, { "epoch": 18.61337683523654, "grad_norm": 2.199402332305908, "learning_rate": 1.4574463968115903e-05, "loss": 0.0297, "num_input_tokens_seen": 246122592, "step": 114100 }, { "epoch": 18.614192495921696, "grad_norm": 0.031588152050971985, "learning_rate": 1.4557408255696181e-05, "loss": 0.0121, "num_input_tokens_seen": 246132320, "step": 114105 }, { "epoch": 18.61500815660685, "grad_norm": 0.009587142616510391, "learning_rate": 1.4540362381386452e-05, "loss": 0.001, "num_input_tokens_seen": 246143328, "step": 114110 }, { "epoch": 18.615823817292007, "grad_norm": 0.004696622025221586, "learning_rate": 1.4523326345532163e-05, "loss": 0.0048, "num_input_tokens_seen": 246154304, "step": 114115 }, { "epoch": 18.616639477977163, "grad_norm": 0.8757311701774597, "learning_rate": 1.450630014847848e-05, "loss": 0.0321, "num_input_tokens_seen": 246164800, "step": 114120 }, { "epoch": 18.617455138662315, "grad_norm": 0.007999480701982975, "learning_rate": 1.4489283790570518e-05, "loss": 0.002, "num_input_tokens_seen": 246175744, "step": 114125 }, { "epoch": 18.61827079934747, "grad_norm": 0.002791090402752161, "learning_rate": 1.4472277272153167e-05, "loss": 0.0034, "num_input_tokens_seen": 246185856, "step": 114130 }, { "epoch": 18.619086460032626, "grad_norm": 0.0009678204078227282, "learning_rate": 1.445528059357104e-05, "loss": 0.0021, "num_input_tokens_seen": 246197280, "step": 114135 }, { "epoch": 18.619902120717782, "grad_norm": 0.017024708911776543, "learning_rate": 1.4438293755168585e-05, "loss": 0.0044, "num_input_tokens_seen": 246208000, "step": 114140 }, { "epoch": 18.620717781402938, "grad_norm": 0.0006294417544268072, "learning_rate": 1.4421316757290082e-05, "loss": 0.0038, "num_input_tokens_seen": 246219264, "step": 114145 }, { "epoch": 18.62153344208809, "grad_norm": 0.005605780053883791, "learning_rate": 1.4404349600279642e-05, "loss": 0.0055, "num_input_tokens_seen": 246229920, "step": 114150 }, { "epoch": 18.622349102773246, "grad_norm": 0.07736977934837341, "learning_rate": 1.4387392284481049e-05, "loss": 0.0211, "num_input_tokens_seen": 246238880, "step": 114155 }, { "epoch": 18.6231647634584, "grad_norm": 1.042648196220398, "learning_rate": 1.437044481023797e-05, "loss": 0.1288, "num_input_tokens_seen": 246250144, "step": 114160 }, { "epoch": 18.623980424143557, "grad_norm": 0.0009783267742022872, "learning_rate": 1.4353507177893964e-05, "loss": 0.001, "num_input_tokens_seen": 246261440, "step": 114165 }, { "epoch": 18.624796084828713, "grad_norm": 0.2376711517572403, "learning_rate": 1.4336579387792148e-05, "loss": 0.0297, "num_input_tokens_seen": 246273280, "step": 114170 }, { "epoch": 18.625611745513865, "grad_norm": 0.0005417669308371842, "learning_rate": 1.4319661440275689e-05, "loss": 0.0025, "num_input_tokens_seen": 246283744, "step": 114175 }, { "epoch": 18.62642740619902, "grad_norm": 0.022347012534737587, "learning_rate": 1.4302753335687423e-05, "loss": 0.003, "num_input_tokens_seen": 246292832, "step": 114180 }, { "epoch": 18.627243066884176, "grad_norm": 0.0007118910434655845, "learning_rate": 1.4285855074370025e-05, "loss": 0.0021, "num_input_tokens_seen": 246303104, "step": 114185 }, { "epoch": 18.628058727569332, "grad_norm": 0.3159797787666321, "learning_rate": 1.4268966656665938e-05, "loss": 0.004, "num_input_tokens_seen": 246313312, "step": 114190 }, { "epoch": 18.628874388254488, "grad_norm": 0.001411781762726605, "learning_rate": 1.4252088082917391e-05, "loss": 0.0051, "num_input_tokens_seen": 246324320, "step": 114195 }, { "epoch": 18.62969004893964, "grad_norm": 0.5962813496589661, "learning_rate": 1.4235219353466555e-05, "loss": 0.0108, "num_input_tokens_seen": 246335232, "step": 114200 }, { "epoch": 18.630505709624796, "grad_norm": 0.0061396942473948, "learning_rate": 1.4218360468655212e-05, "loss": 0.0009, "num_input_tokens_seen": 246345056, "step": 114205 }, { "epoch": 18.63132137030995, "grad_norm": 0.006456305272877216, "learning_rate": 1.4201511428824976e-05, "loss": 0.0022, "num_input_tokens_seen": 246356032, "step": 114210 }, { "epoch": 18.632137030995107, "grad_norm": 0.017149146646261215, "learning_rate": 1.4184672234317463e-05, "loss": 0.0067, "num_input_tokens_seen": 246366944, "step": 114215 }, { "epoch": 18.63295269168026, "grad_norm": 0.0018400782719254494, "learning_rate": 1.4167842885473903e-05, "loss": 0.0005, "num_input_tokens_seen": 246377824, "step": 114220 }, { "epoch": 18.633768352365415, "grad_norm": 0.09079362452030182, "learning_rate": 1.4151023382635298e-05, "loss": 0.1441, "num_input_tokens_seen": 246387520, "step": 114225 }, { "epoch": 18.63458401305057, "grad_norm": 0.08388044685125351, "learning_rate": 1.4134213726142541e-05, "loss": 0.002, "num_input_tokens_seen": 246398336, "step": 114230 }, { "epoch": 18.635399673735726, "grad_norm": 0.23914304375648499, "learning_rate": 1.4117413916336307e-05, "loss": 0.015, "num_input_tokens_seen": 246409312, "step": 114235 }, { "epoch": 18.636215334420882, "grad_norm": 0.0027765545528382063, "learning_rate": 1.4100623953557045e-05, "loss": 0.0024, "num_input_tokens_seen": 246419904, "step": 114240 }, { "epoch": 18.637030995106034, "grad_norm": 0.00591387739405036, "learning_rate": 1.4083843838145095e-05, "loss": 0.0118, "num_input_tokens_seen": 246430976, "step": 114245 }, { "epoch": 18.63784665579119, "grad_norm": 0.01001641433686018, "learning_rate": 1.4067073570440458e-05, "loss": 0.0013, "num_input_tokens_seen": 246441280, "step": 114250 }, { "epoch": 18.638662316476346, "grad_norm": 0.0038873902522027493, "learning_rate": 1.4050313150782978e-05, "loss": 0.0021, "num_input_tokens_seen": 246453248, "step": 114255 }, { "epoch": 18.6394779771615, "grad_norm": 0.007375560700893402, "learning_rate": 1.4033562579512438e-05, "loss": 0.0033, "num_input_tokens_seen": 246462816, "step": 114260 }, { "epoch": 18.640293637846657, "grad_norm": 0.0023261315654963255, "learning_rate": 1.4016821856968232e-05, "loss": 0.0037, "num_input_tokens_seen": 246473920, "step": 114265 }, { "epoch": 18.64110929853181, "grad_norm": 0.0009641236392781138, "learning_rate": 1.4000090983489588e-05, "loss": 0.0038, "num_input_tokens_seen": 246484256, "step": 114270 }, { "epoch": 18.641924959216965, "grad_norm": 0.004181442782282829, "learning_rate": 1.3983369959415682e-05, "loss": 0.0249, "num_input_tokens_seen": 246494688, "step": 114275 }, { "epoch": 18.64274061990212, "grad_norm": 0.004303324036300182, "learning_rate": 1.3966658785085352e-05, "loss": 0.061, "num_input_tokens_seen": 246506432, "step": 114280 }, { "epoch": 18.643556280587276, "grad_norm": 0.0010069162817671895, "learning_rate": 1.394995746083727e-05, "loss": 0.0014, "num_input_tokens_seen": 246516032, "step": 114285 }, { "epoch": 18.644371941272432, "grad_norm": 0.008468493819236755, "learning_rate": 1.3933265987009836e-05, "loss": 0.0027, "num_input_tokens_seen": 246526944, "step": 114290 }, { "epoch": 18.645187601957584, "grad_norm": 0.005815332289785147, "learning_rate": 1.3916584363941442e-05, "loss": 0.0027, "num_input_tokens_seen": 246536192, "step": 114295 }, { "epoch": 18.64600326264274, "grad_norm": 0.00613406952470541, "learning_rate": 1.3899912591970099e-05, "loss": 0.0026, "num_input_tokens_seen": 246546432, "step": 114300 }, { "epoch": 18.646818923327896, "grad_norm": 0.003470706520602107, "learning_rate": 1.3883250671433645e-05, "loss": 0.008, "num_input_tokens_seen": 246557376, "step": 114305 }, { "epoch": 18.64763458401305, "grad_norm": 0.03748178109526634, "learning_rate": 1.3866598602669866e-05, "loss": 0.0508, "num_input_tokens_seen": 246567680, "step": 114310 }, { "epoch": 18.648450244698207, "grad_norm": 0.06075272709131241, "learning_rate": 1.3849956386016049e-05, "loss": 0.0034, "num_input_tokens_seen": 246578336, "step": 114315 }, { "epoch": 18.64926590538336, "grad_norm": 0.027381382882595062, "learning_rate": 1.3833324021809756e-05, "loss": 0.0031, "num_input_tokens_seen": 246590080, "step": 114320 }, { "epoch": 18.650081566068515, "grad_norm": 0.03411688655614853, "learning_rate": 1.3816701510387775e-05, "loss": 0.0053, "num_input_tokens_seen": 246600160, "step": 114325 }, { "epoch": 18.65089722675367, "grad_norm": 0.007338962983340025, "learning_rate": 1.3800088852087166e-05, "loss": 0.0008, "num_input_tokens_seen": 246610848, "step": 114330 }, { "epoch": 18.651712887438826, "grad_norm": 0.18375210464000702, "learning_rate": 1.3783486047244497e-05, "loss": 0.0082, "num_input_tokens_seen": 246622784, "step": 114335 }, { "epoch": 18.652528548123982, "grad_norm": 0.0033818951342254877, "learning_rate": 1.3766893096196386e-05, "loss": 0.0016, "num_input_tokens_seen": 246632992, "step": 114340 }, { "epoch": 18.653344208809134, "grad_norm": 0.3213283121585846, "learning_rate": 1.3750309999278899e-05, "loss": 0.0081, "num_input_tokens_seen": 246641408, "step": 114345 }, { "epoch": 18.65415986949429, "grad_norm": 0.00027173393755219877, "learning_rate": 1.373373675682832e-05, "loss": 0.0028, "num_input_tokens_seen": 246652032, "step": 114350 }, { "epoch": 18.654975530179446, "grad_norm": 0.01266220398247242, "learning_rate": 1.371717336918038e-05, "loss": 0.0034, "num_input_tokens_seen": 246662624, "step": 114355 }, { "epoch": 18.6557911908646, "grad_norm": 0.09006364643573761, "learning_rate": 1.3700619836670813e-05, "loss": 0.0029, "num_input_tokens_seen": 246673824, "step": 114360 }, { "epoch": 18.656606851549757, "grad_norm": 0.0017075908835977316, "learning_rate": 1.3684076159635129e-05, "loss": 0.0283, "num_input_tokens_seen": 246684672, "step": 114365 }, { "epoch": 18.65742251223491, "grad_norm": 0.259032279253006, "learning_rate": 1.3667542338408611e-05, "loss": 0.0046, "num_input_tokens_seen": 246696128, "step": 114370 }, { "epoch": 18.658238172920065, "grad_norm": 0.11919786781072617, "learning_rate": 1.3651018373326219e-05, "loss": 0.0061, "num_input_tokens_seen": 246706400, "step": 114375 }, { "epoch": 18.65905383360522, "grad_norm": 0.008529772982001305, "learning_rate": 1.3634504264723013e-05, "loss": 0.0027, "num_input_tokens_seen": 246717152, "step": 114380 }, { "epoch": 18.659869494290376, "grad_norm": 0.012453092262148857, "learning_rate": 1.3618000012933506e-05, "loss": 0.0034, "num_input_tokens_seen": 246728288, "step": 114385 }, { "epoch": 18.660685154975532, "grad_norm": 0.011291971430182457, "learning_rate": 1.3601505618292264e-05, "loss": 0.003, "num_input_tokens_seen": 246739744, "step": 114390 }, { "epoch": 18.661500815660684, "grad_norm": 0.01382684800773859, "learning_rate": 1.3585021081133575e-05, "loss": 0.0028, "num_input_tokens_seen": 246751136, "step": 114395 }, { "epoch": 18.66231647634584, "grad_norm": 0.00242988346144557, "learning_rate": 1.3568546401791449e-05, "loss": 0.0228, "num_input_tokens_seen": 246762656, "step": 114400 }, { "epoch": 18.663132137030995, "grad_norm": 0.007501264102756977, "learning_rate": 1.355208158059984e-05, "loss": 0.0083, "num_input_tokens_seen": 246774304, "step": 114405 }, { "epoch": 18.66394779771615, "grad_norm": 0.0035917186178267, "learning_rate": 1.3535626617892426e-05, "loss": 0.0018, "num_input_tokens_seen": 246784352, "step": 114410 }, { "epoch": 18.664763458401303, "grad_norm": 0.0012000981951132417, "learning_rate": 1.3519181514002665e-05, "loss": 0.0177, "num_input_tokens_seen": 246794592, "step": 114415 }, { "epoch": 18.66557911908646, "grad_norm": 0.0010066869435831904, "learning_rate": 1.3502746269263788e-05, "loss": 0.0038, "num_input_tokens_seen": 246804992, "step": 114420 }, { "epoch": 18.666394779771615, "grad_norm": 0.003891819855198264, "learning_rate": 1.3486320884008918e-05, "loss": 0.0066, "num_input_tokens_seen": 246816032, "step": 114425 }, { "epoch": 18.66721044045677, "grad_norm": 0.03920121490955353, "learning_rate": 1.3469905358570956e-05, "loss": 0.0053, "num_input_tokens_seen": 246827008, "step": 114430 }, { "epoch": 18.668026101141926, "grad_norm": 0.17713934183120728, "learning_rate": 1.3453499693282633e-05, "loss": 0.1019, "num_input_tokens_seen": 246836896, "step": 114435 }, { "epoch": 18.66884176182708, "grad_norm": 0.9315683841705322, "learning_rate": 1.3437103888476244e-05, "loss": 0.0103, "num_input_tokens_seen": 246846720, "step": 114440 }, { "epoch": 18.669657422512234, "grad_norm": 0.0075207240879535675, "learning_rate": 1.342071794448435e-05, "loss": 0.0044, "num_input_tokens_seen": 246857408, "step": 114445 }, { "epoch": 18.67047308319739, "grad_norm": 0.1635553538799286, "learning_rate": 1.340434186163869e-05, "loss": 0.0047, "num_input_tokens_seen": 246867072, "step": 114450 }, { "epoch": 18.671288743882545, "grad_norm": 0.003152323653921485, "learning_rate": 1.33879756402715e-05, "loss": 0.0386, "num_input_tokens_seen": 246877536, "step": 114455 }, { "epoch": 18.6721044045677, "grad_norm": 0.3928254544734955, "learning_rate": 1.3371619280714175e-05, "loss": 0.007, "num_input_tokens_seen": 246889376, "step": 114460 }, { "epoch": 18.672920065252853, "grad_norm": 0.017926042899489403, "learning_rate": 1.3355272783298455e-05, "loss": 0.0097, "num_input_tokens_seen": 246900480, "step": 114465 }, { "epoch": 18.67373572593801, "grad_norm": 2.1085751056671143, "learning_rate": 1.3338936148355351e-05, "loss": 0.1037, "num_input_tokens_seen": 246912416, "step": 114470 }, { "epoch": 18.674551386623165, "grad_norm": 0.005590209271758795, "learning_rate": 1.3322609376216155e-05, "loss": 0.0021, "num_input_tokens_seen": 246922912, "step": 114475 }, { "epoch": 18.67536704730832, "grad_norm": 0.0006414468516595662, "learning_rate": 1.33062924672116e-05, "loss": 0.0118, "num_input_tokens_seen": 246934304, "step": 114480 }, { "epoch": 18.676182707993476, "grad_norm": 0.006771451327949762, "learning_rate": 1.3289985421672534e-05, "loss": 0.0402, "num_input_tokens_seen": 246945376, "step": 114485 }, { "epoch": 18.67699836867863, "grad_norm": 0.14315979182720184, "learning_rate": 1.3273688239929248e-05, "loss": 0.004, "num_input_tokens_seen": 246955776, "step": 114490 }, { "epoch": 18.677814029363784, "grad_norm": 0.0036652134731411934, "learning_rate": 1.3257400922312258e-05, "loss": 0.059, "num_input_tokens_seen": 246967168, "step": 114495 }, { "epoch": 18.67862969004894, "grad_norm": 0.01581701636314392, "learning_rate": 1.3241123469151406e-05, "loss": 0.094, "num_input_tokens_seen": 246978464, "step": 114500 }, { "epoch": 18.679445350734095, "grad_norm": 0.009054360911250114, "learning_rate": 1.322485588077671e-05, "loss": 0.0017, "num_input_tokens_seen": 246989376, "step": 114505 }, { "epoch": 18.68026101141925, "grad_norm": 0.015612986870110035, "learning_rate": 1.3208598157517849e-05, "loss": 0.0021, "num_input_tokens_seen": 247000640, "step": 114510 }, { "epoch": 18.681076672104403, "grad_norm": 0.2937453091144562, "learning_rate": 1.3192350299704225e-05, "loss": 0.0091, "num_input_tokens_seen": 247011296, "step": 114515 }, { "epoch": 18.68189233278956, "grad_norm": 0.005547434091567993, "learning_rate": 1.3176112307665245e-05, "loss": 0.0017, "num_input_tokens_seen": 247022688, "step": 114520 }, { "epoch": 18.682707993474715, "grad_norm": 0.20111241936683655, "learning_rate": 1.315988418172992e-05, "loss": 0.0109, "num_input_tokens_seen": 247032960, "step": 114525 }, { "epoch": 18.68352365415987, "grad_norm": 0.0362079031765461, "learning_rate": 1.3143665922227155e-05, "loss": 0.0039, "num_input_tokens_seen": 247042368, "step": 114530 }, { "epoch": 18.684339314845026, "grad_norm": 0.11285007745027542, "learning_rate": 1.3127457529485576e-05, "loss": 0.0057, "num_input_tokens_seen": 247053792, "step": 114535 }, { "epoch": 18.68515497553018, "grad_norm": 1.8377408981323242, "learning_rate": 1.3111259003833753e-05, "loss": 0.0301, "num_input_tokens_seen": 247065280, "step": 114540 }, { "epoch": 18.685970636215334, "grad_norm": 0.0024892932269722223, "learning_rate": 1.3095070345599924e-05, "loss": 0.0153, "num_input_tokens_seen": 247076192, "step": 114545 }, { "epoch": 18.68678629690049, "grad_norm": 0.8574090003967285, "learning_rate": 1.3078891555112161e-05, "loss": 0.1828, "num_input_tokens_seen": 247085536, "step": 114550 }, { "epoch": 18.687601957585645, "grad_norm": 0.04280076548457146, "learning_rate": 1.306272263269831e-05, "loss": 0.1131, "num_input_tokens_seen": 247095040, "step": 114555 }, { "epoch": 18.6884176182708, "grad_norm": 0.0008235589484684169, "learning_rate": 1.3046563578686222e-05, "loss": 0.001, "num_input_tokens_seen": 247106304, "step": 114560 }, { "epoch": 18.689233278955953, "grad_norm": 0.016166958957910538, "learning_rate": 1.303041439340319e-05, "loss": 0.0027, "num_input_tokens_seen": 247117408, "step": 114565 }, { "epoch": 18.69004893964111, "grad_norm": 0.14784908294677734, "learning_rate": 1.3014275077176618e-05, "loss": 0.0078, "num_input_tokens_seen": 247127136, "step": 114570 }, { "epoch": 18.690864600326265, "grad_norm": 0.004585585091263056, "learning_rate": 1.2998145630333469e-05, "loss": 0.005, "num_input_tokens_seen": 247136128, "step": 114575 }, { "epoch": 18.69168026101142, "grad_norm": 0.005209977272897959, "learning_rate": 1.2982026053200813e-05, "loss": 0.0023, "num_input_tokens_seen": 247146720, "step": 114580 }, { "epoch": 18.692495921696576, "grad_norm": 0.009345420636236668, "learning_rate": 1.2965916346105166e-05, "loss": 0.0015, "num_input_tokens_seen": 247158272, "step": 114585 }, { "epoch": 18.693311582381728, "grad_norm": 0.049502335488796234, "learning_rate": 1.2949816509373102e-05, "loss": 0.0046, "num_input_tokens_seen": 247169344, "step": 114590 }, { "epoch": 18.694127243066884, "grad_norm": 0.021334391087293625, "learning_rate": 1.2933726543330804e-05, "loss": 0.0974, "num_input_tokens_seen": 247179968, "step": 114595 }, { "epoch": 18.69494290375204, "grad_norm": 0.0025282034184783697, "learning_rate": 1.2917646448304509e-05, "loss": 0.0668, "num_input_tokens_seen": 247191264, "step": 114600 }, { "epoch": 18.695758564437195, "grad_norm": 0.027276240289211273, "learning_rate": 1.2901576224619959e-05, "loss": 0.0023, "num_input_tokens_seen": 247201376, "step": 114605 }, { "epoch": 18.696574225122347, "grad_norm": 0.0018339460948482156, "learning_rate": 1.2885515872602949e-05, "loss": 0.0831, "num_input_tokens_seen": 247212480, "step": 114610 }, { "epoch": 18.697389885807503, "grad_norm": 0.009036364033818245, "learning_rate": 1.2869465392578828e-05, "loss": 0.001, "num_input_tokens_seen": 247222944, "step": 114615 }, { "epoch": 18.69820554649266, "grad_norm": 0.06548304855823517, "learning_rate": 1.2853424784873059e-05, "loss": 0.0072, "num_input_tokens_seen": 247233216, "step": 114620 }, { "epoch": 18.699021207177815, "grad_norm": 0.07296621054410934, "learning_rate": 1.2837394049810547e-05, "loss": 0.0033, "num_input_tokens_seen": 247244032, "step": 114625 }, { "epoch": 18.69983686786297, "grad_norm": 0.005188798997551203, "learning_rate": 1.2821373187716311e-05, "loss": 0.0278, "num_input_tokens_seen": 247253664, "step": 114630 }, { "epoch": 18.700652528548122, "grad_norm": 0.01822066865861416, "learning_rate": 1.2805362198914872e-05, "loss": 0.055, "num_input_tokens_seen": 247264448, "step": 114635 }, { "epoch": 18.701468189233278, "grad_norm": 0.029625875875353813, "learning_rate": 1.2789361083730911e-05, "loss": 0.0019, "num_input_tokens_seen": 247273440, "step": 114640 }, { "epoch": 18.702283849918434, "grad_norm": 0.007063397206366062, "learning_rate": 1.2773369842488614e-05, "loss": 0.0404, "num_input_tokens_seen": 247284992, "step": 114645 }, { "epoch": 18.70309951060359, "grad_norm": 0.0051968879997730255, "learning_rate": 1.2757388475512055e-05, "loss": 0.0078, "num_input_tokens_seen": 247295808, "step": 114650 }, { "epoch": 18.703915171288745, "grad_norm": 0.06897776573896408, "learning_rate": 1.2741416983125143e-05, "loss": 0.0033, "num_input_tokens_seen": 247306208, "step": 114655 }, { "epoch": 18.704730831973897, "grad_norm": 0.005071122664958239, "learning_rate": 1.2725455365651507e-05, "loss": 0.0013, "num_input_tokens_seen": 247317920, "step": 114660 }, { "epoch": 18.705546492659053, "grad_norm": 0.025943227112293243, "learning_rate": 1.270950362341472e-05, "loss": 0.0027, "num_input_tokens_seen": 247328384, "step": 114665 }, { "epoch": 18.70636215334421, "grad_norm": 0.0012658167397603393, "learning_rate": 1.269356175673797e-05, "loss": 0.008, "num_input_tokens_seen": 247338944, "step": 114670 }, { "epoch": 18.707177814029365, "grad_norm": 0.020685292780399323, "learning_rate": 1.2677629765944387e-05, "loss": 0.0013, "num_input_tokens_seen": 247349216, "step": 114675 }, { "epoch": 18.70799347471452, "grad_norm": 0.009101249277591705, "learning_rate": 1.266170765135688e-05, "loss": 0.002, "num_input_tokens_seen": 247359104, "step": 114680 }, { "epoch": 18.708809135399672, "grad_norm": 0.005121287424117327, "learning_rate": 1.2645795413298078e-05, "loss": 0.0062, "num_input_tokens_seen": 247369376, "step": 114685 }, { "epoch": 18.709624796084828, "grad_norm": 0.057890310883522034, "learning_rate": 1.2629893052090502e-05, "loss": 0.0038, "num_input_tokens_seen": 247379328, "step": 114690 }, { "epoch": 18.710440456769984, "grad_norm": 0.1745368093252182, "learning_rate": 1.2614000568056395e-05, "loss": 0.0032, "num_input_tokens_seen": 247390016, "step": 114695 }, { "epoch": 18.71125611745514, "grad_norm": 0.009156850166618824, "learning_rate": 1.259811796151783e-05, "loss": 0.0036, "num_input_tokens_seen": 247401952, "step": 114700 }, { "epoch": 18.712071778140295, "grad_norm": 0.14167004823684692, "learning_rate": 1.258224523279683e-05, "loss": 0.0058, "num_input_tokens_seen": 247412960, "step": 114705 }, { "epoch": 18.712887438825447, "grad_norm": 0.002328514354303479, "learning_rate": 1.2566382382214859e-05, "loss": 0.0009, "num_input_tokens_seen": 247424576, "step": 114710 }, { "epoch": 18.713703099510603, "grad_norm": 0.008223442360758781, "learning_rate": 1.2550529410093548e-05, "loss": 0.0044, "num_input_tokens_seen": 247435488, "step": 114715 }, { "epoch": 18.71451876019576, "grad_norm": 0.003988505341112614, "learning_rate": 1.2534686316754085e-05, "loss": 0.0007, "num_input_tokens_seen": 247445952, "step": 114720 }, { "epoch": 18.715334420880914, "grad_norm": 0.02723725512623787, "learning_rate": 1.2518853102517657e-05, "loss": 0.0024, "num_input_tokens_seen": 247456704, "step": 114725 }, { "epoch": 18.71615008156607, "grad_norm": 0.0030995721463114023, "learning_rate": 1.250302976770501e-05, "loss": 0.0028, "num_input_tokens_seen": 247466912, "step": 114730 }, { "epoch": 18.716965742251222, "grad_norm": 0.004374995827674866, "learning_rate": 1.248721631263705e-05, "loss": 0.0764, "num_input_tokens_seen": 247478272, "step": 114735 }, { "epoch": 18.717781402936378, "grad_norm": 0.0018350764876231551, "learning_rate": 1.2471412737633914e-05, "loss": 0.0012, "num_input_tokens_seen": 247488480, "step": 114740 }, { "epoch": 18.718597063621534, "grad_norm": 0.13558359444141388, "learning_rate": 1.2455619043016175e-05, "loss": 0.0044, "num_input_tokens_seen": 247498848, "step": 114745 }, { "epoch": 18.71941272430669, "grad_norm": 0.0006049941875971854, "learning_rate": 1.2439835229103803e-05, "loss": 0.0005, "num_input_tokens_seen": 247508320, "step": 114750 }, { "epoch": 18.72022838499184, "grad_norm": 0.07029488682746887, "learning_rate": 1.242406129621665e-05, "loss": 0.0041, "num_input_tokens_seen": 247517120, "step": 114755 }, { "epoch": 18.721044045676997, "grad_norm": 0.01675412803888321, "learning_rate": 1.240829724467446e-05, "loss": 0.0022, "num_input_tokens_seen": 247527104, "step": 114760 }, { "epoch": 18.721859706362153, "grad_norm": 0.11928557604551315, "learning_rate": 1.2392543074796702e-05, "loss": 0.0076, "num_input_tokens_seen": 247536864, "step": 114765 }, { "epoch": 18.72267536704731, "grad_norm": 0.48988014459609985, "learning_rate": 1.2376798786902621e-05, "loss": 0.0068, "num_input_tokens_seen": 247547904, "step": 114770 }, { "epoch": 18.723491027732464, "grad_norm": 0.0007810079259797931, "learning_rate": 1.2361064381311293e-05, "loss": 0.0029, "num_input_tokens_seen": 247559040, "step": 114775 }, { "epoch": 18.724306688417617, "grad_norm": 0.03635910525918007, "learning_rate": 1.2345339858341576e-05, "loss": 0.0046, "num_input_tokens_seen": 247570560, "step": 114780 }, { "epoch": 18.725122349102772, "grad_norm": 0.000600889150518924, "learning_rate": 1.2329625218312213e-05, "loss": 0.0031, "num_input_tokens_seen": 247581920, "step": 114785 }, { "epoch": 18.725938009787928, "grad_norm": 0.018115883693099022, "learning_rate": 1.2313920461541672e-05, "loss": 0.001, "num_input_tokens_seen": 247592896, "step": 114790 }, { "epoch": 18.726753670473084, "grad_norm": 0.12685595452785492, "learning_rate": 1.22982255883482e-05, "loss": 0.0067, "num_input_tokens_seen": 247604512, "step": 114795 }, { "epoch": 18.72756933115824, "grad_norm": 0.03018014505505562, "learning_rate": 1.2282540599049873e-05, "loss": 0.1212, "num_input_tokens_seen": 247614784, "step": 114800 }, { "epoch": 18.72838499184339, "grad_norm": 0.0024490493815392256, "learning_rate": 1.2266865493964551e-05, "loss": 0.0008, "num_input_tokens_seen": 247625408, "step": 114805 }, { "epoch": 18.729200652528547, "grad_norm": 0.07000678777694702, "learning_rate": 1.2251200273409923e-05, "loss": 0.0018, "num_input_tokens_seen": 247635616, "step": 114810 }, { "epoch": 18.730016313213703, "grad_norm": 0.010413183830678463, "learning_rate": 1.2235544937703513e-05, "loss": 0.003, "num_input_tokens_seen": 247647136, "step": 114815 }, { "epoch": 18.73083197389886, "grad_norm": 0.014890529215335846, "learning_rate": 1.2219899487162567e-05, "loss": 0.004, "num_input_tokens_seen": 247658784, "step": 114820 }, { "epoch": 18.731647634584014, "grad_norm": 0.004546916577965021, "learning_rate": 1.2204263922104108e-05, "loss": 0.0237, "num_input_tokens_seen": 247670688, "step": 114825 }, { "epoch": 18.732463295269167, "grad_norm": 0.00583751080557704, "learning_rate": 1.2188638242845108e-05, "loss": 0.001, "num_input_tokens_seen": 247681728, "step": 114830 }, { "epoch": 18.733278955954322, "grad_norm": 0.017790179699659348, "learning_rate": 1.2173022449702142e-05, "loss": 0.0012, "num_input_tokens_seen": 247691968, "step": 114835 }, { "epoch": 18.734094616639478, "grad_norm": 1.1295139789581299, "learning_rate": 1.215741654299174e-05, "loss": 0.0749, "num_input_tokens_seen": 247702880, "step": 114840 }, { "epoch": 18.734910277324634, "grad_norm": 0.006016951519995928, "learning_rate": 1.214182052303009e-05, "loss": 0.0009, "num_input_tokens_seen": 247713568, "step": 114845 }, { "epoch": 18.73572593800979, "grad_norm": 0.9011142253875732, "learning_rate": 1.2126234390133439e-05, "loss": 0.0487, "num_input_tokens_seen": 247723360, "step": 114850 }, { "epoch": 18.73654159869494, "grad_norm": 0.005538031458854675, "learning_rate": 1.2110658144617538e-05, "loss": 0.0044, "num_input_tokens_seen": 247735328, "step": 114855 }, { "epoch": 18.737357259380097, "grad_norm": 0.018627788871526718, "learning_rate": 1.2095091786798074e-05, "loss": 0.0834, "num_input_tokens_seen": 247745696, "step": 114860 }, { "epoch": 18.738172920065253, "grad_norm": 0.0018085705814883113, "learning_rate": 1.207953531699052e-05, "loss": 0.0066, "num_input_tokens_seen": 247756736, "step": 114865 }, { "epoch": 18.73898858075041, "grad_norm": 0.022579705342650414, "learning_rate": 1.206398873551018e-05, "loss": 0.0028, "num_input_tokens_seen": 247767712, "step": 114870 }, { "epoch": 18.739804241435564, "grad_norm": 0.008336147293448448, "learning_rate": 1.2048452042672075e-05, "loss": 0.0048, "num_input_tokens_seen": 247778336, "step": 114875 }, { "epoch": 18.740619902120716, "grad_norm": 0.0016348216449841857, "learning_rate": 1.2032925238791071e-05, "loss": 0.0246, "num_input_tokens_seen": 247788736, "step": 114880 }, { "epoch": 18.741435562805872, "grad_norm": 0.0044553461484611034, "learning_rate": 1.2017408324181911e-05, "loss": 0.0093, "num_input_tokens_seen": 247798752, "step": 114885 }, { "epoch": 18.742251223491028, "grad_norm": 0.002034853445366025, "learning_rate": 1.2001901299159013e-05, "loss": 0.0037, "num_input_tokens_seen": 247809088, "step": 114890 }, { "epoch": 18.743066884176184, "grad_norm": 0.02008631080389023, "learning_rate": 1.1986404164036679e-05, "loss": 0.0046, "num_input_tokens_seen": 247818336, "step": 114895 }, { "epoch": 18.74388254486134, "grad_norm": 0.022153202444314957, "learning_rate": 1.1970916919128937e-05, "loss": 0.0013, "num_input_tokens_seen": 247828448, "step": 114900 }, { "epoch": 18.74469820554649, "grad_norm": 0.020743364468216896, "learning_rate": 1.1955439564749649e-05, "loss": 0.0035, "num_input_tokens_seen": 247837632, "step": 114905 }, { "epoch": 18.745513866231647, "grad_norm": 0.005820127669721842, "learning_rate": 1.1939972101212503e-05, "loss": 0.0016, "num_input_tokens_seen": 247848640, "step": 114910 }, { "epoch": 18.746329526916803, "grad_norm": 0.001899397000670433, "learning_rate": 1.1924514528831032e-05, "loss": 0.0043, "num_input_tokens_seen": 247858400, "step": 114915 }, { "epoch": 18.74714518760196, "grad_norm": 0.0008723632781766355, "learning_rate": 1.190906684791837e-05, "loss": 0.0048, "num_input_tokens_seen": 247869632, "step": 114920 }, { "epoch": 18.747960848287114, "grad_norm": 0.066276416182518, "learning_rate": 1.1893629058787714e-05, "loss": 0.0024, "num_input_tokens_seen": 247881024, "step": 114925 }, { "epoch": 18.748776508972266, "grad_norm": 0.052281104028224945, "learning_rate": 1.187820116175181e-05, "loss": 0.0045, "num_input_tokens_seen": 247892192, "step": 114930 }, { "epoch": 18.749592169657422, "grad_norm": 0.0022571063600480556, "learning_rate": 1.1862783157123413e-05, "loss": 0.0031, "num_input_tokens_seen": 247903136, "step": 114935 }, { "epoch": 18.750407830342578, "grad_norm": 0.004913630895316601, "learning_rate": 1.1847375045214992e-05, "loss": 0.0675, "num_input_tokens_seen": 247912672, "step": 114940 }, { "epoch": 18.751223491027734, "grad_norm": 0.017598306760191917, "learning_rate": 1.1831976826338742e-05, "loss": 0.0019, "num_input_tokens_seen": 247923968, "step": 114945 }, { "epoch": 18.752039151712886, "grad_norm": 0.016010930761694908, "learning_rate": 1.1816588500806802e-05, "loss": 0.0133, "num_input_tokens_seen": 247935520, "step": 114950 }, { "epoch": 18.75285481239804, "grad_norm": 0.0033077681437134743, "learning_rate": 1.1801210068930923e-05, "loss": 0.0017, "num_input_tokens_seen": 247945536, "step": 114955 }, { "epoch": 18.753670473083197, "grad_norm": 0.10296429693698883, "learning_rate": 1.1785841531022968e-05, "loss": 0.0018, "num_input_tokens_seen": 247957312, "step": 114960 }, { "epoch": 18.754486133768353, "grad_norm": 0.0016397037543356419, "learning_rate": 1.177048288739413e-05, "loss": 0.0694, "num_input_tokens_seen": 247969024, "step": 114965 }, { "epoch": 18.75530179445351, "grad_norm": 0.002550773788243532, "learning_rate": 1.1755134138355995e-05, "loss": 0.0017, "num_input_tokens_seen": 247979456, "step": 114970 }, { "epoch": 18.75611745513866, "grad_norm": 0.011164713650941849, "learning_rate": 1.1739795284219256e-05, "loss": 0.0015, "num_input_tokens_seen": 247990784, "step": 114975 }, { "epoch": 18.756933115823816, "grad_norm": 0.001113231643103063, "learning_rate": 1.172446632529517e-05, "loss": 0.0032, "num_input_tokens_seen": 248002560, "step": 114980 }, { "epoch": 18.757748776508972, "grad_norm": 0.00174132629763335, "learning_rate": 1.1709147261894037e-05, "loss": 0.0031, "num_input_tokens_seen": 248013952, "step": 114985 }, { "epoch": 18.758564437194128, "grad_norm": 0.00089851493248716, "learning_rate": 1.1693838094326502e-05, "loss": 0.008, "num_input_tokens_seen": 248025120, "step": 114990 }, { "epoch": 18.759380097879284, "grad_norm": 0.08875664323568344, "learning_rate": 1.1678538822902817e-05, "loss": 0.0065, "num_input_tokens_seen": 248036640, "step": 114995 }, { "epoch": 18.760195758564436, "grad_norm": 0.0121017349883914, "learning_rate": 1.1663249447933067e-05, "loss": 0.0013, "num_input_tokens_seen": 248046688, "step": 115000 }, { "epoch": 18.76101141924959, "grad_norm": 0.0004884064546786249, "learning_rate": 1.1647969969727e-05, "loss": 0.0016, "num_input_tokens_seen": 248057600, "step": 115005 }, { "epoch": 18.761827079934747, "grad_norm": 0.003829386318102479, "learning_rate": 1.1632700388594375e-05, "loss": 0.0015, "num_input_tokens_seen": 248067136, "step": 115010 }, { "epoch": 18.762642740619903, "grad_norm": 0.0018088167998939753, "learning_rate": 1.1617440704844661e-05, "loss": 0.002, "num_input_tokens_seen": 248077920, "step": 115015 }, { "epoch": 18.76345840130506, "grad_norm": 0.0024925859179347754, "learning_rate": 1.1602190918787004e-05, "loss": 0.0068, "num_input_tokens_seen": 248087968, "step": 115020 }, { "epoch": 18.76427406199021, "grad_norm": 0.014874942600727081, "learning_rate": 1.1586951030730542e-05, "loss": 0.0079, "num_input_tokens_seen": 248098432, "step": 115025 }, { "epoch": 18.765089722675366, "grad_norm": 0.016050072386860847, "learning_rate": 1.1571721040984084e-05, "loss": 0.0094, "num_input_tokens_seen": 248107936, "step": 115030 }, { "epoch": 18.765905383360522, "grad_norm": 0.030814247205853462, "learning_rate": 1.1556500949856386e-05, "loss": 0.0021, "num_input_tokens_seen": 248118400, "step": 115035 }, { "epoch": 18.766721044045678, "grad_norm": 0.0023874815087765455, "learning_rate": 1.1541290757655754e-05, "loss": 0.001, "num_input_tokens_seen": 248130208, "step": 115040 }, { "epoch": 18.767536704730833, "grad_norm": 0.04324791952967644, "learning_rate": 1.1526090464690553e-05, "loss": 0.0015, "num_input_tokens_seen": 248141664, "step": 115045 }, { "epoch": 18.768352365415986, "grad_norm": 0.11483463644981384, "learning_rate": 1.1510900071268815e-05, "loss": 0.019, "num_input_tokens_seen": 248151840, "step": 115050 }, { "epoch": 18.76916802610114, "grad_norm": 0.15023289620876312, "learning_rate": 1.149571957769835e-05, "loss": 0.0031, "num_input_tokens_seen": 248162048, "step": 115055 }, { "epoch": 18.769983686786297, "grad_norm": 0.002688201842829585, "learning_rate": 1.1480548984286853e-05, "loss": 0.0384, "num_input_tokens_seen": 248172800, "step": 115060 }, { "epoch": 18.770799347471453, "grad_norm": 0.024533575400710106, "learning_rate": 1.1465388291341804e-05, "loss": 0.002, "num_input_tokens_seen": 248183296, "step": 115065 }, { "epoch": 18.77161500815661, "grad_norm": 0.0018382304115220904, "learning_rate": 1.145023749917029e-05, "loss": 0.0026, "num_input_tokens_seen": 248194560, "step": 115070 }, { "epoch": 18.77243066884176, "grad_norm": 0.02986457571387291, "learning_rate": 1.143509660807962e-05, "loss": 0.0027, "num_input_tokens_seen": 248204224, "step": 115075 }, { "epoch": 18.773246329526916, "grad_norm": 0.01146732084453106, "learning_rate": 1.1419965618376383e-05, "loss": 0.0013, "num_input_tokens_seen": 248213856, "step": 115080 }, { "epoch": 18.774061990212072, "grad_norm": 1.1866114139556885, "learning_rate": 1.1404844530367498e-05, "loss": 0.0545, "num_input_tokens_seen": 248223936, "step": 115085 }, { "epoch": 18.774877650897228, "grad_norm": 0.003607213729992509, "learning_rate": 1.138973334435911e-05, "loss": 0.005, "num_input_tokens_seen": 248235296, "step": 115090 }, { "epoch": 18.775693311582383, "grad_norm": 0.007150751538574696, "learning_rate": 1.1374632060657753e-05, "loss": 0.0021, "num_input_tokens_seen": 248246624, "step": 115095 }, { "epoch": 18.776508972267536, "grad_norm": 0.0269037876278162, "learning_rate": 1.1359540679569236e-05, "loss": 0.003, "num_input_tokens_seen": 248257184, "step": 115100 }, { "epoch": 18.77732463295269, "grad_norm": 0.13129253685474396, "learning_rate": 1.1344459201399592e-05, "loss": 0.0029, "num_input_tokens_seen": 248268096, "step": 115105 }, { "epoch": 18.778140293637847, "grad_norm": 0.001095607760362327, "learning_rate": 1.1329387626454358e-05, "loss": 0.0007, "num_input_tokens_seen": 248279136, "step": 115110 }, { "epoch": 18.778955954323003, "grad_norm": 0.0034238805528730154, "learning_rate": 1.1314325955039007e-05, "loss": 0.0077, "num_input_tokens_seen": 248289504, "step": 115115 }, { "epoch": 18.77977161500816, "grad_norm": 0.001256258925423026, "learning_rate": 1.1299274187458741e-05, "loss": 0.0026, "num_input_tokens_seen": 248300672, "step": 115120 }, { "epoch": 18.78058727569331, "grad_norm": 0.06746986508369446, "learning_rate": 1.1284232324018761e-05, "loss": 0.0048, "num_input_tokens_seen": 248311584, "step": 115125 }, { "epoch": 18.781402936378466, "grad_norm": 0.0014895633794367313, "learning_rate": 1.1269200365023657e-05, "loss": 0.0006, "num_input_tokens_seen": 248322080, "step": 115130 }, { "epoch": 18.782218597063622, "grad_norm": 1.192189335823059, "learning_rate": 1.125417831077824e-05, "loss": 0.0761, "num_input_tokens_seen": 248332544, "step": 115135 }, { "epoch": 18.783034257748778, "grad_norm": 0.001713824924081564, "learning_rate": 1.1239166161586933e-05, "loss": 0.0007, "num_input_tokens_seen": 248343008, "step": 115140 }, { "epoch": 18.78384991843393, "grad_norm": 1.0211992263793945, "learning_rate": 1.1224163917753993e-05, "loss": 0.0877, "num_input_tokens_seen": 248353600, "step": 115145 }, { "epoch": 18.784665579119086, "grad_norm": 0.16758735477924347, "learning_rate": 1.1209171579583399e-05, "loss": 0.0047, "num_input_tokens_seen": 248365024, "step": 115150 }, { "epoch": 18.78548123980424, "grad_norm": 0.0007796662393957376, "learning_rate": 1.1194189147379018e-05, "loss": 0.0046, "num_input_tokens_seen": 248376448, "step": 115155 }, { "epoch": 18.786296900489397, "grad_norm": 0.013082359917461872, "learning_rate": 1.1179216621444499e-05, "loss": 0.0012, "num_input_tokens_seen": 248387584, "step": 115160 }, { "epoch": 18.787112561174553, "grad_norm": 0.004847257398068905, "learning_rate": 1.1164254002083262e-05, "loss": 0.0022, "num_input_tokens_seen": 248399296, "step": 115165 }, { "epoch": 18.787928221859705, "grad_norm": 0.006595268379896879, "learning_rate": 1.1149301289598569e-05, "loss": 0.0015, "num_input_tokens_seen": 248409600, "step": 115170 }, { "epoch": 18.78874388254486, "grad_norm": 0.009936106391251087, "learning_rate": 1.1134358484293395e-05, "loss": 0.0074, "num_input_tokens_seen": 248418912, "step": 115175 }, { "epoch": 18.789559543230016, "grad_norm": 0.0037287110462784767, "learning_rate": 1.1119425586470667e-05, "loss": 0.0082, "num_input_tokens_seen": 248429600, "step": 115180 }, { "epoch": 18.790375203915172, "grad_norm": 0.025336049497127533, "learning_rate": 1.1104502596432863e-05, "loss": 0.0026, "num_input_tokens_seen": 248440224, "step": 115185 }, { "epoch": 18.791190864600328, "grad_norm": 0.002936782781034708, "learning_rate": 1.1089589514482635e-05, "loss": 0.0156, "num_input_tokens_seen": 248450208, "step": 115190 }, { "epoch": 18.79200652528548, "grad_norm": 0.05204629898071289, "learning_rate": 1.1074686340922068e-05, "loss": 0.0016, "num_input_tokens_seen": 248461920, "step": 115195 }, { "epoch": 18.792822185970635, "grad_norm": 0.0007248996407724917, "learning_rate": 1.105979307605326e-05, "loss": 0.0014, "num_input_tokens_seen": 248472544, "step": 115200 }, { "epoch": 18.79363784665579, "grad_norm": 0.009234776720404625, "learning_rate": 1.104490972017791e-05, "loss": 0.0031, "num_input_tokens_seen": 248483296, "step": 115205 }, { "epoch": 18.794453507340947, "grad_norm": 0.0467919185757637, "learning_rate": 1.1030036273597888e-05, "loss": 0.0119, "num_input_tokens_seen": 248493248, "step": 115210 }, { "epoch": 18.795269168026103, "grad_norm": 0.15880519151687622, "learning_rate": 1.1015172736614343e-05, "loss": 0.0049, "num_input_tokens_seen": 248503872, "step": 115215 }, { "epoch": 18.796084828711255, "grad_norm": 0.20169824361801147, "learning_rate": 1.1000319109528755e-05, "loss": 0.0168, "num_input_tokens_seen": 248515360, "step": 115220 }, { "epoch": 18.79690048939641, "grad_norm": 0.0016364585608243942, "learning_rate": 1.0985475392641941e-05, "loss": 0.0089, "num_input_tokens_seen": 248526432, "step": 115225 }, { "epoch": 18.797716150081566, "grad_norm": 0.03288798779249191, "learning_rate": 1.0970641586254937e-05, "loss": 0.0018, "num_input_tokens_seen": 248536320, "step": 115230 }, { "epoch": 18.798531810766722, "grad_norm": 0.0009012518567033112, "learning_rate": 1.0955817690668169e-05, "loss": 0.006, "num_input_tokens_seen": 248547936, "step": 115235 }, { "epoch": 18.799347471451878, "grad_norm": 0.09589342027902603, "learning_rate": 1.094100370618223e-05, "loss": 0.0169, "num_input_tokens_seen": 248559136, "step": 115240 }, { "epoch": 18.80016313213703, "grad_norm": 0.0008315709419548512, "learning_rate": 1.0926199633097156e-05, "loss": 0.001, "num_input_tokens_seen": 248569152, "step": 115245 }, { "epoch": 18.800978792822185, "grad_norm": 0.007832744158804417, "learning_rate": 1.091140547171321e-05, "loss": 0.0209, "num_input_tokens_seen": 248580608, "step": 115250 }, { "epoch": 18.80179445350734, "grad_norm": 0.00847557932138443, "learning_rate": 1.0896621222329983e-05, "loss": 0.002, "num_input_tokens_seen": 248591872, "step": 115255 }, { "epoch": 18.802610114192497, "grad_norm": 0.010743658989667892, "learning_rate": 1.0881846885247293e-05, "loss": 0.0052, "num_input_tokens_seen": 248599968, "step": 115260 }, { "epoch": 18.803425774877653, "grad_norm": 0.025661129504442215, "learning_rate": 1.0867082460764343e-05, "loss": 0.0031, "num_input_tokens_seen": 248610400, "step": 115265 }, { "epoch": 18.804241435562805, "grad_norm": 0.0013557036872953176, "learning_rate": 1.0852327949180618e-05, "loss": 0.1194, "num_input_tokens_seen": 248621408, "step": 115270 }, { "epoch": 18.80505709624796, "grad_norm": 0.00702410563826561, "learning_rate": 1.0837583350794878e-05, "loss": 0.004, "num_input_tokens_seen": 248631328, "step": 115275 }, { "epoch": 18.805872756933116, "grad_norm": 0.001059781527146697, "learning_rate": 1.0822848665906104e-05, "loss": 0.0112, "num_input_tokens_seen": 248643392, "step": 115280 }, { "epoch": 18.806688417618272, "grad_norm": 0.050288084894418716, "learning_rate": 1.0808123894812838e-05, "loss": 0.0061, "num_input_tokens_seen": 248652832, "step": 115285 }, { "epoch": 18.807504078303424, "grad_norm": 0.010964504443109035, "learning_rate": 1.0793409037813562e-05, "loss": 0.0808, "num_input_tokens_seen": 248664128, "step": 115290 }, { "epoch": 18.80831973898858, "grad_norm": 1.875773549079895, "learning_rate": 1.0778704095206427e-05, "loss": 0.0457, "num_input_tokens_seen": 248674528, "step": 115295 }, { "epoch": 18.809135399673735, "grad_norm": 0.008540058508515358, "learning_rate": 1.0764009067289526e-05, "loss": 0.0009, "num_input_tokens_seen": 248685216, "step": 115300 }, { "epoch": 18.80995106035889, "grad_norm": 0.012429107911884785, "learning_rate": 1.0749323954360568e-05, "loss": 0.0008, "num_input_tokens_seen": 248695424, "step": 115305 }, { "epoch": 18.810766721044047, "grad_norm": 0.0063535356894135475, "learning_rate": 1.0734648756717258e-05, "loss": 0.0049, "num_input_tokens_seen": 248706208, "step": 115310 }, { "epoch": 18.8115823817292, "grad_norm": 0.004071129951626062, "learning_rate": 1.0719983474656914e-05, "loss": 0.0257, "num_input_tokens_seen": 248716800, "step": 115315 }, { "epoch": 18.812398042414355, "grad_norm": 0.021871037781238556, "learning_rate": 1.0705328108476852e-05, "loss": 0.0075, "num_input_tokens_seen": 248728544, "step": 115320 }, { "epoch": 18.81321370309951, "grad_norm": 0.03381072357296944, "learning_rate": 1.0690682658474004e-05, "loss": 0.0019, "num_input_tokens_seen": 248737664, "step": 115325 }, { "epoch": 18.814029363784666, "grad_norm": 0.24163702130317688, "learning_rate": 1.0676047124945187e-05, "loss": 0.024, "num_input_tokens_seen": 248748800, "step": 115330 }, { "epoch": 18.81484502446982, "grad_norm": 0.48827773332595825, "learning_rate": 1.0661421508187109e-05, "loss": 0.0108, "num_input_tokens_seen": 248758464, "step": 115335 }, { "epoch": 18.815660685154974, "grad_norm": 0.04895069822669029, "learning_rate": 1.0646805808495974e-05, "loss": 0.0069, "num_input_tokens_seen": 248769760, "step": 115340 }, { "epoch": 18.81647634584013, "grad_norm": 0.033234771341085434, "learning_rate": 1.0632200026168215e-05, "loss": 0.0016, "num_input_tokens_seen": 248780640, "step": 115345 }, { "epoch": 18.817292006525285, "grad_norm": 0.11137734353542328, "learning_rate": 1.061760416149965e-05, "loss": 0.0042, "num_input_tokens_seen": 248792256, "step": 115350 }, { "epoch": 18.81810766721044, "grad_norm": 0.06093854457139969, "learning_rate": 1.0603018214786264e-05, "loss": 0.004, "num_input_tokens_seen": 248803136, "step": 115355 }, { "epoch": 18.818923327895597, "grad_norm": 0.0050347293727099895, "learning_rate": 1.0588442186323433e-05, "loss": 0.001, "num_input_tokens_seen": 248812864, "step": 115360 }, { "epoch": 18.81973898858075, "grad_norm": 0.0018477152334526181, "learning_rate": 1.0573876076406807e-05, "loss": 0.0012, "num_input_tokens_seen": 248824352, "step": 115365 }, { "epoch": 18.820554649265905, "grad_norm": 0.0020881600212305784, "learning_rate": 1.055931988533132e-05, "loss": 0.0029, "num_input_tokens_seen": 248835520, "step": 115370 }, { "epoch": 18.82137030995106, "grad_norm": 0.0012075596023350954, "learning_rate": 1.0544773613392289e-05, "loss": 0.0012, "num_input_tokens_seen": 248847680, "step": 115375 }, { "epoch": 18.822185970636216, "grad_norm": 0.022528348490595818, "learning_rate": 1.0530237260884146e-05, "loss": 0.006, "num_input_tokens_seen": 248858112, "step": 115380 }, { "epoch": 18.82300163132137, "grad_norm": 0.0023832619190216064, "learning_rate": 1.051571082810182e-05, "loss": 0.0046, "num_input_tokens_seen": 248868160, "step": 115385 }, { "epoch": 18.823817292006524, "grad_norm": 0.0546661801636219, "learning_rate": 1.0501194315339523e-05, "loss": 0.0014, "num_input_tokens_seen": 248880288, "step": 115390 }, { "epoch": 18.82463295269168, "grad_norm": 0.0013886182568967342, "learning_rate": 1.048668772289152e-05, "loss": 0.0073, "num_input_tokens_seen": 248890912, "step": 115395 }, { "epoch": 18.825448613376835, "grad_norm": 0.02315945364534855, "learning_rate": 1.0472191051051738e-05, "loss": 0.1714, "num_input_tokens_seen": 248901184, "step": 115400 }, { "epoch": 18.82626427406199, "grad_norm": 0.0006052025710232556, "learning_rate": 1.0457704300114057e-05, "loss": 0.0072, "num_input_tokens_seen": 248911264, "step": 115405 }, { "epoch": 18.827079934747147, "grad_norm": 0.003722458379343152, "learning_rate": 1.0443227470372018e-05, "loss": 0.0019, "num_input_tokens_seen": 248922080, "step": 115410 }, { "epoch": 18.8278955954323, "grad_norm": 0.003230731701478362, "learning_rate": 1.0428760562119e-05, "loss": 0.1179, "num_input_tokens_seen": 248931168, "step": 115415 }, { "epoch": 18.828711256117455, "grad_norm": 0.09946080297231674, "learning_rate": 1.041430357564821e-05, "loss": 0.0058, "num_input_tokens_seen": 248942048, "step": 115420 }, { "epoch": 18.82952691680261, "grad_norm": 0.009610078297555447, "learning_rate": 1.0399856511252692e-05, "loss": 0.0033, "num_input_tokens_seen": 248952544, "step": 115425 }, { "epoch": 18.830342577487766, "grad_norm": 0.003259522607550025, "learning_rate": 1.0385419369225157e-05, "loss": 0.0237, "num_input_tokens_seen": 248964224, "step": 115430 }, { "epoch": 18.83115823817292, "grad_norm": 0.0024400127585977316, "learning_rate": 1.0370992149858205e-05, "loss": 0.0014, "num_input_tokens_seen": 248974752, "step": 115435 }, { "epoch": 18.831973898858074, "grad_norm": 0.016571413725614548, "learning_rate": 1.0356574853444211e-05, "loss": 0.0021, "num_input_tokens_seen": 248984448, "step": 115440 }, { "epoch": 18.83278955954323, "grad_norm": 1.5505924224853516, "learning_rate": 1.0342167480275444e-05, "loss": 0.0533, "num_input_tokens_seen": 248996288, "step": 115445 }, { "epoch": 18.833605220228385, "grad_norm": 0.079538993537426, "learning_rate": 1.032777003064378e-05, "loss": 0.0027, "num_input_tokens_seen": 249006336, "step": 115450 }, { "epoch": 18.83442088091354, "grad_norm": 0.0026444753166288137, "learning_rate": 1.0313382504841096e-05, "loss": 0.0333, "num_input_tokens_seen": 249016224, "step": 115455 }, { "epoch": 18.835236541598697, "grad_norm": 0.016765626147389412, "learning_rate": 1.0299004903158882e-05, "loss": 0.0042, "num_input_tokens_seen": 249026656, "step": 115460 }, { "epoch": 18.83605220228385, "grad_norm": 0.03925272077322006, "learning_rate": 1.0284637225888626e-05, "loss": 0.0026, "num_input_tokens_seen": 249037568, "step": 115465 }, { "epoch": 18.836867862969005, "grad_norm": 0.12720951437950134, "learning_rate": 1.0270279473321375e-05, "loss": 0.0032, "num_input_tokens_seen": 249049184, "step": 115470 }, { "epoch": 18.83768352365416, "grad_norm": 0.006453736685216427, "learning_rate": 1.0255931645748174e-05, "loss": 0.0017, "num_input_tokens_seen": 249059840, "step": 115475 }, { "epoch": 18.838499184339316, "grad_norm": 0.008658915758132935, "learning_rate": 1.0241593743459898e-05, "loss": 0.0015, "num_input_tokens_seen": 249070784, "step": 115480 }, { "epoch": 18.839314845024468, "grad_norm": 0.0008843439863994718, "learning_rate": 1.0227265766746874e-05, "loss": 0.0025, "num_input_tokens_seen": 249082464, "step": 115485 }, { "epoch": 18.840130505709624, "grad_norm": 0.025189083069562912, "learning_rate": 1.0212947715899757e-05, "loss": 0.013, "num_input_tokens_seen": 249092896, "step": 115490 }, { "epoch": 18.84094616639478, "grad_norm": 0.0011144388699904084, "learning_rate": 1.0198639591208535e-05, "loss": 0.0009, "num_input_tokens_seen": 249104000, "step": 115495 }, { "epoch": 18.841761827079935, "grad_norm": 0.017848897725343704, "learning_rate": 1.0184341392963259e-05, "loss": 0.009, "num_input_tokens_seen": 249116032, "step": 115500 }, { "epoch": 18.84257748776509, "grad_norm": 0.48548492789268494, "learning_rate": 1.0170053121453694e-05, "loss": 0.0213, "num_input_tokens_seen": 249126720, "step": 115505 }, { "epoch": 18.843393148450243, "grad_norm": 0.06440817564725876, "learning_rate": 1.0155774776969385e-05, "loss": 0.0018, "num_input_tokens_seen": 249137536, "step": 115510 }, { "epoch": 18.8442088091354, "grad_norm": 0.14905716478824615, "learning_rate": 1.0141506359799712e-05, "loss": 0.0078, "num_input_tokens_seen": 249148832, "step": 115515 }, { "epoch": 18.845024469820554, "grad_norm": 0.0022266735322773457, "learning_rate": 1.0127247870233836e-05, "loss": 0.0574, "num_input_tokens_seen": 249158784, "step": 115520 }, { "epoch": 18.84584013050571, "grad_norm": 0.6542752385139465, "learning_rate": 1.011299930856069e-05, "loss": 0.046, "num_input_tokens_seen": 249167616, "step": 115525 }, { "epoch": 18.846655791190866, "grad_norm": 0.003549334593117237, "learning_rate": 1.0098760675069151e-05, "loss": 0.0014, "num_input_tokens_seen": 249180032, "step": 115530 }, { "epoch": 18.847471451876018, "grad_norm": 0.021953493356704712, "learning_rate": 1.0084531970047662e-05, "loss": 0.005, "num_input_tokens_seen": 249190848, "step": 115535 }, { "epoch": 18.848287112561174, "grad_norm": 0.005867693107575178, "learning_rate": 1.0070313193784653e-05, "loss": 0.0029, "num_input_tokens_seen": 249200416, "step": 115540 }, { "epoch": 18.84910277324633, "grad_norm": 0.005468722432851791, "learning_rate": 1.0056104346568285e-05, "loss": 0.0008, "num_input_tokens_seen": 249211328, "step": 115545 }, { "epoch": 18.849918433931485, "grad_norm": 0.02604471519589424, "learning_rate": 1.0041905428686493e-05, "loss": 0.0029, "num_input_tokens_seen": 249220832, "step": 115550 }, { "epoch": 18.85073409461664, "grad_norm": 0.009239173494279385, "learning_rate": 1.0027716440427049e-05, "loss": 0.0029, "num_input_tokens_seen": 249232512, "step": 115555 }, { "epoch": 18.851549755301793, "grad_norm": 0.000523728143889457, "learning_rate": 1.0013537382077443e-05, "loss": 0.0013, "num_input_tokens_seen": 249243776, "step": 115560 }, { "epoch": 18.85236541598695, "grad_norm": 0.1039673388004303, "learning_rate": 9.999368253925167e-06, "loss": 0.0366, "num_input_tokens_seen": 249255328, "step": 115565 }, { "epoch": 18.853181076672104, "grad_norm": 0.0017294472781941295, "learning_rate": 9.985209056257272e-06, "loss": 0.0049, "num_input_tokens_seen": 249265728, "step": 115570 }, { "epoch": 18.85399673735726, "grad_norm": 0.0008751714485697448, "learning_rate": 9.971059789360749e-06, "loss": 0.0026, "num_input_tokens_seen": 249275808, "step": 115575 }, { "epoch": 18.854812398042416, "grad_norm": 0.0032133180648088455, "learning_rate": 9.956920453522366e-06, "loss": 0.0305, "num_input_tokens_seen": 249286976, "step": 115580 }, { "epoch": 18.855628058727568, "grad_norm": 0.0070322491228580475, "learning_rate": 9.942791049028621e-06, "loss": 0.0099, "num_input_tokens_seen": 249298912, "step": 115585 }, { "epoch": 18.856443719412724, "grad_norm": 0.08816249668598175, "learning_rate": 9.928671576165893e-06, "loss": 0.0052, "num_input_tokens_seen": 249308992, "step": 115590 }, { "epoch": 18.85725938009788, "grad_norm": 0.02860182523727417, "learning_rate": 9.914562035220287e-06, "loss": 0.0021, "num_input_tokens_seen": 249319456, "step": 115595 }, { "epoch": 18.858075040783035, "grad_norm": 0.054430607706308365, "learning_rate": 9.900462426477908e-06, "loss": 0.0103, "num_input_tokens_seen": 249331648, "step": 115600 }, { "epoch": 18.85889070146819, "grad_norm": 0.01901131309568882, "learning_rate": 9.886372750224304e-06, "loss": 0.0031, "num_input_tokens_seen": 249342656, "step": 115605 }, { "epoch": 18.859706362153343, "grad_norm": 0.0022008593659847975, "learning_rate": 9.872293006745192e-06, "loss": 0.005, "num_input_tokens_seen": 249354592, "step": 115610 }, { "epoch": 18.8605220228385, "grad_norm": 0.015517137013375759, "learning_rate": 9.858223196325789e-06, "loss": 0.0037, "num_input_tokens_seen": 249365344, "step": 115615 }, { "epoch": 18.861337683523654, "grad_norm": 0.5070440769195557, "learning_rate": 9.844163319251253e-06, "loss": 0.0041, "num_input_tokens_seen": 249374240, "step": 115620 }, { "epoch": 18.86215334420881, "grad_norm": 0.09401752799749374, "learning_rate": 9.830113375806582e-06, "loss": 0.002, "num_input_tokens_seen": 249383808, "step": 115625 }, { "epoch": 18.862969004893966, "grad_norm": 0.024383682757616043, "learning_rate": 9.816073366276545e-06, "loss": 0.0072, "num_input_tokens_seen": 249393664, "step": 115630 }, { "epoch": 18.863784665579118, "grad_norm": 0.0016977563500404358, "learning_rate": 9.802043290945529e-06, "loss": 0.001, "num_input_tokens_seen": 249404768, "step": 115635 }, { "epoch": 18.864600326264274, "grad_norm": 0.018630797043442726, "learning_rate": 9.788023150098024e-06, "loss": 0.0043, "num_input_tokens_seen": 249415392, "step": 115640 }, { "epoch": 18.86541598694943, "grad_norm": 0.04330010712146759, "learning_rate": 9.774012944018085e-06, "loss": 0.0015, "num_input_tokens_seen": 249427168, "step": 115645 }, { "epoch": 18.866231647634585, "grad_norm": 0.003567884908989072, "learning_rate": 9.760012672989704e-06, "loss": 0.0024, "num_input_tokens_seen": 249439104, "step": 115650 }, { "epoch": 18.86704730831974, "grad_norm": 0.003598837647587061, "learning_rate": 9.746022337296546e-06, "loss": 0.0099, "num_input_tokens_seen": 249450240, "step": 115655 }, { "epoch": 18.867862969004893, "grad_norm": 0.0035808219108730555, "learning_rate": 9.732041937222157e-06, "loss": 0.0365, "num_input_tokens_seen": 249461696, "step": 115660 }, { "epoch": 18.86867862969005, "grad_norm": 0.03885788470506668, "learning_rate": 9.718071473049927e-06, "loss": 0.0283, "num_input_tokens_seen": 249472064, "step": 115665 }, { "epoch": 18.869494290375204, "grad_norm": 0.0013579197693616152, "learning_rate": 9.70411094506296e-06, "loss": 0.1235, "num_input_tokens_seen": 249481888, "step": 115670 }, { "epoch": 18.87030995106036, "grad_norm": 0.0016899093752726912, "learning_rate": 9.690160353544142e-06, "loss": 0.0014, "num_input_tokens_seen": 249492288, "step": 115675 }, { "epoch": 18.871125611745512, "grad_norm": 0.13482151925563812, "learning_rate": 9.67621969877619e-06, "loss": 0.0032, "num_input_tokens_seen": 249502304, "step": 115680 }, { "epoch": 18.871941272430668, "grad_norm": 0.5752891302108765, "learning_rate": 9.66228898104171e-06, "loss": 0.0084, "num_input_tokens_seen": 249514240, "step": 115685 }, { "epoch": 18.872756933115824, "grad_norm": 0.002397983567789197, "learning_rate": 9.64836820062298e-06, "loss": 0.0126, "num_input_tokens_seen": 249525696, "step": 115690 }, { "epoch": 18.87357259380098, "grad_norm": 0.06476953625679016, "learning_rate": 9.634457357802107e-06, "loss": 0.0022, "num_input_tokens_seen": 249536384, "step": 115695 }, { "epoch": 18.874388254486135, "grad_norm": 0.008530079387128353, "learning_rate": 9.62055645286103e-06, "loss": 0.0359, "num_input_tokens_seen": 249548160, "step": 115700 }, { "epoch": 18.875203915171287, "grad_norm": 0.01979628950357437, "learning_rate": 9.606665486081522e-06, "loss": 0.0018, "num_input_tokens_seen": 249558432, "step": 115705 }, { "epoch": 18.876019575856443, "grad_norm": 0.024095594882965088, "learning_rate": 9.592784457744918e-06, "loss": 0.0087, "num_input_tokens_seen": 249569824, "step": 115710 }, { "epoch": 18.8768352365416, "grad_norm": 0.025035196915268898, "learning_rate": 9.578913368132824e-06, "loss": 0.0018, "num_input_tokens_seen": 249579968, "step": 115715 }, { "epoch": 18.877650897226754, "grad_norm": 0.03322688117623329, "learning_rate": 9.565052217526072e-06, "loss": 0.007, "num_input_tokens_seen": 249591552, "step": 115720 }, { "epoch": 18.87846655791191, "grad_norm": 0.00032601566636003554, "learning_rate": 9.551201006205767e-06, "loss": 0.0008, "num_input_tokens_seen": 249601600, "step": 115725 }, { "epoch": 18.879282218597062, "grad_norm": 0.0033398610539734364, "learning_rate": 9.537359734452466e-06, "loss": 0.0197, "num_input_tokens_seen": 249611968, "step": 115730 }, { "epoch": 18.880097879282218, "grad_norm": 0.06887746602296829, "learning_rate": 9.523528402546888e-06, "loss": 0.0024, "num_input_tokens_seen": 249622432, "step": 115735 }, { "epoch": 18.880913539967374, "grad_norm": 0.28180527687072754, "learning_rate": 9.509707010769086e-06, "loss": 0.0052, "num_input_tokens_seen": 249633856, "step": 115740 }, { "epoch": 18.88172920065253, "grad_norm": 0.0023468483705073595, "learning_rate": 9.495895559399449e-06, "loss": 0.002, "num_input_tokens_seen": 249644672, "step": 115745 }, { "epoch": 18.882544861337685, "grad_norm": 0.002144427038729191, "learning_rate": 9.482094048717637e-06, "loss": 0.0015, "num_input_tokens_seen": 249656224, "step": 115750 }, { "epoch": 18.883360522022837, "grad_norm": 0.007314638234674931, "learning_rate": 9.468302479003487e-06, "loss": 0.0017, "num_input_tokens_seen": 249666272, "step": 115755 }, { "epoch": 18.884176182707993, "grad_norm": 0.42862388491630554, "learning_rate": 9.45452085053644e-06, "loss": 0.0069, "num_input_tokens_seen": 249675776, "step": 115760 }, { "epoch": 18.88499184339315, "grad_norm": 0.004564585164189339, "learning_rate": 9.44074916359583e-06, "loss": 0.0059, "num_input_tokens_seen": 249686432, "step": 115765 }, { "epoch": 18.885807504078304, "grad_norm": 0.007316834758967161, "learning_rate": 9.42698741846082e-06, "loss": 0.0066, "num_input_tokens_seen": 249697472, "step": 115770 }, { "epoch": 18.88662316476346, "grad_norm": 0.11382170766592026, "learning_rate": 9.413235615410188e-06, "loss": 0.0035, "num_input_tokens_seen": 249708160, "step": 115775 }, { "epoch": 18.887438825448612, "grad_norm": 0.0668792724609375, "learning_rate": 9.39949375472271e-06, "loss": 0.0047, "num_input_tokens_seen": 249718816, "step": 115780 }, { "epoch": 18.888254486133768, "grad_norm": 0.006758835166692734, "learning_rate": 9.385761836676832e-06, "loss": 0.0025, "num_input_tokens_seen": 249730880, "step": 115785 }, { "epoch": 18.889070146818923, "grad_norm": 0.010912146419286728, "learning_rate": 9.37203986155094e-06, "loss": 0.0008, "num_input_tokens_seen": 249740832, "step": 115790 }, { "epoch": 18.88988580750408, "grad_norm": 0.24447159469127655, "learning_rate": 9.358327829623038e-06, "loss": 0.0099, "num_input_tokens_seen": 249752448, "step": 115795 }, { "epoch": 18.890701468189235, "grad_norm": 0.02637798897922039, "learning_rate": 9.344625741171009e-06, "loss": 0.0057, "num_input_tokens_seen": 249763776, "step": 115800 }, { "epoch": 18.891517128874387, "grad_norm": 0.066844642162323, "learning_rate": 9.330933596472635e-06, "loss": 0.0032, "num_input_tokens_seen": 249774560, "step": 115805 }, { "epoch": 18.892332789559543, "grad_norm": 0.0016681354027241468, "learning_rate": 9.317251395805304e-06, "loss": 0.0015, "num_input_tokens_seen": 249785408, "step": 115810 }, { "epoch": 18.8931484502447, "grad_norm": 0.004231530707329512, "learning_rate": 9.303579139446349e-06, "loss": 0.0022, "num_input_tokens_seen": 249795008, "step": 115815 }, { "epoch": 18.893964110929854, "grad_norm": 0.0003896152484230697, "learning_rate": 9.28991682767294e-06, "loss": 0.0006, "num_input_tokens_seen": 249806368, "step": 115820 }, { "epoch": 18.894779771615006, "grad_norm": 0.014759872108697891, "learning_rate": 9.27626446076174e-06, "loss": 0.0028, "num_input_tokens_seen": 249817056, "step": 115825 }, { "epoch": 18.895595432300162, "grad_norm": 0.020037462934851646, "learning_rate": 9.2626220389897e-06, "loss": 0.007, "num_input_tokens_seen": 249828544, "step": 115830 }, { "epoch": 18.896411092985318, "grad_norm": 0.002802329370751977, "learning_rate": 9.248989562633037e-06, "loss": 0.0017, "num_input_tokens_seen": 249838976, "step": 115835 }, { "epoch": 18.897226753670473, "grad_norm": 0.003205228829756379, "learning_rate": 9.235367031968312e-06, "loss": 0.0025, "num_input_tokens_seen": 249848800, "step": 115840 }, { "epoch": 18.89804241435563, "grad_norm": 0.002557715168222785, "learning_rate": 9.221754447271302e-06, "loss": 0.0022, "num_input_tokens_seen": 249859104, "step": 115845 }, { "epoch": 18.898858075040785, "grad_norm": 0.0033851321786642075, "learning_rate": 9.208151808818177e-06, "loss": 0.0354, "num_input_tokens_seen": 249870400, "step": 115850 }, { "epoch": 18.899673735725937, "grad_norm": 0.006178013514727354, "learning_rate": 9.194559116884327e-06, "loss": 0.0024, "num_input_tokens_seen": 249881408, "step": 115855 }, { "epoch": 18.900489396411093, "grad_norm": 0.006866784766316414, "learning_rate": 9.18097637174553e-06, "loss": 0.0008, "num_input_tokens_seen": 249893216, "step": 115860 }, { "epoch": 18.90130505709625, "grad_norm": 0.015050943940877914, "learning_rate": 9.167403573676736e-06, "loss": 0.0055, "num_input_tokens_seen": 249903104, "step": 115865 }, { "epoch": 18.902120717781404, "grad_norm": 0.002230555983260274, "learning_rate": 9.153840722953278e-06, "loss": 0.0085, "num_input_tokens_seen": 249914432, "step": 115870 }, { "epoch": 18.902936378466556, "grad_norm": 0.006018503103405237, "learning_rate": 9.14028781984988e-06, "loss": 0.0023, "num_input_tokens_seen": 249925664, "step": 115875 }, { "epoch": 18.903752039151712, "grad_norm": 0.4204626977443695, "learning_rate": 9.126744864641267e-06, "loss": 0.031, "num_input_tokens_seen": 249935712, "step": 115880 }, { "epoch": 18.904567699836868, "grad_norm": 0.7488633394241333, "learning_rate": 9.113211857601833e-06, "loss": 0.3829, "num_input_tokens_seen": 249946688, "step": 115885 }, { "epoch": 18.905383360522023, "grad_norm": 0.007819092832505703, "learning_rate": 9.099688799005967e-06, "loss": 0.0872, "num_input_tokens_seen": 249958208, "step": 115890 }, { "epoch": 18.90619902120718, "grad_norm": 0.017484666779637337, "learning_rate": 9.086175689127618e-06, "loss": 0.0013, "num_input_tokens_seen": 249969088, "step": 115895 }, { "epoch": 18.90701468189233, "grad_norm": 0.019806895405054092, "learning_rate": 9.072672528240733e-06, "loss": 0.001, "num_input_tokens_seen": 249980000, "step": 115900 }, { "epoch": 18.907830342577487, "grad_norm": 0.01013483852148056, "learning_rate": 9.059179316618871e-06, "loss": 0.0014, "num_input_tokens_seen": 249990784, "step": 115905 }, { "epoch": 18.908646003262643, "grad_norm": 0.0024809723254293203, "learning_rate": 9.045696054535535e-06, "loss": 0.001, "num_input_tokens_seen": 250000864, "step": 115910 }, { "epoch": 18.9094616639478, "grad_norm": 0.301296591758728, "learning_rate": 9.032222742264008e-06, "loss": 0.0096, "num_input_tokens_seen": 250011872, "step": 115915 }, { "epoch": 18.910277324632954, "grad_norm": 0.04038350284099579, "learning_rate": 9.018759380077346e-06, "loss": 0.0036, "num_input_tokens_seen": 250022208, "step": 115920 }, { "epoch": 18.911092985318106, "grad_norm": 0.0124030327424407, "learning_rate": 9.005305968248334e-06, "loss": 0.0016, "num_input_tokens_seen": 250034016, "step": 115925 }, { "epoch": 18.911908646003262, "grad_norm": 0.0019749896600842476, "learning_rate": 8.991862507049698e-06, "loss": 0.0147, "num_input_tokens_seen": 250045216, "step": 115930 }, { "epoch": 18.912724306688418, "grad_norm": 0.001440156833268702, "learning_rate": 8.978428996753885e-06, "loss": 0.0032, "num_input_tokens_seen": 250055552, "step": 115935 }, { "epoch": 18.913539967373573, "grad_norm": 0.0008463303674943745, "learning_rate": 8.965005437633067e-06, "loss": 0.0022, "num_input_tokens_seen": 250067104, "step": 115940 }, { "epoch": 18.91435562805873, "grad_norm": 0.0022829356603324413, "learning_rate": 8.95159182995936e-06, "loss": 0.0008, "num_input_tokens_seen": 250078016, "step": 115945 }, { "epoch": 18.91517128874388, "grad_norm": 0.012311785481870174, "learning_rate": 8.938188174004602e-06, "loss": 0.0019, "num_input_tokens_seen": 250089248, "step": 115950 }, { "epoch": 18.915986949429037, "grad_norm": 0.009466729126870632, "learning_rate": 8.924794470040354e-06, "loss": 0.012, "num_input_tokens_seen": 250099840, "step": 115955 }, { "epoch": 18.916802610114193, "grad_norm": 0.009992366656661034, "learning_rate": 8.91141071833812e-06, "loss": 0.0033, "num_input_tokens_seen": 250110784, "step": 115960 }, { "epoch": 18.91761827079935, "grad_norm": 0.023043444380164146, "learning_rate": 8.89803691916924e-06, "loss": 0.0072, "num_input_tokens_seen": 250121344, "step": 115965 }, { "epoch": 18.918433931484504, "grad_norm": 0.04372687265276909, "learning_rate": 8.88467307280455e-06, "loss": 0.0015, "num_input_tokens_seen": 250133344, "step": 115970 }, { "epoch": 18.919249592169656, "grad_norm": 0.006103368941694498, "learning_rate": 8.871319179515058e-06, "loss": 0.0049, "num_input_tokens_seen": 250143296, "step": 115975 }, { "epoch": 18.920065252854812, "grad_norm": 0.01576423831284046, "learning_rate": 8.857975239571215e-06, "loss": 0.0023, "num_input_tokens_seen": 250155232, "step": 115980 }, { "epoch": 18.920880913539968, "grad_norm": 0.0007975206826813519, "learning_rate": 8.84464125324369e-06, "loss": 0.0056, "num_input_tokens_seen": 250167200, "step": 115985 }, { "epoch": 18.921696574225123, "grad_norm": 0.029342642053961754, "learning_rate": 8.831317220802493e-06, "loss": 0.0009, "num_input_tokens_seen": 250178496, "step": 115990 }, { "epoch": 18.92251223491028, "grad_norm": 0.08800439536571503, "learning_rate": 8.818003142517794e-06, "loss": 0.0027, "num_input_tokens_seen": 250190304, "step": 115995 }, { "epoch": 18.92332789559543, "grad_norm": 0.008746287785470486, "learning_rate": 8.804699018659324e-06, "loss": 0.0037, "num_input_tokens_seen": 250200896, "step": 116000 }, { "epoch": 18.924143556280587, "grad_norm": 0.009568284265697002, "learning_rate": 8.79140484949681e-06, "loss": 0.0031, "num_input_tokens_seen": 250209696, "step": 116005 }, { "epoch": 18.924959216965743, "grad_norm": 0.17504137754440308, "learning_rate": 8.778120635299537e-06, "loss": 0.0053, "num_input_tokens_seen": 250220096, "step": 116010 }, { "epoch": 18.9257748776509, "grad_norm": 0.028957204893231392, "learning_rate": 8.7648463763369e-06, "loss": 0.0019, "num_input_tokens_seen": 250230432, "step": 116015 }, { "epoch": 18.92659053833605, "grad_norm": 0.013375704176723957, "learning_rate": 8.751582072877739e-06, "loss": 0.0074, "num_input_tokens_seen": 250240768, "step": 116020 }, { "epoch": 18.927406199021206, "grad_norm": 0.007944837212562561, "learning_rate": 8.738327725191064e-06, "loss": 0.0019, "num_input_tokens_seen": 250250944, "step": 116025 }, { "epoch": 18.928221859706362, "grad_norm": 0.0099667152389884, "learning_rate": 8.725083333545326e-06, "loss": 0.0054, "num_input_tokens_seen": 250261696, "step": 116030 }, { "epoch": 18.929037520391518, "grad_norm": 0.08507489413022995, "learning_rate": 8.711848898208974e-06, "loss": 0.004, "num_input_tokens_seen": 250272640, "step": 116035 }, { "epoch": 18.929853181076673, "grad_norm": 0.002138556446880102, "learning_rate": 8.698624419450296e-06, "loss": 0.0009, "num_input_tokens_seen": 250284064, "step": 116040 }, { "epoch": 18.930668841761825, "grad_norm": 0.9311119318008423, "learning_rate": 8.685409897537244e-06, "loss": 0.1325, "num_input_tokens_seen": 250294976, "step": 116045 }, { "epoch": 18.93148450244698, "grad_norm": 0.007951601408421993, "learning_rate": 8.672205332737603e-06, "loss": 0.0023, "num_input_tokens_seen": 250306848, "step": 116050 }, { "epoch": 18.932300163132137, "grad_norm": 0.078665591776371, "learning_rate": 8.65901072531905e-06, "loss": 0.066, "num_input_tokens_seen": 250315424, "step": 116055 }, { "epoch": 18.933115823817293, "grad_norm": 0.0007486925460398197, "learning_rate": 8.64582607554898e-06, "loss": 0.004, "num_input_tokens_seen": 250326688, "step": 116060 }, { "epoch": 18.93393148450245, "grad_norm": 0.005215236451476812, "learning_rate": 8.632651383694513e-06, "loss": 0.0007, "num_input_tokens_seen": 250336800, "step": 116065 }, { "epoch": 18.9347471451876, "grad_norm": 0.0007327625062316656, "learning_rate": 8.619486650022768e-06, "loss": 0.0023, "num_input_tokens_seen": 250346528, "step": 116070 }, { "epoch": 18.935562805872756, "grad_norm": 0.004965245258063078, "learning_rate": 8.606331874800421e-06, "loss": 0.0007, "num_input_tokens_seen": 250357120, "step": 116075 }, { "epoch": 18.936378466557912, "grad_norm": 0.08158113807439804, "learning_rate": 8.593187058294205e-06, "loss": 0.0024, "num_input_tokens_seen": 250369024, "step": 116080 }, { "epoch": 18.937194127243067, "grad_norm": 0.0014401074731722474, "learning_rate": 8.580052200770405e-06, "loss": 0.0011, "num_input_tokens_seen": 250380256, "step": 116085 }, { "epoch": 18.938009787928223, "grad_norm": 0.013578815385699272, "learning_rate": 8.566927302495254e-06, "loss": 0.0058, "num_input_tokens_seen": 250391456, "step": 116090 }, { "epoch": 18.938825448613375, "grad_norm": 0.002787172794342041, "learning_rate": 8.553812363734759e-06, "loss": 0.002, "num_input_tokens_seen": 250402272, "step": 116095 }, { "epoch": 18.93964110929853, "grad_norm": 0.0021521078888326883, "learning_rate": 8.54070738475471e-06, "loss": 0.0091, "num_input_tokens_seen": 250411488, "step": 116100 }, { "epoch": 18.940456769983687, "grad_norm": 0.09405086189508438, "learning_rate": 8.527612365820613e-06, "loss": 0.0923, "num_input_tokens_seen": 250423104, "step": 116105 }, { "epoch": 18.941272430668842, "grad_norm": 0.021962931379675865, "learning_rate": 8.514527307198038e-06, "loss": 0.0022, "num_input_tokens_seen": 250432992, "step": 116110 }, { "epoch": 18.942088091353998, "grad_norm": 0.003239113837480545, "learning_rate": 8.501452209151995e-06, "loss": 0.0012, "num_input_tokens_seen": 250444288, "step": 116115 }, { "epoch": 18.94290375203915, "grad_norm": 0.006468859035521746, "learning_rate": 8.488387071947601e-06, "loss": 0.0008, "num_input_tokens_seen": 250454656, "step": 116120 }, { "epoch": 18.943719412724306, "grad_norm": 0.012379714287817478, "learning_rate": 8.47533189584948e-06, "loss": 0.0013, "num_input_tokens_seen": 250465152, "step": 116125 }, { "epoch": 18.94453507340946, "grad_norm": 0.0033938849810510874, "learning_rate": 8.46228668112231e-06, "loss": 0.026, "num_input_tokens_seen": 250476416, "step": 116130 }, { "epoch": 18.945350734094617, "grad_norm": 0.005319104064255953, "learning_rate": 8.449251428030492e-06, "loss": 0.0015, "num_input_tokens_seen": 250486848, "step": 116135 }, { "epoch": 18.946166394779773, "grad_norm": 0.030385605990886688, "learning_rate": 8.436226136838198e-06, "loss": 0.0031, "num_input_tokens_seen": 250497120, "step": 116140 }, { "epoch": 18.946982055464925, "grad_norm": 0.021137241274118423, "learning_rate": 8.423210807809333e-06, "loss": 0.001, "num_input_tokens_seen": 250507200, "step": 116145 }, { "epoch": 18.94779771615008, "grad_norm": 0.008405787870287895, "learning_rate": 8.410205441207741e-06, "loss": 0.0032, "num_input_tokens_seen": 250517632, "step": 116150 }, { "epoch": 18.948613376835237, "grad_norm": 0.0010001793270930648, "learning_rate": 8.397210037296931e-06, "loss": 0.0005, "num_input_tokens_seen": 250527904, "step": 116155 }, { "epoch": 18.949429037520392, "grad_norm": 0.007627187296748161, "learning_rate": 8.384224596340306e-06, "loss": 0.0068, "num_input_tokens_seen": 250539680, "step": 116160 }, { "epoch": 18.950244698205548, "grad_norm": 0.0020545762963593006, "learning_rate": 8.371249118601043e-06, "loss": 0.0018, "num_input_tokens_seen": 250550144, "step": 116165 }, { "epoch": 18.9510603588907, "grad_norm": 0.006114541087299585, "learning_rate": 8.358283604342098e-06, "loss": 0.0196, "num_input_tokens_seen": 250561920, "step": 116170 }, { "epoch": 18.951876019575856, "grad_norm": 0.0308638084679842, "learning_rate": 8.345328053826207e-06, "loss": 0.0047, "num_input_tokens_seen": 250571744, "step": 116175 }, { "epoch": 18.95269168026101, "grad_norm": 0.09067867696285248, "learning_rate": 8.33238246731599e-06, "loss": 0.0039, "num_input_tokens_seen": 250583264, "step": 116180 }, { "epoch": 18.953507340946167, "grad_norm": 0.0020279581658542156, "learning_rate": 8.319446845073741e-06, "loss": 0.0015, "num_input_tokens_seen": 250594688, "step": 116185 }, { "epoch": 18.954323001631323, "grad_norm": 0.006511158775538206, "learning_rate": 8.306521187361638e-06, "loss": 0.0052, "num_input_tokens_seen": 250605696, "step": 116190 }, { "epoch": 18.955138662316475, "grad_norm": 0.006900846492499113, "learning_rate": 8.293605494441636e-06, "loss": 0.003, "num_input_tokens_seen": 250615488, "step": 116195 }, { "epoch": 18.95595432300163, "grad_norm": 0.012329554185271263, "learning_rate": 8.280699766575528e-06, "loss": 0.0013, "num_input_tokens_seen": 250625632, "step": 116200 }, { "epoch": 18.956769983686787, "grad_norm": 0.0082809217274189, "learning_rate": 8.26780400402477e-06, "loss": 0.0012, "num_input_tokens_seen": 250636416, "step": 116205 }, { "epoch": 18.957585644371942, "grad_norm": 0.5010150074958801, "learning_rate": 8.254918207050821e-06, "loss": 0.0134, "num_input_tokens_seen": 250646048, "step": 116210 }, { "epoch": 18.958401305057095, "grad_norm": 0.05712961405515671, "learning_rate": 8.242042375914748e-06, "loss": 0.008, "num_input_tokens_seen": 250657440, "step": 116215 }, { "epoch": 18.95921696574225, "grad_norm": 0.003811068367213011, "learning_rate": 8.229176510877512e-06, "loss": 0.0051, "num_input_tokens_seen": 250669696, "step": 116220 }, { "epoch": 18.960032626427406, "grad_norm": 0.8903943300247192, "learning_rate": 8.216320612199901e-06, "loss": 0.093, "num_input_tokens_seen": 250680800, "step": 116225 }, { "epoch": 18.96084828711256, "grad_norm": 0.016154255717992783, "learning_rate": 8.203474680142431e-06, "loss": 0.0016, "num_input_tokens_seen": 250692992, "step": 116230 }, { "epoch": 18.961663947797717, "grad_norm": 0.17833995819091797, "learning_rate": 8.190638714965393e-06, "loss": 0.0066, "num_input_tokens_seen": 250703584, "step": 116235 }, { "epoch": 18.96247960848287, "grad_norm": 0.006108742672950029, "learning_rate": 8.177812716928967e-06, "loss": 0.0032, "num_input_tokens_seen": 250714624, "step": 116240 }, { "epoch": 18.963295269168025, "grad_norm": 0.021284479647874832, "learning_rate": 8.164996686293114e-06, "loss": 0.0042, "num_input_tokens_seen": 250725920, "step": 116245 }, { "epoch": 18.96411092985318, "grad_norm": 0.007907040417194366, "learning_rate": 8.152190623317569e-06, "loss": 0.0014, "num_input_tokens_seen": 250735456, "step": 116250 }, { "epoch": 18.964926590538337, "grad_norm": 0.07404635101556778, "learning_rate": 8.13939452826179e-06, "loss": 0.0031, "num_input_tokens_seen": 250746464, "step": 116255 }, { "epoch": 18.965742251223492, "grad_norm": 0.0012358606327325106, "learning_rate": 8.126608401385183e-06, "loss": 0.1067, "num_input_tokens_seen": 250759488, "step": 116260 }, { "epoch": 18.966557911908644, "grad_norm": 0.0033821011893451214, "learning_rate": 8.113832242946818e-06, "loss": 0.001, "num_input_tokens_seen": 250769216, "step": 116265 }, { "epoch": 18.9673735725938, "grad_norm": 0.2067692130804062, "learning_rate": 8.101066053205653e-06, "loss": 0.0056, "num_input_tokens_seen": 250780000, "step": 116270 }, { "epoch": 18.968189233278956, "grad_norm": 0.02143809013068676, "learning_rate": 8.08830983242037e-06, "loss": 0.0147, "num_input_tokens_seen": 250789408, "step": 116275 }, { "epoch": 18.96900489396411, "grad_norm": 0.012229821644723415, "learning_rate": 8.0755635808496e-06, "loss": 0.0371, "num_input_tokens_seen": 250800128, "step": 116280 }, { "epoch": 18.969820554649267, "grad_norm": 1.0263516902923584, "learning_rate": 8.062827298751518e-06, "loss": 0.0252, "num_input_tokens_seen": 250812704, "step": 116285 }, { "epoch": 18.97063621533442, "grad_norm": 0.008975377306342125, "learning_rate": 8.050100986384312e-06, "loss": 0.0031, "num_input_tokens_seen": 250825088, "step": 116290 }, { "epoch": 18.971451876019575, "grad_norm": 0.009711428545415401, "learning_rate": 8.037384644005941e-06, "loss": 0.0015, "num_input_tokens_seen": 250836192, "step": 116295 }, { "epoch": 18.97226753670473, "grad_norm": 0.23430676758289337, "learning_rate": 8.024678271874031e-06, "loss": 0.0075, "num_input_tokens_seen": 250845408, "step": 116300 }, { "epoch": 18.973083197389887, "grad_norm": 0.11744371801614761, "learning_rate": 8.011981870246099e-06, "loss": 0.0058, "num_input_tokens_seen": 250857280, "step": 116305 }, { "epoch": 18.973898858075042, "grad_norm": 0.0030789999291300774, "learning_rate": 7.99929543937955e-06, "loss": 0.0023, "num_input_tokens_seen": 250868512, "step": 116310 }, { "epoch": 18.974714518760194, "grad_norm": 0.048179615288972855, "learning_rate": 7.9866189795314e-06, "loss": 0.0086, "num_input_tokens_seen": 250879232, "step": 116315 }, { "epoch": 18.97553017944535, "grad_norm": 0.034057579934597015, "learning_rate": 7.973952490958559e-06, "loss": 0.0011, "num_input_tokens_seen": 250890304, "step": 116320 }, { "epoch": 18.976345840130506, "grad_norm": 0.006413894705474377, "learning_rate": 7.961295973917759e-06, "loss": 0.0026, "num_input_tokens_seen": 250900224, "step": 116325 }, { "epoch": 18.97716150081566, "grad_norm": 0.6238510012626648, "learning_rate": 7.948649428665522e-06, "loss": 0.0374, "num_input_tokens_seen": 250911520, "step": 116330 }, { "epoch": 18.977977161500817, "grad_norm": 0.07288864254951477, "learning_rate": 7.936012855458085e-06, "loss": 0.0016, "num_input_tokens_seen": 250921472, "step": 116335 }, { "epoch": 18.97879282218597, "grad_norm": 0.008380493149161339, "learning_rate": 7.923386254551523e-06, "loss": 0.0081, "num_input_tokens_seen": 250933536, "step": 116340 }, { "epoch": 18.979608482871125, "grad_norm": 0.2320074439048767, "learning_rate": 7.910769626201908e-06, "loss": 0.0086, "num_input_tokens_seen": 250942944, "step": 116345 }, { "epoch": 18.98042414355628, "grad_norm": 0.036740388721227646, "learning_rate": 7.898162970664702e-06, "loss": 0.0019, "num_input_tokens_seen": 250953216, "step": 116350 }, { "epoch": 18.981239804241437, "grad_norm": 0.008007435128092766, "learning_rate": 7.88556628819559e-06, "loss": 0.0021, "num_input_tokens_seen": 250964384, "step": 116355 }, { "epoch": 18.982055464926592, "grad_norm": 0.03796574845910072, "learning_rate": 7.872979579049644e-06, "loss": 0.0041, "num_input_tokens_seen": 250975648, "step": 116360 }, { "epoch": 18.982871125611744, "grad_norm": 0.007252459414303303, "learning_rate": 7.860402843482218e-06, "loss": 0.0084, "num_input_tokens_seen": 250985728, "step": 116365 }, { "epoch": 18.9836867862969, "grad_norm": 0.0018981442553922534, "learning_rate": 7.847836081747939e-06, "loss": 0.0615, "num_input_tokens_seen": 250996480, "step": 116370 }, { "epoch": 18.984502446982056, "grad_norm": 0.001245081890374422, "learning_rate": 7.83527929410166e-06, "loss": 0.0009, "num_input_tokens_seen": 251006208, "step": 116375 }, { "epoch": 18.98531810766721, "grad_norm": 0.001177369267679751, "learning_rate": 7.822732480797734e-06, "loss": 0.0012, "num_input_tokens_seen": 251016096, "step": 116380 }, { "epoch": 18.986133768352367, "grad_norm": 0.024029146879911423, "learning_rate": 7.810195642090568e-06, "loss": 0.005, "num_input_tokens_seen": 251028448, "step": 116385 }, { "epoch": 18.98694942903752, "grad_norm": 0.010108845308423042, "learning_rate": 7.797668778234179e-06, "loss": 0.0008, "num_input_tokens_seen": 251038560, "step": 116390 }, { "epoch": 18.987765089722675, "grad_norm": 0.03382923826575279, "learning_rate": 7.785151889482422e-06, "loss": 0.0028, "num_input_tokens_seen": 251048000, "step": 116395 }, { "epoch": 18.98858075040783, "grad_norm": 2.420494794845581, "learning_rate": 7.772644976088982e-06, "loss": 0.0366, "num_input_tokens_seen": 251058272, "step": 116400 }, { "epoch": 18.989396411092986, "grad_norm": 0.008531792089343071, "learning_rate": 7.760148038307324e-06, "loss": 0.001, "num_input_tokens_seen": 251069504, "step": 116405 }, { "epoch": 18.99021207177814, "grad_norm": 0.03108024038374424, "learning_rate": 7.747661076390688e-06, "loss": 0.0281, "num_input_tokens_seen": 251080640, "step": 116410 }, { "epoch": 18.991027732463294, "grad_norm": 0.0020375594031065702, "learning_rate": 7.735184090592206e-06, "loss": 0.0019, "num_input_tokens_seen": 251090560, "step": 116415 }, { "epoch": 18.99184339314845, "grad_norm": 0.003627994330599904, "learning_rate": 7.722717081164677e-06, "loss": 0.0006, "num_input_tokens_seen": 251101856, "step": 116420 }, { "epoch": 18.992659053833606, "grad_norm": 0.02929912880063057, "learning_rate": 7.710260048360784e-06, "loss": 0.0066, "num_input_tokens_seen": 251111360, "step": 116425 }, { "epoch": 18.99347471451876, "grad_norm": 0.0008582132286392152, "learning_rate": 7.697812992432996e-06, "loss": 0.1148, "num_input_tokens_seen": 251122432, "step": 116430 }, { "epoch": 18.994290375203914, "grad_norm": 0.017039980739355087, "learning_rate": 7.685375913633607e-06, "loss": 0.0032, "num_input_tokens_seen": 251133376, "step": 116435 }, { "epoch": 18.99510603588907, "grad_norm": 0.004629854578524828, "learning_rate": 7.67294881221453e-06, "loss": 0.0028, "num_input_tokens_seen": 251142400, "step": 116440 }, { "epoch": 18.995921696574225, "grad_norm": 0.011425500735640526, "learning_rate": 7.660531688427729e-06, "loss": 0.0006, "num_input_tokens_seen": 251154016, "step": 116445 }, { "epoch": 18.99673735725938, "grad_norm": 0.001014662440866232, "learning_rate": 7.648124542524892e-06, "loss": 0.0006, "num_input_tokens_seen": 251165536, "step": 116450 }, { "epoch": 18.997553017944536, "grad_norm": 0.00871925987303257, "learning_rate": 7.635727374757318e-06, "loss": 0.007, "num_input_tokens_seen": 251174976, "step": 116455 }, { "epoch": 18.99836867862969, "grad_norm": 0.0005812650779262185, "learning_rate": 7.623340185376415e-06, "loss": 0.0008, "num_input_tokens_seen": 251186272, "step": 116460 }, { "epoch": 18.999184339314844, "grad_norm": 0.00275573437102139, "learning_rate": 7.6109629746330955e-06, "loss": 0.0056, "num_input_tokens_seen": 251197344, "step": 116465 }, { "epoch": 19.0, "grad_norm": 0.004681569058448076, "learning_rate": 7.5985957427782695e-06, "loss": 0.0047, "num_input_tokens_seen": 251207552, "step": 116470 }, { "epoch": 19.0, "eval_loss": 0.2634807229042053, "eval_runtime": 104.8012, "eval_samples_per_second": 26.002, "eval_steps_per_second": 6.508, "num_input_tokens_seen": 251207552, "step": 116470 }, { "epoch": 19.000815660685156, "grad_norm": 0.018297944217920303, "learning_rate": 7.5862384900625135e-06, "loss": 0.0028, "num_input_tokens_seen": 251219264, "step": 116475 }, { "epoch": 19.00163132137031, "grad_norm": 0.011280841194093227, "learning_rate": 7.573891216736406e-06, "loss": 0.0034, "num_input_tokens_seen": 251230400, "step": 116480 }, { "epoch": 19.002446982055464, "grad_norm": 0.007486789487302303, "learning_rate": 7.561553923049969e-06, "loss": 0.0028, "num_input_tokens_seen": 251241088, "step": 116485 }, { "epoch": 19.00326264274062, "grad_norm": 0.01644994504749775, "learning_rate": 7.549226609253446e-06, "loss": 0.0017, "num_input_tokens_seen": 251250976, "step": 116490 }, { "epoch": 19.004078303425775, "grad_norm": 0.11348441243171692, "learning_rate": 7.536909275596471e-06, "loss": 0.0041, "num_input_tokens_seen": 251261952, "step": 116495 }, { "epoch": 19.00489396411093, "grad_norm": 0.004390157759189606, "learning_rate": 7.524601922328844e-06, "loss": 0.0332, "num_input_tokens_seen": 251272096, "step": 116500 }, { "epoch": 19.005709624796086, "grad_norm": 0.0027737815398722887, "learning_rate": 7.512304549699811e-06, "loss": 0.0068, "num_input_tokens_seen": 251282560, "step": 116505 }, { "epoch": 19.00652528548124, "grad_norm": 0.06997373700141907, "learning_rate": 7.500017157958838e-06, "loss": 0.004, "num_input_tokens_seen": 251294272, "step": 116510 }, { "epoch": 19.007340946166394, "grad_norm": 0.0003149318799842149, "learning_rate": 7.487739747354672e-06, "loss": 0.0016, "num_input_tokens_seen": 251303520, "step": 116515 }, { "epoch": 19.00815660685155, "grad_norm": 0.15804758667945862, "learning_rate": 7.475472318136334e-06, "loss": 0.0081, "num_input_tokens_seen": 251315424, "step": 116520 }, { "epoch": 19.008972267536706, "grad_norm": 0.03621334955096245, "learning_rate": 7.4632148705522374e-06, "loss": 0.0018, "num_input_tokens_seen": 251325248, "step": 116525 }, { "epoch": 19.00978792822186, "grad_norm": 0.009401818737387657, "learning_rate": 7.450967404851017e-06, "loss": 0.0014, "num_input_tokens_seen": 251337344, "step": 116530 }, { "epoch": 19.010603588907014, "grad_norm": 0.03038276731967926, "learning_rate": 7.438729921280752e-06, "loss": 0.0082, "num_input_tokens_seen": 251347968, "step": 116535 }, { "epoch": 19.01141924959217, "grad_norm": 0.13571631908416748, "learning_rate": 7.42650242008952e-06, "loss": 0.0029, "num_input_tokens_seen": 251359616, "step": 116540 }, { "epoch": 19.012234910277325, "grad_norm": 0.0018942217575386167, "learning_rate": 7.41428490152507e-06, "loss": 0.001, "num_input_tokens_seen": 251371296, "step": 116545 }, { "epoch": 19.01305057096248, "grad_norm": 0.0177732203155756, "learning_rate": 7.402077365835036e-06, "loss": 0.0016, "num_input_tokens_seen": 251381280, "step": 116550 }, { "epoch": 19.013866231647636, "grad_norm": 0.011855092830955982, "learning_rate": 7.389879813266831e-06, "loss": 0.0411, "num_input_tokens_seen": 251390688, "step": 116555 }, { "epoch": 19.01468189233279, "grad_norm": 0.05330047383904457, "learning_rate": 7.377692244067591e-06, "loss": 0.0148, "num_input_tokens_seen": 251400960, "step": 116560 }, { "epoch": 19.015497553017944, "grad_norm": 0.0013384429039433599, "learning_rate": 7.36551465848434e-06, "loss": 0.005, "num_input_tokens_seen": 251411200, "step": 116565 }, { "epoch": 19.0163132137031, "grad_norm": 0.0011979796690866351, "learning_rate": 7.353347056763937e-06, "loss": 0.0049, "num_input_tokens_seen": 251421824, "step": 116570 }, { "epoch": 19.017128874388256, "grad_norm": 0.00457855686545372, "learning_rate": 7.341189439152907e-06, "loss": 0.0029, "num_input_tokens_seen": 251432800, "step": 116575 }, { "epoch": 19.017944535073408, "grad_norm": 0.008155317977070808, "learning_rate": 7.329041805897551e-06, "loss": 0.009, "num_input_tokens_seen": 251444384, "step": 116580 }, { "epoch": 19.018760195758563, "grad_norm": 0.04514889419078827, "learning_rate": 7.316904157244342e-06, "loss": 0.0027, "num_input_tokens_seen": 251455648, "step": 116585 }, { "epoch": 19.01957585644372, "grad_norm": 0.008646034635603428, "learning_rate": 7.304776493438914e-06, "loss": 0.0018, "num_input_tokens_seen": 251466432, "step": 116590 }, { "epoch": 19.020391517128875, "grad_norm": 0.03563907742500305, "learning_rate": 7.2926588147273484e-06, "loss": 0.003, "num_input_tokens_seen": 251476672, "step": 116595 }, { "epoch": 19.02120717781403, "grad_norm": 0.0012868959456682205, "learning_rate": 7.280551121355005e-06, "loss": 0.0046, "num_input_tokens_seen": 251487232, "step": 116600 }, { "epoch": 19.022022838499183, "grad_norm": 0.005586009938269854, "learning_rate": 7.268453413567467e-06, "loss": 0.0097, "num_input_tokens_seen": 251498336, "step": 116605 }, { "epoch": 19.02283849918434, "grad_norm": 0.011918015778064728, "learning_rate": 7.256365691609645e-06, "loss": 0.0018, "num_input_tokens_seen": 251509600, "step": 116610 }, { "epoch": 19.023654159869494, "grad_norm": 0.013791882432997227, "learning_rate": 7.244287955726791e-06, "loss": 0.0013, "num_input_tokens_seen": 251520288, "step": 116615 }, { "epoch": 19.02446982055465, "grad_norm": 0.0010393774136900902, "learning_rate": 7.232220206163431e-06, "loss": 0.0058, "num_input_tokens_seen": 251530976, "step": 116620 }, { "epoch": 19.025285481239806, "grad_norm": 0.006047057453542948, "learning_rate": 7.220162443164369e-06, "loss": 0.0036, "num_input_tokens_seen": 251541600, "step": 116625 }, { "epoch": 19.026101141924958, "grad_norm": 0.0013951618457213044, "learning_rate": 7.2081146669737416e-06, "loss": 0.1085, "num_input_tokens_seen": 251551456, "step": 116630 }, { "epoch": 19.026916802610113, "grad_norm": 0.2098870724439621, "learning_rate": 7.196076877835911e-06, "loss": 0.0076, "num_input_tokens_seen": 251563808, "step": 116635 }, { "epoch": 19.02773246329527, "grad_norm": 0.0034503669012337923, "learning_rate": 7.1840490759946805e-06, "loss": 0.0012, "num_input_tokens_seen": 251573696, "step": 116640 }, { "epoch": 19.028548123980425, "grad_norm": 0.020296858623623848, "learning_rate": 7.172031261693967e-06, "loss": 0.0035, "num_input_tokens_seen": 251584640, "step": 116645 }, { "epoch": 19.02936378466558, "grad_norm": 0.03558698669075966, "learning_rate": 7.160023435177132e-06, "loss": 0.0036, "num_input_tokens_seen": 251596768, "step": 116650 }, { "epoch": 19.030179445350733, "grad_norm": 0.011863305233418941, "learning_rate": 7.148025596687702e-06, "loss": 0.0086, "num_input_tokens_seen": 251609056, "step": 116655 }, { "epoch": 19.03099510603589, "grad_norm": 1.1137291193008423, "learning_rate": 7.136037746468704e-06, "loss": 0.0318, "num_input_tokens_seen": 251619232, "step": 116660 }, { "epoch": 19.031810766721044, "grad_norm": 0.08766020089387894, "learning_rate": 7.124059884763168e-06, "loss": 0.005, "num_input_tokens_seen": 251630752, "step": 116665 }, { "epoch": 19.0326264274062, "grad_norm": 0.0116335554048419, "learning_rate": 7.112092011813842e-06, "loss": 0.0013, "num_input_tokens_seen": 251640768, "step": 116670 }, { "epoch": 19.033442088091356, "grad_norm": 0.020776070654392242, "learning_rate": 7.1001341278632e-06, "loss": 0.0022, "num_input_tokens_seen": 251650048, "step": 116675 }, { "epoch": 19.034257748776508, "grad_norm": 0.07412869483232498, "learning_rate": 7.08818623315366e-06, "loss": 0.0044, "num_input_tokens_seen": 251661248, "step": 116680 }, { "epoch": 19.035073409461663, "grad_norm": 0.003236723132431507, "learning_rate": 7.076248327927359e-06, "loss": 0.0018, "num_input_tokens_seen": 251672352, "step": 116685 }, { "epoch": 19.03588907014682, "grad_norm": 0.013173513114452362, "learning_rate": 7.064320412426162e-06, "loss": 0.0156, "num_input_tokens_seen": 251682688, "step": 116690 }, { "epoch": 19.036704730831975, "grad_norm": 0.02193308062851429, "learning_rate": 7.052402486891818e-06, "loss": 0.0096, "num_input_tokens_seen": 251694080, "step": 116695 }, { "epoch": 19.03752039151713, "grad_norm": 0.003878583898767829, "learning_rate": 7.040494551565912e-06, "loss": 0.0031, "num_input_tokens_seen": 251704704, "step": 116700 }, { "epoch": 19.038336052202283, "grad_norm": 0.0030068554915487766, "learning_rate": 7.028596606689808e-06, "loss": 0.0032, "num_input_tokens_seen": 251715744, "step": 116705 }, { "epoch": 19.03915171288744, "grad_norm": 0.007508622948080301, "learning_rate": 7.016708652504477e-06, "loss": 0.0015, "num_input_tokens_seen": 251728096, "step": 116710 }, { "epoch": 19.039967373572594, "grad_norm": 0.02960384637117386, "learning_rate": 7.004830689251007e-06, "loss": 0.0022, "num_input_tokens_seen": 251738784, "step": 116715 }, { "epoch": 19.04078303425775, "grad_norm": 0.0021867721807211637, "learning_rate": 6.992962717170038e-06, "loss": 0.0008, "num_input_tokens_seen": 251748800, "step": 116720 }, { "epoch": 19.041598694942905, "grad_norm": 0.006749243475496769, "learning_rate": 6.981104736502042e-06, "loss": 0.0064, "num_input_tokens_seen": 251759680, "step": 116725 }, { "epoch": 19.042414355628058, "grad_norm": 0.12050947546958923, "learning_rate": 6.969256747487496e-06, "loss": 0.0092, "num_input_tokens_seen": 251770272, "step": 116730 }, { "epoch": 19.043230016313213, "grad_norm": 0.04304780066013336, "learning_rate": 6.957418750366318e-06, "loss": 0.0025, "num_input_tokens_seen": 251779904, "step": 116735 }, { "epoch": 19.04404567699837, "grad_norm": 0.007385640870779753, "learning_rate": 6.945590745378594e-06, "loss": 0.003, "num_input_tokens_seen": 251791296, "step": 116740 }, { "epoch": 19.044861337683525, "grad_norm": 0.0005054351058788598, "learning_rate": 6.9337727327639096e-06, "loss": 0.0062, "num_input_tokens_seen": 251801120, "step": 116745 }, { "epoch": 19.045676998368677, "grad_norm": 0.06654557585716248, "learning_rate": 6.921964712761853e-06, "loss": 0.0063, "num_input_tokens_seen": 251812032, "step": 116750 }, { "epoch": 19.046492659053833, "grad_norm": 0.0005299762124195695, "learning_rate": 6.910166685611674e-06, "loss": 0.0015, "num_input_tokens_seen": 251823968, "step": 116755 }, { "epoch": 19.04730831973899, "grad_norm": 0.026475977152585983, "learning_rate": 6.898378651552517e-06, "loss": 0.0078, "num_input_tokens_seen": 251833920, "step": 116760 }, { "epoch": 19.048123980424144, "grad_norm": 0.9855684638023376, "learning_rate": 6.88660061082319e-06, "loss": 0.0505, "num_input_tokens_seen": 251845024, "step": 116765 }, { "epoch": 19.0489396411093, "grad_norm": 0.0023344147484749556, "learning_rate": 6.874832563662559e-06, "loss": 0.0021, "num_input_tokens_seen": 251855776, "step": 116770 }, { "epoch": 19.049755301794452, "grad_norm": 0.014367961324751377, "learning_rate": 6.863074510308931e-06, "loss": 0.0058, "num_input_tokens_seen": 251866048, "step": 116775 }, { "epoch": 19.050570962479608, "grad_norm": 0.003604233730584383, "learning_rate": 6.851326451000783e-06, "loss": 0.019, "num_input_tokens_seen": 251877056, "step": 116780 }, { "epoch": 19.051386623164763, "grad_norm": 1.4208050966262817, "learning_rate": 6.839588385976036e-06, "loss": 0.061, "num_input_tokens_seen": 251887552, "step": 116785 }, { "epoch": 19.05220228384992, "grad_norm": 0.032239846885204315, "learning_rate": 6.827860315472667e-06, "loss": 0.0044, "num_input_tokens_seen": 251897760, "step": 116790 }, { "epoch": 19.053017944535075, "grad_norm": 0.02500569261610508, "learning_rate": 6.816142239728373e-06, "loss": 0.0657, "num_input_tokens_seen": 251907872, "step": 116795 }, { "epoch": 19.053833605220227, "grad_norm": 0.0011021732352674007, "learning_rate": 6.804434158980577e-06, "loss": 0.0538, "num_input_tokens_seen": 251917248, "step": 116800 }, { "epoch": 19.054649265905383, "grad_norm": 0.0004683133738581091, "learning_rate": 6.792736073466587e-06, "loss": 0.0021, "num_input_tokens_seen": 251927872, "step": 116805 }, { "epoch": 19.05546492659054, "grad_norm": 0.012258251197636127, "learning_rate": 6.781047983423439e-06, "loss": 0.001, "num_input_tokens_seen": 251938112, "step": 116810 }, { "epoch": 19.056280587275694, "grad_norm": 0.000691449735313654, "learning_rate": 6.769369889088106e-06, "loss": 0.0016, "num_input_tokens_seen": 251949216, "step": 116815 }, { "epoch": 19.05709624796085, "grad_norm": 0.41087207198143005, "learning_rate": 6.75770179069718e-06, "loss": 0.0135, "num_input_tokens_seen": 251960864, "step": 116820 }, { "epoch": 19.057911908646002, "grad_norm": 0.013794134370982647, "learning_rate": 6.746043688487136e-06, "loss": 0.0013, "num_input_tokens_seen": 251972608, "step": 116825 }, { "epoch": 19.058727569331158, "grad_norm": 0.0021597216837108135, "learning_rate": 6.734395582694286e-06, "loss": 0.002, "num_input_tokens_seen": 251983552, "step": 116830 }, { "epoch": 19.059543230016313, "grad_norm": 0.08024912327528, "learning_rate": 6.722757473554608e-06, "loss": 0.0032, "num_input_tokens_seen": 251994400, "step": 116835 }, { "epoch": 19.06035889070147, "grad_norm": 0.06934983283281326, "learning_rate": 6.71112936130408e-06, "loss": 0.0606, "num_input_tokens_seen": 252005408, "step": 116840 }, { "epoch": 19.061174551386625, "grad_norm": 0.0015880359569564462, "learning_rate": 6.6995112461782355e-06, "loss": 0.0025, "num_input_tokens_seen": 252015232, "step": 116845 }, { "epoch": 19.061990212071777, "grad_norm": 0.0008570001809857786, "learning_rate": 6.6879031284126646e-06, "loss": 0.0377, "num_input_tokens_seen": 252026880, "step": 116850 }, { "epoch": 19.062805872756933, "grad_norm": 0.16858051717281342, "learning_rate": 6.676305008242512e-06, "loss": 0.0074, "num_input_tokens_seen": 252038592, "step": 116855 }, { "epoch": 19.063621533442088, "grad_norm": 0.005522600840777159, "learning_rate": 6.664716885902811e-06, "loss": 0.0078, "num_input_tokens_seen": 252049248, "step": 116860 }, { "epoch": 19.064437194127244, "grad_norm": 0.0033199612516909838, "learning_rate": 6.653138761628541e-06, "loss": 0.0048, "num_input_tokens_seen": 252059872, "step": 116865 }, { "epoch": 19.0652528548124, "grad_norm": 0.00818253867328167, "learning_rate": 6.641570635654182e-06, "loss": 0.0059, "num_input_tokens_seen": 252070848, "step": 116870 }, { "epoch": 19.06606851549755, "grad_norm": 0.05451688915491104, "learning_rate": 6.630012508214322e-06, "loss": 0.0026, "num_input_tokens_seen": 252080832, "step": 116875 }, { "epoch": 19.066884176182707, "grad_norm": 0.000597434351220727, "learning_rate": 6.618464379543166e-06, "loss": 0.0053, "num_input_tokens_seen": 252092256, "step": 116880 }, { "epoch": 19.067699836867863, "grad_norm": 0.03946010395884514, "learning_rate": 6.6069262498746895e-06, "loss": 0.0045, "num_input_tokens_seen": 252102112, "step": 116885 }, { "epoch": 19.06851549755302, "grad_norm": 0.18442970514297485, "learning_rate": 6.595398119442764e-06, "loss": 0.0016, "num_input_tokens_seen": 252113184, "step": 116890 }, { "epoch": 19.069331158238175, "grad_norm": 0.0022198837250471115, "learning_rate": 6.583879988481034e-06, "loss": 0.0007, "num_input_tokens_seen": 252124704, "step": 116895 }, { "epoch": 19.070146818923327, "grad_norm": 0.0005130788777023554, "learning_rate": 6.572371857222925e-06, "loss": 0.0447, "num_input_tokens_seen": 252134240, "step": 116900 }, { "epoch": 19.070962479608482, "grad_norm": 0.0011946576414629817, "learning_rate": 6.560873725901695e-06, "loss": 0.0198, "num_input_tokens_seen": 252144736, "step": 116905 }, { "epoch": 19.071778140293638, "grad_norm": 0.007925347425043583, "learning_rate": 6.5493855947502674e-06, "loss": 0.0011, "num_input_tokens_seen": 252156352, "step": 116910 }, { "epoch": 19.072593800978794, "grad_norm": 0.005744624882936478, "learning_rate": 6.537907464001569e-06, "loss": 0.001, "num_input_tokens_seen": 252167904, "step": 116915 }, { "epoch": 19.07340946166395, "grad_norm": 0.0015677028568461537, "learning_rate": 6.5264393338881345e-06, "loss": 0.0006, "num_input_tokens_seen": 252178272, "step": 116920 }, { "epoch": 19.0742251223491, "grad_norm": 0.004115057643502951, "learning_rate": 6.514981204642445e-06, "loss": 0.0008, "num_input_tokens_seen": 252188928, "step": 116925 }, { "epoch": 19.075040783034257, "grad_norm": 0.0012294430052861571, "learning_rate": 6.503533076496704e-06, "loss": 0.0086, "num_input_tokens_seen": 252199808, "step": 116930 }, { "epoch": 19.075856443719413, "grad_norm": 0.0017336987657472491, "learning_rate": 6.492094949682892e-06, "loss": 0.0205, "num_input_tokens_seen": 252210208, "step": 116935 }, { "epoch": 19.07667210440457, "grad_norm": 0.28334999084472656, "learning_rate": 6.480666824432879e-06, "loss": 0.007, "num_input_tokens_seen": 252221152, "step": 116940 }, { "epoch": 19.07748776508972, "grad_norm": 0.031605396419763565, "learning_rate": 6.469248700978148e-06, "loss": 0.0333, "num_input_tokens_seen": 252231616, "step": 116945 }, { "epoch": 19.078303425774877, "grad_norm": 0.0064923204481601715, "learning_rate": 6.457840579550234e-06, "loss": 0.0236, "num_input_tokens_seen": 252242400, "step": 116950 }, { "epoch": 19.079119086460032, "grad_norm": 0.0026066426653414965, "learning_rate": 6.4464424603802865e-06, "loss": 0.0038, "num_input_tokens_seen": 252252352, "step": 116955 }, { "epoch": 19.079934747145188, "grad_norm": 0.009466147981584072, "learning_rate": 6.435054343699287e-06, "loss": 0.0071, "num_input_tokens_seen": 252262400, "step": 116960 }, { "epoch": 19.080750407830344, "grad_norm": 0.0048973639495670795, "learning_rate": 6.423676229738051e-06, "loss": 0.0023, "num_input_tokens_seen": 252273440, "step": 116965 }, { "epoch": 19.081566068515496, "grad_norm": 0.031225355342030525, "learning_rate": 6.412308118727117e-06, "loss": 0.0031, "num_input_tokens_seen": 252284736, "step": 116970 }, { "epoch": 19.08238172920065, "grad_norm": 0.1073719784617424, "learning_rate": 6.400950010896966e-06, "loss": 0.0054, "num_input_tokens_seen": 252294816, "step": 116975 }, { "epoch": 19.083197389885807, "grad_norm": 0.004490416031330824, "learning_rate": 6.389601906477693e-06, "loss": 0.001, "num_input_tokens_seen": 252305152, "step": 116980 }, { "epoch": 19.084013050570963, "grad_norm": 0.17070934176445007, "learning_rate": 6.378263805699391e-06, "loss": 0.0122, "num_input_tokens_seen": 252315648, "step": 116985 }, { "epoch": 19.08482871125612, "grad_norm": 0.0028736640233546495, "learning_rate": 6.36693570879171e-06, "loss": 0.0021, "num_input_tokens_seen": 252326624, "step": 116990 }, { "epoch": 19.08564437194127, "grad_norm": 0.1844363957643509, "learning_rate": 6.355617615984355e-06, "loss": 0.0055, "num_input_tokens_seen": 252336608, "step": 116995 }, { "epoch": 19.086460032626427, "grad_norm": 0.0075249457731842995, "learning_rate": 6.344309527506587e-06, "loss": 0.0037, "num_input_tokens_seen": 252347232, "step": 117000 }, { "epoch": 19.087275693311582, "grad_norm": 0.12839020788669586, "learning_rate": 6.333011443587722e-06, "loss": 0.0029, "num_input_tokens_seen": 252359168, "step": 117005 }, { "epoch": 19.088091353996738, "grad_norm": 0.0014731186674907804, "learning_rate": 6.3217233644565216e-06, "loss": 0.0034, "num_input_tokens_seen": 252371008, "step": 117010 }, { "epoch": 19.088907014681894, "grad_norm": 0.004167306236922741, "learning_rate": 6.3104452903419704e-06, "loss": 0.0009, "num_input_tokens_seen": 252381440, "step": 117015 }, { "epoch": 19.089722675367046, "grad_norm": 0.0023143987637013197, "learning_rate": 6.299177221472496e-06, "loss": 0.0464, "num_input_tokens_seen": 252393152, "step": 117020 }, { "epoch": 19.0905383360522, "grad_norm": 0.06549499183893204, "learning_rate": 6.287919158076472e-06, "loss": 0.0068, "num_input_tokens_seen": 252404736, "step": 117025 }, { "epoch": 19.091353996737357, "grad_norm": 0.005502500105649233, "learning_rate": 6.2766711003821035e-06, "loss": 0.0037, "num_input_tokens_seen": 252416256, "step": 117030 }, { "epoch": 19.092169657422513, "grad_norm": 0.008037969470024109, "learning_rate": 6.265433048617375e-06, "loss": 0.0071, "num_input_tokens_seen": 252426048, "step": 117035 }, { "epoch": 19.09298531810767, "grad_norm": 0.7470915913581848, "learning_rate": 6.254205003009938e-06, "loss": 0.2566, "num_input_tokens_seen": 252436928, "step": 117040 }, { "epoch": 19.09380097879282, "grad_norm": 0.475132554769516, "learning_rate": 6.242986963787445e-06, "loss": 0.0215, "num_input_tokens_seen": 252448960, "step": 117045 }, { "epoch": 19.094616639477977, "grad_norm": 0.005477204918861389, "learning_rate": 6.231778931177157e-06, "loss": 0.0043, "num_input_tokens_seen": 252459264, "step": 117050 }, { "epoch": 19.095432300163132, "grad_norm": 0.0025322719011455774, "learning_rate": 6.220580905406226e-06, "loss": 0.003, "num_input_tokens_seen": 252469632, "step": 117055 }, { "epoch": 19.096247960848288, "grad_norm": 0.3419979512691498, "learning_rate": 6.209392886701692e-06, "loss": 0.2325, "num_input_tokens_seen": 252480704, "step": 117060 }, { "epoch": 19.097063621533444, "grad_norm": 0.00174994976259768, "learning_rate": 6.198214875290209e-06, "loss": 0.0014, "num_input_tokens_seen": 252490976, "step": 117065 }, { "epoch": 19.097879282218596, "grad_norm": 0.0044839829206466675, "learning_rate": 6.187046871398316e-06, "loss": 0.003, "num_input_tokens_seen": 252502688, "step": 117070 }, { "epoch": 19.09869494290375, "grad_norm": 0.0014323730720207095, "learning_rate": 6.175888875252389e-06, "loss": 0.0039, "num_input_tokens_seen": 252514144, "step": 117075 }, { "epoch": 19.099510603588907, "grad_norm": 0.10081123560667038, "learning_rate": 6.1647408870785236e-06, "loss": 0.0036, "num_input_tokens_seen": 252525952, "step": 117080 }, { "epoch": 19.100326264274063, "grad_norm": 0.032982561737298965, "learning_rate": 6.1536029071025955e-06, "loss": 0.0823, "num_input_tokens_seen": 252537152, "step": 117085 }, { "epoch": 19.10114192495922, "grad_norm": 0.0012489905348047614, "learning_rate": 6.142474935550535e-06, "loss": 0.0042, "num_input_tokens_seen": 252548288, "step": 117090 }, { "epoch": 19.10195758564437, "grad_norm": 0.018998196348547935, "learning_rate": 6.131356972647606e-06, "loss": 0.001, "num_input_tokens_seen": 252559648, "step": 117095 }, { "epoch": 19.102773246329527, "grad_norm": 0.014564228244125843, "learning_rate": 6.120249018619295e-06, "loss": 0.0488, "num_input_tokens_seen": 252568352, "step": 117100 }, { "epoch": 19.103588907014682, "grad_norm": 0.0015452841762453318, "learning_rate": 6.109151073690644e-06, "loss": 0.0567, "num_input_tokens_seen": 252578880, "step": 117105 }, { "epoch": 19.104404567699838, "grad_norm": 0.0030026016756892204, "learning_rate": 6.0980631380866405e-06, "loss": 0.0036, "num_input_tokens_seen": 252589216, "step": 117110 }, { "epoch": 19.10522022838499, "grad_norm": 0.023995254188776016, "learning_rate": 6.086985212031881e-06, "loss": 0.0029, "num_input_tokens_seen": 252598944, "step": 117115 }, { "epoch": 19.106035889070146, "grad_norm": 0.00553120207041502, "learning_rate": 6.075917295750965e-06, "loss": 0.0628, "num_input_tokens_seen": 252610080, "step": 117120 }, { "epoch": 19.1068515497553, "grad_norm": 0.057313837110996246, "learning_rate": 6.064859389468158e-06, "loss": 0.0025, "num_input_tokens_seen": 252621088, "step": 117125 }, { "epoch": 19.107667210440457, "grad_norm": 0.007232170086354017, "learning_rate": 6.053811493407613e-06, "loss": 0.0239, "num_input_tokens_seen": 252631808, "step": 117130 }, { "epoch": 19.108482871125613, "grad_norm": 0.3389255106449127, "learning_rate": 6.04277360779315e-06, "loss": 0.0527, "num_input_tokens_seen": 252641920, "step": 117135 }, { "epoch": 19.109298531810765, "grad_norm": 2.1491196155548096, "learning_rate": 6.031745732848593e-06, "loss": 0.0911, "num_input_tokens_seen": 252653056, "step": 117140 }, { "epoch": 19.11011419249592, "grad_norm": 0.007699198089540005, "learning_rate": 6.02072786879726e-06, "loss": 0.004, "num_input_tokens_seen": 252664800, "step": 117145 }, { "epoch": 19.110929853181077, "grad_norm": 0.006537728011608124, "learning_rate": 6.009720015862585e-06, "loss": 0.0067, "num_input_tokens_seen": 252675872, "step": 117150 }, { "epoch": 19.111745513866232, "grad_norm": 0.002166560385376215, "learning_rate": 5.9987221742675566e-06, "loss": 0.0017, "num_input_tokens_seen": 252684352, "step": 117155 }, { "epoch": 19.112561174551388, "grad_norm": 0.0009376151137985289, "learning_rate": 5.987734344235107e-06, "loss": 0.0014, "num_input_tokens_seen": 252695264, "step": 117160 }, { "epoch": 19.11337683523654, "grad_norm": 0.04573298618197441, "learning_rate": 5.976756525987948e-06, "loss": 0.0343, "num_input_tokens_seen": 252705728, "step": 117165 }, { "epoch": 19.114192495921696, "grad_norm": 0.002298894105479121, "learning_rate": 5.965788719748566e-06, "loss": 0.0056, "num_input_tokens_seen": 252717440, "step": 117170 }, { "epoch": 19.11500815660685, "grad_norm": 0.2270878404378891, "learning_rate": 5.954830925739174e-06, "loss": 0.0051, "num_input_tokens_seen": 252728640, "step": 117175 }, { "epoch": 19.115823817292007, "grad_norm": 0.5213251113891602, "learning_rate": 5.943883144181872e-06, "loss": 0.0122, "num_input_tokens_seen": 252739168, "step": 117180 }, { "epoch": 19.116639477977163, "grad_norm": 0.0009164363727904856, "learning_rate": 5.932945375298537e-06, "loss": 0.0024, "num_input_tokens_seen": 252750080, "step": 117185 }, { "epoch": 19.117455138662315, "grad_norm": 0.2582615613937378, "learning_rate": 5.922017619310826e-06, "loss": 0.0059, "num_input_tokens_seen": 252760736, "step": 117190 }, { "epoch": 19.11827079934747, "grad_norm": 0.0007387922960333526, "learning_rate": 5.911099876440173e-06, "loss": 0.0069, "num_input_tokens_seen": 252770976, "step": 117195 }, { "epoch": 19.119086460032626, "grad_norm": 0.0013459996553137898, "learning_rate": 5.900192146907957e-06, "loss": 0.0019, "num_input_tokens_seen": 252781760, "step": 117200 }, { "epoch": 19.119902120717782, "grad_norm": 0.0284243356436491, "learning_rate": 5.889294430935111e-06, "loss": 0.0103, "num_input_tokens_seen": 252793024, "step": 117205 }, { "epoch": 19.120717781402938, "grad_norm": 0.014018491841852665, "learning_rate": 5.8784067287424584e-06, "loss": 0.0032, "num_input_tokens_seen": 252803648, "step": 117210 }, { "epoch": 19.12153344208809, "grad_norm": 0.014758551493287086, "learning_rate": 5.8675290405508785e-06, "loss": 0.003, "num_input_tokens_seen": 252813568, "step": 117215 }, { "epoch": 19.122349102773246, "grad_norm": 0.4107888340950012, "learning_rate": 5.856661366580584e-06, "loss": 0.0218, "num_input_tokens_seen": 252824928, "step": 117220 }, { "epoch": 19.1231647634584, "grad_norm": 0.21927295625209808, "learning_rate": 5.845803707051955e-06, "loss": 0.0052, "num_input_tokens_seen": 252835264, "step": 117225 }, { "epoch": 19.123980424143557, "grad_norm": 0.02431410364806652, "learning_rate": 5.834956062184926e-06, "loss": 0.0024, "num_input_tokens_seen": 252846016, "step": 117230 }, { "epoch": 19.124796084828713, "grad_norm": 0.021583184599876404, "learning_rate": 5.824118432199488e-06, "loss": 0.0642, "num_input_tokens_seen": 252857344, "step": 117235 }, { "epoch": 19.125611745513865, "grad_norm": 0.01124032773077488, "learning_rate": 5.813290817315131e-06, "loss": 0.0146, "num_input_tokens_seen": 252868448, "step": 117240 }, { "epoch": 19.12642740619902, "grad_norm": 0.004287295043468475, "learning_rate": 5.8024732177514585e-06, "loss": 0.0092, "num_input_tokens_seen": 252879392, "step": 117245 }, { "epoch": 19.127243066884176, "grad_norm": 0.005989894736558199, "learning_rate": 5.791665633727461e-06, "loss": 0.0017, "num_input_tokens_seen": 252889792, "step": 117250 }, { "epoch": 19.128058727569332, "grad_norm": 0.004384719766676426, "learning_rate": 5.780868065462408e-06, "loss": 0.0014, "num_input_tokens_seen": 252900704, "step": 117255 }, { "epoch": 19.128874388254488, "grad_norm": 0.04409981146454811, "learning_rate": 5.770080513174958e-06, "loss": 0.0021, "num_input_tokens_seen": 252911168, "step": 117260 }, { "epoch": 19.12969004893964, "grad_norm": 0.00992228277027607, "learning_rate": 5.75930297708388e-06, "loss": 0.0034, "num_input_tokens_seen": 252921248, "step": 117265 }, { "epoch": 19.130505709624796, "grad_norm": 0.2759452760219574, "learning_rate": 5.748535457407444e-06, "loss": 0.0092, "num_input_tokens_seen": 252932000, "step": 117270 }, { "epoch": 19.13132137030995, "grad_norm": 0.23765501379966736, "learning_rate": 5.737777954364032e-06, "loss": 0.0057, "num_input_tokens_seen": 252942720, "step": 117275 }, { "epoch": 19.132137030995107, "grad_norm": 0.04268978536128998, "learning_rate": 5.727030468171468e-06, "loss": 0.0027, "num_input_tokens_seen": 252953376, "step": 117280 }, { "epoch": 19.13295269168026, "grad_norm": 0.03579249233007431, "learning_rate": 5.71629299904769e-06, "loss": 0.0074, "num_input_tokens_seen": 252963840, "step": 117285 }, { "epoch": 19.133768352365415, "grad_norm": 0.1813647300004959, "learning_rate": 5.705565547210301e-06, "loss": 0.0094, "num_input_tokens_seen": 252975584, "step": 117290 }, { "epoch": 19.13458401305057, "grad_norm": 0.0008507302263751626, "learning_rate": 5.694848112876683e-06, "loss": 0.0093, "num_input_tokens_seen": 252985472, "step": 117295 }, { "epoch": 19.135399673735726, "grad_norm": 0.004285555332899094, "learning_rate": 5.684140696263995e-06, "loss": 0.0013, "num_input_tokens_seen": 252997216, "step": 117300 }, { "epoch": 19.136215334420882, "grad_norm": 0.007268323563039303, "learning_rate": 5.673443297589287e-06, "loss": 0.0015, "num_input_tokens_seen": 253008928, "step": 117305 }, { "epoch": 19.137030995106034, "grad_norm": 0.015987036749720573, "learning_rate": 5.662755917069384e-06, "loss": 0.0656, "num_input_tokens_seen": 253020352, "step": 117310 }, { "epoch": 19.13784665579119, "grad_norm": 0.017393572255969048, "learning_rate": 5.652078554920836e-06, "loss": 0.0008, "num_input_tokens_seen": 253031744, "step": 117315 }, { "epoch": 19.138662316476346, "grad_norm": 0.06776174157857895, "learning_rate": 5.6414112113600254e-06, "loss": 0.0021, "num_input_tokens_seen": 253042880, "step": 117320 }, { "epoch": 19.1394779771615, "grad_norm": 0.0012039857683703303, "learning_rate": 5.630753886603168e-06, "loss": 0.0025, "num_input_tokens_seen": 253053696, "step": 117325 }, { "epoch": 19.140293637846657, "grad_norm": 0.000722131400834769, "learning_rate": 5.6201065808662025e-06, "loss": 0.0031, "num_input_tokens_seen": 253064608, "step": 117330 }, { "epoch": 19.14110929853181, "grad_norm": 0.02362147718667984, "learning_rate": 5.609469294364955e-06, "loss": 0.0024, "num_input_tokens_seen": 253074976, "step": 117335 }, { "epoch": 19.141924959216965, "grad_norm": 0.0032658616546541452, "learning_rate": 5.598842027315032e-06, "loss": 0.0014, "num_input_tokens_seen": 253084416, "step": 117340 }, { "epoch": 19.14274061990212, "grad_norm": 0.000858460261952132, "learning_rate": 5.588224779931761e-06, "loss": 0.0007, "num_input_tokens_seen": 253094784, "step": 117345 }, { "epoch": 19.143556280587276, "grad_norm": 0.010698369704186916, "learning_rate": 5.577617552430303e-06, "loss": 0.0012, "num_input_tokens_seen": 253105408, "step": 117350 }, { "epoch": 19.144371941272432, "grad_norm": 0.5947797298431396, "learning_rate": 5.567020345025597e-06, "loss": 0.0139, "num_input_tokens_seen": 253116096, "step": 117355 }, { "epoch": 19.145187601957584, "grad_norm": 0.047917693853378296, "learning_rate": 5.556433157932528e-06, "loss": 0.0034, "num_input_tokens_seen": 253126272, "step": 117360 }, { "epoch": 19.14600326264274, "grad_norm": 0.0012306952849030495, "learning_rate": 5.5458559913655335e-06, "loss": 0.1208, "num_input_tokens_seen": 253135840, "step": 117365 }, { "epoch": 19.146818923327896, "grad_norm": 0.028121288865804672, "learning_rate": 5.5352888455390546e-06, "loss": 0.0033, "num_input_tokens_seen": 253146656, "step": 117370 }, { "epoch": 19.14763458401305, "grad_norm": 0.0027935479301959276, "learning_rate": 5.524731720667197e-06, "loss": 0.0012, "num_input_tokens_seen": 253156864, "step": 117375 }, { "epoch": 19.148450244698207, "grad_norm": 0.01684625819325447, "learning_rate": 5.514184616964013e-06, "loss": 0.1028, "num_input_tokens_seen": 253167232, "step": 117380 }, { "epoch": 19.14926590538336, "grad_norm": 0.06111212447285652, "learning_rate": 5.503647534643108e-06, "loss": 0.0282, "num_input_tokens_seen": 253178208, "step": 117385 }, { "epoch": 19.150081566068515, "grad_norm": 0.6507181525230408, "learning_rate": 5.493120473918145e-06, "loss": 0.026, "num_input_tokens_seen": 253187936, "step": 117390 }, { "epoch": 19.15089722675367, "grad_norm": 0.004325888119637966, "learning_rate": 5.4826034350023426e-06, "loss": 0.0164, "num_input_tokens_seen": 253198976, "step": 117395 }, { "epoch": 19.151712887438826, "grad_norm": 0.30134132504463196, "learning_rate": 5.472096418108974e-06, "loss": 0.0073, "num_input_tokens_seen": 253210016, "step": 117400 }, { "epoch": 19.152528548123982, "grad_norm": 0.048335928469896317, "learning_rate": 5.461599423450924e-06, "loss": 0.0056, "num_input_tokens_seen": 253220192, "step": 117405 }, { "epoch": 19.153344208809134, "grad_norm": 0.0022684589494019747, "learning_rate": 5.451112451240914e-06, "loss": 0.0009, "num_input_tokens_seen": 253230816, "step": 117410 }, { "epoch": 19.15415986949429, "grad_norm": 0.00986073911190033, "learning_rate": 5.440635501691493e-06, "loss": 0.0024, "num_input_tokens_seen": 253243040, "step": 117415 }, { "epoch": 19.154975530179446, "grad_norm": 0.0014062334084883332, "learning_rate": 5.4301685750149935e-06, "loss": 0.0019, "num_input_tokens_seen": 253251584, "step": 117420 }, { "epoch": 19.1557911908646, "grad_norm": 0.012442125007510185, "learning_rate": 5.419711671423577e-06, "loss": 0.0088, "num_input_tokens_seen": 253262656, "step": 117425 }, { "epoch": 19.156606851549757, "grad_norm": 0.01120783668011427, "learning_rate": 5.409264791129076e-06, "loss": 0.0037, "num_input_tokens_seen": 253272576, "step": 117430 }, { "epoch": 19.15742251223491, "grad_norm": 0.49347108602523804, "learning_rate": 5.398827934343264e-06, "loss": 0.005, "num_input_tokens_seen": 253282464, "step": 117435 }, { "epoch": 19.158238172920065, "grad_norm": 0.0008438205695711076, "learning_rate": 5.38840110127764e-06, "loss": 0.0014, "num_input_tokens_seen": 253294016, "step": 117440 }, { "epoch": 19.15905383360522, "grad_norm": 0.05520523712038994, "learning_rate": 5.377984292143534e-06, "loss": 0.0022, "num_input_tokens_seen": 253305376, "step": 117445 }, { "epoch": 19.159869494290376, "grad_norm": 0.023860836401581764, "learning_rate": 5.367577507152055e-06, "loss": 0.0033, "num_input_tokens_seen": 253316000, "step": 117450 }, { "epoch": 19.160685154975532, "grad_norm": 0.0024914753157645464, "learning_rate": 5.35718074651409e-06, "loss": 0.0048, "num_input_tokens_seen": 253327520, "step": 117455 }, { "epoch": 19.161500815660684, "grad_norm": 0.002385669155046344, "learning_rate": 5.346794010440359e-06, "loss": 0.0063, "num_input_tokens_seen": 253337568, "step": 117460 }, { "epoch": 19.16231647634584, "grad_norm": 0.24927610158920288, "learning_rate": 5.336417299141361e-06, "loss": 0.0198, "num_input_tokens_seen": 253348960, "step": 117465 }, { "epoch": 19.163132137030995, "grad_norm": 0.2020181566476822, "learning_rate": 5.326050612827426e-06, "loss": 0.0715, "num_input_tokens_seen": 253359936, "step": 117470 }, { "epoch": 19.16394779771615, "grad_norm": 0.0018167552771046758, "learning_rate": 5.315693951708555e-06, "loss": 0.0239, "num_input_tokens_seen": 253371808, "step": 117475 }, { "epoch": 19.164763458401303, "grad_norm": 0.019198672845959663, "learning_rate": 5.305347315994747e-06, "loss": 0.0087, "num_input_tokens_seen": 253382720, "step": 117480 }, { "epoch": 19.16557911908646, "grad_norm": 0.029304277151823044, "learning_rate": 5.295010705895609e-06, "loss": 0.0013, "num_input_tokens_seen": 253393120, "step": 117485 }, { "epoch": 19.166394779771615, "grad_norm": 0.0020602981094270945, "learning_rate": 5.284684121620697e-06, "loss": 0.0007, "num_input_tokens_seen": 253404640, "step": 117490 }, { "epoch": 19.16721044045677, "grad_norm": 0.0025340435095131397, "learning_rate": 5.2743675633792345e-06, "loss": 0.0397, "num_input_tokens_seen": 253415712, "step": 117495 }, { "epoch": 19.168026101141926, "grad_norm": 0.10034231841564178, "learning_rate": 5.264061031380274e-06, "loss": 0.002, "num_input_tokens_seen": 253425088, "step": 117500 }, { "epoch": 19.16884176182708, "grad_norm": 0.09268267452716827, "learning_rate": 5.253764525832761e-06, "loss": 0.0033, "num_input_tokens_seen": 253434592, "step": 117505 }, { "epoch": 19.169657422512234, "grad_norm": 0.00356184970587492, "learning_rate": 5.243478046945305e-06, "loss": 0.0021, "num_input_tokens_seen": 253447104, "step": 117510 }, { "epoch": 19.17047308319739, "grad_norm": 0.08627818524837494, "learning_rate": 5.233201594926462e-06, "loss": 0.0059, "num_input_tokens_seen": 253457120, "step": 117515 }, { "epoch": 19.171288743882545, "grad_norm": 0.23849062621593475, "learning_rate": 5.222935169984455e-06, "loss": 0.017, "num_input_tokens_seen": 253469088, "step": 117520 }, { "epoch": 19.1721044045677, "grad_norm": 0.0035332636907696724, "learning_rate": 5.212678772327284e-06, "loss": 0.0041, "num_input_tokens_seen": 253479584, "step": 117525 }, { "epoch": 19.172920065252853, "grad_norm": 0.007696598302572966, "learning_rate": 5.202432402162893e-06, "loss": 0.0166, "num_input_tokens_seen": 253491552, "step": 117530 }, { "epoch": 19.17373572593801, "grad_norm": 0.004243612289428711, "learning_rate": 5.192196059698895e-06, "loss": 0.0031, "num_input_tokens_seen": 253500864, "step": 117535 }, { "epoch": 19.174551386623165, "grad_norm": 0.00108912936411798, "learning_rate": 5.18196974514279e-06, "loss": 0.0039, "num_input_tokens_seen": 253510432, "step": 117540 }, { "epoch": 19.17536704730832, "grad_norm": 0.12107149511575699, "learning_rate": 5.1717534587017445e-06, "loss": 0.0048, "num_input_tokens_seen": 253521248, "step": 117545 }, { "epoch": 19.176182707993476, "grad_norm": 2.5671796798706055, "learning_rate": 5.161547200582872e-06, "loss": 0.1452, "num_input_tokens_seen": 253531744, "step": 117550 }, { "epoch": 19.17699836867863, "grad_norm": 0.0007515217876061797, "learning_rate": 5.151350970993007e-06, "loss": 0.0007, "num_input_tokens_seen": 253542016, "step": 117555 }, { "epoch": 19.177814029363784, "grad_norm": 0.0018741495441645384, "learning_rate": 5.141164770138707e-06, "loss": 0.0012, "num_input_tokens_seen": 253553216, "step": 117560 }, { "epoch": 19.17862969004894, "grad_norm": 0.037890490144491196, "learning_rate": 5.130988598226527e-06, "loss": 0.0046, "num_input_tokens_seen": 253563360, "step": 117565 }, { "epoch": 19.179445350734095, "grad_norm": 0.03155801445245743, "learning_rate": 5.120822455462637e-06, "loss": 0.0092, "num_input_tokens_seen": 253573792, "step": 117570 }, { "epoch": 19.18026101141925, "grad_norm": 0.5582737326622009, "learning_rate": 5.110666342053094e-06, "loss": 0.0133, "num_input_tokens_seen": 253584864, "step": 117575 }, { "epoch": 19.181076672104403, "grad_norm": 0.03286183997988701, "learning_rate": 5.100520258203734e-06, "loss": 0.0025, "num_input_tokens_seen": 253594880, "step": 117580 }, { "epoch": 19.18189233278956, "grad_norm": 0.7584715485572815, "learning_rate": 5.090384204120113e-06, "loss": 0.0116, "num_input_tokens_seen": 253604896, "step": 117585 }, { "epoch": 19.182707993474715, "grad_norm": 0.0029940763488411903, "learning_rate": 5.08025818000768e-06, "loss": 0.0025, "num_input_tokens_seen": 253614624, "step": 117590 }, { "epoch": 19.18352365415987, "grad_norm": 0.8081431984901428, "learning_rate": 5.0701421860717135e-06, "loss": 0.034, "num_input_tokens_seen": 253626272, "step": 117595 }, { "epoch": 19.184339314845026, "grad_norm": 0.005797778721898794, "learning_rate": 5.060036222517161e-06, "loss": 0.0064, "num_input_tokens_seen": 253637344, "step": 117600 }, { "epoch": 19.18515497553018, "grad_norm": 0.05412788316607475, "learning_rate": 5.049940289548804e-06, "loss": 0.0027, "num_input_tokens_seen": 253648800, "step": 117605 }, { "epoch": 19.185970636215334, "grad_norm": 0.034146640449762344, "learning_rate": 5.039854387371368e-06, "loss": 0.0733, "num_input_tokens_seen": 253659648, "step": 117610 }, { "epoch": 19.18678629690049, "grad_norm": 0.06328219920396805, "learning_rate": 5.0297785161891315e-06, "loss": 0.0054, "num_input_tokens_seen": 253670144, "step": 117615 }, { "epoch": 19.187601957585645, "grad_norm": 0.01284455880522728, "learning_rate": 5.019712676206323e-06, "loss": 0.0102, "num_input_tokens_seen": 253680928, "step": 117620 }, { "epoch": 19.1884176182708, "grad_norm": 0.20250487327575684, "learning_rate": 5.009656867627055e-06, "loss": 0.0011, "num_input_tokens_seen": 253690304, "step": 117625 }, { "epoch": 19.189233278955953, "grad_norm": 0.04685455933213234, "learning_rate": 4.999611090654943e-06, "loss": 0.0201, "num_input_tokens_seen": 253702080, "step": 117630 }, { "epoch": 19.19004893964111, "grad_norm": 0.05707656964659691, "learning_rate": 4.989575345493713e-06, "loss": 0.0026, "num_input_tokens_seen": 253713440, "step": 117635 }, { "epoch": 19.190864600326265, "grad_norm": 0.0013502791989594698, "learning_rate": 4.979549632346702e-06, "loss": 0.0137, "num_input_tokens_seen": 253725152, "step": 117640 }, { "epoch": 19.19168026101142, "grad_norm": 0.9162588119506836, "learning_rate": 4.969533951417082e-06, "loss": 0.0143, "num_input_tokens_seen": 253734976, "step": 117645 }, { "epoch": 19.192495921696572, "grad_norm": 0.00969085842370987, "learning_rate": 4.959528302907857e-06, "loss": 0.098, "num_input_tokens_seen": 253745152, "step": 117650 }, { "epoch": 19.193311582381728, "grad_norm": 0.02400083653628826, "learning_rate": 4.949532687021751e-06, "loss": 0.0028, "num_input_tokens_seen": 253754112, "step": 117655 }, { "epoch": 19.194127243066884, "grad_norm": 0.013779951259493828, "learning_rate": 4.939547103961439e-06, "loss": 0.0033, "num_input_tokens_seen": 253765056, "step": 117660 }, { "epoch": 19.19494290375204, "grad_norm": 0.08369942009449005, "learning_rate": 4.929571553929202e-06, "loss": 0.0049, "num_input_tokens_seen": 253774752, "step": 117665 }, { "epoch": 19.195758564437195, "grad_norm": 0.0046674944460392, "learning_rate": 4.919606037127267e-06, "loss": 0.0009, "num_input_tokens_seen": 253785184, "step": 117670 }, { "epoch": 19.196574225122347, "grad_norm": 0.02739865705370903, "learning_rate": 4.909650553757583e-06, "loss": 0.0088, "num_input_tokens_seen": 253795808, "step": 117675 }, { "epoch": 19.197389885807503, "grad_norm": 0.005417009815573692, "learning_rate": 4.8997051040218235e-06, "loss": 0.0023, "num_input_tokens_seen": 253807200, "step": 117680 }, { "epoch": 19.19820554649266, "grad_norm": 0.11733714491128922, "learning_rate": 4.889769688121715e-06, "loss": 0.005, "num_input_tokens_seen": 253817888, "step": 117685 }, { "epoch": 19.199021207177815, "grad_norm": 0.006482226308435202, "learning_rate": 4.87984430625843e-06, "loss": 0.0059, "num_input_tokens_seen": 253827424, "step": 117690 }, { "epoch": 19.19983686786297, "grad_norm": 0.008640163578093052, "learning_rate": 4.869928958633252e-06, "loss": 0.0017, "num_input_tokens_seen": 253838176, "step": 117695 }, { "epoch": 19.200652528548122, "grad_norm": 0.007539651822298765, "learning_rate": 4.860023645447076e-06, "loss": 0.0019, "num_input_tokens_seen": 253849152, "step": 117700 }, { "epoch": 19.201468189233278, "grad_norm": 0.15219734609127045, "learning_rate": 4.85012836690063e-06, "loss": 0.0051, "num_input_tokens_seen": 253860352, "step": 117705 }, { "epoch": 19.202283849918434, "grad_norm": 0.02061980962753296, "learning_rate": 4.840243123194477e-06, "loss": 0.0055, "num_input_tokens_seen": 253871456, "step": 117710 }, { "epoch": 19.20309951060359, "grad_norm": 0.00217765336856246, "learning_rate": 4.83036791452901e-06, "loss": 0.001, "num_input_tokens_seen": 253881760, "step": 117715 }, { "epoch": 19.203915171288745, "grad_norm": 0.12017388641834259, "learning_rate": 4.820502741104238e-06, "loss": 0.0031, "num_input_tokens_seen": 253893184, "step": 117720 }, { "epoch": 19.204730831973897, "grad_norm": 0.05387655645608902, "learning_rate": 4.810647603120166e-06, "loss": 0.1077, "num_input_tokens_seen": 253902720, "step": 117725 }, { "epoch": 19.205546492659053, "grad_norm": 0.0011076066875830293, "learning_rate": 4.800802500776524e-06, "loss": 0.0025, "num_input_tokens_seen": 253913248, "step": 117730 }, { "epoch": 19.20636215334421, "grad_norm": 0.05855204537510872, "learning_rate": 4.790967434272819e-06, "loss": 0.0021, "num_input_tokens_seen": 253923616, "step": 117735 }, { "epoch": 19.207177814029365, "grad_norm": 0.008936426602303982, "learning_rate": 4.781142403808392e-06, "loss": 0.0025, "num_input_tokens_seen": 253933440, "step": 117740 }, { "epoch": 19.20799347471452, "grad_norm": 0.024678170680999756, "learning_rate": 4.771327409582305e-06, "loss": 0.0012, "num_input_tokens_seen": 253943168, "step": 117745 }, { "epoch": 19.208809135399672, "grad_norm": 0.002226482843980193, "learning_rate": 4.761522451793565e-06, "loss": 0.0006, "num_input_tokens_seen": 253955328, "step": 117750 }, { "epoch": 19.209624796084828, "grad_norm": 0.017716385424137115, "learning_rate": 4.751727530640793e-06, "loss": 0.006, "num_input_tokens_seen": 253966688, "step": 117755 }, { "epoch": 19.210440456769984, "grad_norm": 0.5473878383636475, "learning_rate": 4.74194264632255e-06, "loss": 0.0303, "num_input_tokens_seen": 253978176, "step": 117760 }, { "epoch": 19.21125611745514, "grad_norm": 0.004080644808709621, "learning_rate": 4.732167799037068e-06, "loss": 0.0067, "num_input_tokens_seen": 253988736, "step": 117765 }, { "epoch": 19.212071778140295, "grad_norm": 0.12208492308855057, "learning_rate": 4.722402988982577e-06, "loss": 0.0028, "num_input_tokens_seen": 253997856, "step": 117770 }, { "epoch": 19.212887438825447, "grad_norm": 0.001441897009499371, "learning_rate": 4.7126482163568075e-06, "loss": 0.0028, "num_input_tokens_seen": 254007936, "step": 117775 }, { "epoch": 19.213703099510603, "grad_norm": 0.0012886013137176633, "learning_rate": 4.702903481357601e-06, "loss": 0.0007, "num_input_tokens_seen": 254017760, "step": 117780 }, { "epoch": 19.21451876019576, "grad_norm": 0.0016197813674807549, "learning_rate": 4.693168784182356e-06, "loss": 0.0018, "num_input_tokens_seen": 254027424, "step": 117785 }, { "epoch": 19.215334420880914, "grad_norm": 0.008906791917979717, "learning_rate": 4.6834441250284135e-06, "loss": 0.004, "num_input_tokens_seen": 254038304, "step": 117790 }, { "epoch": 19.21615008156607, "grad_norm": 0.005568936467170715, "learning_rate": 4.673729504092783e-06, "loss": 0.0031, "num_input_tokens_seen": 254049504, "step": 117795 }, { "epoch": 19.216965742251222, "grad_norm": 0.0010722267907112837, "learning_rate": 4.664024921572419e-06, "loss": 0.0179, "num_input_tokens_seen": 254060704, "step": 117800 }, { "epoch": 19.217781402936378, "grad_norm": 0.006914776284247637, "learning_rate": 4.654330377663996e-06, "loss": 0.0011, "num_input_tokens_seen": 254072320, "step": 117805 }, { "epoch": 19.218597063621534, "grad_norm": 0.015252461656928062, "learning_rate": 4.644645872563913e-06, "loss": 0.0035, "num_input_tokens_seen": 254082688, "step": 117810 }, { "epoch": 19.21941272430669, "grad_norm": 0.01141372136771679, "learning_rate": 4.634971406468514e-06, "loss": 0.0019, "num_input_tokens_seen": 254094080, "step": 117815 }, { "epoch": 19.22022838499184, "grad_norm": 0.04872119426727295, "learning_rate": 4.625306979573807e-06, "loss": 0.004, "num_input_tokens_seen": 254104928, "step": 117820 }, { "epoch": 19.221044045676997, "grad_norm": 0.0028321316931396723, "learning_rate": 4.615652592075747e-06, "loss": 0.0078, "num_input_tokens_seen": 254117760, "step": 117825 }, { "epoch": 19.221859706362153, "grad_norm": 0.008213963359594345, "learning_rate": 4.606008244169846e-06, "loss": 0.014, "num_input_tokens_seen": 254128896, "step": 117830 }, { "epoch": 19.22267536704731, "grad_norm": 0.009492597542703152, "learning_rate": 4.596373936051667e-06, "loss": 0.0026, "num_input_tokens_seen": 254140960, "step": 117835 }, { "epoch": 19.223491027732464, "grad_norm": 0.003203229047358036, "learning_rate": 4.586749667916446e-06, "loss": 0.003, "num_input_tokens_seen": 254152640, "step": 117840 }, { "epoch": 19.224306688417617, "grad_norm": 0.001191652030684054, "learning_rate": 4.57713543995919e-06, "loss": 0.0039, "num_input_tokens_seen": 254162912, "step": 117845 }, { "epoch": 19.225122349102772, "grad_norm": 0.06796732544898987, "learning_rate": 4.567531252374801e-06, "loss": 0.0051, "num_input_tokens_seen": 254174048, "step": 117850 }, { "epoch": 19.225938009787928, "grad_norm": 0.0674242153763771, "learning_rate": 4.557937105357901e-06, "loss": 0.003, "num_input_tokens_seen": 254184160, "step": 117855 }, { "epoch": 19.226753670473084, "grad_norm": 0.0026770527474582195, "learning_rate": 4.54835299910289e-06, "loss": 0.0011, "num_input_tokens_seen": 254194496, "step": 117860 }, { "epoch": 19.22756933115824, "grad_norm": 1.078954815864563, "learning_rate": 4.5387789338040555e-06, "loss": 0.1206, "num_input_tokens_seen": 254205312, "step": 117865 }, { "epoch": 19.22838499184339, "grad_norm": 0.0185884777456522, "learning_rate": 4.529214909655355e-06, "loss": 0.0022, "num_input_tokens_seen": 254217152, "step": 117870 }, { "epoch": 19.229200652528547, "grad_norm": 0.0012162190396338701, "learning_rate": 4.519660926850744e-06, "loss": 0.0028, "num_input_tokens_seen": 254227456, "step": 117875 }, { "epoch": 19.230016313213703, "grad_norm": 0.0021928332280367613, "learning_rate": 4.510116985583679e-06, "loss": 0.0013, "num_input_tokens_seen": 254238464, "step": 117880 }, { "epoch": 19.23083197389886, "grad_norm": 0.0030982759781181812, "learning_rate": 4.500583086047782e-06, "loss": 0.0013, "num_input_tokens_seen": 254248896, "step": 117885 }, { "epoch": 19.231647634584014, "grad_norm": 0.0018363152630627155, "learning_rate": 4.491059228436012e-06, "loss": 0.0011, "num_input_tokens_seen": 254259072, "step": 117890 }, { "epoch": 19.232463295269167, "grad_norm": 0.000521830574143678, "learning_rate": 4.481545412941657e-06, "loss": 0.0031, "num_input_tokens_seen": 254270624, "step": 117895 }, { "epoch": 19.233278955954322, "grad_norm": 0.2740010619163513, "learning_rate": 4.472041639757285e-06, "loss": 0.0067, "num_input_tokens_seen": 254282912, "step": 117900 }, { "epoch": 19.234094616639478, "grad_norm": 0.0027572151739150286, "learning_rate": 4.462547909075687e-06, "loss": 0.0016, "num_input_tokens_seen": 254294752, "step": 117905 }, { "epoch": 19.234910277324634, "grad_norm": 0.03951915726065636, "learning_rate": 4.453064221089154e-06, "loss": 0.003, "num_input_tokens_seen": 254306528, "step": 117910 }, { "epoch": 19.23572593800979, "grad_norm": 0.02961686998605728, "learning_rate": 4.44359057598992e-06, "loss": 0.0027, "num_input_tokens_seen": 254316928, "step": 117915 }, { "epoch": 19.23654159869494, "grad_norm": 0.002008619485422969, "learning_rate": 4.434126973969998e-06, "loss": 0.0025, "num_input_tokens_seen": 254328512, "step": 117920 }, { "epoch": 19.237357259380097, "grad_norm": 0.0008880678797140718, "learning_rate": 4.424673415221181e-06, "loss": 0.0025, "num_input_tokens_seen": 254339584, "step": 117925 }, { "epoch": 19.238172920065253, "grad_norm": 0.0590706542134285, "learning_rate": 4.41522989993498e-06, "loss": 0.0053, "num_input_tokens_seen": 254350240, "step": 117930 }, { "epoch": 19.23898858075041, "grad_norm": 0.23491306602954865, "learning_rate": 4.405796428302855e-06, "loss": 0.0057, "num_input_tokens_seen": 254359904, "step": 117935 }, { "epoch": 19.239804241435564, "grad_norm": 0.0030116024427115917, "learning_rate": 4.396373000515986e-06, "loss": 0.0052, "num_input_tokens_seen": 254371712, "step": 117940 }, { "epoch": 19.240619902120716, "grad_norm": 0.0023326079826802015, "learning_rate": 4.3869596167653296e-06, "loss": 0.036, "num_input_tokens_seen": 254382240, "step": 117945 }, { "epoch": 19.241435562805872, "grad_norm": 0.003536358941346407, "learning_rate": 4.377556277241679e-06, "loss": 0.0013, "num_input_tokens_seen": 254392960, "step": 117950 }, { "epoch": 19.242251223491028, "grad_norm": 0.17972618341445923, "learning_rate": 4.368162982135604e-06, "loss": 0.0209, "num_input_tokens_seen": 254405216, "step": 117955 }, { "epoch": 19.243066884176184, "grad_norm": 0.001351052662357688, "learning_rate": 4.3587797316373965e-06, "loss": 0.0015, "num_input_tokens_seen": 254415456, "step": 117960 }, { "epoch": 19.24388254486134, "grad_norm": 0.0041278330609202385, "learning_rate": 4.34940652593735e-06, "loss": 0.0011, "num_input_tokens_seen": 254427584, "step": 117965 }, { "epoch": 19.24469820554649, "grad_norm": 0.002829907927662134, "learning_rate": 4.34004336522531e-06, "loss": 0.008, "num_input_tokens_seen": 254437824, "step": 117970 }, { "epoch": 19.245513866231647, "grad_norm": 0.012118655256927013, "learning_rate": 4.330690249691127e-06, "loss": 0.1022, "num_input_tokens_seen": 254449120, "step": 117975 }, { "epoch": 19.246329526916803, "grad_norm": 0.00830063782632351, "learning_rate": 4.321347179524316e-06, "loss": 0.002, "num_input_tokens_seen": 254461216, "step": 117980 }, { "epoch": 19.24714518760196, "grad_norm": 0.0008738120668567717, "learning_rate": 4.312014154914113e-06, "loss": 0.0011, "num_input_tokens_seen": 254471936, "step": 117985 }, { "epoch": 19.247960848287114, "grad_norm": 0.010977339930832386, "learning_rate": 4.302691176049922e-06, "loss": 0.0049, "num_input_tokens_seen": 254482784, "step": 117990 }, { "epoch": 19.248776508972266, "grad_norm": 0.004213379696011543, "learning_rate": 4.293378243120371e-06, "loss": 0.0244, "num_input_tokens_seen": 254492704, "step": 117995 }, { "epoch": 19.249592169657422, "grad_norm": 0.005055021494626999, "learning_rate": 4.284075356314476e-06, "loss": 0.0009, "num_input_tokens_seen": 254503584, "step": 118000 }, { "epoch": 19.250407830342578, "grad_norm": 0.05547285079956055, "learning_rate": 4.2747825158205855e-06, "loss": 0.0034, "num_input_tokens_seen": 254511360, "step": 118005 }, { "epoch": 19.251223491027734, "grad_norm": 0.0026612533256411552, "learning_rate": 4.265499721827159e-06, "loss": 0.0023, "num_input_tokens_seen": 254521952, "step": 118010 }, { "epoch": 19.252039151712886, "grad_norm": 0.0351443849503994, "learning_rate": 4.256226974522215e-06, "loss": 0.0034, "num_input_tokens_seen": 254532160, "step": 118015 }, { "epoch": 19.25285481239804, "grad_norm": 0.0037611257284879684, "learning_rate": 4.246964274093767e-06, "loss": 0.0034, "num_input_tokens_seen": 254542176, "step": 118020 }, { "epoch": 19.253670473083197, "grad_norm": 0.16852305829524994, "learning_rate": 4.237711620729501e-06, "loss": 0.0054, "num_input_tokens_seen": 254553952, "step": 118025 }, { "epoch": 19.254486133768353, "grad_norm": 0.0017705989303067327, "learning_rate": 4.228469014616931e-06, "loss": 0.0012, "num_input_tokens_seen": 254563520, "step": 118030 }, { "epoch": 19.25530179445351, "grad_norm": 0.10526105761528015, "learning_rate": 4.219236455943298e-06, "loss": 0.0058, "num_input_tokens_seen": 254575552, "step": 118035 }, { "epoch": 19.25611745513866, "grad_norm": 0.038174819201231, "learning_rate": 4.210013944895841e-06, "loss": 0.0056, "num_input_tokens_seen": 254586432, "step": 118040 }, { "epoch": 19.256933115823816, "grad_norm": 0.0014995165402069688, "learning_rate": 4.2008014816613534e-06, "loss": 0.0025, "num_input_tokens_seen": 254596512, "step": 118045 }, { "epoch": 19.257748776508972, "grad_norm": 0.06956733763217926, "learning_rate": 4.191599066426632e-06, "loss": 0.0031, "num_input_tokens_seen": 254607296, "step": 118050 }, { "epoch": 19.258564437194128, "grad_norm": 0.002530378755182028, "learning_rate": 4.182406699378138e-06, "loss": 0.0035, "num_input_tokens_seen": 254616832, "step": 118055 }, { "epoch": 19.259380097879284, "grad_norm": 0.24716614186763763, "learning_rate": 4.173224380702112e-06, "loss": 0.0059, "num_input_tokens_seen": 254627104, "step": 118060 }, { "epoch": 19.260195758564436, "grad_norm": 0.06442822515964508, "learning_rate": 4.164052110584737e-06, "loss": 0.0024, "num_input_tokens_seen": 254638464, "step": 118065 }, { "epoch": 19.26101141924959, "grad_norm": 0.0006701275706291199, "learning_rate": 4.154889889211866e-06, "loss": 0.0269, "num_input_tokens_seen": 254649600, "step": 118070 }, { "epoch": 19.261827079934747, "grad_norm": 0.0043263803236186504, "learning_rate": 4.145737716769182e-06, "loss": 0.0012, "num_input_tokens_seen": 254660384, "step": 118075 }, { "epoch": 19.262642740619903, "grad_norm": 0.0007175590726546943, "learning_rate": 4.136595593442149e-06, "loss": 0.0048, "num_input_tokens_seen": 254671360, "step": 118080 }, { "epoch": 19.26345840130506, "grad_norm": 0.10473741590976715, "learning_rate": 4.1274635194160086e-06, "loss": 0.0047, "num_input_tokens_seen": 254682464, "step": 118085 }, { "epoch": 19.26427406199021, "grad_norm": 0.0012198301265016198, "learning_rate": 4.118341494875944e-06, "loss": 0.0059, "num_input_tokens_seen": 254693472, "step": 118090 }, { "epoch": 19.265089722675366, "grad_norm": 0.00916962418705225, "learning_rate": 4.1092295200066966e-06, "loss": 0.0013, "num_input_tokens_seen": 254704992, "step": 118095 }, { "epoch": 19.265905383360522, "grad_norm": 0.012095187790691853, "learning_rate": 4.100127594993064e-06, "loss": 0.003, "num_input_tokens_seen": 254715296, "step": 118100 }, { "epoch": 19.266721044045678, "grad_norm": 0.08755134791135788, "learning_rate": 4.091035720019398e-06, "loss": 0.0039, "num_input_tokens_seen": 254725600, "step": 118105 }, { "epoch": 19.267536704730833, "grad_norm": 0.027095254510641098, "learning_rate": 4.081953895269996e-06, "loss": 0.0031, "num_input_tokens_seen": 254736768, "step": 118110 }, { "epoch": 19.268352365415986, "grad_norm": 0.003155405167490244, "learning_rate": 4.072882120928933e-06, "loss": 0.0043, "num_input_tokens_seen": 254748032, "step": 118115 }, { "epoch": 19.26916802610114, "grad_norm": 0.01777806133031845, "learning_rate": 4.063820397180007e-06, "loss": 0.0357, "num_input_tokens_seen": 254758720, "step": 118120 }, { "epoch": 19.269983686786297, "grad_norm": 0.005692991428077221, "learning_rate": 4.054768724206958e-06, "loss": 0.0013, "num_input_tokens_seen": 254769888, "step": 118125 }, { "epoch": 19.270799347471453, "grad_norm": 0.009835735894739628, "learning_rate": 4.045727102193087e-06, "loss": 0.0015, "num_input_tokens_seen": 254780640, "step": 118130 }, { "epoch": 19.27161500815661, "grad_norm": 0.01440081000328064, "learning_rate": 4.036695531321799e-06, "loss": 0.0027, "num_input_tokens_seen": 254791584, "step": 118135 }, { "epoch": 19.27243066884176, "grad_norm": 0.0032137101516127586, "learning_rate": 4.027674011776006e-06, "loss": 0.0015, "num_input_tokens_seen": 254802880, "step": 118140 }, { "epoch": 19.273246329526916, "grad_norm": 0.03580562025308609, "learning_rate": 4.018662543738616e-06, "loss": 0.0021, "num_input_tokens_seen": 254812992, "step": 118145 }, { "epoch": 19.274061990212072, "grad_norm": 0.006409763358533382, "learning_rate": 4.009661127392206e-06, "loss": 0.003, "num_input_tokens_seen": 254825184, "step": 118150 }, { "epoch": 19.274877650897228, "grad_norm": 0.0009287481661885977, "learning_rate": 4.00066976291924e-06, "loss": 0.0051, "num_input_tokens_seen": 254835840, "step": 118155 }, { "epoch": 19.275693311582383, "grad_norm": 0.06812703609466553, "learning_rate": 3.9916884505019065e-06, "loss": 0.0015, "num_input_tokens_seen": 254846432, "step": 118160 }, { "epoch": 19.276508972267536, "grad_norm": 0.002685232786461711, "learning_rate": 3.982717190322227e-06, "loss": 0.0113, "num_input_tokens_seen": 254857120, "step": 118165 }, { "epoch": 19.27732463295269, "grad_norm": 0.007741945795714855, "learning_rate": 3.973755982562055e-06, "loss": 0.0007, "num_input_tokens_seen": 254870560, "step": 118170 }, { "epoch": 19.278140293637847, "grad_norm": 0.047024499624967575, "learning_rate": 3.964804827402913e-06, "loss": 0.0018, "num_input_tokens_seen": 254882048, "step": 118175 }, { "epoch": 19.278955954323003, "grad_norm": 0.012042994610965252, "learning_rate": 3.955863725026321e-06, "loss": 0.0084, "num_input_tokens_seen": 254892384, "step": 118180 }, { "epoch": 19.27977161500816, "grad_norm": 0.001222998253069818, "learning_rate": 3.946932675613413e-06, "loss": 0.0035, "num_input_tokens_seen": 254903360, "step": 118185 }, { "epoch": 19.28058727569331, "grad_norm": 0.0008421693928539753, "learning_rate": 3.93801167934521e-06, "loss": 0.0021, "num_input_tokens_seen": 254913952, "step": 118190 }, { "epoch": 19.281402936378466, "grad_norm": 0.0010170169407501817, "learning_rate": 3.929100736402513e-06, "loss": 0.0085, "num_input_tokens_seen": 254923424, "step": 118195 }, { "epoch": 19.282218597063622, "grad_norm": 0.031334735453128815, "learning_rate": 3.920199846965844e-06, "loss": 0.0024, "num_input_tokens_seen": 254934912, "step": 118200 }, { "epoch": 19.283034257748778, "grad_norm": 0.041090648621320724, "learning_rate": 3.911309011215725e-06, "loss": 0.0041, "num_input_tokens_seen": 254946272, "step": 118205 }, { "epoch": 19.28384991843393, "grad_norm": 0.0029840560164302588, "learning_rate": 3.902428229332233e-06, "loss": 0.0027, "num_input_tokens_seen": 254957888, "step": 118210 }, { "epoch": 19.284665579119086, "grad_norm": 0.015510083176195621, "learning_rate": 3.8935575014953374e-06, "loss": 0.0039, "num_input_tokens_seen": 254967776, "step": 118215 }, { "epoch": 19.28548123980424, "grad_norm": 0.08233433216810226, "learning_rate": 3.884696827884893e-06, "loss": 0.0032, "num_input_tokens_seen": 254979616, "step": 118220 }, { "epoch": 19.286296900489397, "grad_norm": 0.24252121150493622, "learning_rate": 3.8758462086804225e-06, "loss": 0.0062, "num_input_tokens_seen": 254991328, "step": 118225 }, { "epoch": 19.287112561174553, "grad_norm": 0.004716339986771345, "learning_rate": 3.867005644061283e-06, "loss": 0.0007, "num_input_tokens_seen": 255001184, "step": 118230 }, { "epoch": 19.287928221859705, "grad_norm": 0.04423563554883003, "learning_rate": 3.8581751342067205e-06, "loss": 0.0028, "num_input_tokens_seen": 255012864, "step": 118235 }, { "epoch": 19.28874388254486, "grad_norm": 0.061014335602521896, "learning_rate": 3.849354679295591e-06, "loss": 0.0021, "num_input_tokens_seen": 255023232, "step": 118240 }, { "epoch": 19.289559543230016, "grad_norm": 0.004110171925276518, "learning_rate": 3.840544279506753e-06, "loss": 0.0838, "num_input_tokens_seen": 255035232, "step": 118245 }, { "epoch": 19.290375203915172, "grad_norm": 0.22601604461669922, "learning_rate": 3.831743935018672e-06, "loss": 0.0033, "num_input_tokens_seen": 255046784, "step": 118250 }, { "epoch": 19.291190864600328, "grad_norm": 0.002615095116198063, "learning_rate": 3.822953646009708e-06, "loss": 0.0008, "num_input_tokens_seen": 255057888, "step": 118255 }, { "epoch": 19.29200652528548, "grad_norm": 0.011257930658757687, "learning_rate": 3.8141734126580505e-06, "loss": 0.0032, "num_input_tokens_seen": 255068704, "step": 118260 }, { "epoch": 19.292822185970635, "grad_norm": 0.04294012114405632, "learning_rate": 3.805403235141669e-06, "loss": 0.0034, "num_input_tokens_seen": 255079840, "step": 118265 }, { "epoch": 19.29363784665579, "grad_norm": 0.0013795472914353013, "learning_rate": 3.7966431136381985e-06, "loss": 0.0009, "num_input_tokens_seen": 255090784, "step": 118270 }, { "epoch": 19.294453507340947, "grad_norm": 0.0066407211124897, "learning_rate": 3.7878930483252195e-06, "loss": 0.0085, "num_input_tokens_seen": 255101760, "step": 118275 }, { "epoch": 19.295269168026103, "grad_norm": 0.0032084493432193995, "learning_rate": 3.7791530393801456e-06, "loss": 0.0007, "num_input_tokens_seen": 255111552, "step": 118280 }, { "epoch": 19.296084828711255, "grad_norm": 0.008687667548656464, "learning_rate": 3.7704230869800015e-06, "loss": 0.1227, "num_input_tokens_seen": 255121920, "step": 118285 }, { "epoch": 19.29690048939641, "grad_norm": 0.009135226719081402, "learning_rate": 3.7617031913017573e-06, "loss": 0.0021, "num_input_tokens_seen": 255132224, "step": 118290 }, { "epoch": 19.297716150081566, "grad_norm": 0.08637550473213196, "learning_rate": 3.752993352522105e-06, "loss": 0.0058, "num_input_tokens_seen": 255143392, "step": 118295 }, { "epoch": 19.298531810766722, "grad_norm": 0.0354999415576458, "learning_rate": 3.7442935708176253e-06, "loss": 0.0065, "num_input_tokens_seen": 255153888, "step": 118300 }, { "epoch": 19.299347471451878, "grad_norm": 0.08332519978284836, "learning_rate": 3.7356038463645105e-06, "loss": 0.0093, "num_input_tokens_seen": 255164832, "step": 118305 }, { "epoch": 19.30016313213703, "grad_norm": 0.0005716729792766273, "learning_rate": 3.7269241793390084e-06, "loss": 0.0011, "num_input_tokens_seen": 255174464, "step": 118310 }, { "epoch": 19.300978792822185, "grad_norm": 0.002202606061473489, "learning_rate": 3.7182545699169236e-06, "loss": 0.0098, "num_input_tokens_seen": 255184800, "step": 118315 }, { "epoch": 19.30179445350734, "grad_norm": 0.00437677139416337, "learning_rate": 3.7095950182739478e-06, "loss": 0.0059, "num_input_tokens_seen": 255196000, "step": 118320 }, { "epoch": 19.302610114192497, "grad_norm": 0.044026076793670654, "learning_rate": 3.700945524585664e-06, "loss": 0.0023, "num_input_tokens_seen": 255207232, "step": 118325 }, { "epoch": 19.303425774877653, "grad_norm": 0.06210612505674362, "learning_rate": 3.6923060890273195e-06, "loss": 0.0026, "num_input_tokens_seen": 255218112, "step": 118330 }, { "epoch": 19.304241435562805, "grad_norm": 0.24252726137638092, "learning_rate": 3.683676711773998e-06, "loss": 0.0077, "num_input_tokens_seen": 255229216, "step": 118335 }, { "epoch": 19.30505709624796, "grad_norm": 0.001352749764919281, "learning_rate": 3.6750573930005583e-06, "loss": 0.0014, "num_input_tokens_seen": 255238752, "step": 118340 }, { "epoch": 19.305872756933116, "grad_norm": 0.003706161165609956, "learning_rate": 3.66644813288175e-06, "loss": 0.0039, "num_input_tokens_seen": 255248064, "step": 118345 }, { "epoch": 19.306688417618272, "grad_norm": 0.016326528042554855, "learning_rate": 3.6578489315919893e-06, "loss": 0.001, "num_input_tokens_seen": 255260032, "step": 118350 }, { "epoch": 19.307504078303428, "grad_norm": 0.010051500052213669, "learning_rate": 3.6492597893056367e-06, "loss": 0.1427, "num_input_tokens_seen": 255270176, "step": 118355 }, { "epoch": 19.30831973898858, "grad_norm": 0.022094812244176865, "learning_rate": 3.6406807061966085e-06, "loss": 0.0033, "num_input_tokens_seen": 255281568, "step": 118360 }, { "epoch": 19.309135399673735, "grad_norm": 0.004617269616574049, "learning_rate": 3.6321116824388767e-06, "loss": 0.0214, "num_input_tokens_seen": 255293504, "step": 118365 }, { "epoch": 19.30995106035889, "grad_norm": 0.09398304671049118, "learning_rate": 3.6235527182061912e-06, "loss": 0.0039, "num_input_tokens_seen": 255304288, "step": 118370 }, { "epoch": 19.310766721044047, "grad_norm": 0.018080953508615494, "learning_rate": 3.615003813671802e-06, "loss": 0.0062, "num_input_tokens_seen": 255316192, "step": 118375 }, { "epoch": 19.3115823817292, "grad_norm": 0.005143174901604652, "learning_rate": 3.6064649690091268e-06, "loss": 0.0718, "num_input_tokens_seen": 255326880, "step": 118380 }, { "epoch": 19.312398042414355, "grad_norm": 0.005171927623450756, "learning_rate": 3.5979361843910817e-06, "loss": 0.0011, "num_input_tokens_seen": 255338240, "step": 118385 }, { "epoch": 19.31321370309951, "grad_norm": 0.0005775060853920877, "learning_rate": 3.589417459990696e-06, "loss": 0.0015, "num_input_tokens_seen": 255348512, "step": 118390 }, { "epoch": 19.314029363784666, "grad_norm": 0.0005351380095817149, "learning_rate": 3.580908795980442e-06, "loss": 0.004, "num_input_tokens_seen": 255359488, "step": 118395 }, { "epoch": 19.31484502446982, "grad_norm": 0.010033232159912586, "learning_rate": 3.572410192532849e-06, "loss": 0.009, "num_input_tokens_seen": 255370016, "step": 118400 }, { "epoch": 19.315660685154974, "grad_norm": 0.029634885489940643, "learning_rate": 3.563921649820112e-06, "loss": 0.0042, "num_input_tokens_seen": 255380576, "step": 118405 }, { "epoch": 19.31647634584013, "grad_norm": 0.0038884247187525034, "learning_rate": 3.555443168014261e-06, "loss": 0.0017, "num_input_tokens_seen": 255391136, "step": 118410 }, { "epoch": 19.317292006525285, "grad_norm": 0.001331888954155147, "learning_rate": 3.5469747472871574e-06, "loss": 0.001, "num_input_tokens_seen": 255402784, "step": 118415 }, { "epoch": 19.31810766721044, "grad_norm": 0.10643607378005981, "learning_rate": 3.5385163878103864e-06, "loss": 0.0027, "num_input_tokens_seen": 255413408, "step": 118420 }, { "epoch": 19.318923327895597, "grad_norm": 0.017755325883626938, "learning_rate": 3.5300680897554226e-06, "loss": 0.0028, "num_input_tokens_seen": 255423552, "step": 118425 }, { "epoch": 19.31973898858075, "grad_norm": 0.10430113226175308, "learning_rate": 3.5216298532934068e-06, "loss": 0.0049, "num_input_tokens_seen": 255435008, "step": 118430 }, { "epoch": 19.320554649265905, "grad_norm": 0.0009032901143655181, "learning_rate": 3.5132016785954235e-06, "loss": 0.0012, "num_input_tokens_seen": 255446336, "step": 118435 }, { "epoch": 19.32137030995106, "grad_norm": 0.004010402597486973, "learning_rate": 3.504783565832226e-06, "loss": 0.0016, "num_input_tokens_seen": 255455936, "step": 118440 }, { "epoch": 19.322185970636216, "grad_norm": 0.005608629435300827, "learning_rate": 3.496375515174455e-06, "loss": 0.0027, "num_input_tokens_seen": 255467232, "step": 118445 }, { "epoch": 19.32300163132137, "grad_norm": 0.006776916328817606, "learning_rate": 3.4879775267925297e-06, "loss": 0.0012, "num_input_tokens_seen": 255478048, "step": 118450 }, { "epoch": 19.323817292006524, "grad_norm": 0.022793499752879143, "learning_rate": 3.4795896008565363e-06, "loss": 0.0213, "num_input_tokens_seen": 255488800, "step": 118455 }, { "epoch": 19.32463295269168, "grad_norm": 0.002545322524383664, "learning_rate": 3.4712117375365615e-06, "loss": 0.0015, "num_input_tokens_seen": 255498560, "step": 118460 }, { "epoch": 19.325448613376835, "grad_norm": 0.09427519887685776, "learning_rate": 3.4628439370024133e-06, "loss": 0.0037, "num_input_tokens_seen": 255508800, "step": 118465 }, { "epoch": 19.32626427406199, "grad_norm": 0.0024652944412082434, "learning_rate": 3.454486199423568e-06, "loss": 0.0075, "num_input_tokens_seen": 255520512, "step": 118470 }, { "epoch": 19.327079934747147, "grad_norm": 0.0055308775044977665, "learning_rate": 3.4461385249695e-06, "loss": 0.1262, "num_input_tokens_seen": 255530592, "step": 118475 }, { "epoch": 19.3278955954323, "grad_norm": 0.14644497632980347, "learning_rate": 3.4378009138093524e-06, "loss": 0.003, "num_input_tokens_seen": 255542720, "step": 118480 }, { "epoch": 19.328711256117455, "grad_norm": 0.001656314474530518, "learning_rate": 3.429473366112157e-06, "loss": 0.0099, "num_input_tokens_seen": 255553856, "step": 118485 }, { "epoch": 19.32952691680261, "grad_norm": 0.1507488638162613, "learning_rate": 3.421155882046556e-06, "loss": 0.0046, "num_input_tokens_seen": 255564736, "step": 118490 }, { "epoch": 19.330342577487766, "grad_norm": 0.007428924553096294, "learning_rate": 3.4128484617812482e-06, "loss": 0.0018, "num_input_tokens_seen": 255576416, "step": 118495 }, { "epoch": 19.33115823817292, "grad_norm": 0.0035626955796033144, "learning_rate": 3.404551105484488e-06, "loss": 0.0384, "num_input_tokens_seen": 255586144, "step": 118500 }, { "epoch": 19.331973898858074, "grad_norm": 0.0005969221238046885, "learning_rate": 3.3962638133245296e-06, "loss": 0.0749, "num_input_tokens_seen": 255597120, "step": 118505 }, { "epoch": 19.33278955954323, "grad_norm": 0.0013191952602937818, "learning_rate": 3.3879865854691825e-06, "loss": 0.1228, "num_input_tokens_seen": 255607648, "step": 118510 }, { "epoch": 19.333605220228385, "grad_norm": 0.0011316253803670406, "learning_rate": 3.3797194220863694e-06, "loss": 0.0864, "num_input_tokens_seen": 255619744, "step": 118515 }, { "epoch": 19.33442088091354, "grad_norm": 0.023691197857260704, "learning_rate": 3.371462323343455e-06, "loss": 0.0018, "num_input_tokens_seen": 255630208, "step": 118520 }, { "epoch": 19.335236541598697, "grad_norm": 0.00782823283225298, "learning_rate": 3.3632152894079727e-06, "loss": 0.002, "num_input_tokens_seen": 255640256, "step": 118525 }, { "epoch": 19.33605220228385, "grad_norm": 0.0032837321050465107, "learning_rate": 3.3549783204469e-06, "loss": 0.0823, "num_input_tokens_seen": 255650688, "step": 118530 }, { "epoch": 19.336867862969005, "grad_norm": 0.01362606044858694, "learning_rate": 3.3467514166272696e-06, "loss": 0.0024, "num_input_tokens_seen": 255661920, "step": 118535 }, { "epoch": 19.33768352365416, "grad_norm": 0.007564127445220947, "learning_rate": 3.338534578115726e-06, "loss": 0.0185, "num_input_tokens_seen": 255670688, "step": 118540 }, { "epoch": 19.338499184339316, "grad_norm": 0.0023412788286805153, "learning_rate": 3.3303278050789143e-06, "loss": 0.0028, "num_input_tokens_seen": 255682176, "step": 118545 }, { "epoch": 19.339314845024468, "grad_norm": 0.14904625713825226, "learning_rate": 3.3221310976829787e-06, "loss": 0.0079, "num_input_tokens_seen": 255691680, "step": 118550 }, { "epoch": 19.340130505709624, "grad_norm": 0.007661188952624798, "learning_rate": 3.313944456094231e-06, "loss": 0.0024, "num_input_tokens_seen": 255702432, "step": 118555 }, { "epoch": 19.34094616639478, "grad_norm": 0.06741590052843094, "learning_rate": 3.3057678804784276e-06, "loss": 0.004, "num_input_tokens_seen": 255712320, "step": 118560 }, { "epoch": 19.341761827079935, "grad_norm": 0.0012314959894865751, "learning_rate": 3.29760137100138e-06, "loss": 0.0022, "num_input_tokens_seen": 255723776, "step": 118565 }, { "epoch": 19.34257748776509, "grad_norm": 0.0017186448676511645, "learning_rate": 3.289444927828511e-06, "loss": 0.0609, "num_input_tokens_seen": 255736384, "step": 118570 }, { "epoch": 19.343393148450243, "grad_norm": 0.0011038166703656316, "learning_rate": 3.281298551125189e-06, "loss": 0.0053, "num_input_tokens_seen": 255747104, "step": 118575 }, { "epoch": 19.3442088091354, "grad_norm": 0.0036464787553995848, "learning_rate": 3.2731622410565043e-06, "loss": 0.0022, "num_input_tokens_seen": 255758688, "step": 118580 }, { "epoch": 19.345024469820554, "grad_norm": 0.0017774697626009583, "learning_rate": 3.265035997787269e-06, "loss": 0.0007, "num_input_tokens_seen": 255769632, "step": 118585 }, { "epoch": 19.34584013050571, "grad_norm": 0.001611288171261549, "learning_rate": 3.256919821482296e-06, "loss": 0.0027, "num_input_tokens_seen": 255779232, "step": 118590 }, { "epoch": 19.346655791190866, "grad_norm": 0.03137814253568649, "learning_rate": 3.2488137123059537e-06, "loss": 0.003, "num_input_tokens_seen": 255790784, "step": 118595 }, { "epoch": 19.347471451876018, "grad_norm": 0.05481543764472008, "learning_rate": 3.2407176704226102e-06, "loss": 0.0049, "num_input_tokens_seen": 255801952, "step": 118600 }, { "epoch": 19.348287112561174, "grad_norm": 0.0003398183034732938, "learning_rate": 3.2326316959962463e-06, "loss": 0.0025, "num_input_tokens_seen": 255811808, "step": 118605 }, { "epoch": 19.34910277324633, "grad_norm": 0.011275216937065125, "learning_rate": 3.224555789190897e-06, "loss": 0.0034, "num_input_tokens_seen": 255822880, "step": 118610 }, { "epoch": 19.349918433931485, "grad_norm": 0.15232455730438232, "learning_rate": 3.216489950170043e-06, "loss": 0.0021, "num_input_tokens_seen": 255833920, "step": 118615 }, { "epoch": 19.35073409461664, "grad_norm": 0.008598522283136845, "learning_rate": 3.208434179097275e-06, "loss": 0.0023, "num_input_tokens_seen": 255844544, "step": 118620 }, { "epoch": 19.351549755301793, "grad_norm": 0.04623036831617355, "learning_rate": 3.200388476135796e-06, "loss": 0.0052, "num_input_tokens_seen": 255856704, "step": 118625 }, { "epoch": 19.35236541598695, "grad_norm": 0.07567715644836426, "learning_rate": 3.1923528414487535e-06, "loss": 0.0022, "num_input_tokens_seen": 255867968, "step": 118630 }, { "epoch": 19.353181076672104, "grad_norm": 0.06720583885908127, "learning_rate": 3.184327275198795e-06, "loss": 0.0917, "num_input_tokens_seen": 255877280, "step": 118635 }, { "epoch": 19.35399673735726, "grad_norm": 0.0007300156285054982, "learning_rate": 3.1763117775487903e-06, "loss": 0.0016, "num_input_tokens_seen": 255888288, "step": 118640 }, { "epoch": 19.354812398042416, "grad_norm": 0.02508467808365822, "learning_rate": 3.168306348661054e-06, "loss": 0.0107, "num_input_tokens_seen": 255898720, "step": 118645 }, { "epoch": 19.355628058727568, "grad_norm": 0.006044514011591673, "learning_rate": 3.160310988697901e-06, "loss": 0.0016, "num_input_tokens_seen": 255909984, "step": 118650 }, { "epoch": 19.356443719412724, "grad_norm": 0.025566769763827324, "learning_rate": 3.152325697821312e-06, "loss": 0.0016, "num_input_tokens_seen": 255920096, "step": 118655 }, { "epoch": 19.35725938009788, "grad_norm": 0.1835334599018097, "learning_rate": 3.1443504761931585e-06, "loss": 0.0144, "num_input_tokens_seen": 255931712, "step": 118660 }, { "epoch": 19.358075040783035, "grad_norm": 0.02440604381263256, "learning_rate": 3.1363853239750327e-06, "loss": 0.0025, "num_input_tokens_seen": 255943520, "step": 118665 }, { "epoch": 19.35889070146819, "grad_norm": 0.28867626190185547, "learning_rate": 3.1284302413283615e-06, "loss": 0.0437, "num_input_tokens_seen": 255954336, "step": 118670 }, { "epoch": 19.359706362153343, "grad_norm": 0.15877529978752136, "learning_rate": 3.1204852284143493e-06, "loss": 0.0067, "num_input_tokens_seen": 255963776, "step": 118675 }, { "epoch": 19.3605220228385, "grad_norm": 0.09603649377822876, "learning_rate": 3.1125502853941444e-06, "loss": 0.0026, "num_input_tokens_seen": 255975392, "step": 118680 }, { "epoch": 19.361337683523654, "grad_norm": 0.0020709510426968336, "learning_rate": 3.1046254124283413e-06, "loss": 0.0018, "num_input_tokens_seen": 255986176, "step": 118685 }, { "epoch": 19.36215334420881, "grad_norm": 0.0032279056031256914, "learning_rate": 3.0967106096777e-06, "loss": 0.003, "num_input_tokens_seen": 255996992, "step": 118690 }, { "epoch": 19.362969004893966, "grad_norm": 0.009630155749619007, "learning_rate": 3.088805877302592e-06, "loss": 0.0049, "num_input_tokens_seen": 256007136, "step": 118695 }, { "epoch": 19.363784665579118, "grad_norm": 0.0014223603066056967, "learning_rate": 3.0809112154632226e-06, "loss": 0.0016, "num_input_tokens_seen": 256018336, "step": 118700 }, { "epoch": 19.364600326264274, "grad_norm": 0.03396604582667351, "learning_rate": 3.073026624319575e-06, "loss": 0.0032, "num_input_tokens_seen": 256029376, "step": 118705 }, { "epoch": 19.36541598694943, "grad_norm": 0.012623466551303864, "learning_rate": 3.06515210403141e-06, "loss": 0.0034, "num_input_tokens_seen": 256039136, "step": 118710 }, { "epoch": 19.366231647634585, "grad_norm": 0.14400139451026917, "learning_rate": 3.0572876547583785e-06, "loss": 0.0089, "num_input_tokens_seen": 256051040, "step": 118715 }, { "epoch": 19.36704730831974, "grad_norm": 0.004602317698299885, "learning_rate": 3.0494332766597967e-06, "loss": 0.0042, "num_input_tokens_seen": 256060992, "step": 118720 }, { "epoch": 19.367862969004893, "grad_norm": 0.0029588222969323397, "learning_rate": 3.0415889698949262e-06, "loss": 0.0009, "num_input_tokens_seen": 256073056, "step": 118725 }, { "epoch": 19.36867862969005, "grad_norm": 0.0022659054957330227, "learning_rate": 3.0337547346226404e-06, "loss": 0.0033, "num_input_tokens_seen": 256083840, "step": 118730 }, { "epoch": 19.369494290375204, "grad_norm": 0.005076737608760595, "learning_rate": 3.025930571001756e-06, "loss": 0.0008, "num_input_tokens_seen": 256095744, "step": 118735 }, { "epoch": 19.37030995106036, "grad_norm": 0.04462633654475212, "learning_rate": 3.018116479190869e-06, "loss": 0.0055, "num_input_tokens_seen": 256105408, "step": 118740 }, { "epoch": 19.371125611745512, "grad_norm": 0.05156286433339119, "learning_rate": 3.0103124593483522e-06, "loss": 0.0111, "num_input_tokens_seen": 256116928, "step": 118745 }, { "epoch": 19.371941272430668, "grad_norm": 0.016167784109711647, "learning_rate": 3.002518511632246e-06, "loss": 0.0111, "num_input_tokens_seen": 256127872, "step": 118750 }, { "epoch": 19.372756933115824, "grad_norm": 0.0009412463987246156, "learning_rate": 2.9947346362006466e-06, "loss": 0.0008, "num_input_tokens_seen": 256139520, "step": 118755 }, { "epoch": 19.37357259380098, "grad_norm": 0.008702821098268032, "learning_rate": 2.986960833211205e-06, "loss": 0.0026, "num_input_tokens_seen": 256151392, "step": 118760 }, { "epoch": 19.374388254486135, "grad_norm": 0.024824324995279312, "learning_rate": 2.9791971028215737e-06, "loss": 0.001, "num_input_tokens_seen": 256161440, "step": 118765 }, { "epoch": 19.375203915171287, "grad_norm": 0.003195522353053093, "learning_rate": 2.9714434451889595e-06, "loss": 0.0012, "num_input_tokens_seen": 256171008, "step": 118770 }, { "epoch": 19.376019575856443, "grad_norm": 0.0006611187127418816, "learning_rate": 2.9636998604706255e-06, "loss": 0.0007, "num_input_tokens_seen": 256180992, "step": 118775 }, { "epoch": 19.3768352365416, "grad_norm": 0.05272280424833298, "learning_rate": 2.955966348823391e-06, "loss": 0.0016, "num_input_tokens_seen": 256191552, "step": 118780 }, { "epoch": 19.377650897226754, "grad_norm": 0.7046160101890564, "learning_rate": 2.948242910404131e-06, "loss": 0.0197, "num_input_tokens_seen": 256202496, "step": 118785 }, { "epoch": 19.37846655791191, "grad_norm": 0.023265032097697258, "learning_rate": 2.9405295453692195e-06, "loss": 0.0204, "num_input_tokens_seen": 256214080, "step": 118790 }, { "epoch": 19.379282218597062, "grad_norm": 0.007516664918512106, "learning_rate": 2.9328262538750316e-06, "loss": 0.0038, "num_input_tokens_seen": 256224800, "step": 118795 }, { "epoch": 19.380097879282218, "grad_norm": 0.0016055069863796234, "learning_rate": 2.9251330360777205e-06, "loss": 0.0043, "num_input_tokens_seen": 256235712, "step": 118800 }, { "epoch": 19.380913539967374, "grad_norm": 0.0021142070181667805, "learning_rate": 2.9174498921331616e-06, "loss": 0.0049, "num_input_tokens_seen": 256245984, "step": 118805 }, { "epoch": 19.38172920065253, "grad_norm": 0.02773747779428959, "learning_rate": 2.909776822197063e-06, "loss": 0.0029, "num_input_tokens_seen": 256256064, "step": 118810 }, { "epoch": 19.382544861337685, "grad_norm": 0.05096176639199257, "learning_rate": 2.902113826424968e-06, "loss": 0.004, "num_input_tokens_seen": 256267232, "step": 118815 }, { "epoch": 19.383360522022837, "grad_norm": 0.0023822637740522623, "learning_rate": 2.8944609049721406e-06, "loss": 0.0015, "num_input_tokens_seen": 256277856, "step": 118820 }, { "epoch": 19.384176182707993, "grad_norm": 0.8270858526229858, "learning_rate": 2.8868180579936787e-06, "loss": 0.032, "num_input_tokens_seen": 256287808, "step": 118825 }, { "epoch": 19.38499184339315, "grad_norm": 0.021089231595396996, "learning_rate": 2.8791852856445143e-06, "loss": 0.002, "num_input_tokens_seen": 256298592, "step": 118830 }, { "epoch": 19.385807504078304, "grad_norm": 0.030118092894554138, "learning_rate": 2.8715625880792463e-06, "loss": 0.0022, "num_input_tokens_seen": 256308640, "step": 118835 }, { "epoch": 19.38662316476346, "grad_norm": 0.025761593133211136, "learning_rate": 2.8639499654524724e-06, "loss": 0.0013, "num_input_tokens_seen": 256320160, "step": 118840 }, { "epoch": 19.387438825448612, "grad_norm": 0.010964738205075264, "learning_rate": 2.856347417918348e-06, "loss": 0.0011, "num_input_tokens_seen": 256331648, "step": 118845 }, { "epoch": 19.388254486133768, "grad_norm": 0.018883679062128067, "learning_rate": 2.8487549456310824e-06, "loss": 0.0012, "num_input_tokens_seen": 256343328, "step": 118850 }, { "epoch": 19.389070146818923, "grad_norm": 0.03913642466068268, "learning_rate": 2.841172548744442e-06, "loss": 0.0043, "num_input_tokens_seen": 256353728, "step": 118855 }, { "epoch": 19.38988580750408, "grad_norm": 0.010447255335748196, "learning_rate": 2.8336002274121365e-06, "loss": 0.0092, "num_input_tokens_seen": 256363840, "step": 118860 }, { "epoch": 19.390701468189235, "grad_norm": 0.0033557710703462362, "learning_rate": 2.8260379817875993e-06, "loss": 0.0009, "num_input_tokens_seen": 256373888, "step": 118865 }, { "epoch": 19.391517128874387, "grad_norm": 0.03197970986366272, "learning_rate": 2.818485812024152e-06, "loss": 0.003, "num_input_tokens_seen": 256384992, "step": 118870 }, { "epoch": 19.392332789559543, "grad_norm": 1.0519121885299683, "learning_rate": 2.810943718274783e-06, "loss": 0.0855, "num_input_tokens_seen": 256395680, "step": 118875 }, { "epoch": 19.3931484502447, "grad_norm": 0.030041197314858437, "learning_rate": 2.8034117006924264e-06, "loss": 0.0018, "num_input_tokens_seen": 256406912, "step": 118880 }, { "epoch": 19.393964110929854, "grad_norm": 0.0942625105381012, "learning_rate": 2.795889759429626e-06, "loss": 0.0034, "num_input_tokens_seen": 256418208, "step": 118885 }, { "epoch": 19.39477977161501, "grad_norm": 0.3594015836715698, "learning_rate": 2.788377894638816e-06, "loss": 0.0063, "num_input_tokens_seen": 256429088, "step": 118890 }, { "epoch": 19.395595432300162, "grad_norm": 0.14045129716396332, "learning_rate": 2.7808761064723186e-06, "loss": 0.0069, "num_input_tokens_seen": 256438624, "step": 118895 }, { "epoch": 19.396411092985318, "grad_norm": 0.029000164940953255, "learning_rate": 2.773384395082179e-06, "loss": 0.0139, "num_input_tokens_seen": 256450112, "step": 118900 }, { "epoch": 19.397226753670473, "grad_norm": 0.7675994634628296, "learning_rate": 2.765902760620165e-06, "loss": 0.1749, "num_input_tokens_seen": 256459680, "step": 118905 }, { "epoch": 19.39804241435563, "grad_norm": 0.012273434549570084, "learning_rate": 2.758431203237877e-06, "loss": 0.0008, "num_input_tokens_seen": 256469440, "step": 118910 }, { "epoch": 19.39885807504078, "grad_norm": 0.010265165939927101, "learning_rate": 2.7509697230868048e-06, "loss": 0.0011, "num_input_tokens_seen": 256480768, "step": 118915 }, { "epoch": 19.399673735725937, "grad_norm": 0.011919491924345493, "learning_rate": 2.7435183203181613e-06, "loss": 0.0021, "num_input_tokens_seen": 256492064, "step": 118920 }, { "epoch": 19.400489396411093, "grad_norm": 0.0033093541860580444, "learning_rate": 2.7360769950828814e-06, "loss": 0.009, "num_input_tokens_seen": 256503904, "step": 118925 }, { "epoch": 19.40130505709625, "grad_norm": 0.2095029354095459, "learning_rate": 2.728645747531844e-06, "loss": 0.004, "num_input_tokens_seen": 256514336, "step": 118930 }, { "epoch": 19.402120717781404, "grad_norm": 0.030316771939396858, "learning_rate": 2.721224577815651e-06, "loss": 0.0041, "num_input_tokens_seen": 256524192, "step": 118935 }, { "epoch": 19.402936378466556, "grad_norm": 0.008953796699643135, "learning_rate": 2.713813486084682e-06, "loss": 0.003, "num_input_tokens_seen": 256534912, "step": 118940 }, { "epoch": 19.403752039151712, "grad_norm": 0.07936040312051773, "learning_rate": 2.7064124724891505e-06, "loss": 0.0038, "num_input_tokens_seen": 256545344, "step": 118945 }, { "epoch": 19.404567699836868, "grad_norm": 0.014342640526592731, "learning_rate": 2.6990215371789916e-06, "loss": 0.0056, "num_input_tokens_seen": 256555744, "step": 118950 }, { "epoch": 19.405383360522023, "grad_norm": 0.00343996356241405, "learning_rate": 2.691640680304086e-06, "loss": 0.001, "num_input_tokens_seen": 256567200, "step": 118955 }, { "epoch": 19.40619902120718, "grad_norm": 0.001286724698729813, "learning_rate": 2.684269902013925e-06, "loss": 0.006, "num_input_tokens_seen": 256577664, "step": 118960 }, { "epoch": 19.40701468189233, "grad_norm": 0.008780188858509064, "learning_rate": 2.676909202457889e-06, "loss": 0.0015, "num_input_tokens_seen": 256587776, "step": 118965 }, { "epoch": 19.407830342577487, "grad_norm": 0.020756250247359276, "learning_rate": 2.6695585817852476e-06, "loss": 0.0525, "num_input_tokens_seen": 256598464, "step": 118970 }, { "epoch": 19.408646003262643, "grad_norm": 0.0012134790886193514, "learning_rate": 2.6622180401448815e-06, "loss": 0.004, "num_input_tokens_seen": 256608992, "step": 118975 }, { "epoch": 19.4094616639478, "grad_norm": 0.08771555125713348, "learning_rate": 2.6548875776856163e-06, "loss": 0.0021, "num_input_tokens_seen": 256619264, "step": 118980 }, { "epoch": 19.410277324632954, "grad_norm": 0.10372212529182434, "learning_rate": 2.6475671945559442e-06, "loss": 0.1096, "num_input_tokens_seen": 256630432, "step": 118985 }, { "epoch": 19.411092985318106, "grad_norm": 0.0011598593555390835, "learning_rate": 2.6402568909042467e-06, "loss": 0.0024, "num_input_tokens_seen": 256642208, "step": 118990 }, { "epoch": 19.411908646003262, "grad_norm": 0.010293975472450256, "learning_rate": 2.6329566668787384e-06, "loss": 0.0822, "num_input_tokens_seen": 256652768, "step": 118995 }, { "epoch": 19.412724306688418, "grad_norm": 0.10590193420648575, "learning_rate": 2.625666522627301e-06, "loss": 0.0027, "num_input_tokens_seen": 256664032, "step": 119000 }, { "epoch": 19.413539967373573, "grad_norm": 0.003040950745344162, "learning_rate": 2.6183864582976503e-06, "loss": 0.0022, "num_input_tokens_seen": 256674656, "step": 119005 }, { "epoch": 19.41435562805873, "grad_norm": 0.16197501122951508, "learning_rate": 2.611116474037445e-06, "loss": 0.0061, "num_input_tokens_seen": 256684800, "step": 119010 }, { "epoch": 19.41517128874388, "grad_norm": 0.1702406257390976, "learning_rate": 2.603856569993901e-06, "loss": 0.0046, "num_input_tokens_seen": 256695872, "step": 119015 }, { "epoch": 19.415986949429037, "grad_norm": 0.012812474742531776, "learning_rate": 2.596606746314234e-06, "loss": 0.0014, "num_input_tokens_seen": 256705568, "step": 119020 }, { "epoch": 19.416802610114193, "grad_norm": 0.00394494878128171, "learning_rate": 2.589367003145271e-06, "loss": 0.0314, "num_input_tokens_seen": 256717056, "step": 119025 }, { "epoch": 19.41761827079935, "grad_norm": 0.0014385912800207734, "learning_rate": 2.5821373406338387e-06, "loss": 0.0027, "num_input_tokens_seen": 256728064, "step": 119030 }, { "epoch": 19.418433931484504, "grad_norm": 0.0008900863467715681, "learning_rate": 2.574917758926376e-06, "loss": 0.0026, "num_input_tokens_seen": 256738208, "step": 119035 }, { "epoch": 19.419249592169656, "grad_norm": 0.02794579416513443, "learning_rate": 2.5677082581692657e-06, "loss": 0.0016, "num_input_tokens_seen": 256749472, "step": 119040 }, { "epoch": 19.420065252854812, "grad_norm": 0.09478487819433212, "learning_rate": 2.5605088385085573e-06, "loss": 0.0052, "num_input_tokens_seen": 256760576, "step": 119045 }, { "epoch": 19.420880913539968, "grad_norm": 0.0305741298943758, "learning_rate": 2.553319500090245e-06, "loss": 0.0039, "num_input_tokens_seen": 256771040, "step": 119050 }, { "epoch": 19.421696574225123, "grad_norm": 0.17244242131710052, "learning_rate": 2.5461402430599357e-06, "loss": 0.0044, "num_input_tokens_seen": 256782528, "step": 119055 }, { "epoch": 19.42251223491028, "grad_norm": 0.003994576167315245, "learning_rate": 2.5389710675631227e-06, "loss": 0.1824, "num_input_tokens_seen": 256793440, "step": 119060 }, { "epoch": 19.42332789559543, "grad_norm": 0.000680187193211168, "learning_rate": 2.5318119737451905e-06, "loss": 0.0123, "num_input_tokens_seen": 256803392, "step": 119065 }, { "epoch": 19.424143556280587, "grad_norm": 0.006508481688797474, "learning_rate": 2.524662961751134e-06, "loss": 0.0019, "num_input_tokens_seen": 256813920, "step": 119070 }, { "epoch": 19.424959216965743, "grad_norm": 0.007876231335103512, "learning_rate": 2.517524031725893e-06, "loss": 0.0062, "num_input_tokens_seen": 256825568, "step": 119075 }, { "epoch": 19.4257748776509, "grad_norm": 0.020631087943911552, "learning_rate": 2.5103951838141292e-06, "loss": 0.0048, "num_input_tokens_seen": 256836160, "step": 119080 }, { "epoch": 19.42659053833605, "grad_norm": 0.0014763151993975043, "learning_rate": 2.503276418160283e-06, "loss": 0.001, "num_input_tokens_seen": 256845984, "step": 119085 }, { "epoch": 19.427406199021206, "grad_norm": 0.0038314054254442453, "learning_rate": 2.496167734908683e-06, "loss": 0.0021, "num_input_tokens_seen": 256857888, "step": 119090 }, { "epoch": 19.428221859706362, "grad_norm": 0.017192743718624115, "learning_rate": 2.489069134203381e-06, "loss": 0.0063, "num_input_tokens_seen": 256868576, "step": 119095 }, { "epoch": 19.429037520391518, "grad_norm": 0.0005124272429384291, "learning_rate": 2.481980616188262e-06, "loss": 0.0067, "num_input_tokens_seen": 256878784, "step": 119100 }, { "epoch": 19.429853181076673, "grad_norm": 0.025685638189315796, "learning_rate": 2.474902181006877e-06, "loss": 0.0051, "num_input_tokens_seen": 256889760, "step": 119105 }, { "epoch": 19.430668841761825, "grad_norm": 0.04253138601779938, "learning_rate": 2.467833828802779e-06, "loss": 0.0028, "num_input_tokens_seen": 256899968, "step": 119110 }, { "epoch": 19.43148450244698, "grad_norm": 0.2884300649166107, "learning_rate": 2.4607755597192417e-06, "loss": 0.0068, "num_input_tokens_seen": 256911200, "step": 119115 }, { "epoch": 19.432300163132137, "grad_norm": 0.0007615702343173325, "learning_rate": 2.453727373899206e-06, "loss": 0.0017, "num_input_tokens_seen": 256922688, "step": 119120 }, { "epoch": 19.433115823817293, "grad_norm": 0.035302285104990005, "learning_rate": 2.4466892714856137e-06, "loss": 0.0014, "num_input_tokens_seen": 256933792, "step": 119125 }, { "epoch": 19.43393148450245, "grad_norm": 0.0032260376028716564, "learning_rate": 2.439661252621017e-06, "loss": 0.0018, "num_input_tokens_seen": 256945728, "step": 119130 }, { "epoch": 19.4347471451876, "grad_norm": 0.0007269604247994721, "learning_rate": 2.4326433174479133e-06, "loss": 0.0108, "num_input_tokens_seen": 256956480, "step": 119135 }, { "epoch": 19.435562805872756, "grad_norm": 0.018233370035886765, "learning_rate": 2.4256354661084666e-06, "loss": 0.0407, "num_input_tokens_seen": 256967360, "step": 119140 }, { "epoch": 19.436378466557912, "grad_norm": 0.008074983023107052, "learning_rate": 2.4186376987447857e-06, "loss": 0.0024, "num_input_tokens_seen": 256979232, "step": 119145 }, { "epoch": 19.437194127243067, "grad_norm": 0.006428489927202463, "learning_rate": 2.41165001549859e-06, "loss": 0.001, "num_input_tokens_seen": 256990112, "step": 119150 }, { "epoch": 19.438009787928223, "grad_norm": 0.000663550803437829, "learning_rate": 2.4046724165115998e-06, "loss": 0.0016, "num_input_tokens_seen": 257001312, "step": 119155 }, { "epoch": 19.438825448613375, "grad_norm": 0.0024166356306523085, "learning_rate": 2.3977049019250907e-06, "loss": 0.0012, "num_input_tokens_seen": 257011776, "step": 119160 }, { "epoch": 19.43964110929853, "grad_norm": 0.04791110008955002, "learning_rate": 2.3907474718803944e-06, "loss": 0.0049, "num_input_tokens_seen": 257023584, "step": 119165 }, { "epoch": 19.440456769983687, "grad_norm": 0.0316590741276741, "learning_rate": 2.383800126518454e-06, "loss": 0.0099, "num_input_tokens_seen": 257034400, "step": 119170 }, { "epoch": 19.441272430668842, "grad_norm": 0.00524046178907156, "learning_rate": 2.3768628659801005e-06, "loss": 0.0158, "num_input_tokens_seen": 257045024, "step": 119175 }, { "epoch": 19.442088091353998, "grad_norm": 0.004806924611330032, "learning_rate": 2.3699356904058334e-06, "loss": 0.0008, "num_input_tokens_seen": 257055424, "step": 119180 }, { "epoch": 19.44290375203915, "grad_norm": 0.0069340248592197895, "learning_rate": 2.363018599936151e-06, "loss": 0.0083, "num_input_tokens_seen": 257065824, "step": 119185 }, { "epoch": 19.443719412724306, "grad_norm": 0.00811630580574274, "learning_rate": 2.3561115947111635e-06, "loss": 0.0014, "num_input_tokens_seen": 257076480, "step": 119190 }, { "epoch": 19.44453507340946, "grad_norm": 0.001696476829238236, "learning_rate": 2.349214674870925e-06, "loss": 0.0017, "num_input_tokens_seen": 257086848, "step": 119195 }, { "epoch": 19.445350734094617, "grad_norm": 0.02390192076563835, "learning_rate": 2.3423278405551583e-06, "loss": 0.0071, "num_input_tokens_seen": 257097824, "step": 119200 }, { "epoch": 19.446166394779773, "grad_norm": 0.005989675875753164, "learning_rate": 2.335451091903418e-06, "loss": 0.002, "num_input_tokens_seen": 257108096, "step": 119205 }, { "epoch": 19.446982055464925, "grad_norm": 0.008413761854171753, "learning_rate": 2.3285844290550916e-06, "loss": 0.0011, "num_input_tokens_seen": 257118816, "step": 119210 }, { "epoch": 19.44779771615008, "grad_norm": 0.006310771219432354, "learning_rate": 2.321727852149402e-06, "loss": 0.0006, "num_input_tokens_seen": 257129952, "step": 119215 }, { "epoch": 19.448613376835237, "grad_norm": 0.09782808274030685, "learning_rate": 2.314881361325183e-06, "loss": 0.003, "num_input_tokens_seen": 257140448, "step": 119220 }, { "epoch": 19.449429037520392, "grad_norm": 0.01110011525452137, "learning_rate": 2.308044956721267e-06, "loss": 0.0596, "num_input_tokens_seen": 257151392, "step": 119225 }, { "epoch": 19.450244698205548, "grad_norm": 0.004148659761995077, "learning_rate": 2.30121863847621e-06, "loss": 0.0013, "num_input_tokens_seen": 257162560, "step": 119230 }, { "epoch": 19.4510603588907, "grad_norm": 0.17019915580749512, "learning_rate": 2.294402406728291e-06, "loss": 0.003, "num_input_tokens_seen": 257172768, "step": 119235 }, { "epoch": 19.451876019575856, "grad_norm": 0.005454510450363159, "learning_rate": 2.2875962616157318e-06, "loss": 0.0038, "num_input_tokens_seen": 257183840, "step": 119240 }, { "epoch": 19.45269168026101, "grad_norm": 0.006874216254800558, "learning_rate": 2.2808002032763676e-06, "loss": 0.013, "num_input_tokens_seen": 257194720, "step": 119245 }, { "epoch": 19.453507340946167, "grad_norm": 0.11752831935882568, "learning_rate": 2.2740142318480873e-06, "loss": 0.0032, "num_input_tokens_seen": 257205664, "step": 119250 }, { "epoch": 19.454323001631323, "grad_norm": 0.027066411450505257, "learning_rate": 2.267238347468226e-06, "loss": 0.0083, "num_input_tokens_seen": 257217120, "step": 119255 }, { "epoch": 19.455138662316475, "grad_norm": 0.010607726871967316, "learning_rate": 2.2604725502742286e-06, "loss": 0.0047, "num_input_tokens_seen": 257227296, "step": 119260 }, { "epoch": 19.45595432300163, "grad_norm": 0.004806202836334705, "learning_rate": 2.2537168404032082e-06, "loss": 0.0019, "num_input_tokens_seen": 257237280, "step": 119265 }, { "epoch": 19.456769983686787, "grad_norm": 0.004285057075321674, "learning_rate": 2.2469712179920555e-06, "loss": 0.0406, "num_input_tokens_seen": 257248768, "step": 119270 }, { "epoch": 19.457585644371942, "grad_norm": 0.0013414152199402452, "learning_rate": 2.2402356831774383e-06, "loss": 0.0026, "num_input_tokens_seen": 257260000, "step": 119275 }, { "epoch": 19.458401305057095, "grad_norm": 0.010863806121051311, "learning_rate": 2.2335102360959148e-06, "loss": 0.0094, "num_input_tokens_seen": 257270112, "step": 119280 }, { "epoch": 19.45921696574225, "grad_norm": 0.15078891813755035, "learning_rate": 2.226794876883764e-06, "loss": 0.0035, "num_input_tokens_seen": 257281632, "step": 119285 }, { "epoch": 19.460032626427406, "grad_norm": 0.004961119033396244, "learning_rate": 2.2200896056771004e-06, "loss": 0.0007, "num_input_tokens_seen": 257291520, "step": 119290 }, { "epoch": 19.46084828711256, "grad_norm": 0.003951911348849535, "learning_rate": 2.2133944226117587e-06, "loss": 0.0006, "num_input_tokens_seen": 257302336, "step": 119295 }, { "epoch": 19.461663947797717, "grad_norm": 0.002165642101317644, "learning_rate": 2.2067093278235194e-06, "loss": 0.0043, "num_input_tokens_seen": 257312608, "step": 119300 }, { "epoch": 19.46247960848287, "grad_norm": 0.019960639998316765, "learning_rate": 2.2000343214477746e-06, "loss": 0.0019, "num_input_tokens_seen": 257323936, "step": 119305 }, { "epoch": 19.463295269168025, "grad_norm": 0.07938145846128464, "learning_rate": 2.1933694036198605e-06, "loss": 0.0111, "num_input_tokens_seen": 257335008, "step": 119310 }, { "epoch": 19.46411092985318, "grad_norm": 0.7556980848312378, "learning_rate": 2.1867145744747796e-06, "loss": 0.0139, "num_input_tokens_seen": 257344928, "step": 119315 }, { "epoch": 19.464926590538337, "grad_norm": 0.0383114218711853, "learning_rate": 2.1800698341475355e-06, "loss": 0.0029, "num_input_tokens_seen": 257356448, "step": 119320 }, { "epoch": 19.465742251223492, "grad_norm": 0.0009741922258399427, "learning_rate": 2.173435182772632e-06, "loss": 0.0012, "num_input_tokens_seen": 257366560, "step": 119325 }, { "epoch": 19.466557911908644, "grad_norm": 0.01230474654585123, "learning_rate": 2.166810620484627e-06, "loss": 0.008, "num_input_tokens_seen": 257378176, "step": 119330 }, { "epoch": 19.4673735725938, "grad_norm": 0.015972265973687172, "learning_rate": 2.160196147417748e-06, "loss": 0.0015, "num_input_tokens_seen": 257388672, "step": 119335 }, { "epoch": 19.468189233278956, "grad_norm": 0.10081072896718979, "learning_rate": 2.153591763706053e-06, "loss": 0.0898, "num_input_tokens_seen": 257399072, "step": 119340 }, { "epoch": 19.46900489396411, "grad_norm": 0.0022433120757341385, "learning_rate": 2.1469974694833805e-06, "loss": 0.0029, "num_input_tokens_seen": 257409856, "step": 119345 }, { "epoch": 19.469820554649267, "grad_norm": 0.002184277633205056, "learning_rate": 2.140413264883401e-06, "loss": 0.0668, "num_input_tokens_seen": 257419904, "step": 119350 }, { "epoch": 19.47063621533442, "grad_norm": 0.012972038239240646, "learning_rate": 2.1338391500394516e-06, "loss": 0.0046, "num_input_tokens_seen": 257429344, "step": 119355 }, { "epoch": 19.471451876019575, "grad_norm": 0.0042672185227274895, "learning_rate": 2.1272751250849263e-06, "loss": 0.1142, "num_input_tokens_seen": 257439104, "step": 119360 }, { "epoch": 19.47226753670473, "grad_norm": 0.03050355240702629, "learning_rate": 2.120721190152719e-06, "loss": 0.0015, "num_input_tokens_seen": 257450592, "step": 119365 }, { "epoch": 19.473083197389887, "grad_norm": 0.05795154348015785, "learning_rate": 2.114177345375723e-06, "loss": 0.0017, "num_input_tokens_seen": 257461664, "step": 119370 }, { "epoch": 19.473898858075042, "grad_norm": 0.07662321627140045, "learning_rate": 2.1076435908864986e-06, "loss": 0.1395, "num_input_tokens_seen": 257472160, "step": 119375 }, { "epoch": 19.474714518760194, "grad_norm": 0.04658502712845802, "learning_rate": 2.1011199268175517e-06, "loss": 0.0056, "num_input_tokens_seen": 257482176, "step": 119380 }, { "epoch": 19.47553017944535, "grad_norm": 0.12562593817710876, "learning_rate": 2.0946063533009986e-06, "loss": 0.0063, "num_input_tokens_seen": 257492512, "step": 119385 }, { "epoch": 19.476345840130506, "grad_norm": 0.1496136337518692, "learning_rate": 2.0881028704688997e-06, "loss": 0.0043, "num_input_tokens_seen": 257502304, "step": 119390 }, { "epoch": 19.47716150081566, "grad_norm": 0.04551282525062561, "learning_rate": 2.0816094784530394e-06, "loss": 0.0017, "num_input_tokens_seen": 257512928, "step": 119395 }, { "epoch": 19.477977161500817, "grad_norm": 0.050266966223716736, "learning_rate": 2.075126177385034e-06, "loss": 0.003, "num_input_tokens_seen": 257523840, "step": 119400 }, { "epoch": 19.47879282218597, "grad_norm": 0.004219160880893469, "learning_rate": 2.0686529673962784e-06, "loss": 0.1401, "num_input_tokens_seen": 257533632, "step": 119405 }, { "epoch": 19.479608482871125, "grad_norm": 0.012626801617443562, "learning_rate": 2.06218984861789e-06, "loss": 0.001, "num_input_tokens_seen": 257543840, "step": 119410 }, { "epoch": 19.48042414355628, "grad_norm": 0.0016090667340904474, "learning_rate": 2.0557368211809314e-06, "loss": 0.0211, "num_input_tokens_seen": 257555040, "step": 119415 }, { "epoch": 19.481239804241437, "grad_norm": 0.038332611322402954, "learning_rate": 2.0492938852161304e-06, "loss": 0.0077, "num_input_tokens_seen": 257564448, "step": 119420 }, { "epoch": 19.482055464926592, "grad_norm": 0.008477047085762024, "learning_rate": 2.042861040854105e-06, "loss": 0.001, "num_input_tokens_seen": 257575680, "step": 119425 }, { "epoch": 19.482871125611744, "grad_norm": 0.001152245094999671, "learning_rate": 2.0364382882251952e-06, "loss": 0.117, "num_input_tokens_seen": 257586688, "step": 119430 }, { "epoch": 19.4836867862969, "grad_norm": 0.000707261438947171, "learning_rate": 2.030025627459575e-06, "loss": 0.0065, "num_input_tokens_seen": 257597312, "step": 119435 }, { "epoch": 19.484502446982056, "grad_norm": 0.0017191810766234994, "learning_rate": 2.023623058687196e-06, "loss": 0.0018, "num_input_tokens_seen": 257609184, "step": 119440 }, { "epoch": 19.48531810766721, "grad_norm": 0.0017286689253523946, "learning_rate": 2.0172305820378434e-06, "loss": 0.0008, "num_input_tokens_seen": 257620256, "step": 119445 }, { "epoch": 19.486133768352367, "grad_norm": 0.011426905170083046, "learning_rate": 2.010848197641024e-06, "loss": 0.0038, "num_input_tokens_seen": 257630880, "step": 119450 }, { "epoch": 19.48694942903752, "grad_norm": 0.007682662457227707, "learning_rate": 2.0044759056261354e-06, "loss": 0.0211, "num_input_tokens_seen": 257642944, "step": 119455 }, { "epoch": 19.487765089722675, "grad_norm": 0.0007816300494596362, "learning_rate": 1.9981137061222954e-06, "loss": 0.004, "num_input_tokens_seen": 257654176, "step": 119460 }, { "epoch": 19.48858075040783, "grad_norm": 0.06774532794952393, "learning_rate": 1.9917615992584017e-06, "loss": 0.0012, "num_input_tokens_seen": 257665568, "step": 119465 }, { "epoch": 19.489396411092986, "grad_norm": 0.031107915565371513, "learning_rate": 1.985419585163295e-06, "loss": 0.0048, "num_input_tokens_seen": 257677088, "step": 119470 }, { "epoch": 19.49021207177814, "grad_norm": 0.24358516931533813, "learning_rate": 1.9790876639653733e-06, "loss": 0.0233, "num_input_tokens_seen": 257687168, "step": 119475 }, { "epoch": 19.491027732463294, "grad_norm": 0.0011304056970402598, "learning_rate": 1.972765835793089e-06, "loss": 0.0057, "num_input_tokens_seen": 257699232, "step": 119480 }, { "epoch": 19.49184339314845, "grad_norm": 0.006914403289556503, "learning_rate": 1.9664541007744508e-06, "loss": 0.0007, "num_input_tokens_seen": 257710368, "step": 119485 }, { "epoch": 19.492659053833606, "grad_norm": 0.0006439753342419863, "learning_rate": 1.960152459037412e-06, "loss": 0.0043, "num_input_tokens_seen": 257721856, "step": 119490 }, { "epoch": 19.49347471451876, "grad_norm": 0.002651756629347801, "learning_rate": 1.953860910709704e-06, "loss": 0.0016, "num_input_tokens_seen": 257732608, "step": 119495 }, { "epoch": 19.494290375203914, "grad_norm": 0.12680502235889435, "learning_rate": 1.9475794559188354e-06, "loss": 0.0559, "num_input_tokens_seen": 257741952, "step": 119500 }, { "epoch": 19.49510603588907, "grad_norm": 0.3291623592376709, "learning_rate": 1.9413080947920934e-06, "loss": 0.01, "num_input_tokens_seen": 257753376, "step": 119505 }, { "epoch": 19.495921696574225, "grad_norm": 0.04630269855260849, "learning_rate": 1.9350468274565434e-06, "loss": 0.0084, "num_input_tokens_seen": 257764352, "step": 119510 }, { "epoch": 19.49673735725938, "grad_norm": 0.001846547587774694, "learning_rate": 1.9287956540391395e-06, "loss": 0.0028, "num_input_tokens_seen": 257775936, "step": 119515 }, { "epoch": 19.497553017944536, "grad_norm": 0.34959709644317627, "learning_rate": 1.9225545746665575e-06, "loss": 0.009, "num_input_tokens_seen": 257787040, "step": 119520 }, { "epoch": 19.49836867862969, "grad_norm": 0.0007856850861571729, "learning_rate": 1.9163235894651965e-06, "loss": 0.0023, "num_input_tokens_seen": 257797216, "step": 119525 }, { "epoch": 19.499184339314844, "grad_norm": 0.2688422203063965, "learning_rate": 1.9101026985614558e-06, "loss": 0.0119, "num_input_tokens_seen": 257808320, "step": 119530 }, { "epoch": 19.5, "grad_norm": 0.0008199987350963056, "learning_rate": 1.903891902081345e-06, "loss": 0.0213, "num_input_tokens_seen": 257819040, "step": 119535 }, { "epoch": 19.500815660685156, "grad_norm": 0.0011780294589698315, "learning_rate": 1.8976912001507084e-06, "loss": 0.0073, "num_input_tokens_seen": 257829472, "step": 119540 }, { "epoch": 19.50163132137031, "grad_norm": 0.07872146368026733, "learning_rate": 1.8915005928953344e-06, "loss": 0.0054, "num_input_tokens_seen": 257840800, "step": 119545 }, { "epoch": 19.502446982055464, "grad_norm": 0.0024738647043704987, "learning_rate": 1.8853200804405113e-06, "loss": 0.0074, "num_input_tokens_seen": 257851712, "step": 119550 }, { "epoch": 19.50326264274062, "grad_norm": 0.005437144543975592, "learning_rate": 1.879149662911639e-06, "loss": 0.0018, "num_input_tokens_seen": 257862336, "step": 119555 }, { "epoch": 19.504078303425775, "grad_norm": 0.056144557893276215, "learning_rate": 1.8729893404336728e-06, "loss": 0.0038, "num_input_tokens_seen": 257872896, "step": 119560 }, { "epoch": 19.50489396411093, "grad_norm": 0.0034959709737449884, "learning_rate": 1.8668391131315133e-06, "loss": 0.0009, "num_input_tokens_seen": 257884416, "step": 119565 }, { "epoch": 19.505709624796086, "grad_norm": 0.08308064192533493, "learning_rate": 1.8606989811297824e-06, "loss": 0.002, "num_input_tokens_seen": 257895296, "step": 119570 }, { "epoch": 19.50652528548124, "grad_norm": 0.004397088196128607, "learning_rate": 1.8545689445528813e-06, "loss": 0.0013, "num_input_tokens_seen": 257906080, "step": 119575 }, { "epoch": 19.507340946166394, "grad_norm": 0.0049890740774571896, "learning_rate": 1.8484490035251544e-06, "loss": 0.0022, "num_input_tokens_seen": 257915744, "step": 119580 }, { "epoch": 19.50815660685155, "grad_norm": 0.0006847847835160792, "learning_rate": 1.842339158170503e-06, "loss": 0.0027, "num_input_tokens_seen": 257926688, "step": 119585 }, { "epoch": 19.508972267536706, "grad_norm": 0.03053983300924301, "learning_rate": 1.8362394086128276e-06, "loss": 0.0282, "num_input_tokens_seen": 257936896, "step": 119590 }, { "epoch": 19.50978792822186, "grad_norm": 0.016011381521821022, "learning_rate": 1.8301497549757518e-06, "loss": 0.0014, "num_input_tokens_seen": 257947456, "step": 119595 }, { "epoch": 19.510603588907014, "grad_norm": 0.05162233114242554, "learning_rate": 1.8240701973826213e-06, "loss": 0.0037, "num_input_tokens_seen": 257957376, "step": 119600 }, { "epoch": 19.51141924959217, "grad_norm": 0.0011370930587872863, "learning_rate": 1.8180007359567263e-06, "loss": 0.0065, "num_input_tokens_seen": 257969248, "step": 119605 }, { "epoch": 19.512234910277325, "grad_norm": 0.00228711636736989, "learning_rate": 1.8119413708210243e-06, "loss": 0.0016, "num_input_tokens_seen": 257980032, "step": 119610 }, { "epoch": 19.51305057096248, "grad_norm": 0.013576788827776909, "learning_rate": 1.8058921020983055e-06, "loss": 0.0007, "num_input_tokens_seen": 257991840, "step": 119615 }, { "epoch": 19.513866231647633, "grad_norm": 0.006445596925914288, "learning_rate": 1.7998529299111944e-06, "loss": 0.0162, "num_input_tokens_seen": 258003904, "step": 119620 }, { "epoch": 19.51468189233279, "grad_norm": 0.00770383607596159, "learning_rate": 1.7938238543820928e-06, "loss": 0.0024, "num_input_tokens_seen": 258014784, "step": 119625 }, { "epoch": 19.515497553017944, "grad_norm": 0.5669142603874207, "learning_rate": 1.7878048756331256e-06, "loss": 0.0146, "num_input_tokens_seen": 258025664, "step": 119630 }, { "epoch": 19.5163132137031, "grad_norm": 0.003933437168598175, "learning_rate": 1.7817959937863615e-06, "loss": 0.0033, "num_input_tokens_seen": 258035776, "step": 119635 }, { "epoch": 19.517128874388256, "grad_norm": 0.019525030627846718, "learning_rate": 1.7757972089635367e-06, "loss": 0.0102, "num_input_tokens_seen": 258046528, "step": 119640 }, { "epoch": 19.517944535073408, "grad_norm": 0.024743445217609406, "learning_rate": 1.7698085212862203e-06, "loss": 0.0341, "num_input_tokens_seen": 258058464, "step": 119645 }, { "epoch": 19.518760195758563, "grad_norm": 0.0018806976731866598, "learning_rate": 1.76382993087576e-06, "loss": 0.0084, "num_input_tokens_seen": 258069696, "step": 119650 }, { "epoch": 19.51957585644372, "grad_norm": 0.0038691849913448095, "learning_rate": 1.7578614378533365e-06, "loss": 0.0017, "num_input_tokens_seen": 258080352, "step": 119655 }, { "epoch": 19.520391517128875, "grad_norm": 0.0027784884441643953, "learning_rate": 1.751903042339964e-06, "loss": 0.0046, "num_input_tokens_seen": 258091136, "step": 119660 }, { "epoch": 19.52120717781403, "grad_norm": 0.020127762109041214, "learning_rate": 1.745954744456324e-06, "loss": 0.0048, "num_input_tokens_seen": 258101120, "step": 119665 }, { "epoch": 19.522022838499183, "grad_norm": 0.01012109499424696, "learning_rate": 1.7400165443229865e-06, "loss": 0.0069, "num_input_tokens_seen": 258110752, "step": 119670 }, { "epoch": 19.52283849918434, "grad_norm": 0.012885812669992447, "learning_rate": 1.7340884420603e-06, "loss": 0.0019, "num_input_tokens_seen": 258121696, "step": 119675 }, { "epoch": 19.523654159869494, "grad_norm": 0.024637741968035698, "learning_rate": 1.7281704377884454e-06, "loss": 0.0011, "num_input_tokens_seen": 258132832, "step": 119680 }, { "epoch": 19.52446982055465, "grad_norm": 0.01336624100804329, "learning_rate": 1.7222625316272723e-06, "loss": 0.002, "num_input_tokens_seen": 258143392, "step": 119685 }, { "epoch": 19.525285481239806, "grad_norm": 0.025070851668715477, "learning_rate": 1.7163647236965728e-06, "loss": 0.0018, "num_input_tokens_seen": 258153568, "step": 119690 }, { "epoch": 19.526101141924958, "grad_norm": 0.013639749027788639, "learning_rate": 1.7104770141158631e-06, "loss": 0.002, "num_input_tokens_seen": 258164384, "step": 119695 }, { "epoch": 19.526916802610113, "grad_norm": 0.025079362094402313, "learning_rate": 1.704599403004492e-06, "loss": 0.0102, "num_input_tokens_seen": 258175040, "step": 119700 }, { "epoch": 19.52773246329527, "grad_norm": 0.031152775511145592, "learning_rate": 1.6987318904814753e-06, "loss": 0.0016, "num_input_tokens_seen": 258184672, "step": 119705 }, { "epoch": 19.528548123980425, "grad_norm": 0.0022300644777715206, "learning_rate": 1.6928744766658844e-06, "loss": 0.0679, "num_input_tokens_seen": 258194528, "step": 119710 }, { "epoch": 19.52936378466558, "grad_norm": 0.0010773834073916078, "learning_rate": 1.687027161676291e-06, "loss": 0.0009, "num_input_tokens_seen": 258204192, "step": 119715 }, { "epoch": 19.530179445350733, "grad_norm": 0.023418370634317398, "learning_rate": 1.6811899456312119e-06, "loss": 0.0024, "num_input_tokens_seen": 258215136, "step": 119720 }, { "epoch": 19.53099510603589, "grad_norm": 1.448304295539856, "learning_rate": 1.6753628286490518e-06, "loss": 0.0632, "num_input_tokens_seen": 258226976, "step": 119725 }, { "epoch": 19.531810766721044, "grad_norm": 0.06279526650905609, "learning_rate": 1.6695458108477724e-06, "loss": 0.007, "num_input_tokens_seen": 258236864, "step": 119730 }, { "epoch": 19.5326264274062, "grad_norm": 0.0028894804418087006, "learning_rate": 1.66373889234539e-06, "loss": 0.0022, "num_input_tokens_seen": 258248288, "step": 119735 }, { "epoch": 19.533442088091356, "grad_norm": 0.1574600636959076, "learning_rate": 1.6579420732594774e-06, "loss": 0.0029, "num_input_tokens_seen": 258259040, "step": 119740 }, { "epoch": 19.534257748776508, "grad_norm": 0.004936246667057276, "learning_rate": 1.6521553537075518e-06, "loss": 0.001, "num_input_tokens_seen": 258269216, "step": 119745 }, { "epoch": 19.535073409461663, "grad_norm": 0.03724940866231918, "learning_rate": 1.646378733806908e-06, "loss": 0.0105, "num_input_tokens_seen": 258280224, "step": 119750 }, { "epoch": 19.53588907014682, "grad_norm": 0.020494816824793816, "learning_rate": 1.6406122136746193e-06, "loss": 0.0016, "num_input_tokens_seen": 258289952, "step": 119755 }, { "epoch": 19.536704730831975, "grad_norm": 0.05111588537693024, "learning_rate": 1.634855793427481e-06, "loss": 0.0015, "num_input_tokens_seen": 258301280, "step": 119760 }, { "epoch": 19.53752039151713, "grad_norm": 0.0015057544223964214, "learning_rate": 1.6291094731822886e-06, "loss": 0.0014, "num_input_tokens_seen": 258310912, "step": 119765 }, { "epoch": 19.538336052202283, "grad_norm": 0.001634570537135005, "learning_rate": 1.6233732530553935e-06, "loss": 0.0017, "num_input_tokens_seen": 258321472, "step": 119770 }, { "epoch": 19.53915171288744, "grad_norm": 0.020351288840174675, "learning_rate": 1.6176471331630915e-06, "loss": 0.0051, "num_input_tokens_seen": 258332672, "step": 119775 }, { "epoch": 19.539967373572594, "grad_norm": 0.12417326122522354, "learning_rate": 1.6119311136213455e-06, "loss": 0.0169, "num_input_tokens_seen": 258343648, "step": 119780 }, { "epoch": 19.54078303425775, "grad_norm": 0.016062038019299507, "learning_rate": 1.6062251945461737e-06, "loss": 0.0048, "num_input_tokens_seen": 258354880, "step": 119785 }, { "epoch": 19.541598694942905, "grad_norm": 0.01808302290737629, "learning_rate": 1.6005293760530393e-06, "loss": 0.0013, "num_input_tokens_seen": 258364064, "step": 119790 }, { "epoch": 19.542414355628058, "grad_norm": 0.01824459806084633, "learning_rate": 1.594843658257461e-06, "loss": 0.0018, "num_input_tokens_seen": 258375968, "step": 119795 }, { "epoch": 19.543230016313213, "grad_norm": 0.056755926460027695, "learning_rate": 1.5891680412746246e-06, "loss": 0.0022, "num_input_tokens_seen": 258385984, "step": 119800 }, { "epoch": 19.54404567699837, "grad_norm": 0.005223503801971674, "learning_rate": 1.5835025252196044e-06, "loss": 0.0034, "num_input_tokens_seen": 258396992, "step": 119805 }, { "epoch": 19.544861337683525, "grad_norm": 0.001604521065019071, "learning_rate": 1.5778471102071423e-06, "loss": 0.0028, "num_input_tokens_seen": 258408352, "step": 119810 }, { "epoch": 19.545676998368677, "grad_norm": 0.0014112128410488367, "learning_rate": 1.572201796351924e-06, "loss": 0.0021, "num_input_tokens_seen": 258419040, "step": 119815 }, { "epoch": 19.546492659053833, "grad_norm": 0.00901867263019085, "learning_rate": 1.5665665837683584e-06, "loss": 0.0049, "num_input_tokens_seen": 258430016, "step": 119820 }, { "epoch": 19.54730831973899, "grad_norm": 0.018750082701444626, "learning_rate": 1.5609414725706317e-06, "loss": 0.0028, "num_input_tokens_seen": 258440512, "step": 119825 }, { "epoch": 19.548123980424144, "grad_norm": 0.03270319104194641, "learning_rate": 1.5553264628727082e-06, "loss": 0.0024, "num_input_tokens_seen": 258451456, "step": 119830 }, { "epoch": 19.5489396411093, "grad_norm": 0.07036089897155762, "learning_rate": 1.5497215547884414e-06, "loss": 0.0034, "num_input_tokens_seen": 258461824, "step": 119835 }, { "epoch": 19.549755301794452, "grad_norm": 0.008530604653060436, "learning_rate": 1.544126748431407e-06, "loss": 0.0056, "num_input_tokens_seen": 258472032, "step": 119840 }, { "epoch": 19.550570962479608, "grad_norm": 0.009274479933083057, "learning_rate": 1.538542043914959e-06, "loss": 0.0025, "num_input_tokens_seen": 258482176, "step": 119845 }, { "epoch": 19.551386623164763, "grad_norm": 0.0013693161308765411, "learning_rate": 1.5329674413522843e-06, "loss": 0.0046, "num_input_tokens_seen": 258492256, "step": 119850 }, { "epoch": 19.55220228384992, "grad_norm": 0.018925661221146584, "learning_rate": 1.527402940856404e-06, "loss": 0.001, "num_input_tokens_seen": 258502976, "step": 119855 }, { "epoch": 19.553017944535075, "grad_norm": 0.0008114072843454778, "learning_rate": 1.5218485425400607e-06, "loss": 0.0035, "num_input_tokens_seen": 258513632, "step": 119860 }, { "epoch": 19.553833605220227, "grad_norm": 0.010868213139474392, "learning_rate": 1.516304246515776e-06, "loss": 0.0014, "num_input_tokens_seen": 258525056, "step": 119865 }, { "epoch": 19.554649265905383, "grad_norm": 0.034939125180244446, "learning_rate": 1.5107700528960156e-06, "loss": 0.0404, "num_input_tokens_seen": 258536672, "step": 119870 }, { "epoch": 19.55546492659054, "grad_norm": 0.04563318192958832, "learning_rate": 1.505245961792856e-06, "loss": 0.0043, "num_input_tokens_seen": 258547360, "step": 119875 }, { "epoch": 19.556280587275694, "grad_norm": 0.0006017862469889224, "learning_rate": 1.4997319733182636e-06, "loss": 0.0015, "num_input_tokens_seen": 258557728, "step": 119880 }, { "epoch": 19.55709624796085, "grad_norm": 0.043354861438274384, "learning_rate": 1.494228087583982e-06, "loss": 0.0038, "num_input_tokens_seen": 258568064, "step": 119885 }, { "epoch": 19.557911908646002, "grad_norm": 0.0026747905649244785, "learning_rate": 1.4887343047016444e-06, "loss": 0.0034, "num_input_tokens_seen": 258578912, "step": 119890 }, { "epoch": 19.558727569331158, "grad_norm": 0.03258626163005829, "learning_rate": 1.4832506247824396e-06, "loss": 0.0033, "num_input_tokens_seen": 258589344, "step": 119895 }, { "epoch": 19.559543230016313, "grad_norm": 0.0023476590868085623, "learning_rate": 1.4777770479376118e-06, "loss": 0.0022, "num_input_tokens_seen": 258600704, "step": 119900 }, { "epoch": 19.56035889070147, "grad_norm": 0.007997504435479641, "learning_rate": 1.472313574278017e-06, "loss": 0.0009, "num_input_tokens_seen": 258610432, "step": 119905 }, { "epoch": 19.561174551386625, "grad_norm": 0.00283032632432878, "learning_rate": 1.4668602039144551e-06, "loss": 0.0056, "num_input_tokens_seen": 258620288, "step": 119910 }, { "epoch": 19.561990212071777, "grad_norm": 0.00397408427670598, "learning_rate": 1.4614169369573382e-06, "loss": 0.0054, "num_input_tokens_seen": 258631776, "step": 119915 }, { "epoch": 19.562805872756933, "grad_norm": 0.1410626322031021, "learning_rate": 1.4559837735171333e-06, "loss": 0.003, "num_input_tokens_seen": 258642720, "step": 119920 }, { "epoch": 19.563621533442088, "grad_norm": 0.19208964705467224, "learning_rate": 1.450560713703808e-06, "loss": 0.0049, "num_input_tokens_seen": 258653184, "step": 119925 }, { "epoch": 19.564437194127244, "grad_norm": 0.04116985574364662, "learning_rate": 1.4451477576273298e-06, "loss": 0.0016, "num_input_tokens_seen": 258663680, "step": 119930 }, { "epoch": 19.5652528548124, "grad_norm": 0.30253228545188904, "learning_rate": 1.4397449053973888e-06, "loss": 0.0065, "num_input_tokens_seen": 258675264, "step": 119935 }, { "epoch": 19.56606851549755, "grad_norm": 0.03988620266318321, "learning_rate": 1.4343521571235086e-06, "loss": 0.004, "num_input_tokens_seen": 258686400, "step": 119940 }, { "epoch": 19.566884176182707, "grad_norm": 0.044855114072561264, "learning_rate": 1.4289695129149349e-06, "loss": 0.0019, "num_input_tokens_seen": 258696608, "step": 119945 }, { "epoch": 19.567699836867863, "grad_norm": 0.20270061492919922, "learning_rate": 1.423596972880803e-06, "loss": 0.0526, "num_input_tokens_seen": 258706240, "step": 119950 }, { "epoch": 19.56851549755302, "grad_norm": 0.004503197502344847, "learning_rate": 1.4182345371299699e-06, "loss": 0.0167, "num_input_tokens_seen": 258716160, "step": 119955 }, { "epoch": 19.569331158238175, "grad_norm": 0.9061958193778992, "learning_rate": 1.412882205771071e-06, "loss": 0.0949, "num_input_tokens_seen": 258726784, "step": 119960 }, { "epoch": 19.570146818923327, "grad_norm": 0.008481321856379509, "learning_rate": 1.4075399789126308e-06, "loss": 0.0011, "num_input_tokens_seen": 258737856, "step": 119965 }, { "epoch": 19.570962479608482, "grad_norm": 0.014342599548399448, "learning_rate": 1.4022078566629515e-06, "loss": 0.0012, "num_input_tokens_seen": 258748928, "step": 119970 }, { "epoch": 19.571778140293638, "grad_norm": 0.6860421299934387, "learning_rate": 1.396885839130002e-06, "loss": 0.0268, "num_input_tokens_seen": 258759744, "step": 119975 }, { "epoch": 19.572593800978794, "grad_norm": 0.04488009959459305, "learning_rate": 1.3915739264216964e-06, "loss": 0.0026, "num_input_tokens_seen": 258770720, "step": 119980 }, { "epoch": 19.57340946166395, "grad_norm": 0.009662150405347347, "learning_rate": 1.3862721186456706e-06, "loss": 0.0022, "num_input_tokens_seen": 258780608, "step": 119985 }, { "epoch": 19.5742251223491, "grad_norm": 0.008140115067362785, "learning_rate": 1.3809804159093386e-06, "loss": 0.0077, "num_input_tokens_seen": 258791840, "step": 119990 }, { "epoch": 19.575040783034257, "grad_norm": 0.006578588858246803, "learning_rate": 1.3756988183200037e-06, "loss": 0.0038, "num_input_tokens_seen": 258803744, "step": 119995 }, { "epoch": 19.575856443719413, "grad_norm": 0.05764668434858322, "learning_rate": 1.3704273259847467e-06, "loss": 0.004, "num_input_tokens_seen": 258814368, "step": 120000 }, { "epoch": 19.57667210440457, "grad_norm": 0.002946641528978944, "learning_rate": 1.36516593901026e-06, "loss": 0.0019, "num_input_tokens_seen": 258822944, "step": 120005 }, { "epoch": 19.57748776508972, "grad_norm": 0.00768510764464736, "learning_rate": 1.3599146575032363e-06, "loss": 0.0063, "num_input_tokens_seen": 258834432, "step": 120010 }, { "epoch": 19.578303425774877, "grad_norm": 0.0012711272574961185, "learning_rate": 1.3546734815702012e-06, "loss": 0.0178, "num_input_tokens_seen": 258845152, "step": 120015 }, { "epoch": 19.579119086460032, "grad_norm": 0.020287562161684036, "learning_rate": 1.349442411317181e-06, "loss": 0.0035, "num_input_tokens_seen": 258856256, "step": 120020 }, { "epoch": 19.579934747145188, "grad_norm": 1.0393017530441284, "learning_rate": 1.3442214468503688e-06, "loss": 0.0639, "num_input_tokens_seen": 258867424, "step": 120025 }, { "epoch": 19.580750407830344, "grad_norm": 0.016953587532043457, "learning_rate": 1.3390105882754577e-06, "loss": 0.0471, "num_input_tokens_seen": 258877856, "step": 120030 }, { "epoch": 19.581566068515496, "grad_norm": 0.011008658446371555, "learning_rate": 1.333809835698141e-06, "loss": 0.0104, "num_input_tokens_seen": 258889088, "step": 120035 }, { "epoch": 19.58238172920065, "grad_norm": 0.009418210946023464, "learning_rate": 1.3286191892237231e-06, "loss": 0.0013, "num_input_tokens_seen": 258899136, "step": 120040 }, { "epoch": 19.583197389885807, "grad_norm": 0.05028191953897476, "learning_rate": 1.323438648957509e-06, "loss": 0.0063, "num_input_tokens_seen": 258909536, "step": 120045 }, { "epoch": 19.584013050570963, "grad_norm": 0.007948525249958038, "learning_rate": 1.318268215004359e-06, "loss": 0.0051, "num_input_tokens_seen": 258920736, "step": 120050 }, { "epoch": 19.58482871125612, "grad_norm": 0.006144360639154911, "learning_rate": 1.3131078874691337e-06, "loss": 0.0047, "num_input_tokens_seen": 258931552, "step": 120055 }, { "epoch": 19.58564437194127, "grad_norm": 0.46879157423973083, "learning_rate": 1.3079576664564163e-06, "loss": 0.0223, "num_input_tokens_seen": 258942112, "step": 120060 }, { "epoch": 19.586460032626427, "grad_norm": 0.015498192049562931, "learning_rate": 1.302817552070623e-06, "loss": 0.2033, "num_input_tokens_seen": 258954592, "step": 120065 }, { "epoch": 19.587275693311582, "grad_norm": 0.001306193065829575, "learning_rate": 1.297687544415782e-06, "loss": 0.0085, "num_input_tokens_seen": 258964736, "step": 120070 }, { "epoch": 19.588091353996738, "grad_norm": 0.003690555924549699, "learning_rate": 1.292567643596032e-06, "loss": 0.0004, "num_input_tokens_seen": 258975520, "step": 120075 }, { "epoch": 19.588907014681894, "grad_norm": 0.021095123142004013, "learning_rate": 1.2874578497150125e-06, "loss": 0.005, "num_input_tokens_seen": 258986752, "step": 120080 }, { "epoch": 19.589722675367046, "grad_norm": 0.011259655468165874, "learning_rate": 1.282358162876307e-06, "loss": 0.0029, "num_input_tokens_seen": 258998912, "step": 120085 }, { "epoch": 19.5905383360522, "grad_norm": 0.0920916274189949, "learning_rate": 1.277268583183333e-06, "loss": 0.0035, "num_input_tokens_seen": 259010688, "step": 120090 }, { "epoch": 19.591353996737357, "grad_norm": 0.01910473220050335, "learning_rate": 1.2721891107391192e-06, "loss": 0.0022, "num_input_tokens_seen": 259021280, "step": 120095 }, { "epoch": 19.592169657422513, "grad_norm": 0.031802862882614136, "learning_rate": 1.2671197456467497e-06, "loss": 0.002, "num_input_tokens_seen": 259031520, "step": 120100 }, { "epoch": 19.59298531810767, "grad_norm": 0.04007276892662048, "learning_rate": 1.2620604880088093e-06, "loss": 0.0753, "num_input_tokens_seen": 259042144, "step": 120105 }, { "epoch": 19.59380097879282, "grad_norm": 0.003993109799921513, "learning_rate": 1.2570113379279936e-06, "loss": 0.0039, "num_input_tokens_seen": 259052512, "step": 120110 }, { "epoch": 19.594616639477977, "grad_norm": 0.33553823828697205, "learning_rate": 1.2519722955064982e-06, "loss": 0.1263, "num_input_tokens_seen": 259063328, "step": 120115 }, { "epoch": 19.595432300163132, "grad_norm": 0.01577514223754406, "learning_rate": 1.2469433608464642e-06, "loss": 0.0013, "num_input_tokens_seen": 259073728, "step": 120120 }, { "epoch": 19.596247960848288, "grad_norm": 0.004346830770373344, "learning_rate": 1.2419245340498652e-06, "loss": 0.0024, "num_input_tokens_seen": 259085728, "step": 120125 }, { "epoch": 19.597063621533444, "grad_norm": 0.002067071618512273, "learning_rate": 1.236915815218398e-06, "loss": 0.0011, "num_input_tokens_seen": 259096960, "step": 120130 }, { "epoch": 19.597879282218596, "grad_norm": 0.0035990895703434944, "learning_rate": 1.2319172044535365e-06, "loss": 0.0437, "num_input_tokens_seen": 259107680, "step": 120135 }, { "epoch": 19.59869494290375, "grad_norm": 0.0034610715229064226, "learning_rate": 1.2269287018565888e-06, "loss": 0.0016, "num_input_tokens_seen": 259118016, "step": 120140 }, { "epoch": 19.599510603588907, "grad_norm": 0.014905531890690327, "learning_rate": 1.2219503075286963e-06, "loss": 0.0014, "num_input_tokens_seen": 259128960, "step": 120145 }, { "epoch": 19.600326264274063, "grad_norm": 0.04299386963248253, "learning_rate": 1.2169820215707228e-06, "loss": 0.0086, "num_input_tokens_seen": 259140576, "step": 120150 }, { "epoch": 19.601141924959215, "grad_norm": 0.2985369563102722, "learning_rate": 1.2120238440833653e-06, "loss": 0.0311, "num_input_tokens_seen": 259150656, "step": 120155 }, { "epoch": 19.60195758564437, "grad_norm": 0.012994828633964062, "learning_rate": 1.207075775167099e-06, "loss": 0.0006, "num_input_tokens_seen": 259161376, "step": 120160 }, { "epoch": 19.602773246329527, "grad_norm": 0.0008274966385215521, "learning_rate": 1.2021378149221773e-06, "loss": 0.0019, "num_input_tokens_seen": 259170432, "step": 120165 }, { "epoch": 19.603588907014682, "grad_norm": 0.040956612676382065, "learning_rate": 1.1972099634487422e-06, "loss": 0.004, "num_input_tokens_seen": 259181728, "step": 120170 }, { "epoch": 19.604404567699838, "grad_norm": 0.004888230003416538, "learning_rate": 1.1922922208466026e-06, "loss": 0.0017, "num_input_tokens_seen": 259190880, "step": 120175 }, { "epoch": 19.605220228384994, "grad_norm": 0.00583982327952981, "learning_rate": 1.1873845872154565e-06, "loss": 0.0085, "num_input_tokens_seen": 259202656, "step": 120180 }, { "epoch": 19.606035889070146, "grad_norm": 0.0033045648597180843, "learning_rate": 1.1824870626547247e-06, "loss": 0.0021, "num_input_tokens_seen": 259213376, "step": 120185 }, { "epoch": 19.6068515497553, "grad_norm": 0.22127608954906464, "learning_rate": 1.1775996472637163e-06, "loss": 0.0063, "num_input_tokens_seen": 259225024, "step": 120190 }, { "epoch": 19.607667210440457, "grad_norm": 0.1409861445426941, "learning_rate": 1.1727223411414078e-06, "loss": 0.0048, "num_input_tokens_seen": 259236224, "step": 120195 }, { "epoch": 19.608482871125613, "grad_norm": 0.022134289145469666, "learning_rate": 1.1678551443867203e-06, "loss": 0.0027, "num_input_tokens_seen": 259248192, "step": 120200 }, { "epoch": 19.609298531810765, "grad_norm": 0.0017865733243525028, "learning_rate": 1.1629980570982967e-06, "loss": 0.0022, "num_input_tokens_seen": 259259776, "step": 120205 }, { "epoch": 19.61011419249592, "grad_norm": 0.011602630838751793, "learning_rate": 1.1581510793745032e-06, "loss": 0.0045, "num_input_tokens_seen": 259270624, "step": 120210 }, { "epoch": 19.610929853181077, "grad_norm": 0.4227563738822937, "learning_rate": 1.153314211313594e-06, "loss": 0.0081, "num_input_tokens_seen": 259282112, "step": 120215 }, { "epoch": 19.611745513866232, "grad_norm": 0.012841165997087955, "learning_rate": 1.1484874530136025e-06, "loss": 0.0807, "num_input_tokens_seen": 259293504, "step": 120220 }, { "epoch": 19.612561174551388, "grad_norm": 0.0072961910627782345, "learning_rate": 1.1436708045723388e-06, "loss": 0.0855, "num_input_tokens_seen": 259304480, "step": 120225 }, { "epoch": 19.61337683523654, "grad_norm": 0.49936535954475403, "learning_rate": 1.1388642660875025e-06, "loss": 0.0131, "num_input_tokens_seen": 259315712, "step": 120230 }, { "epoch": 19.614192495921696, "grad_norm": 0.0136000607162714, "learning_rate": 1.1340678376563495e-06, "loss": 0.0031, "num_input_tokens_seen": 259325888, "step": 120235 }, { "epoch": 19.61500815660685, "grad_norm": 0.013826275244355202, "learning_rate": 1.1292815193761907e-06, "loss": 0.0023, "num_input_tokens_seen": 259335776, "step": 120240 }, { "epoch": 19.615823817292007, "grad_norm": 0.015942323952913284, "learning_rate": 1.1245053113440596e-06, "loss": 0.0008, "num_input_tokens_seen": 259348352, "step": 120245 }, { "epoch": 19.616639477977163, "grad_norm": 0.015758251771330833, "learning_rate": 1.1197392136566565e-06, "loss": 0.0035, "num_input_tokens_seen": 259359488, "step": 120250 }, { "epoch": 19.617455138662315, "grad_norm": 0.01022917591035366, "learning_rate": 1.114983226410571e-06, "loss": 0.0056, "num_input_tokens_seen": 259369792, "step": 120255 }, { "epoch": 19.61827079934747, "grad_norm": 0.13176175951957703, "learning_rate": 1.110237349702281e-06, "loss": 0.016, "num_input_tokens_seen": 259380416, "step": 120260 }, { "epoch": 19.619086460032626, "grad_norm": 0.03193429857492447, "learning_rate": 1.1055015836279326e-06, "loss": 0.0021, "num_input_tokens_seen": 259391712, "step": 120265 }, { "epoch": 19.619902120717782, "grad_norm": 0.09113612771034241, "learning_rate": 1.1007759282834484e-06, "loss": 0.004, "num_input_tokens_seen": 259402400, "step": 120270 }, { "epoch": 19.620717781402938, "grad_norm": 0.01012646034359932, "learning_rate": 1.096060383764641e-06, "loss": 0.0012, "num_input_tokens_seen": 259412704, "step": 120275 }, { "epoch": 19.62153344208809, "grad_norm": 0.03085959516465664, "learning_rate": 1.0913549501671004e-06, "loss": 0.0096, "num_input_tokens_seen": 259421984, "step": 120280 }, { "epoch": 19.622349102773246, "grad_norm": 0.0038579190149903297, "learning_rate": 1.0866596275861395e-06, "loss": 0.0023, "num_input_tokens_seen": 259433888, "step": 120285 }, { "epoch": 19.6231647634584, "grad_norm": 0.00632438762113452, "learning_rate": 1.0819744161169597e-06, "loss": 0.0007, "num_input_tokens_seen": 259445408, "step": 120290 }, { "epoch": 19.623980424143557, "grad_norm": 0.011171843856573105, "learning_rate": 1.0772993158544297e-06, "loss": 0.02, "num_input_tokens_seen": 259455936, "step": 120295 }, { "epoch": 19.624796084828713, "grad_norm": 0.010120272636413574, "learning_rate": 1.072634326893418e-06, "loss": 0.1189, "num_input_tokens_seen": 259465856, "step": 120300 }, { "epoch": 19.625611745513865, "grad_norm": 0.02943062223494053, "learning_rate": 1.0679794493284045e-06, "loss": 0.0027, "num_input_tokens_seen": 259476384, "step": 120305 }, { "epoch": 19.62642740619902, "grad_norm": 0.0017542147543281317, "learning_rate": 1.0633346832537026e-06, "loss": 0.0015, "num_input_tokens_seen": 259487328, "step": 120310 }, { "epoch": 19.627243066884176, "grad_norm": 0.010939440689980984, "learning_rate": 1.0587000287634596e-06, "loss": 0.0013, "num_input_tokens_seen": 259498208, "step": 120315 }, { "epoch": 19.628058727569332, "grad_norm": 0.0008260926697403193, "learning_rate": 1.0540754859516554e-06, "loss": 0.004, "num_input_tokens_seen": 259509376, "step": 120320 }, { "epoch": 19.628874388254488, "grad_norm": 0.05023251101374626, "learning_rate": 1.0494610549119377e-06, "loss": 0.0075, "num_input_tokens_seen": 259521088, "step": 120325 }, { "epoch": 19.62969004893964, "grad_norm": 1.8395010232925415, "learning_rate": 1.0448567357378424e-06, "loss": 0.0273, "num_input_tokens_seen": 259531968, "step": 120330 }, { "epoch": 19.630505709624796, "grad_norm": 0.003540440695360303, "learning_rate": 1.0402625285227396e-06, "loss": 0.0065, "num_input_tokens_seen": 259542752, "step": 120335 }, { "epoch": 19.63132137030995, "grad_norm": 0.006668029353022575, "learning_rate": 1.0356784333596658e-06, "loss": 0.0249, "num_input_tokens_seen": 259553504, "step": 120340 }, { "epoch": 19.632137030995107, "grad_norm": 0.12457288801670074, "learning_rate": 1.0311044503415468e-06, "loss": 0.1706, "num_input_tokens_seen": 259563488, "step": 120345 }, { "epoch": 19.63295269168026, "grad_norm": 0.007881802506744862, "learning_rate": 1.026540579561086e-06, "loss": 0.0171, "num_input_tokens_seen": 259574208, "step": 120350 }, { "epoch": 19.633768352365415, "grad_norm": 0.1654602289199829, "learning_rate": 1.0219868211108208e-06, "loss": 0.0084, "num_input_tokens_seen": 259585664, "step": 120355 }, { "epoch": 19.63458401305057, "grad_norm": 0.23740610480308533, "learning_rate": 1.0174431750828993e-06, "loss": 0.0049, "num_input_tokens_seen": 259596832, "step": 120360 }, { "epoch": 19.635399673735726, "grad_norm": 0.19849993288516998, "learning_rate": 1.0129096415695816e-06, "loss": 0.0051, "num_input_tokens_seen": 259608736, "step": 120365 }, { "epoch": 19.636215334420882, "grad_norm": 0.005057538393884897, "learning_rate": 1.008386220662627e-06, "loss": 0.0025, "num_input_tokens_seen": 259619232, "step": 120370 }, { "epoch": 19.637030995106034, "grad_norm": 0.020129449665546417, "learning_rate": 1.0038729124537405e-06, "loss": 0.0017, "num_input_tokens_seen": 259630080, "step": 120375 }, { "epoch": 19.63784665579119, "grad_norm": 0.009826862253248692, "learning_rate": 9.993697170343485e-07, "loss": 0.0026, "num_input_tokens_seen": 259641280, "step": 120380 }, { "epoch": 19.638662316476346, "grad_norm": 0.09291274100542068, "learning_rate": 9.948766344958227e-07, "loss": 0.0038, "num_input_tokens_seen": 259651584, "step": 120385 }, { "epoch": 19.6394779771615, "grad_norm": 0.022330310195684433, "learning_rate": 9.9039366492909e-07, "loss": 0.0691, "num_input_tokens_seen": 259661152, "step": 120390 }, { "epoch": 19.640293637846657, "grad_norm": 0.06288199871778488, "learning_rate": 9.859208084251337e-07, "loss": 0.004, "num_input_tokens_seen": 259672576, "step": 120395 }, { "epoch": 19.64110929853181, "grad_norm": 0.21794436872005463, "learning_rate": 9.81458065074492e-07, "loss": 0.0075, "num_input_tokens_seen": 259682400, "step": 120400 }, { "epoch": 19.641924959216965, "grad_norm": 0.02039063721895218, "learning_rate": 9.770054349677037e-07, "loss": 0.0039, "num_input_tokens_seen": 259692896, "step": 120405 }, { "epoch": 19.64274061990212, "grad_norm": 0.0044409967958927155, "learning_rate": 9.725629181949192e-07, "loss": 0.0062, "num_input_tokens_seen": 259704480, "step": 120410 }, { "epoch": 19.643556280587276, "grad_norm": 0.12437718361616135, "learning_rate": 9.681305148462328e-07, "loss": 0.0025, "num_input_tokens_seen": 259715104, "step": 120415 }, { "epoch": 19.644371941272432, "grad_norm": 0.11906149238348007, "learning_rate": 9.63708225011406e-07, "loss": 0.0193, "num_input_tokens_seen": 259724928, "step": 120420 }, { "epoch": 19.645187601957584, "grad_norm": 0.004545453004539013, "learning_rate": 9.59296048780145e-07, "loss": 0.0017, "num_input_tokens_seen": 259736256, "step": 120425 }, { "epoch": 19.64600326264274, "grad_norm": 0.6343613862991333, "learning_rate": 9.54893986241767e-07, "loss": 0.0389, "num_input_tokens_seen": 259748288, "step": 120430 }, { "epoch": 19.646818923327896, "grad_norm": 0.006254831328988075, "learning_rate": 9.505020374855899e-07, "loss": 0.0037, "num_input_tokens_seen": 259758784, "step": 120435 }, { "epoch": 19.64763458401305, "grad_norm": 0.10660555958747864, "learning_rate": 9.461202026005978e-07, "loss": 0.0026, "num_input_tokens_seen": 259769952, "step": 120440 }, { "epoch": 19.648450244698207, "grad_norm": 0.011994830332696438, "learning_rate": 9.417484816755528e-07, "loss": 0.0505, "num_input_tokens_seen": 259780832, "step": 120445 }, { "epoch": 19.64926590538336, "grad_norm": 0.22202511131763458, "learning_rate": 9.37386874799051e-07, "loss": 0.0052, "num_input_tokens_seen": 259792672, "step": 120450 }, { "epoch": 19.650081566068515, "grad_norm": 0.018390163779258728, "learning_rate": 9.330353820595217e-07, "loss": 0.0076, "num_input_tokens_seen": 259804000, "step": 120455 }, { "epoch": 19.65089722675367, "grad_norm": 0.05937052518129349, "learning_rate": 9.286940035451718e-07, "loss": 0.0033, "num_input_tokens_seen": 259815168, "step": 120460 }, { "epoch": 19.651712887438826, "grad_norm": 0.0017838447820395231, "learning_rate": 9.243627393439313e-07, "loss": 0.091, "num_input_tokens_seen": 259824640, "step": 120465 }, { "epoch": 19.652528548123982, "grad_norm": 0.00913760531693697, "learning_rate": 9.200415895436187e-07, "loss": 0.0017, "num_input_tokens_seen": 259834656, "step": 120470 }, { "epoch": 19.653344208809134, "grad_norm": 0.029210327193140984, "learning_rate": 9.157305542317751e-07, "loss": 0.0018, "num_input_tokens_seen": 259846112, "step": 120475 }, { "epoch": 19.65415986949429, "grad_norm": 0.02041507512331009, "learning_rate": 9.11429633495775e-07, "loss": 0.0042, "num_input_tokens_seen": 259857024, "step": 120480 }, { "epoch": 19.654975530179446, "grad_norm": 0.002950589172542095, "learning_rate": 9.071388274228264e-07, "loss": 0.005, "num_input_tokens_seen": 259867104, "step": 120485 }, { "epoch": 19.6557911908646, "grad_norm": 0.046937182545661926, "learning_rate": 9.028581360998045e-07, "loss": 0.0044, "num_input_tokens_seen": 259876576, "step": 120490 }, { "epoch": 19.656606851549757, "grad_norm": 0.08512108027935028, "learning_rate": 8.985875596135285e-07, "loss": 0.004, "num_input_tokens_seen": 259888256, "step": 120495 }, { "epoch": 19.65742251223491, "grad_norm": 0.0016706367023289204, "learning_rate": 8.943270980505957e-07, "loss": 0.0021, "num_input_tokens_seen": 259898912, "step": 120500 }, { "epoch": 19.658238172920065, "grad_norm": 0.010568813420832157, "learning_rate": 8.900767514972152e-07, "loss": 0.0024, "num_input_tokens_seen": 259910816, "step": 120505 }, { "epoch": 19.65905383360522, "grad_norm": 0.00877430196851492, "learning_rate": 8.858365200395957e-07, "loss": 0.0011, "num_input_tokens_seen": 259921056, "step": 120510 }, { "epoch": 19.659869494290376, "grad_norm": 0.002357657067477703, "learning_rate": 8.816064037636684e-07, "loss": 0.004, "num_input_tokens_seen": 259931616, "step": 120515 }, { "epoch": 19.660685154975532, "grad_norm": 0.005972938612103462, "learning_rate": 8.773864027551981e-07, "loss": 0.0011, "num_input_tokens_seen": 259941888, "step": 120520 }, { "epoch": 19.661500815660684, "grad_norm": 0.0357477068901062, "learning_rate": 8.73176517099672e-07, "loss": 0.0085, "num_input_tokens_seen": 259953504, "step": 120525 }, { "epoch": 19.66231647634584, "grad_norm": 0.0067837717942893505, "learning_rate": 8.689767468824105e-07, "loss": 0.0026, "num_input_tokens_seen": 259963488, "step": 120530 }, { "epoch": 19.663132137030995, "grad_norm": 0.0030017246026545763, "learning_rate": 8.647870921885126e-07, "loss": 0.0028, "num_input_tokens_seen": 259973312, "step": 120535 }, { "epoch": 19.66394779771615, "grad_norm": 0.043457675725221634, "learning_rate": 8.606075531029101e-07, "loss": 0.0022, "num_input_tokens_seen": 259984160, "step": 120540 }, { "epoch": 19.664763458401303, "grad_norm": 0.051734670996665955, "learning_rate": 8.564381297102575e-07, "loss": 0.0026, "num_input_tokens_seen": 259995008, "step": 120545 }, { "epoch": 19.66557911908646, "grad_norm": 0.052582815289497375, "learning_rate": 8.522788220951538e-07, "loss": 0.0064, "num_input_tokens_seen": 260006624, "step": 120550 }, { "epoch": 19.666394779771615, "grad_norm": 0.08694297075271606, "learning_rate": 8.481296303418096e-07, "loss": 0.0622, "num_input_tokens_seen": 260016960, "step": 120555 }, { "epoch": 19.66721044045677, "grad_norm": 0.001538576907478273, "learning_rate": 8.439905545343796e-07, "loss": 0.0049, "num_input_tokens_seen": 260026624, "step": 120560 }, { "epoch": 19.668026101141926, "grad_norm": 0.023914417251944542, "learning_rate": 8.398615947566302e-07, "loss": 0.0018, "num_input_tokens_seen": 260038624, "step": 120565 }, { "epoch": 19.66884176182708, "grad_norm": 0.008039684034883976, "learning_rate": 8.357427510923832e-07, "loss": 0.0086, "num_input_tokens_seen": 260049344, "step": 120570 }, { "epoch": 19.669657422512234, "grad_norm": 0.002670666901394725, "learning_rate": 8.316340236249609e-07, "loss": 0.0414, "num_input_tokens_seen": 260060064, "step": 120575 }, { "epoch": 19.67047308319739, "grad_norm": 0.006534434389322996, "learning_rate": 8.275354124377965e-07, "loss": 0.0012, "num_input_tokens_seen": 260070624, "step": 120580 }, { "epoch": 19.671288743882545, "grad_norm": 0.13145004212856293, "learning_rate": 8.234469176138238e-07, "loss": 0.0056, "num_input_tokens_seen": 260080160, "step": 120585 }, { "epoch": 19.6721044045677, "grad_norm": 0.003174137556925416, "learning_rate": 8.193685392359762e-07, "loss": 0.0023, "num_input_tokens_seen": 260090400, "step": 120590 }, { "epoch": 19.672920065252853, "grad_norm": 0.16072995960712433, "learning_rate": 8.153002773868546e-07, "loss": 0.0859, "num_input_tokens_seen": 260101280, "step": 120595 }, { "epoch": 19.67373572593801, "grad_norm": 0.004255970008671284, "learning_rate": 8.112421321489483e-07, "loss": 0.0009, "num_input_tokens_seen": 260112256, "step": 120600 }, { "epoch": 19.674551386623165, "grad_norm": 0.005726212169975042, "learning_rate": 8.07194103604525e-07, "loss": 0.0037, "num_input_tokens_seen": 260122208, "step": 120605 }, { "epoch": 19.67536704730832, "grad_norm": 0.008446725085377693, "learning_rate": 8.03156191835519e-07, "loss": 0.0047, "num_input_tokens_seen": 260133792, "step": 120610 }, { "epoch": 19.676182707993476, "grad_norm": 0.029481690376996994, "learning_rate": 7.99128396923865e-07, "loss": 0.0032, "num_input_tokens_seen": 260143104, "step": 120615 }, { "epoch": 19.67699836867863, "grad_norm": 0.02431483566761017, "learning_rate": 7.951107189511641e-07, "loss": 0.0086, "num_input_tokens_seen": 260154464, "step": 120620 }, { "epoch": 19.677814029363784, "grad_norm": 0.015393667854368687, "learning_rate": 7.91103157998796e-07, "loss": 0.0055, "num_input_tokens_seen": 260164576, "step": 120625 }, { "epoch": 19.67862969004894, "grad_norm": 1.3296020030975342, "learning_rate": 7.871057141480287e-07, "loss": 0.0269, "num_input_tokens_seen": 260174592, "step": 120630 }, { "epoch": 19.679445350734095, "grad_norm": 0.005392282269895077, "learning_rate": 7.831183874798531e-07, "loss": 0.0016, "num_input_tokens_seen": 260184960, "step": 120635 }, { "epoch": 19.68026101141925, "grad_norm": 0.09615886956453323, "learning_rate": 7.791411780750935e-07, "loss": 0.003, "num_input_tokens_seen": 260195360, "step": 120640 }, { "epoch": 19.681076672104403, "grad_norm": 0.0007226698799058795, "learning_rate": 7.751740860143519e-07, "loss": 0.008, "num_input_tokens_seen": 260204768, "step": 120645 }, { "epoch": 19.68189233278956, "grad_norm": 0.0036334313917905092, "learning_rate": 7.712171113780086e-07, "loss": 0.0403, "num_input_tokens_seen": 260215520, "step": 120650 }, { "epoch": 19.682707993474715, "grad_norm": 0.013188259676098824, "learning_rate": 7.672702542462773e-07, "loss": 0.0014, "num_input_tokens_seen": 260227328, "step": 120655 }, { "epoch": 19.68352365415987, "grad_norm": 0.15168829262256622, "learning_rate": 7.633335146991493e-07, "loss": 0.0037, "num_input_tokens_seen": 260237696, "step": 120660 }, { "epoch": 19.684339314845026, "grad_norm": 0.004290735814720392, "learning_rate": 7.594068928163944e-07, "loss": 0.0037, "num_input_tokens_seen": 260248864, "step": 120665 }, { "epoch": 19.68515497553018, "grad_norm": 0.015271148644387722, "learning_rate": 7.554903886775599e-07, "loss": 0.0018, "num_input_tokens_seen": 260259872, "step": 120670 }, { "epoch": 19.685970636215334, "grad_norm": 0.047990549355745316, "learning_rate": 7.515840023620824e-07, "loss": 0.0144, "num_input_tokens_seen": 260269504, "step": 120675 }, { "epoch": 19.68678629690049, "grad_norm": 0.005070829764008522, "learning_rate": 7.476877339490651e-07, "loss": 0.0016, "num_input_tokens_seen": 260280384, "step": 120680 }, { "epoch": 19.687601957585645, "grad_norm": 0.015725646167993546, "learning_rate": 7.438015835175005e-07, "loss": 0.0028, "num_input_tokens_seen": 260292320, "step": 120685 }, { "epoch": 19.6884176182708, "grad_norm": 0.0013184587005525827, "learning_rate": 7.399255511461589e-07, "loss": 0.0048, "num_input_tokens_seen": 260302208, "step": 120690 }, { "epoch": 19.689233278955953, "grad_norm": 0.6393589377403259, "learning_rate": 7.360596369135886e-07, "loss": 0.027, "num_input_tokens_seen": 260312384, "step": 120695 }, { "epoch": 19.69004893964111, "grad_norm": 0.0006050775409676135, "learning_rate": 7.322038408981157e-07, "loss": 0.0012, "num_input_tokens_seen": 260322080, "step": 120700 }, { "epoch": 19.690864600326265, "grad_norm": 0.043836891651153564, "learning_rate": 7.283581631779002e-07, "loss": 0.0017, "num_input_tokens_seen": 260332992, "step": 120705 }, { "epoch": 19.69168026101142, "grad_norm": 0.01632705330848694, "learning_rate": 7.245226038308794e-07, "loss": 0.0044, "num_input_tokens_seen": 260343904, "step": 120710 }, { "epoch": 19.692495921696576, "grad_norm": 0.013385389000177383, "learning_rate": 7.206971629348246e-07, "loss": 0.001, "num_input_tokens_seen": 260354496, "step": 120715 }, { "epoch": 19.693311582381728, "grad_norm": 0.0019467597594484687, "learning_rate": 7.16881840567174e-07, "loss": 0.0014, "num_input_tokens_seen": 260364992, "step": 120720 }, { "epoch": 19.694127243066884, "grad_norm": 0.0036955641116946936, "learning_rate": 7.130766368053099e-07, "loss": 0.0049, "num_input_tokens_seen": 260375072, "step": 120725 }, { "epoch": 19.69494290375204, "grad_norm": 0.01463555358350277, "learning_rate": 7.092815517263373e-07, "loss": 0.0013, "num_input_tokens_seen": 260384928, "step": 120730 }, { "epoch": 19.695758564437195, "grad_norm": 0.0007540292572230101, "learning_rate": 7.054965854071948e-07, "loss": 0.0038, "num_input_tokens_seen": 260395008, "step": 120735 }, { "epoch": 19.696574225122347, "grad_norm": 0.009311623871326447, "learning_rate": 7.017217379245433e-07, "loss": 0.0028, "num_input_tokens_seen": 260405248, "step": 120740 }, { "epoch": 19.697389885807503, "grad_norm": 0.15400007367134094, "learning_rate": 6.979570093548771e-07, "loss": 0.0025, "num_input_tokens_seen": 260416192, "step": 120745 }, { "epoch": 19.69820554649266, "grad_norm": 0.19965264201164246, "learning_rate": 6.942023997745794e-07, "loss": 0.0062, "num_input_tokens_seen": 260427808, "step": 120750 }, { "epoch": 19.699021207177815, "grad_norm": 1.4906607866287231, "learning_rate": 6.904579092596452e-07, "loss": 0.0312, "num_input_tokens_seen": 260438432, "step": 120755 }, { "epoch": 19.69983686786297, "grad_norm": 0.018873140215873718, "learning_rate": 6.867235378860137e-07, "loss": 0.0028, "num_input_tokens_seen": 260449696, "step": 120760 }, { "epoch": 19.700652528548122, "grad_norm": 0.010376965627074242, "learning_rate": 6.829992857293465e-07, "loss": 0.0021, "num_input_tokens_seen": 260460576, "step": 120765 }, { "epoch": 19.701468189233278, "grad_norm": 0.0019120845245197415, "learning_rate": 6.792851528651389e-07, "loss": 0.0108, "num_input_tokens_seen": 260471296, "step": 120770 }, { "epoch": 19.702283849918434, "grad_norm": 0.004104107152670622, "learning_rate": 6.755811393686084e-07, "loss": 0.0021, "num_input_tokens_seen": 260481824, "step": 120775 }, { "epoch": 19.70309951060359, "grad_norm": 0.015224210917949677, "learning_rate": 6.718872453149172e-07, "loss": 0.001, "num_input_tokens_seen": 260491392, "step": 120780 }, { "epoch": 19.703915171288745, "grad_norm": 0.00626148097217083, "learning_rate": 6.682034707788386e-07, "loss": 0.001, "num_input_tokens_seen": 260502272, "step": 120785 }, { "epoch": 19.704730831973897, "grad_norm": 0.011242669075727463, "learning_rate": 6.645298158350909e-07, "loss": 0.0008, "num_input_tokens_seen": 260512256, "step": 120790 }, { "epoch": 19.705546492659053, "grad_norm": 0.0540161207318306, "learning_rate": 6.608662805580589e-07, "loss": 0.0024, "num_input_tokens_seen": 260524288, "step": 120795 }, { "epoch": 19.70636215334421, "grad_norm": 0.07905373722314835, "learning_rate": 6.572128650220721e-07, "loss": 0.0046, "num_input_tokens_seen": 260535328, "step": 120800 }, { "epoch": 19.707177814029365, "grad_norm": 0.0250291358679533, "learning_rate": 6.535695693011268e-07, "loss": 0.0022, "num_input_tokens_seen": 260545824, "step": 120805 }, { "epoch": 19.70799347471452, "grad_norm": 0.23714597523212433, "learning_rate": 6.499363934690528e-07, "loss": 0.0042, "num_input_tokens_seen": 260556096, "step": 120810 }, { "epoch": 19.708809135399672, "grad_norm": 0.01444271206855774, "learning_rate": 6.463133375994579e-07, "loss": 0.0036, "num_input_tokens_seen": 260567520, "step": 120815 }, { "epoch": 19.709624796084828, "grad_norm": 0.0026037017814815044, "learning_rate": 6.427004017658389e-07, "loss": 0.015, "num_input_tokens_seen": 260577792, "step": 120820 }, { "epoch": 19.710440456769984, "grad_norm": 0.010139694437384605, "learning_rate": 6.390975860413594e-07, "loss": 0.005, "num_input_tokens_seen": 260588608, "step": 120825 }, { "epoch": 19.71125611745514, "grad_norm": 0.0346621610224247, "learning_rate": 6.355048904990724e-07, "loss": 0.0023, "num_input_tokens_seen": 260598336, "step": 120830 }, { "epoch": 19.712071778140295, "grad_norm": 0.04312991723418236, "learning_rate": 6.319223152117526e-07, "loss": 0.0029, "num_input_tokens_seen": 260607776, "step": 120835 }, { "epoch": 19.712887438825447, "grad_norm": 0.0265484731644392, "learning_rate": 6.283498602520088e-07, "loss": 0.0014, "num_input_tokens_seen": 260618976, "step": 120840 }, { "epoch": 19.713703099510603, "grad_norm": 0.013334968127310276, "learning_rate": 6.247875256922275e-07, "loss": 0.0052, "num_input_tokens_seen": 260630048, "step": 120845 }, { "epoch": 19.71451876019576, "grad_norm": 0.0007019578479230404, "learning_rate": 6.212353116046843e-07, "loss": 0.0006, "num_input_tokens_seen": 260641088, "step": 120850 }, { "epoch": 19.715334420880914, "grad_norm": 0.011212490499019623, "learning_rate": 6.17693218061266e-07, "loss": 0.0014, "num_input_tokens_seen": 260652032, "step": 120855 }, { "epoch": 19.71615008156607, "grad_norm": 1.414024829864502, "learning_rate": 6.141612451338596e-07, "loss": 0.0671, "num_input_tokens_seen": 260662624, "step": 120860 }, { "epoch": 19.716965742251222, "grad_norm": 0.0022796974517405033, "learning_rate": 6.106393928939635e-07, "loss": 0.0019, "num_input_tokens_seen": 260673504, "step": 120865 }, { "epoch": 19.717781402936378, "grad_norm": 0.0063771712593734264, "learning_rate": 6.07127661412965e-07, "loss": 0.0012, "num_input_tokens_seen": 260683936, "step": 120870 }, { "epoch": 19.718597063621534, "grad_norm": 0.022156668826937675, "learning_rate": 6.036260507620849e-07, "loss": 0.0016, "num_input_tokens_seen": 260694784, "step": 120875 }, { "epoch": 19.71941272430669, "grad_norm": 0.0003603149962145835, "learning_rate": 6.001345610122111e-07, "loss": 0.0038, "num_input_tokens_seen": 260704960, "step": 120880 }, { "epoch": 19.72022838499184, "grad_norm": 0.039632465690374374, "learning_rate": 5.966531922341756e-07, "loss": 0.0022, "num_input_tokens_seen": 260717184, "step": 120885 }, { "epoch": 19.721044045676997, "grad_norm": 0.005430936347693205, "learning_rate": 5.931819444984777e-07, "loss": 0.0953, "num_input_tokens_seen": 260727776, "step": 120890 }, { "epoch": 19.721859706362153, "grad_norm": 0.008867492899298668, "learning_rate": 5.897208178755054e-07, "loss": 0.0054, "num_input_tokens_seen": 260737408, "step": 120895 }, { "epoch": 19.72267536704731, "grad_norm": 0.03744132071733475, "learning_rate": 5.862698124353694e-07, "loss": 0.002, "num_input_tokens_seen": 260746016, "step": 120900 }, { "epoch": 19.723491027732464, "grad_norm": 0.006908580660820007, "learning_rate": 5.828289282480692e-07, "loss": 0.0016, "num_input_tokens_seen": 260757312, "step": 120905 }, { "epoch": 19.724306688417617, "grad_norm": 0.2857799530029297, "learning_rate": 5.793981653832714e-07, "loss": 0.1291, "num_input_tokens_seen": 260768448, "step": 120910 }, { "epoch": 19.725122349102772, "grad_norm": 0.08347934484481812, "learning_rate": 5.759775239105314e-07, "loss": 0.006, "num_input_tokens_seen": 260779904, "step": 120915 }, { "epoch": 19.725938009787928, "grad_norm": 0.0005045973230153322, "learning_rate": 5.72567003899127e-07, "loss": 0.0007, "num_input_tokens_seen": 260791616, "step": 120920 }, { "epoch": 19.726753670473084, "grad_norm": 0.0032052635215222836, "learning_rate": 5.691666054182809e-07, "loss": 0.0017, "num_input_tokens_seen": 260802016, "step": 120925 }, { "epoch": 19.72756933115824, "grad_norm": 0.04603101313114166, "learning_rate": 5.657763285368267e-07, "loss": 0.0639, "num_input_tokens_seen": 260813088, "step": 120930 }, { "epoch": 19.72838499184339, "grad_norm": 0.0020692809484899044, "learning_rate": 5.623961733234873e-07, "loss": 0.0111, "num_input_tokens_seen": 260823840, "step": 120935 }, { "epoch": 19.729200652528547, "grad_norm": 0.025936348363757133, "learning_rate": 5.590261398467633e-07, "loss": 0.0026, "num_input_tokens_seen": 260834912, "step": 120940 }, { "epoch": 19.730016313213703, "grad_norm": 0.9913722276687622, "learning_rate": 5.556662281749891e-07, "loss": 0.0299, "num_input_tokens_seen": 260846400, "step": 120945 }, { "epoch": 19.73083197389886, "grad_norm": 0.017355140298604965, "learning_rate": 5.523164383762213e-07, "loss": 0.0014, "num_input_tokens_seen": 260858080, "step": 120950 }, { "epoch": 19.731647634584014, "grad_norm": 0.04152343422174454, "learning_rate": 5.489767705183501e-07, "loss": 0.0808, "num_input_tokens_seen": 260867744, "step": 120955 }, { "epoch": 19.732463295269167, "grad_norm": 0.06011166423559189, "learning_rate": 5.456472246690436e-07, "loss": 0.0117, "num_input_tokens_seen": 260878912, "step": 120960 }, { "epoch": 19.733278955954322, "grad_norm": 0.34195956587791443, "learning_rate": 5.423278008958032e-07, "loss": 0.0232, "num_input_tokens_seen": 260889888, "step": 120965 }, { "epoch": 19.734094616639478, "grad_norm": 0.30340972542762756, "learning_rate": 5.390184992659641e-07, "loss": 0.0049, "num_input_tokens_seen": 260900608, "step": 120970 }, { "epoch": 19.734910277324634, "grad_norm": 0.0027029630728065968, "learning_rate": 5.357193198464727e-07, "loss": 0.0012, "num_input_tokens_seen": 260911392, "step": 120975 }, { "epoch": 19.73572593800979, "grad_norm": 0.007337834686040878, "learning_rate": 5.324302627042199e-07, "loss": 0.0014, "num_input_tokens_seen": 260922048, "step": 120980 }, { "epoch": 19.73654159869494, "grad_norm": 0.029812682420015335, "learning_rate": 5.291513279059301e-07, "loss": 0.0282, "num_input_tokens_seen": 260933120, "step": 120985 }, { "epoch": 19.737357259380097, "grad_norm": 0.018728526309132576, "learning_rate": 5.258825155179948e-07, "loss": 0.0071, "num_input_tokens_seen": 260943040, "step": 120990 }, { "epoch": 19.738172920065253, "grad_norm": 0.00044426810927689075, "learning_rate": 5.226238256066384e-07, "loss": 0.0007, "num_input_tokens_seen": 260953600, "step": 120995 }, { "epoch": 19.73898858075041, "grad_norm": 0.02940189465880394, "learning_rate": 5.193752582379752e-07, "loss": 0.0026, "num_input_tokens_seen": 260963584, "step": 121000 }, { "epoch": 19.739804241435564, "grad_norm": 0.2353859543800354, "learning_rate": 5.16136813477841e-07, "loss": 0.0019, "num_input_tokens_seen": 260974752, "step": 121005 }, { "epoch": 19.740619902120716, "grad_norm": 0.02618386223912239, "learning_rate": 5.129084913917948e-07, "loss": 0.0012, "num_input_tokens_seen": 260984736, "step": 121010 }, { "epoch": 19.741435562805872, "grad_norm": 0.036600805819034576, "learning_rate": 5.096902920453395e-07, "loss": 0.0194, "num_input_tokens_seen": 260994432, "step": 121015 }, { "epoch": 19.742251223491028, "grad_norm": 0.012462926097214222, "learning_rate": 5.064822155036453e-07, "loss": 0.0955, "num_input_tokens_seen": 261005664, "step": 121020 }, { "epoch": 19.743066884176184, "grad_norm": 0.0015358245000243187, "learning_rate": 5.032842618317157e-07, "loss": 0.0861, "num_input_tokens_seen": 261016224, "step": 121025 }, { "epoch": 19.74388254486134, "grad_norm": 0.025581007823348045, "learning_rate": 5.000964310943878e-07, "loss": 0.0012, "num_input_tokens_seen": 261027936, "step": 121030 }, { "epoch": 19.74469820554649, "grad_norm": 0.21102921664714813, "learning_rate": 4.969187233562767e-07, "loss": 0.0068, "num_input_tokens_seen": 261037856, "step": 121035 }, { "epoch": 19.745513866231647, "grad_norm": 0.005003449507057667, "learning_rate": 4.937511386817751e-07, "loss": 0.023, "num_input_tokens_seen": 261049056, "step": 121040 }, { "epoch": 19.746329526916803, "grad_norm": 0.37298986315727234, "learning_rate": 4.905936771351094e-07, "loss": 0.0093, "num_input_tokens_seen": 261058624, "step": 121045 }, { "epoch": 19.74714518760196, "grad_norm": 0.003840500256046653, "learning_rate": 4.874463387801731e-07, "loss": 0.0025, "num_input_tokens_seen": 261069664, "step": 121050 }, { "epoch": 19.747960848287114, "grad_norm": 0.12661729753017426, "learning_rate": 4.843091236808594e-07, "loss": 0.0021, "num_input_tokens_seen": 261080544, "step": 121055 }, { "epoch": 19.748776508972266, "grad_norm": 0.0003908964281436056, "learning_rate": 4.811820319006732e-07, "loss": 0.0029, "num_input_tokens_seen": 261091296, "step": 121060 }, { "epoch": 19.749592169657422, "grad_norm": 0.001513474271632731, "learning_rate": 4.780650635030081e-07, "loss": 0.0133, "num_input_tokens_seen": 261102400, "step": 121065 }, { "epoch": 19.750407830342578, "grad_norm": 0.03624751418828964, "learning_rate": 4.7495821855109145e-07, "loss": 0.0032, "num_input_tokens_seen": 261112352, "step": 121070 }, { "epoch": 19.751223491027734, "grad_norm": 0.02521163783967495, "learning_rate": 4.718614971078172e-07, "loss": 0.0026, "num_input_tokens_seen": 261123936, "step": 121075 }, { "epoch": 19.752039151712886, "grad_norm": 0.0014399787178263068, "learning_rate": 4.6877489923596863e-07, "loss": 0.0024, "num_input_tokens_seen": 261135552, "step": 121080 }, { "epoch": 19.75285481239804, "grad_norm": 0.003047714475542307, "learning_rate": 4.6569842499805113e-07, "loss": 0.0013, "num_input_tokens_seen": 261146656, "step": 121085 }, { "epoch": 19.753670473083197, "grad_norm": 0.03192577138543129, "learning_rate": 4.626320744565149e-07, "loss": 0.001, "num_input_tokens_seen": 261157600, "step": 121090 }, { "epoch": 19.754486133768353, "grad_norm": 0.006391199771314859, "learning_rate": 4.5957584767342133e-07, "loss": 0.0008, "num_input_tokens_seen": 261168352, "step": 121095 }, { "epoch": 19.75530179445351, "grad_norm": 0.0015015223762020469, "learning_rate": 4.5652974471077637e-07, "loss": 0.0011, "num_input_tokens_seen": 261179008, "step": 121100 }, { "epoch": 19.75611745513866, "grad_norm": 0.028746524825692177, "learning_rate": 4.534937656301974e-07, "loss": 0.0036, "num_input_tokens_seen": 261189568, "step": 121105 }, { "epoch": 19.756933115823816, "grad_norm": 0.0015820281114429235, "learning_rate": 4.5046791049335733e-07, "loss": 0.0018, "num_input_tokens_seen": 261199936, "step": 121110 }, { "epoch": 19.757748776508972, "grad_norm": 0.010042371228337288, "learning_rate": 4.47452179361485e-07, "loss": 0.0018, "num_input_tokens_seen": 261210912, "step": 121115 }, { "epoch": 19.758564437194128, "grad_norm": 0.005510643590241671, "learning_rate": 4.444465722956981e-07, "loss": 0.0016, "num_input_tokens_seen": 261221408, "step": 121120 }, { "epoch": 19.759380097879284, "grad_norm": 0.009353739209473133, "learning_rate": 4.414510893569479e-07, "loss": 0.0062, "num_input_tokens_seen": 261232384, "step": 121125 }, { "epoch": 19.760195758564436, "grad_norm": 0.004087650217115879, "learning_rate": 4.384657306059636e-07, "loss": 0.0152, "num_input_tokens_seen": 261242400, "step": 121130 }, { "epoch": 19.76101141924959, "grad_norm": 0.010991072282195091, "learning_rate": 4.354904961031414e-07, "loss": 0.0194, "num_input_tokens_seen": 261253216, "step": 121135 }, { "epoch": 19.761827079934747, "grad_norm": 0.000554571277461946, "learning_rate": 4.3252538590893285e-07, "loss": 0.0064, "num_input_tokens_seen": 261263616, "step": 121140 }, { "epoch": 19.762642740619903, "grad_norm": 0.004685058258473873, "learning_rate": 4.2957040008323456e-07, "loss": 0.0055, "num_input_tokens_seen": 261275008, "step": 121145 }, { "epoch": 19.76345840130506, "grad_norm": 0.05229879915714264, "learning_rate": 4.266255386861095e-07, "loss": 0.0024, "num_input_tokens_seen": 261284288, "step": 121150 }, { "epoch": 19.76427406199021, "grad_norm": 0.0012258621864020824, "learning_rate": 4.2369080177717676e-07, "loss": 0.0031, "num_input_tokens_seen": 261296192, "step": 121155 }, { "epoch": 19.765089722675366, "grad_norm": 0.16929937899112701, "learning_rate": 4.2076618941588875e-07, "loss": 0.0062, "num_input_tokens_seen": 261306752, "step": 121160 }, { "epoch": 19.765905383360522, "grad_norm": 0.014362063258886337, "learning_rate": 4.178517016615313e-07, "loss": 0.0022, "num_input_tokens_seen": 261317568, "step": 121165 }, { "epoch": 19.766721044045678, "grad_norm": 0.043691907078027725, "learning_rate": 4.1494733857322385e-07, "loss": 0.003, "num_input_tokens_seen": 261329568, "step": 121170 }, { "epoch": 19.767536704730833, "grad_norm": 0.02410140633583069, "learning_rate": 4.120531002096972e-07, "loss": 0.0016, "num_input_tokens_seen": 261340960, "step": 121175 }, { "epoch": 19.768352365415986, "grad_norm": 0.00330495391972363, "learning_rate": 4.091689866297377e-07, "loss": 0.0012, "num_input_tokens_seen": 261351136, "step": 121180 }, { "epoch": 19.76916802610114, "grad_norm": 0.019685426726937294, "learning_rate": 4.0629499789174293e-07, "loss": 0.0028, "num_input_tokens_seen": 261361184, "step": 121185 }, { "epoch": 19.769983686786297, "grad_norm": 0.013756122440099716, "learning_rate": 4.034311340539443e-07, "loss": 0.0037, "num_input_tokens_seen": 261372704, "step": 121190 }, { "epoch": 19.770799347471453, "grad_norm": 0.013881102204322815, "learning_rate": 4.005773951744063e-07, "loss": 0.0048, "num_input_tokens_seen": 261384544, "step": 121195 }, { "epoch": 19.77161500815661, "grad_norm": 0.00964262057095766, "learning_rate": 3.977337813109716e-07, "loss": 0.0015, "num_input_tokens_seen": 261395584, "step": 121200 }, { "epoch": 19.77243066884176, "grad_norm": 0.3615402281284332, "learning_rate": 3.949002925212053e-07, "loss": 0.0027, "num_input_tokens_seen": 261406176, "step": 121205 }, { "epoch": 19.773246329526916, "grad_norm": 0.6459508538246155, "learning_rate": 3.920769288626169e-07, "loss": 0.0163, "num_input_tokens_seen": 261416896, "step": 121210 }, { "epoch": 19.774061990212072, "grad_norm": 0.029573000967502594, "learning_rate": 3.8926369039238295e-07, "loss": 0.0218, "num_input_tokens_seen": 261427968, "step": 121215 }, { "epoch": 19.774877650897228, "grad_norm": 0.01698378287255764, "learning_rate": 3.864605771675134e-07, "loss": 0.0045, "num_input_tokens_seen": 261437696, "step": 121220 }, { "epoch": 19.775693311582383, "grad_norm": 0.002987517509609461, "learning_rate": 3.8366758924479605e-07, "loss": 0.0019, "num_input_tokens_seen": 261448448, "step": 121225 }, { "epoch": 19.776508972267536, "grad_norm": 0.007215553428977728, "learning_rate": 3.808847266809079e-07, "loss": 0.0012, "num_input_tokens_seen": 261457504, "step": 121230 }, { "epoch": 19.77732463295269, "grad_norm": 0.003157370025292039, "learning_rate": 3.781119895321927e-07, "loss": 0.0023, "num_input_tokens_seen": 261468032, "step": 121235 }, { "epoch": 19.778140293637847, "grad_norm": 0.10424255579710007, "learning_rate": 3.753493778548278e-07, "loss": 0.0403, "num_input_tokens_seen": 261479072, "step": 121240 }, { "epoch": 19.778955954323003, "grad_norm": 0.04892542585730553, "learning_rate": 3.725968917048794e-07, "loss": 0.0234, "num_input_tokens_seen": 261488992, "step": 121245 }, { "epoch": 19.77977161500816, "grad_norm": 0.3947793245315552, "learning_rate": 3.6985453113802525e-07, "loss": 0.0088, "num_input_tokens_seen": 261499200, "step": 121250 }, { "epoch": 19.78058727569331, "grad_norm": 0.003126331139355898, "learning_rate": 3.6712229620988744e-07, "loss": 0.0034, "num_input_tokens_seen": 261510688, "step": 121255 }, { "epoch": 19.781402936378466, "grad_norm": 0.3529804050922394, "learning_rate": 3.644001869758662e-07, "loss": 0.0692, "num_input_tokens_seen": 261522368, "step": 121260 }, { "epoch": 19.782218597063622, "grad_norm": 0.00854413490742445, "learning_rate": 3.616882034911395e-07, "loss": 0.003, "num_input_tokens_seen": 261532672, "step": 121265 }, { "epoch": 19.783034257748778, "grad_norm": 0.010441114194691181, "learning_rate": 3.58986345810608e-07, "loss": 0.001, "num_input_tokens_seen": 261542144, "step": 121270 }, { "epoch": 19.78384991843393, "grad_norm": 0.0032706893980503082, "learning_rate": 3.56294613989061e-07, "loss": 0.0069, "num_input_tokens_seen": 261552064, "step": 121275 }, { "epoch": 19.784665579119086, "grad_norm": 0.02453339472413063, "learning_rate": 3.5361300808106625e-07, "loss": 0.0097, "num_input_tokens_seen": 261563808, "step": 121280 }, { "epoch": 19.78548123980424, "grad_norm": 0.01782161369919777, "learning_rate": 3.509415281409134e-07, "loss": 0.0019, "num_input_tokens_seen": 261574272, "step": 121285 }, { "epoch": 19.786296900489397, "grad_norm": 0.03428277745842934, "learning_rate": 3.4828017422278146e-07, "loss": 0.0011, "num_input_tokens_seen": 261586144, "step": 121290 }, { "epoch": 19.787112561174553, "grad_norm": 0.02177782729268074, "learning_rate": 3.4562894638062727e-07, "loss": 0.0073, "num_input_tokens_seen": 261596800, "step": 121295 }, { "epoch": 19.787928221859705, "grad_norm": 0.0021619328763335943, "learning_rate": 3.4298784466818553e-07, "loss": 0.0044, "num_input_tokens_seen": 261606944, "step": 121300 }, { "epoch": 19.78874388254486, "grad_norm": 0.2849423587322235, "learning_rate": 3.403568691389136e-07, "loss": 0.0032, "num_input_tokens_seen": 261617376, "step": 121305 }, { "epoch": 19.789559543230016, "grad_norm": 0.006861614529043436, "learning_rate": 3.3773601984615766e-07, "loss": 0.0215, "num_input_tokens_seen": 261626368, "step": 121310 }, { "epoch": 19.790375203915172, "grad_norm": 0.0016499334014952183, "learning_rate": 3.3512529684309736e-07, "loss": 0.0007, "num_input_tokens_seen": 261637760, "step": 121315 }, { "epoch": 19.791190864600328, "grad_norm": 0.0071779475547373295, "learning_rate": 3.325247001825793e-07, "loss": 0.0011, "num_input_tokens_seen": 261648448, "step": 121320 }, { "epoch": 19.79200652528548, "grad_norm": 0.02867729216814041, "learning_rate": 3.299342299172836e-07, "loss": 0.004, "num_input_tokens_seen": 261658528, "step": 121325 }, { "epoch": 19.792822185970635, "grad_norm": 0.15259157121181488, "learning_rate": 3.2735388609977936e-07, "loss": 0.0021, "num_input_tokens_seen": 261669824, "step": 121330 }, { "epoch": 19.79363784665579, "grad_norm": 0.06815283000469208, "learning_rate": 3.24783668782358e-07, "loss": 0.013, "num_input_tokens_seen": 261679424, "step": 121335 }, { "epoch": 19.794453507340947, "grad_norm": 0.010111456736922264, "learning_rate": 3.222235780170335e-07, "loss": 0.0035, "num_input_tokens_seen": 261690432, "step": 121340 }, { "epoch": 19.795269168026103, "grad_norm": 0.04070668667554855, "learning_rate": 3.196736138557088e-07, "loss": 0.0031, "num_input_tokens_seen": 261700864, "step": 121345 }, { "epoch": 19.796084828711255, "grad_norm": 0.0014786164974793792, "learning_rate": 3.171337763501203e-07, "loss": 0.0095, "num_input_tokens_seen": 261712224, "step": 121350 }, { "epoch": 19.79690048939641, "grad_norm": 0.13004648685455322, "learning_rate": 3.146040655517268e-07, "loss": 0.0069, "num_input_tokens_seen": 261722944, "step": 121355 }, { "epoch": 19.797716150081566, "grad_norm": 0.010681371204555035, "learning_rate": 3.1208448151176516e-07, "loss": 0.0506, "num_input_tokens_seen": 261733728, "step": 121360 }, { "epoch": 19.798531810766722, "grad_norm": 0.014549147337675095, "learning_rate": 3.0957502428130557e-07, "loss": 0.0009, "num_input_tokens_seen": 261744352, "step": 121365 }, { "epoch": 19.799347471451878, "grad_norm": 0.025220857933163643, "learning_rate": 3.070756939111963e-07, "loss": 0.023, "num_input_tokens_seen": 261755264, "step": 121370 }, { "epoch": 19.80016313213703, "grad_norm": 0.04759431257843971, "learning_rate": 3.0458649045211895e-07, "loss": 0.0053, "num_input_tokens_seen": 261766144, "step": 121375 }, { "epoch": 19.800978792822185, "grad_norm": 0.010843619704246521, "learning_rate": 3.021074139545332e-07, "loss": 0.0058, "num_input_tokens_seen": 261777600, "step": 121380 }, { "epoch": 19.80179445350734, "grad_norm": 0.053491175174713135, "learning_rate": 2.996384644686212e-07, "loss": 0.002, "num_input_tokens_seen": 261788576, "step": 121385 }, { "epoch": 19.802610114192497, "grad_norm": 0.0494471937417984, "learning_rate": 2.971796420444539e-07, "loss": 0.0046, "num_input_tokens_seen": 261799584, "step": 121390 }, { "epoch": 19.803425774877653, "grad_norm": 0.06317485123872757, "learning_rate": 2.947309467318804e-07, "loss": 0.0076, "num_input_tokens_seen": 261809856, "step": 121395 }, { "epoch": 19.804241435562805, "grad_norm": 0.05437513440847397, "learning_rate": 2.922923785804721e-07, "loss": 0.0024, "num_input_tokens_seen": 261821024, "step": 121400 }, { "epoch": 19.80505709624796, "grad_norm": 0.018974682316184044, "learning_rate": 2.898639376396894e-07, "loss": 0.0014, "num_input_tokens_seen": 261831520, "step": 121405 }, { "epoch": 19.805872756933116, "grad_norm": 0.04071285203099251, "learning_rate": 2.8744562395877083e-07, "loss": 0.0019, "num_input_tokens_seen": 261841888, "step": 121410 }, { "epoch": 19.806688417618272, "grad_norm": 0.018046526238322258, "learning_rate": 2.850374375866216e-07, "loss": 0.0041, "num_input_tokens_seen": 261850912, "step": 121415 }, { "epoch": 19.807504078303424, "grad_norm": 0.0029810962732881308, "learning_rate": 2.826393785722026e-07, "loss": 0.0004, "num_input_tokens_seen": 261861184, "step": 121420 }, { "epoch": 19.80831973898858, "grad_norm": 0.9538451433181763, "learning_rate": 2.80251446963975e-07, "loss": 0.0721, "num_input_tokens_seen": 261872064, "step": 121425 }, { "epoch": 19.809135399673735, "grad_norm": 0.2573718726634979, "learning_rate": 2.778736428104556e-07, "loss": 0.0025, "num_input_tokens_seen": 261883360, "step": 121430 }, { "epoch": 19.80995106035889, "grad_norm": 0.0019013527780771255, "learning_rate": 2.75505966159717e-07, "loss": 0.0403, "num_input_tokens_seen": 261894112, "step": 121435 }, { "epoch": 19.810766721044047, "grad_norm": 0.005280273500829935, "learning_rate": 2.73148417059832e-07, "loss": 0.0132, "num_input_tokens_seen": 261905824, "step": 121440 }, { "epoch": 19.8115823817292, "grad_norm": 0.004498373717069626, "learning_rate": 2.708009955584845e-07, "loss": 0.0017, "num_input_tokens_seen": 261915872, "step": 121445 }, { "epoch": 19.812398042414355, "grad_norm": 0.031845368444919586, "learning_rate": 2.684637017033587e-07, "loss": 0.0017, "num_input_tokens_seen": 261927424, "step": 121450 }, { "epoch": 19.81321370309951, "grad_norm": 0.00351448031142354, "learning_rate": 2.6613653554175e-07, "loss": 0.0026, "num_input_tokens_seen": 261938112, "step": 121455 }, { "epoch": 19.814029363784666, "grad_norm": 0.0006551856058649719, "learning_rate": 2.6381949712089846e-07, "loss": 0.0064, "num_input_tokens_seen": 261949248, "step": 121460 }, { "epoch": 19.81484502446982, "grad_norm": 0.023522011935710907, "learning_rate": 2.6151258648765553e-07, "loss": 0.0199, "num_input_tokens_seen": 261960672, "step": 121465 }, { "epoch": 19.815660685154974, "grad_norm": 0.36208876967430115, "learning_rate": 2.59215803688817e-07, "loss": 0.0107, "num_input_tokens_seen": 261971584, "step": 121470 }, { "epoch": 19.81647634584013, "grad_norm": 0.3518022298812866, "learning_rate": 2.5692914877090135e-07, "loss": 0.0295, "num_input_tokens_seen": 261982144, "step": 121475 }, { "epoch": 19.817292006525285, "grad_norm": 0.001174090662971139, "learning_rate": 2.546526217803713e-07, "loss": 0.0013, "num_input_tokens_seen": 261992960, "step": 121480 }, { "epoch": 19.81810766721044, "grad_norm": 0.006657520774751902, "learning_rate": 2.5238622276319014e-07, "loss": 0.001, "num_input_tokens_seen": 262004160, "step": 121485 }, { "epoch": 19.818923327895597, "grad_norm": 0.0015887313056737185, "learning_rate": 2.501299517654321e-07, "loss": 0.0041, "num_input_tokens_seen": 262016672, "step": 121490 }, { "epoch": 19.81973898858075, "grad_norm": 0.05156610533595085, "learning_rate": 2.4788380883278285e-07, "loss": 0.0022, "num_input_tokens_seen": 262027936, "step": 121495 }, { "epoch": 19.820554649265905, "grad_norm": 0.06277922540903091, "learning_rate": 2.4564779401070604e-07, "loss": 0.0037, "num_input_tokens_seen": 262038432, "step": 121500 }, { "epoch": 19.82137030995106, "grad_norm": 0.015932805836200714, "learning_rate": 2.434219073445543e-07, "loss": 0.0023, "num_input_tokens_seen": 262049440, "step": 121505 }, { "epoch": 19.822185970636216, "grad_norm": 0.0048410226590931416, "learning_rate": 2.412061488795136e-07, "loss": 0.0032, "num_input_tokens_seen": 262059616, "step": 121510 }, { "epoch": 19.82300163132137, "grad_norm": 0.005701358895748854, "learning_rate": 2.390005186603261e-07, "loss": 0.0013, "num_input_tokens_seen": 262069984, "step": 121515 }, { "epoch": 19.823817292006524, "grad_norm": 0.004888768773525953, "learning_rate": 2.3680501673184474e-07, "loss": 0.0015, "num_input_tokens_seen": 262081632, "step": 121520 }, { "epoch": 19.82463295269168, "grad_norm": 0.002362074563279748, "learning_rate": 2.346196431384784e-07, "loss": 0.0099, "num_input_tokens_seen": 262091552, "step": 121525 }, { "epoch": 19.825448613376835, "grad_norm": 0.07683160156011581, "learning_rate": 2.324443979245805e-07, "loss": 0.039, "num_input_tokens_seen": 262102560, "step": 121530 }, { "epoch": 19.82626427406199, "grad_norm": 0.016223277896642685, "learning_rate": 2.302792811341714e-07, "loss": 0.0703, "num_input_tokens_seen": 262113504, "step": 121535 }, { "epoch": 19.827079934747147, "grad_norm": 1.0312734842300415, "learning_rate": 2.2812429281116043e-07, "loss": 0.3124, "num_input_tokens_seen": 262122976, "step": 121540 }, { "epoch": 19.8278955954323, "grad_norm": 0.0013086225371807814, "learning_rate": 2.2597943299923484e-07, "loss": 0.004, "num_input_tokens_seen": 262133280, "step": 121545 }, { "epoch": 19.828711256117455, "grad_norm": 0.010852545499801636, "learning_rate": 2.2384470174180438e-07, "loss": 0.0021, "num_input_tokens_seen": 262143360, "step": 121550 }, { "epoch": 19.82952691680261, "grad_norm": 0.31105905771255493, "learning_rate": 2.2172009908216772e-07, "loss": 0.0128, "num_input_tokens_seen": 262153312, "step": 121555 }, { "epoch": 19.830342577487766, "grad_norm": 0.009611970745027065, "learning_rate": 2.1960562506340153e-07, "loss": 0.0012, "num_input_tokens_seen": 262163392, "step": 121560 }, { "epoch": 19.83115823817292, "grad_norm": 0.001185477478429675, "learning_rate": 2.1750127972836042e-07, "loss": 0.0016, "num_input_tokens_seen": 262174240, "step": 121565 }, { "epoch": 19.831973898858074, "grad_norm": 0.013203839771449566, "learning_rate": 2.1540706311967695e-07, "loss": 0.0024, "num_input_tokens_seen": 262185056, "step": 121570 }, { "epoch": 19.83278955954323, "grad_norm": 0.017446128651499748, "learning_rate": 2.1332297527976164e-07, "loss": 0.1124, "num_input_tokens_seen": 262195040, "step": 121575 }, { "epoch": 19.833605220228385, "grad_norm": 0.333935022354126, "learning_rate": 2.1124901625091397e-07, "loss": 0.0952, "num_input_tokens_seen": 262205728, "step": 121580 }, { "epoch": 19.83442088091354, "grad_norm": 0.033888496458530426, "learning_rate": 2.091851860751004e-07, "loss": 0.0014, "num_input_tokens_seen": 262216128, "step": 121585 }, { "epoch": 19.835236541598697, "grad_norm": 0.03068959154188633, "learning_rate": 2.071314847941763e-07, "loss": 0.0605, "num_input_tokens_seen": 262224864, "step": 121590 }, { "epoch": 19.83605220228385, "grad_norm": 0.20473067462444305, "learning_rate": 2.050879124498306e-07, "loss": 0.0082, "num_input_tokens_seen": 262236704, "step": 121595 }, { "epoch": 19.836867862969005, "grad_norm": 0.028197892010211945, "learning_rate": 2.0305446908336355e-07, "loss": 0.0024, "num_input_tokens_seen": 262247616, "step": 121600 }, { "epoch": 19.83768352365416, "grad_norm": 0.005834558978676796, "learning_rate": 2.0103115473601996e-07, "loss": 0.0759, "num_input_tokens_seen": 262258944, "step": 121605 }, { "epoch": 19.838499184339316, "grad_norm": 0.0029575175140053034, "learning_rate": 1.9901796944882254e-07, "loss": 0.0021, "num_input_tokens_seen": 262268992, "step": 121610 }, { "epoch": 19.839314845024468, "grad_norm": 0.001949261873960495, "learning_rate": 1.9701491326257203e-07, "loss": 0.0056, "num_input_tokens_seen": 262280320, "step": 121615 }, { "epoch": 19.840130505709624, "grad_norm": 0.8309627771377563, "learning_rate": 1.9502198621790257e-07, "loss": 0.027, "num_input_tokens_seen": 262290368, "step": 121620 }, { "epoch": 19.84094616639478, "grad_norm": 0.0017391116125509143, "learning_rate": 1.9303918835511526e-07, "loss": 0.0021, "num_input_tokens_seen": 262301856, "step": 121625 }, { "epoch": 19.841761827079935, "grad_norm": 0.044609375298023224, "learning_rate": 1.9106651971445564e-07, "loss": 0.0159, "num_input_tokens_seen": 262313536, "step": 121630 }, { "epoch": 19.84257748776509, "grad_norm": 0.005649510305374861, "learning_rate": 1.8910398033589182e-07, "loss": 0.0027, "num_input_tokens_seen": 262324032, "step": 121635 }, { "epoch": 19.843393148450243, "grad_norm": 0.014277572743594646, "learning_rate": 1.8715157025916972e-07, "loss": 0.0043, "num_input_tokens_seen": 262334624, "step": 121640 }, { "epoch": 19.8442088091354, "grad_norm": 0.0010839889291673899, "learning_rate": 1.8520928952386885e-07, "loss": 0.0021, "num_input_tokens_seen": 262345568, "step": 121645 }, { "epoch": 19.845024469820554, "grad_norm": 0.0007540498045273125, "learning_rate": 1.8327713816940207e-07, "loss": 0.0019, "num_input_tokens_seen": 262356896, "step": 121650 }, { "epoch": 19.84584013050571, "grad_norm": 0.0016608246369287372, "learning_rate": 1.8135511623484925e-07, "loss": 0.0013, "num_input_tokens_seen": 262368672, "step": 121655 }, { "epoch": 19.846655791190866, "grad_norm": 0.01017380878329277, "learning_rate": 1.7944322375923472e-07, "loss": 0.0007, "num_input_tokens_seen": 262379360, "step": 121660 }, { "epoch": 19.847471451876018, "grad_norm": 0.02084580436348915, "learning_rate": 1.7754146078124976e-07, "loss": 0.0542, "num_input_tokens_seen": 262390656, "step": 121665 }, { "epoch": 19.848287112561174, "grad_norm": 0.6725381016731262, "learning_rate": 1.7564982733947465e-07, "loss": 0.0256, "num_input_tokens_seen": 262401216, "step": 121670 }, { "epoch": 19.84910277324633, "grad_norm": 0.051715295761823654, "learning_rate": 1.7376832347221206e-07, "loss": 0.003, "num_input_tokens_seen": 262410208, "step": 121675 }, { "epoch": 19.849918433931485, "grad_norm": 0.03860587254166603, "learning_rate": 1.7189694921759813e-07, "loss": 0.0014, "num_input_tokens_seen": 262421376, "step": 121680 }, { "epoch": 19.85073409461664, "grad_norm": 0.018002664670348167, "learning_rate": 1.700357046136025e-07, "loss": 0.0082, "num_input_tokens_seen": 262432800, "step": 121685 }, { "epoch": 19.851549755301793, "grad_norm": 0.0018832444911822677, "learning_rate": 1.6818458969786177e-07, "loss": 0.0027, "num_input_tokens_seen": 262443712, "step": 121690 }, { "epoch": 19.85236541598695, "grad_norm": 0.004878541454672813, "learning_rate": 1.6634360450795694e-07, "loss": 0.0571, "num_input_tokens_seen": 262454048, "step": 121695 }, { "epoch": 19.853181076672104, "grad_norm": 0.08861984312534332, "learning_rate": 1.6451274908124703e-07, "loss": 0.0053, "num_input_tokens_seen": 262463520, "step": 121700 }, { "epoch": 19.85399673735726, "grad_norm": 0.005101449321955442, "learning_rate": 1.6269202345470247e-07, "loss": 0.0038, "num_input_tokens_seen": 262474208, "step": 121705 }, { "epoch": 19.854812398042416, "grad_norm": 0.030919499695301056, "learning_rate": 1.6088142766529367e-07, "loss": 0.0019, "num_input_tokens_seen": 262484896, "step": 121710 }, { "epoch": 19.855628058727568, "grad_norm": 0.008494377136230469, "learning_rate": 1.5908096174976904e-07, "loss": 0.0044, "num_input_tokens_seen": 262496096, "step": 121715 }, { "epoch": 19.856443719412724, "grad_norm": 0.0010764634935185313, "learning_rate": 1.5729062574448838e-07, "loss": 0.0013, "num_input_tokens_seen": 262506304, "step": 121720 }, { "epoch": 19.85725938009788, "grad_norm": 1.449510097503662, "learning_rate": 1.55510419685867e-07, "loss": 0.0931, "num_input_tokens_seen": 262516928, "step": 121725 }, { "epoch": 19.858075040783035, "grad_norm": 0.007794349454343319, "learning_rate": 1.5374034360993162e-07, "loss": 0.0027, "num_input_tokens_seen": 262527712, "step": 121730 }, { "epoch": 19.85889070146819, "grad_norm": 0.028376556932926178, "learning_rate": 1.5198039755248693e-07, "loss": 0.0012, "num_input_tokens_seen": 262539456, "step": 121735 }, { "epoch": 19.859706362153343, "grad_norm": 0.0034696843940764666, "learning_rate": 1.5023058154928216e-07, "loss": 0.0009, "num_input_tokens_seen": 262549024, "step": 121740 }, { "epoch": 19.8605220228385, "grad_norm": 0.003985355142503977, "learning_rate": 1.4849089563578888e-07, "loss": 0.0012, "num_input_tokens_seen": 262562048, "step": 121745 }, { "epoch": 19.861337683523654, "grad_norm": 0.005441877990961075, "learning_rate": 1.467613398472567e-07, "loss": 0.0006, "num_input_tokens_seen": 262573536, "step": 121750 }, { "epoch": 19.86215334420881, "grad_norm": 0.0005511092022061348, "learning_rate": 1.4504191421865765e-07, "loss": 0.0007, "num_input_tokens_seen": 262584288, "step": 121755 }, { "epoch": 19.862969004893966, "grad_norm": 0.04745354503393173, "learning_rate": 1.433326187849082e-07, "loss": 0.0019, "num_input_tokens_seen": 262595520, "step": 121760 }, { "epoch": 19.863784665579118, "grad_norm": 0.200071781873703, "learning_rate": 1.416334535806474e-07, "loss": 0.0055, "num_input_tokens_seen": 262603040, "step": 121765 }, { "epoch": 19.864600326264274, "grad_norm": 0.055561237037181854, "learning_rate": 1.3994441864029206e-07, "loss": 0.0022, "num_input_tokens_seen": 262614272, "step": 121770 }, { "epoch": 19.86541598694943, "grad_norm": 0.024368317797780037, "learning_rate": 1.3826551399809263e-07, "loss": 0.0019, "num_input_tokens_seen": 262625568, "step": 121775 }, { "epoch": 19.866231647634585, "grad_norm": 0.02616938017308712, "learning_rate": 1.3659673968802188e-07, "loss": 0.0027, "num_input_tokens_seen": 262635968, "step": 121780 }, { "epoch": 19.86704730831974, "grad_norm": 0.0171094611287117, "learning_rate": 1.3493809574399717e-07, "loss": 0.0013, "num_input_tokens_seen": 262645536, "step": 121785 }, { "epoch": 19.867862969004893, "grad_norm": 0.005669131875038147, "learning_rate": 1.3328958219954724e-07, "loss": 0.0022, "num_input_tokens_seen": 262657056, "step": 121790 }, { "epoch": 19.86867862969005, "grad_norm": 0.02975994534790516, "learning_rate": 1.3165119908808976e-07, "loss": 0.0035, "num_input_tokens_seen": 262667488, "step": 121795 }, { "epoch": 19.869494290375204, "grad_norm": 0.0011658492730930448, "learning_rate": 1.3002294644287593e-07, "loss": 0.0041, "num_input_tokens_seen": 262677024, "step": 121800 }, { "epoch": 19.87030995106036, "grad_norm": 0.004693070892244577, "learning_rate": 1.284048242968794e-07, "loss": 0.0015, "num_input_tokens_seen": 262687520, "step": 121805 }, { "epoch": 19.871125611745512, "grad_norm": 0.033216074109077454, "learning_rate": 1.267968326829072e-07, "loss": 0.0037, "num_input_tokens_seen": 262699104, "step": 121810 }, { "epoch": 19.871941272430668, "grad_norm": 0.004054656717926264, "learning_rate": 1.2519897163348894e-07, "loss": 0.0019, "num_input_tokens_seen": 262709248, "step": 121815 }, { "epoch": 19.872756933115824, "grad_norm": 0.03737641125917435, "learning_rate": 1.2361124118109856e-07, "loss": 0.0021, "num_input_tokens_seen": 262720320, "step": 121820 }, { "epoch": 19.87357259380098, "grad_norm": 0.05278816074132919, "learning_rate": 1.220336413578216e-07, "loss": 0.0024, "num_input_tokens_seen": 262731552, "step": 121825 }, { "epoch": 19.874388254486135, "grad_norm": 0.005034307949244976, "learning_rate": 1.204661721956879e-07, "loss": 0.0007, "num_input_tokens_seen": 262742912, "step": 121830 }, { "epoch": 19.875203915171287, "grad_norm": 0.09388431161642075, "learning_rate": 1.1890883372644989e-07, "loss": 0.0047, "num_input_tokens_seen": 262755328, "step": 121835 }, { "epoch": 19.876019575856443, "grad_norm": 0.029901105910539627, "learning_rate": 1.1736162598163791e-07, "loss": 0.0087, "num_input_tokens_seen": 262765888, "step": 121840 }, { "epoch": 19.8768352365416, "grad_norm": 0.002862847177311778, "learning_rate": 1.1582454899267126e-07, "loss": 0.0008, "num_input_tokens_seen": 262775104, "step": 121845 }, { "epoch": 19.877650897226754, "grad_norm": 0.40741100907325745, "learning_rate": 1.1429760279069168e-07, "loss": 0.0137, "num_input_tokens_seen": 262786208, "step": 121850 }, { "epoch": 19.87846655791191, "grad_norm": 0.001347400713711977, "learning_rate": 1.1278078740656339e-07, "loss": 0.0048, "num_input_tokens_seen": 262795904, "step": 121855 }, { "epoch": 19.879282218597062, "grad_norm": 0.0035142269916832447, "learning_rate": 1.1127410287115059e-07, "loss": 0.0021, "num_input_tokens_seen": 262806560, "step": 121860 }, { "epoch": 19.880097879282218, "grad_norm": 0.02602490782737732, "learning_rate": 1.0977754921487337e-07, "loss": 0.0464, "num_input_tokens_seen": 262816928, "step": 121865 }, { "epoch": 19.880913539967374, "grad_norm": 0.0005313414731062949, "learning_rate": 1.0829112646809635e-07, "loss": 0.0045, "num_input_tokens_seen": 262827968, "step": 121870 }, { "epoch": 19.88172920065253, "grad_norm": 0.0014291881816461682, "learning_rate": 1.068148346610176e-07, "loss": 0.0016, "num_input_tokens_seen": 262839808, "step": 121875 }, { "epoch": 19.882544861337685, "grad_norm": 0.09791675209999084, "learning_rate": 1.0534867382344659e-07, "loss": 0.0037, "num_input_tokens_seen": 262851520, "step": 121880 }, { "epoch": 19.883360522022837, "grad_norm": 0.04391149803996086, "learning_rate": 1.0389264398519283e-07, "loss": 0.002, "num_input_tokens_seen": 262862720, "step": 121885 }, { "epoch": 19.884176182707993, "grad_norm": 0.004033942706882954, "learning_rate": 1.024467451756772e-07, "loss": 0.0051, "num_input_tokens_seen": 262874400, "step": 121890 }, { "epoch": 19.88499184339315, "grad_norm": 0.022085608914494514, "learning_rate": 1.0101097742426513e-07, "loss": 0.001, "num_input_tokens_seen": 262885344, "step": 121895 }, { "epoch": 19.885807504078304, "grad_norm": 0.0011721554910764098, "learning_rate": 9.958534075998893e-08, "loss": 0.0011, "num_input_tokens_seen": 262896256, "step": 121900 }, { "epoch": 19.88662316476346, "grad_norm": 0.005675388965755701, "learning_rate": 9.816983521182543e-08, "loss": 0.0024, "num_input_tokens_seen": 262906464, "step": 121905 }, { "epoch": 19.887438825448612, "grad_norm": 0.0012360254768282175, "learning_rate": 9.676446080841839e-08, "loss": 0.0117, "num_input_tokens_seen": 262917024, "step": 121910 }, { "epoch": 19.888254486133768, "grad_norm": 0.33061349391937256, "learning_rate": 9.536921757824502e-08, "loss": 0.0096, "num_input_tokens_seen": 262927360, "step": 121915 }, { "epoch": 19.889070146818923, "grad_norm": 0.05313573405146599, "learning_rate": 9.39841055495605e-08, "loss": 0.0037, "num_input_tokens_seen": 262936864, "step": 121920 }, { "epoch": 19.88988580750408, "grad_norm": 0.0019019418396055698, "learning_rate": 9.260912475050898e-08, "loss": 0.0035, "num_input_tokens_seen": 262948448, "step": 121925 }, { "epoch": 19.890701468189235, "grad_norm": 0.02323128655552864, "learning_rate": 9.124427520890155e-08, "loss": 0.0055, "num_input_tokens_seen": 262958528, "step": 121930 }, { "epoch": 19.891517128874387, "grad_norm": 0.03517730161547661, "learning_rate": 8.988955695238277e-08, "loss": 0.0312, "num_input_tokens_seen": 262969120, "step": 121935 }, { "epoch": 19.892332789559543, "grad_norm": 0.07294556498527527, "learning_rate": 8.854497000843065e-08, "loss": 0.0021, "num_input_tokens_seen": 262979968, "step": 121940 }, { "epoch": 19.8931484502447, "grad_norm": 0.08174560964107513, "learning_rate": 8.721051440435668e-08, "loss": 0.0059, "num_input_tokens_seen": 262991488, "step": 121945 }, { "epoch": 19.893964110929854, "grad_norm": 0.02137603797018528, "learning_rate": 8.588619016708377e-08, "loss": 0.055, "num_input_tokens_seen": 263002304, "step": 121950 }, { "epoch": 19.894779771615006, "grad_norm": 0.004290835000574589, "learning_rate": 8.457199732353482e-08, "loss": 0.016, "num_input_tokens_seen": 263012064, "step": 121955 }, { "epoch": 19.895595432300162, "grad_norm": 0.003136908169835806, "learning_rate": 8.32679359003552e-08, "loss": 0.0028, "num_input_tokens_seen": 263023328, "step": 121960 }, { "epoch": 19.896411092985318, "grad_norm": 0.06029631569981575, "learning_rate": 8.197400592391268e-08, "loss": 0.0034, "num_input_tokens_seen": 263035040, "step": 121965 }, { "epoch": 19.897226753670473, "grad_norm": 0.0048327818512916565, "learning_rate": 8.069020742040855e-08, "loss": 0.0017, "num_input_tokens_seen": 263044800, "step": 121970 }, { "epoch": 19.89804241435563, "grad_norm": 0.37062039971351624, "learning_rate": 7.941654041598856e-08, "loss": 0.0123, "num_input_tokens_seen": 263055776, "step": 121975 }, { "epoch": 19.898858075040785, "grad_norm": 0.05953994020819664, "learning_rate": 7.815300493635436e-08, "loss": 0.0087, "num_input_tokens_seen": 263064768, "step": 121980 }, { "epoch": 19.899673735725937, "grad_norm": 0.0030668603722006083, "learning_rate": 7.68996010071521e-08, "loss": 0.0017, "num_input_tokens_seen": 263076000, "step": 121985 }, { "epoch": 19.900489396411093, "grad_norm": 0.005103804636746645, "learning_rate": 7.565632865375039e-08, "loss": 0.0027, "num_input_tokens_seen": 263084672, "step": 121990 }, { "epoch": 19.90130505709625, "grad_norm": 0.005491576623171568, "learning_rate": 7.442318790140679e-08, "loss": 0.0018, "num_input_tokens_seen": 263096320, "step": 121995 }, { "epoch": 19.902120717781404, "grad_norm": 0.005495880264788866, "learning_rate": 7.32001787750458e-08, "loss": 0.0011, "num_input_tokens_seen": 263107552, "step": 122000 }, { "epoch": 19.902936378466556, "grad_norm": 0.0011094525689259171, "learning_rate": 7.198730129948094e-08, "loss": 0.0021, "num_input_tokens_seen": 263118656, "step": 122005 }, { "epoch": 19.903752039151712, "grad_norm": 0.004313451703637838, "learning_rate": 7.078455549935914e-08, "loss": 0.0024, "num_input_tokens_seen": 263129920, "step": 122010 }, { "epoch": 19.904567699836868, "grad_norm": 0.15729361772537231, "learning_rate": 6.959194139893876e-08, "loss": 0.0096, "num_input_tokens_seen": 263141120, "step": 122015 }, { "epoch": 19.905383360522023, "grad_norm": 0.007834318093955517, "learning_rate": 6.840945902242268e-08, "loss": 0.0031, "num_input_tokens_seen": 263151456, "step": 122020 }, { "epoch": 19.90619902120718, "grad_norm": 0.0799918845295906, "learning_rate": 6.723710839384723e-08, "loss": 0.0143, "num_input_tokens_seen": 263162112, "step": 122025 }, { "epoch": 19.90701468189233, "grad_norm": 0.019908614456653595, "learning_rate": 6.607488953691565e-08, "loss": 0.1062, "num_input_tokens_seen": 263172096, "step": 122030 }, { "epoch": 19.907830342577487, "grad_norm": 0.017172912135720253, "learning_rate": 6.492280247516469e-08, "loss": 0.0016, "num_input_tokens_seen": 263183712, "step": 122035 }, { "epoch": 19.908646003262643, "grad_norm": 0.0011959391413256526, "learning_rate": 6.378084723196453e-08, "loss": 0.0059, "num_input_tokens_seen": 263193856, "step": 122040 }, { "epoch": 19.9094616639478, "grad_norm": 0.0022716394159942865, "learning_rate": 6.264902383051885e-08, "loss": 0.0037, "num_input_tokens_seen": 263204800, "step": 122045 }, { "epoch": 19.910277324632954, "grad_norm": 0.0028792903758585453, "learning_rate": 6.152733229364272e-08, "loss": 0.0013, "num_input_tokens_seen": 263215712, "step": 122050 }, { "epoch": 19.911092985318106, "grad_norm": 0.009064840152859688, "learning_rate": 6.041577264415122e-08, "loss": 0.0022, "num_input_tokens_seen": 263227488, "step": 122055 }, { "epoch": 19.911908646003262, "grad_norm": 0.18377956748008728, "learning_rate": 5.9314344904581876e-08, "loss": 0.0162, "num_input_tokens_seen": 263237696, "step": 122060 }, { "epoch": 19.912724306688418, "grad_norm": 0.013362240046262741, "learning_rate": 5.822304909719467e-08, "loss": 0.0029, "num_input_tokens_seen": 263248928, "step": 122065 }, { "epoch": 19.913539967373573, "grad_norm": 0.015376216731965542, "learning_rate": 5.714188524413855e-08, "loss": 0.0011, "num_input_tokens_seen": 263259392, "step": 122070 }, { "epoch": 19.91435562805873, "grad_norm": 0.0024088555946946144, "learning_rate": 5.6070853367284903e-08, "loss": 0.0011, "num_input_tokens_seen": 263271008, "step": 122075 }, { "epoch": 19.91517128874388, "grad_norm": 0.0372304730117321, "learning_rate": 5.500995348844962e-08, "loss": 0.0022, "num_input_tokens_seen": 263281632, "step": 122080 }, { "epoch": 19.915986949429037, "grad_norm": 0.00933782011270523, "learning_rate": 5.395918562900448e-08, "loss": 0.0011, "num_input_tokens_seen": 263293408, "step": 122085 }, { "epoch": 19.916802610114193, "grad_norm": 0.015330552123486996, "learning_rate": 5.2918549810376806e-08, "loss": 0.0046, "num_input_tokens_seen": 263304320, "step": 122090 }, { "epoch": 19.91761827079935, "grad_norm": 0.2565518915653229, "learning_rate": 5.188804605349429e-08, "loss": 0.0118, "num_input_tokens_seen": 263314496, "step": 122095 }, { "epoch": 19.918433931484504, "grad_norm": 0.005954446271061897, "learning_rate": 5.086767437939566e-08, "loss": 0.0011, "num_input_tokens_seen": 263325280, "step": 122100 }, { "epoch": 19.919249592169656, "grad_norm": 0.015750613063573837, "learning_rate": 4.985743480867555e-08, "loss": 0.0015, "num_input_tokens_seen": 263336288, "step": 122105 }, { "epoch": 19.920065252854812, "grad_norm": 0.010615048930048943, "learning_rate": 4.885732736181758e-08, "loss": 0.0108, "num_input_tokens_seen": 263346752, "step": 122110 }, { "epoch": 19.920880913539968, "grad_norm": 0.0038388934917747974, "learning_rate": 4.7867352059138835e-08, "loss": 0.0035, "num_input_tokens_seen": 263357024, "step": 122115 }, { "epoch": 19.921696574225123, "grad_norm": 0.004520430229604244, "learning_rate": 4.688750892062332e-08, "loss": 0.0013, "num_input_tokens_seen": 263367904, "step": 122120 }, { "epoch": 19.92251223491028, "grad_norm": 0.011684359982609749, "learning_rate": 4.5917797966144037e-08, "loss": 0.0388, "num_input_tokens_seen": 263377088, "step": 122125 }, { "epoch": 19.92332789559543, "grad_norm": 0.04097156599164009, "learning_rate": 4.495821921540744e-08, "loss": 0.0024, "num_input_tokens_seen": 263387840, "step": 122130 }, { "epoch": 19.924143556280587, "grad_norm": 0.06774638593196869, "learning_rate": 4.400877268784242e-08, "loss": 0.002, "num_input_tokens_seen": 263398464, "step": 122135 }, { "epoch": 19.924959216965743, "grad_norm": 0.8693785071372986, "learning_rate": 4.306945840265586e-08, "loss": 0.1056, "num_input_tokens_seen": 263408800, "step": 122140 }, { "epoch": 19.9257748776509, "grad_norm": 0.016805754974484444, "learning_rate": 4.2140276378943576e-08, "loss": 0.0028, "num_input_tokens_seen": 263419200, "step": 122145 }, { "epoch": 19.92659053833605, "grad_norm": 0.01759279891848564, "learning_rate": 4.1221226635468345e-08, "loss": 0.0022, "num_input_tokens_seen": 263429184, "step": 122150 }, { "epoch": 19.927406199021206, "grad_norm": 0.03988860547542572, "learning_rate": 4.031230919088191e-08, "loss": 0.0033, "num_input_tokens_seen": 263439168, "step": 122155 }, { "epoch": 19.928221859706362, "grad_norm": 0.0012105383211746812, "learning_rate": 3.941352406361398e-08, "loss": 0.0048, "num_input_tokens_seen": 263450816, "step": 122160 }, { "epoch": 19.929037520391518, "grad_norm": 0.022667454555630684, "learning_rate": 3.852487127187221e-08, "loss": 0.0016, "num_input_tokens_seen": 263460736, "step": 122165 }, { "epoch": 19.929853181076673, "grad_norm": 0.0005834506009705365, "learning_rate": 3.7646350833697715e-08, "loss": 0.0012, "num_input_tokens_seen": 263472640, "step": 122170 }, { "epoch": 19.930668841761825, "grad_norm": 0.03975340351462364, "learning_rate": 3.677796276685408e-08, "loss": 0.0019, "num_input_tokens_seen": 263484000, "step": 122175 }, { "epoch": 19.93148450244698, "grad_norm": 0.06642568856477737, "learning_rate": 3.591970708893832e-08, "loss": 0.0349, "num_input_tokens_seen": 263496288, "step": 122180 }, { "epoch": 19.932300163132137, "grad_norm": 0.09686208516359329, "learning_rate": 3.507158381738096e-08, "loss": 0.0051, "num_input_tokens_seen": 263507168, "step": 122185 }, { "epoch": 19.933115823817293, "grad_norm": 0.019192678853869438, "learning_rate": 3.4233592969334926e-08, "loss": 0.0013, "num_input_tokens_seen": 263517952, "step": 122190 }, { "epoch": 19.93393148450245, "grad_norm": 0.048663631081581116, "learning_rate": 3.340573456184215e-08, "loss": 0.0386, "num_input_tokens_seen": 263528000, "step": 122195 }, { "epoch": 19.9347471451876, "grad_norm": 0.1015453040599823, "learning_rate": 3.258800861155598e-08, "loss": 0.0028, "num_input_tokens_seen": 263538976, "step": 122200 }, { "epoch": 19.935562805872756, "grad_norm": 0.003058068221434951, "learning_rate": 3.178041513518526e-08, "loss": 0.0024, "num_input_tokens_seen": 263549024, "step": 122205 }, { "epoch": 19.936378466557912, "grad_norm": 0.11473993211984634, "learning_rate": 3.098295414899477e-08, "loss": 0.0184, "num_input_tokens_seen": 263560384, "step": 122210 }, { "epoch": 19.937194127243067, "grad_norm": 0.029628217220306396, "learning_rate": 3.019562566924927e-08, "loss": 0.0031, "num_input_tokens_seen": 263572192, "step": 122215 }, { "epoch": 19.938009787928223, "grad_norm": 0.02515897899866104, "learning_rate": 2.9418429711769445e-08, "loss": 0.0023, "num_input_tokens_seen": 263582624, "step": 122220 }, { "epoch": 19.938825448613375, "grad_norm": 0.0011146754259243608, "learning_rate": 2.865136629243148e-08, "loss": 0.001, "num_input_tokens_seen": 263593280, "step": 122225 }, { "epoch": 19.93964110929853, "grad_norm": 0.04408663511276245, "learning_rate": 2.7894435426722988e-08, "loss": 0.0016, "num_input_tokens_seen": 263604384, "step": 122230 }, { "epoch": 19.940456769983687, "grad_norm": 0.00825695414096117, "learning_rate": 2.7147637130020553e-08, "loss": 0.005, "num_input_tokens_seen": 263613920, "step": 122235 }, { "epoch": 19.941272430668842, "grad_norm": 0.0893784612417221, "learning_rate": 2.6410971417423214e-08, "loss": 0.0026, "num_input_tokens_seen": 263623904, "step": 122240 }, { "epoch": 19.942088091353998, "grad_norm": 0.3288726806640625, "learning_rate": 2.5684438303807955e-08, "loss": 0.0077, "num_input_tokens_seen": 263636384, "step": 122245 }, { "epoch": 19.94290375203915, "grad_norm": 0.025374773889780045, "learning_rate": 2.496803780405177e-08, "loss": 0.0012, "num_input_tokens_seen": 263647808, "step": 122250 }, { "epoch": 19.943719412724306, "grad_norm": 0.0013619724195450544, "learning_rate": 2.426176993253204e-08, "loss": 0.0031, "num_input_tokens_seen": 263658208, "step": 122255 }, { "epoch": 19.94453507340946, "grad_norm": 0.10222205519676208, "learning_rate": 2.356563470357065e-08, "loss": 0.0078, "num_input_tokens_seen": 263669408, "step": 122260 }, { "epoch": 19.945350734094617, "grad_norm": 0.050279200077056885, "learning_rate": 2.287963213137845e-08, "loss": 0.003, "num_input_tokens_seen": 263680832, "step": 122265 }, { "epoch": 19.946166394779773, "grad_norm": 0.030106032267212868, "learning_rate": 2.2203762229777713e-08, "loss": 0.0019, "num_input_tokens_seen": 263691200, "step": 122270 }, { "epoch": 19.946982055464925, "grad_norm": 0.02318952977657318, "learning_rate": 2.15380250124797e-08, "loss": 0.0223, "num_input_tokens_seen": 263702560, "step": 122275 }, { "epoch": 19.94779771615008, "grad_norm": 0.0021224466618150473, "learning_rate": 2.0882420493029132e-08, "loss": 0.0041, "num_input_tokens_seen": 263713600, "step": 122280 }, { "epoch": 19.948613376835237, "grad_norm": 0.4169272482395172, "learning_rate": 2.0236948684582147e-08, "loss": 0.0048, "num_input_tokens_seen": 263723712, "step": 122285 }, { "epoch": 19.949429037520392, "grad_norm": 0.6185654401779175, "learning_rate": 1.96016096003504e-08, "loss": 0.0168, "num_input_tokens_seen": 263734080, "step": 122290 }, { "epoch": 19.950244698205548, "grad_norm": 0.31107261776924133, "learning_rate": 1.8976403253156972e-08, "loss": 0.0288, "num_input_tokens_seen": 263744608, "step": 122295 }, { "epoch": 19.9510603588907, "grad_norm": 0.04477408528327942, "learning_rate": 1.836132965571391e-08, "loss": 0.0255, "num_input_tokens_seen": 263756256, "step": 122300 }, { "epoch": 19.951876019575856, "grad_norm": 0.009929417632520199, "learning_rate": 1.7756388820400205e-08, "loss": 0.0011, "num_input_tokens_seen": 263767232, "step": 122305 }, { "epoch": 19.95269168026101, "grad_norm": 0.16747593879699707, "learning_rate": 1.716158075953933e-08, "loss": 0.0035, "num_input_tokens_seen": 263778368, "step": 122310 }, { "epoch": 19.953507340946167, "grad_norm": 0.008718746714293957, "learning_rate": 1.6576905485177206e-08, "loss": 0.0011, "num_input_tokens_seen": 263788992, "step": 122315 }, { "epoch": 19.954323001631323, "grad_norm": 0.0024858666583895683, "learning_rate": 1.6002363009137712e-08, "loss": 0.003, "num_input_tokens_seen": 263798592, "step": 122320 }, { "epoch": 19.955138662316475, "grad_norm": 0.0036268436815589666, "learning_rate": 1.5437953343078182e-08, "loss": 0.011, "num_input_tokens_seen": 263808544, "step": 122325 }, { "epoch": 19.95595432300163, "grad_norm": 0.45842769742012024, "learning_rate": 1.488367649848943e-08, "loss": 0.0958, "num_input_tokens_seen": 263819616, "step": 122330 }, { "epoch": 19.956769983686787, "grad_norm": 0.004889588803052902, "learning_rate": 1.4339532486529195e-08, "loss": 0.0021, "num_input_tokens_seen": 263828704, "step": 122335 }, { "epoch": 19.957585644371942, "grad_norm": 0.2464093416929245, "learning_rate": 1.3805521318244196e-08, "loss": 0.0018, "num_input_tokens_seen": 263840032, "step": 122340 }, { "epoch": 19.958401305057095, "grad_norm": 0.0126004284247756, "learning_rate": 1.3281643004514621e-08, "loss": 0.0017, "num_input_tokens_seen": 263850560, "step": 122345 }, { "epoch": 19.95921696574225, "grad_norm": 0.02858857810497284, "learning_rate": 1.2767897555887587e-08, "loss": 0.0019, "num_input_tokens_seen": 263861824, "step": 122350 }, { "epoch": 19.960032626427406, "grad_norm": 0.005978753790259361, "learning_rate": 1.2264284982743679e-08, "loss": 0.0021, "num_input_tokens_seen": 263873248, "step": 122355 }, { "epoch": 19.96084828711256, "grad_norm": 0.2550622522830963, "learning_rate": 1.1770805295407972e-08, "loss": 0.005, "num_input_tokens_seen": 263884672, "step": 122360 }, { "epoch": 19.961663947797717, "grad_norm": 0.013911999762058258, "learning_rate": 1.1287458503816961e-08, "loss": 0.0032, "num_input_tokens_seen": 263896416, "step": 122365 }, { "epoch": 19.96247960848287, "grad_norm": 0.019946133717894554, "learning_rate": 1.0814244617740609e-08, "loss": 0.001, "num_input_tokens_seen": 263906688, "step": 122370 }, { "epoch": 19.963295269168025, "grad_norm": 0.7437427043914795, "learning_rate": 1.0351163646782346e-08, "loss": 0.138, "num_input_tokens_seen": 263918368, "step": 122375 }, { "epoch": 19.96411092985318, "grad_norm": 0.0035043100360780954, "learning_rate": 9.898215600379068e-09, "loss": 0.0011, "num_input_tokens_seen": 263928864, "step": 122380 }, { "epoch": 19.964926590538337, "grad_norm": 0.007763623725622892, "learning_rate": 9.455400487634602e-09, "loss": 0.0066, "num_input_tokens_seen": 263939616, "step": 122385 }, { "epoch": 19.965742251223492, "grad_norm": 0.011000387370586395, "learning_rate": 9.022718317597267e-09, "loss": 0.0218, "num_input_tokens_seen": 263950816, "step": 122390 }, { "epoch": 19.966557911908644, "grad_norm": 0.001615022774785757, "learning_rate": 8.600169098982313e-09, "loss": 0.0075, "num_input_tokens_seen": 263962624, "step": 122395 }, { "epoch": 19.9673735725938, "grad_norm": 0.01570058986544609, "learning_rate": 8.187752840338458e-09, "loss": 0.007, "num_input_tokens_seen": 263973376, "step": 122400 }, { "epoch": 19.968189233278956, "grad_norm": 0.001268867519684136, "learning_rate": 7.785469550103397e-09, "loss": 0.0043, "num_input_tokens_seen": 263984224, "step": 122405 }, { "epoch": 19.96900489396411, "grad_norm": 0.04272850230336189, "learning_rate": 7.393319236326246e-09, "loss": 0.0079, "num_input_tokens_seen": 263994720, "step": 122410 }, { "epoch": 19.969820554649267, "grad_norm": 0.01077689416706562, "learning_rate": 7.011301907056122e-09, "loss": 0.0063, "num_input_tokens_seen": 264004928, "step": 122415 }, { "epoch": 19.97063621533442, "grad_norm": 0.00306382542476058, "learning_rate": 6.639417570009076e-09, "loss": 0.0753, "num_input_tokens_seen": 264016576, "step": 122420 }, { "epoch": 19.971451876019575, "grad_norm": 0.007194845005869865, "learning_rate": 6.2776662326236025e-09, "loss": 0.0017, "num_input_tokens_seen": 264027744, "step": 122425 }, { "epoch": 19.97226753670473, "grad_norm": 0.022416984662413597, "learning_rate": 5.926047902393705e-09, "loss": 0.0046, "num_input_tokens_seen": 264038400, "step": 122430 }, { "epoch": 19.973083197389887, "grad_norm": 0.012382768094539642, "learning_rate": 5.584562586313791e-09, "loss": 0.0027, "num_input_tokens_seen": 264049664, "step": 122435 }, { "epoch": 19.973898858075042, "grad_norm": 0.015328571200370789, "learning_rate": 5.253210291322752e-09, "loss": 0.0015, "num_input_tokens_seen": 264060960, "step": 122440 }, { "epoch": 19.974714518760194, "grad_norm": 0.06325375288724899, "learning_rate": 4.93199102419295e-09, "loss": 0.0964, "num_input_tokens_seen": 264070016, "step": 122445 }, { "epoch": 19.97553017944535, "grad_norm": 0.004639847669750452, "learning_rate": 4.620904791419189e-09, "loss": 0.0009, "num_input_tokens_seen": 264081440, "step": 122450 }, { "epoch": 19.976345840130506, "grad_norm": 0.013378245756030083, "learning_rate": 4.31995159927423e-09, "loss": 0.0025, "num_input_tokens_seen": 264092544, "step": 122455 }, { "epoch": 19.97716150081566, "grad_norm": 0.09576146304607391, "learning_rate": 4.029131453864299e-09, "loss": 0.0033, "num_input_tokens_seen": 264103808, "step": 122460 }, { "epoch": 19.977977161500817, "grad_norm": 0.006098780781030655, "learning_rate": 3.748444361129088e-09, "loss": 0.0068, "num_input_tokens_seen": 264115168, "step": 122465 }, { "epoch": 19.97879282218597, "grad_norm": 0.0043076807633042336, "learning_rate": 3.477890326675226e-09, "loss": 0.0025, "num_input_tokens_seen": 264125792, "step": 122470 }, { "epoch": 19.979608482871125, "grad_norm": 0.027919109910726547, "learning_rate": 3.217469356053826e-09, "loss": 0.0278, "num_input_tokens_seen": 264136704, "step": 122475 }, { "epoch": 19.98042414355628, "grad_norm": 0.011680357158184052, "learning_rate": 2.9671814545384477e-09, "loss": 0.0013, "num_input_tokens_seen": 264147232, "step": 122480 }, { "epoch": 19.981239804241437, "grad_norm": 0.13557949662208557, "learning_rate": 2.7270266271806065e-09, "loss": 0.0064, "num_input_tokens_seen": 264159040, "step": 122485 }, { "epoch": 19.982055464926592, "grad_norm": 0.003572451416403055, "learning_rate": 2.4970048788652833e-09, "loss": 0.0114, "num_input_tokens_seen": 264169376, "step": 122490 }, { "epoch": 19.982871125611744, "grad_norm": 0.005502818152308464, "learning_rate": 2.2771162141999036e-09, "loss": 0.0033, "num_input_tokens_seen": 264179392, "step": 122495 }, { "epoch": 19.9836867862969, "grad_norm": 0.006612642668187618, "learning_rate": 2.0673606376808707e-09, "loss": 0.0041, "num_input_tokens_seen": 264189216, "step": 122500 }, { "epoch": 19.984502446982056, "grad_norm": 0.0007050768472254276, "learning_rate": 1.8677381535825435e-09, "loss": 0.0066, "num_input_tokens_seen": 264200416, "step": 122505 }, { "epoch": 19.98531810766721, "grad_norm": 0.010658063925802708, "learning_rate": 1.6782487659572354e-09, "loss": 0.0031, "num_input_tokens_seen": 264210880, "step": 122510 }, { "epoch": 19.986133768352367, "grad_norm": 0.14432092010974884, "learning_rate": 1.4988924785797053e-09, "loss": 0.0079, "num_input_tokens_seen": 264222400, "step": 122515 }, { "epoch": 19.98694942903752, "grad_norm": 0.017014559358358383, "learning_rate": 1.329669295113689e-09, "loss": 0.0516, "num_input_tokens_seen": 264231936, "step": 122520 }, { "epoch": 19.987765089722675, "grad_norm": 0.0276069026440382, "learning_rate": 1.1705792190008778e-09, "loss": 0.011, "num_input_tokens_seen": 264241856, "step": 122525 }, { "epoch": 19.98858075040783, "grad_norm": 0.0118412459269166, "learning_rate": 1.0216222534609189e-09, "loss": 0.0028, "num_input_tokens_seen": 264252064, "step": 122530 }, { "epoch": 19.989396411092986, "grad_norm": 0.321582168340683, "learning_rate": 8.827984014914137e-10, "loss": 0.0047, "num_input_tokens_seen": 264261920, "step": 122535 }, { "epoch": 19.99021207177814, "grad_norm": 0.0031751336064189672, "learning_rate": 7.541076659234314e-10, "loss": 0.0027, "num_input_tokens_seen": 264272640, "step": 122540 }, { "epoch": 19.991027732463294, "grad_norm": 0.012154137715697289, "learning_rate": 6.355500494215072e-10, "loss": 0.0032, "num_input_tokens_seen": 264283296, "step": 122545 }, { "epoch": 19.99184339314845, "grad_norm": 0.006077760364860296, "learning_rate": 5.271255543171094e-10, "loss": 0.0029, "num_input_tokens_seen": 264293568, "step": 122550 }, { "epoch": 19.992659053833606, "grad_norm": 0.0028902848716825247, "learning_rate": 4.2883418277517293e-10, "loss": 0.0008, "num_input_tokens_seen": 264306208, "step": 122555 }, { "epoch": 19.99347471451876, "grad_norm": 0.002067724009975791, "learning_rate": 3.4067593690512154e-10, "loss": 0.0012, "num_input_tokens_seen": 264316096, "step": 122560 }, { "epoch": 19.994290375203914, "grad_norm": 0.05612458661198616, "learning_rate": 2.6265081837228976e-10, "loss": 0.1068, "num_input_tokens_seen": 264324128, "step": 122565 }, { "epoch": 19.99510603588907, "grad_norm": 1.1449226140975952, "learning_rate": 1.9475882884201212e-10, "loss": 0.0291, "num_input_tokens_seen": 264334400, "step": 122570 }, { "epoch": 19.995921696574225, "grad_norm": 0.004436062183231115, "learning_rate": 1.3699996964655626e-10, "loss": 0.004, "num_input_tokens_seen": 264343840, "step": 122575 }, { "epoch": 19.99673735725938, "grad_norm": 0.0005828479770570993, "learning_rate": 8.937424195165634e-11, "loss": 0.0016, "num_input_tokens_seen": 264354336, "step": 122580 }, { "epoch": 19.997553017944536, "grad_norm": 0.11285004019737244, "learning_rate": 5.188164675651308e-11, "loss": 0.0572, "num_input_tokens_seen": 264364064, "step": 122585 }, { "epoch": 19.99836867862969, "grad_norm": 0.134333997964859, "learning_rate": 2.4522184838282614e-11, "loss": 0.0049, "num_input_tokens_seen": 264374048, "step": 122590 }, { "epoch": 19.999184339314844, "grad_norm": 0.22424104809761047, "learning_rate": 7.295856696565295e-12, "loss": 0.003, "num_input_tokens_seen": 264385568, "step": 122595 }, { "epoch": 20.0, "grad_norm": 0.00044118051300756633, "learning_rate": 2.0266266442803271e-13, "loss": 0.0801, "num_input_tokens_seen": 264395536, "step": 122600 }, { "epoch": 20.0, "eval_loss": 0.26890021562576294, "eval_runtime": 104.7244, "eval_samples_per_second": 26.021, "eval_steps_per_second": 6.512, "num_input_tokens_seen": 264395536, "step": 122600 }, { "epoch": 20.0, "num_input_tokens_seen": 264395536, "step": 122600, "total_flos": 1.1905611487758385e+19, "train_loss": 0.07372644951130448, "train_runtime": 45564.9824, "train_samples_per_second": 10.762, "train_steps_per_second": 2.691 } ], "logging_steps": 5, "max_steps": 122600, "num_input_tokens_seen": 264395536, "num_train_epochs": 20, "save_steps": 6130, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1905611487758385e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }