{ "best_global_step": 34338, "best_metric": 0.15029733, "best_model_checkpoint": "/mnt/weka/home/yongxin.wang/workspace/lark/swift-pipeline/ckpt/align/Qwen2.5-Math-7B-16k-think-Openr1-Math-46k-S-Aligned-bs8-le1e-5/v0-20251214-053552/checkpoint-34338", "epoch": 3.0, "eval_steps": 500.0, "global_step": 34338, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.736676568233443e-05, "grad_norm": 3.975479566780245, "learning_rate": 2.912055911473501e-09, "loss": 0.6705262660980225, "step": 1 }, { "epoch": 0.0004368338284116722, "grad_norm": 3.6742687472096556, "learning_rate": 1.4560279557367502e-08, "loss": 0.706816554069519, "step": 5 }, { "epoch": 0.0008736676568233444, "grad_norm": 3.193643770044312, "learning_rate": 2.9120559114735005e-08, "loss": 0.718019437789917, "step": 10 }, { "epoch": 0.0013105014852350166, "grad_norm": 4.626192213547573, "learning_rate": 4.368083867210251e-08, "loss": 0.7199454307556152, "step": 15 }, { "epoch": 0.0017473353136466887, "grad_norm": 2.6317521735974547, "learning_rate": 5.824111822947001e-08, "loss": 0.6992023944854736, "step": 20 }, { "epoch": 0.002184169142058361, "grad_norm": 4.650440129234844, "learning_rate": 7.280139778683752e-08, "loss": 0.7208074569702149, "step": 25 }, { "epoch": 0.002621002970470033, "grad_norm": 3.293643716326898, "learning_rate": 8.736167734420502e-08, "loss": 0.7275213718414306, "step": 30 }, { "epoch": 0.0030578367988817056, "grad_norm": 3.8045939218802642, "learning_rate": 1.0192195690157252e-07, "loss": 0.6678548812866211, "step": 35 }, { "epoch": 0.0034946706272933775, "grad_norm": 3.1508073826737246, "learning_rate": 1.1648223645894002e-07, "loss": 0.7069518089294433, "step": 40 }, { "epoch": 0.00393150445570505, "grad_norm": 3.0628181926919225, "learning_rate": 1.310425160163075e-07, "loss": 0.7044812202453613, "step": 45 }, { "epoch": 0.004368338284116722, "grad_norm": 3.2494753644095384, "learning_rate": 1.4560279557367504e-07, "loss": 0.6705468654632568, "step": 50 }, { "epoch": 0.0048051721125283945, "grad_norm": 2.64643367301376, "learning_rate": 1.6016307513104252e-07, "loss": 0.6952574729919434, "step": 55 }, { "epoch": 0.005242005940940066, "grad_norm": 2.8997398655560187, "learning_rate": 1.7472335468841005e-07, "loss": 0.6685723304748535, "step": 60 }, { "epoch": 0.005678839769351738, "grad_norm": 26.731469861536375, "learning_rate": 1.8928363424577753e-07, "loss": 0.7050783157348632, "step": 65 }, { "epoch": 0.006115673597763411, "grad_norm": 4.600639761287295, "learning_rate": 2.0384391380314503e-07, "loss": 0.6876073360443116, "step": 70 }, { "epoch": 0.006552507426175083, "grad_norm": 6.863952667055098, "learning_rate": 2.1840419336051254e-07, "loss": 0.6601121902465821, "step": 75 }, { "epoch": 0.006989341254586755, "grad_norm": 6.524341264867431, "learning_rate": 2.3296447291788004e-07, "loss": 0.6891211986541748, "step": 80 }, { "epoch": 0.007426175082998428, "grad_norm": 5.926213105730085, "learning_rate": 2.4752475247524754e-07, "loss": 0.7047179222106934, "step": 85 }, { "epoch": 0.0078630089114101, "grad_norm": 3.492989204039253, "learning_rate": 2.62085032032615e-07, "loss": 0.6772768497467041, "step": 90 }, { "epoch": 0.008299842739821772, "grad_norm": 9.148067372109034, "learning_rate": 2.7664531158998255e-07, "loss": 0.6597463607788085, "step": 95 }, { "epoch": 0.008736676568233443, "grad_norm": 7.553842940605921, "learning_rate": 2.912055911473501e-07, "loss": 0.6633012294769287, "step": 100 }, { "epoch": 0.009173510396645115, "grad_norm": 3.642476978471778, "learning_rate": 3.0576587070471756e-07, "loss": 0.6524866580963135, "step": 105 }, { "epoch": 0.009610344225056789, "grad_norm": 10.853480787774188, "learning_rate": 3.2032615026208504e-07, "loss": 0.6682044982910156, "step": 110 }, { "epoch": 0.010047178053468461, "grad_norm": 2.6271457296136234, "learning_rate": 3.3488642981945257e-07, "loss": 0.6531826019287109, "step": 115 }, { "epoch": 0.010484011881880133, "grad_norm": 2.1952649606386023, "learning_rate": 3.494467093768201e-07, "loss": 0.6535624980926513, "step": 120 }, { "epoch": 0.010920845710291805, "grad_norm": 2.1048723765719637, "learning_rate": 3.640069889341875e-07, "loss": 0.6740177154541016, "step": 125 }, { "epoch": 0.011357679538703477, "grad_norm": 6.272017751690699, "learning_rate": 3.7856726849155505e-07, "loss": 0.6389056205749511, "step": 130 }, { "epoch": 0.011794513367115149, "grad_norm": 2.277163384293089, "learning_rate": 3.931275480489226e-07, "loss": 0.6129697799682617, "step": 135 }, { "epoch": 0.012231347195526822, "grad_norm": 2.2767169966851126, "learning_rate": 4.0768782760629006e-07, "loss": 0.6738988876342773, "step": 140 }, { "epoch": 0.012668181023938494, "grad_norm": 4.185977790710515, "learning_rate": 4.2224810716365754e-07, "loss": 0.6444270133972168, "step": 145 }, { "epoch": 0.013105014852350166, "grad_norm": 2.4429000679617503, "learning_rate": 4.3680838672102507e-07, "loss": 0.6426665306091308, "step": 150 }, { "epoch": 0.013541848680761838, "grad_norm": 1.9145377206332963, "learning_rate": 4.513686662783926e-07, "loss": 0.6186606407165527, "step": 155 }, { "epoch": 0.01397868250917351, "grad_norm": 2.1179823329634737, "learning_rate": 4.659289458357601e-07, "loss": 0.598472785949707, "step": 160 }, { "epoch": 0.014415516337585182, "grad_norm": 1.770574611291961, "learning_rate": 4.804892253931276e-07, "loss": 0.6322140693664551, "step": 165 }, { "epoch": 0.014852350165996855, "grad_norm": 1.7462077717013986, "learning_rate": 4.950495049504951e-07, "loss": 0.6225698471069336, "step": 170 }, { "epoch": 0.015289183994408527, "grad_norm": 3.116795425586836, "learning_rate": 5.096097845078626e-07, "loss": 0.6271866798400879, "step": 175 }, { "epoch": 0.0157260178228202, "grad_norm": 10.614286614992482, "learning_rate": 5.2417006406523e-07, "loss": 0.5877030372619629, "step": 180 }, { "epoch": 0.016162851651231873, "grad_norm": 2.656976874861882, "learning_rate": 5.387303436225976e-07, "loss": 0.6147327423095703, "step": 185 }, { "epoch": 0.016599685479643543, "grad_norm": 2.8992547385350473, "learning_rate": 5.532906231799651e-07, "loss": 0.5589334487915039, "step": 190 }, { "epoch": 0.017036519308055217, "grad_norm": 2.8554474835573487, "learning_rate": 5.678509027373326e-07, "loss": 0.5784102439880371, "step": 195 }, { "epoch": 0.017473353136466887, "grad_norm": 2.3964017047988126, "learning_rate": 5.824111822947002e-07, "loss": 0.5146402359008789, "step": 200 }, { "epoch": 0.01791018696487856, "grad_norm": 2.076814598335486, "learning_rate": 5.969714618520676e-07, "loss": 0.5853977203369141, "step": 205 }, { "epoch": 0.01834702079329023, "grad_norm": 1.5574276372845763, "learning_rate": 6.115317414094351e-07, "loss": 0.5807666778564453, "step": 210 }, { "epoch": 0.018783854621701904, "grad_norm": 1.6351227814052998, "learning_rate": 6.260920209668025e-07, "loss": 0.5892491340637207, "step": 215 }, { "epoch": 0.019220688450113578, "grad_norm": 2.8258362175235017, "learning_rate": 6.406523005241701e-07, "loss": 0.5704989910125733, "step": 220 }, { "epoch": 0.019657522278525248, "grad_norm": 3.8094274991335486, "learning_rate": 6.552125800815376e-07, "loss": 0.5399591445922851, "step": 225 }, { "epoch": 0.020094356106936922, "grad_norm": 1.2940881575054326, "learning_rate": 6.697728596389051e-07, "loss": 0.559626579284668, "step": 230 }, { "epoch": 0.020531189935348592, "grad_norm": 12.169158417445361, "learning_rate": 6.843331391962727e-07, "loss": 0.5341886520385742, "step": 235 }, { "epoch": 0.020968023763760266, "grad_norm": 3.9336289237789726, "learning_rate": 6.988934187536402e-07, "loss": 0.5439260482788086, "step": 240 }, { "epoch": 0.02140485759217194, "grad_norm": 1.2300020108665588, "learning_rate": 7.134536983110077e-07, "loss": 0.5278302192687988, "step": 245 }, { "epoch": 0.02184169142058361, "grad_norm": 1.982326801510687, "learning_rate": 7.28013977868375e-07, "loss": 0.5121757507324218, "step": 250 }, { "epoch": 0.022278525248995283, "grad_norm": 1.4526132256337456, "learning_rate": 7.425742574257426e-07, "loss": 0.5395015716552735, "step": 255 }, { "epoch": 0.022715359077406953, "grad_norm": 1.3387828103294492, "learning_rate": 7.571345369831101e-07, "loss": 0.5584309577941895, "step": 260 }, { "epoch": 0.023152192905818627, "grad_norm": 0.9084558803213519, "learning_rate": 7.716948165404776e-07, "loss": 0.4968903064727783, "step": 265 }, { "epoch": 0.023589026734230297, "grad_norm": 2.200427007786737, "learning_rate": 7.862550960978452e-07, "loss": 0.5154207229614258, "step": 270 }, { "epoch": 0.02402586056264197, "grad_norm": 1.4520307518150066, "learning_rate": 8.008153756552127e-07, "loss": 0.49453105926513674, "step": 275 }, { "epoch": 0.024462694391053644, "grad_norm": 1.1462529967434751, "learning_rate": 8.153756552125801e-07, "loss": 0.4645058631896973, "step": 280 }, { "epoch": 0.024899528219465315, "grad_norm": 1.0246535422556438, "learning_rate": 8.299359347699477e-07, "loss": 0.5070699214935303, "step": 285 }, { "epoch": 0.025336362047876988, "grad_norm": 1.1584303975990424, "learning_rate": 8.444962143273151e-07, "loss": 0.5024004936218261, "step": 290 }, { "epoch": 0.02577319587628866, "grad_norm": 2.310346158537539, "learning_rate": 8.590564938846826e-07, "loss": 0.481526517868042, "step": 295 }, { "epoch": 0.026210029704700332, "grad_norm": 1.1744336767741916, "learning_rate": 8.736167734420501e-07, "loss": 0.49884324073791503, "step": 300 }, { "epoch": 0.026646863533112006, "grad_norm": 1.521023912557041, "learning_rate": 8.881770529994177e-07, "loss": 0.4882166862487793, "step": 305 }, { "epoch": 0.027083697361523676, "grad_norm": 1.1606861778621638, "learning_rate": 9.027373325567852e-07, "loss": 0.5142670631408691, "step": 310 }, { "epoch": 0.02752053118993535, "grad_norm": 2.2259216507041413, "learning_rate": 9.172976121141526e-07, "loss": 0.47433271408081057, "step": 315 }, { "epoch": 0.02795736501834702, "grad_norm": 1.124738836974987, "learning_rate": 9.318578916715202e-07, "loss": 0.5077671051025391, "step": 320 }, { "epoch": 0.028394198846758693, "grad_norm": 1.241012360366211, "learning_rate": 9.464181712288877e-07, "loss": 0.4562734603881836, "step": 325 }, { "epoch": 0.028831032675170364, "grad_norm": 1.3050626430169976, "learning_rate": 9.609784507862551e-07, "loss": 0.46339073181152346, "step": 330 }, { "epoch": 0.029267866503582037, "grad_norm": 0.9378619782362168, "learning_rate": 9.755387303436227e-07, "loss": 0.5061975479125976, "step": 335 }, { "epoch": 0.02970470033199371, "grad_norm": 1.7385690778137963, "learning_rate": 9.900990099009902e-07, "loss": 0.4722420692443848, "step": 340 }, { "epoch": 0.03014153416040538, "grad_norm": 1.275038331580121, "learning_rate": 1.0046592894583578e-06, "loss": 0.4742225170135498, "step": 345 }, { "epoch": 0.030578367988817055, "grad_norm": 1.8772869795901639, "learning_rate": 1.0192195690157252e-06, "loss": 0.4406951904296875, "step": 350 }, { "epoch": 0.031015201817228725, "grad_norm": 1.2576596432237026, "learning_rate": 1.0337798485730927e-06, "loss": 0.4890933036804199, "step": 355 }, { "epoch": 0.0314520356456404, "grad_norm": 0.9149054372613675, "learning_rate": 1.04834012813046e-06, "loss": 0.45314803123474123, "step": 360 }, { "epoch": 0.03188886947405207, "grad_norm": 1.73946553735941, "learning_rate": 1.0629004076878277e-06, "loss": 0.4562821388244629, "step": 365 }, { "epoch": 0.032325703302463746, "grad_norm": 1.1063726418318252, "learning_rate": 1.0774606872451951e-06, "loss": 0.4598984241485596, "step": 370 }, { "epoch": 0.032762537130875416, "grad_norm": 1.3541247673588452, "learning_rate": 1.0920209668025628e-06, "loss": 0.4538322925567627, "step": 375 }, { "epoch": 0.033199370959287086, "grad_norm": 1.0881282405106767, "learning_rate": 1.1065812463599302e-06, "loss": 0.46449718475341795, "step": 380 }, { "epoch": 0.033636204787698756, "grad_norm": 1.7446444872407596, "learning_rate": 1.1211415259172976e-06, "loss": 0.438394832611084, "step": 385 }, { "epoch": 0.03407303861611043, "grad_norm": 1.109251676807073, "learning_rate": 1.1357018054746653e-06, "loss": 0.45175848007202146, "step": 390 }, { "epoch": 0.034509872444522104, "grad_norm": 1.4845314029601926, "learning_rate": 1.1502620850320327e-06, "loss": 0.4400350093841553, "step": 395 }, { "epoch": 0.034946706272933774, "grad_norm": 1.1990851755373293, "learning_rate": 1.1648223645894003e-06, "loss": 0.42598562240600585, "step": 400 }, { "epoch": 0.03538354010134545, "grad_norm": 1.0591607180616762, "learning_rate": 1.1793826441467678e-06, "loss": 0.4903105735778809, "step": 405 }, { "epoch": 0.03582037392975712, "grad_norm": 1.624363197350525, "learning_rate": 1.1939429237041352e-06, "loss": 0.45419626235961913, "step": 410 }, { "epoch": 0.03625720775816879, "grad_norm": 1.1516169297544518, "learning_rate": 1.2085032032615026e-06, "loss": 0.4483180046081543, "step": 415 }, { "epoch": 0.03669404158658046, "grad_norm": 0.9528830531008237, "learning_rate": 1.2230634828188702e-06, "loss": 0.4451145172119141, "step": 420 }, { "epoch": 0.03713087541499214, "grad_norm": 1.2103825280767155, "learning_rate": 1.2376237623762377e-06, "loss": 0.45114431381225584, "step": 425 }, { "epoch": 0.03756770924340381, "grad_norm": 1.1604504961307947, "learning_rate": 1.252184041933605e-06, "loss": 0.45836262702941893, "step": 430 }, { "epoch": 0.03800454307181548, "grad_norm": 1.0334499291134545, "learning_rate": 1.2667443214909727e-06, "loss": 0.45593061447143557, "step": 435 }, { "epoch": 0.038441376900227156, "grad_norm": 1.2294884028917412, "learning_rate": 1.2813046010483402e-06, "loss": 0.4559175968170166, "step": 440 }, { "epoch": 0.038878210728638826, "grad_norm": 0.853877478731358, "learning_rate": 1.2958648806057078e-06, "loss": 0.44067864418029784, "step": 445 }, { "epoch": 0.039315044557050496, "grad_norm": 0.9696533738916524, "learning_rate": 1.3104251601630752e-06, "loss": 0.45251121520996096, "step": 450 }, { "epoch": 0.039751878385462174, "grad_norm": 1.3047166465176199, "learning_rate": 1.3249854397204428e-06, "loss": 0.4615504264831543, "step": 455 }, { "epoch": 0.040188712213873844, "grad_norm": 0.8835085223350773, "learning_rate": 1.3395457192778103e-06, "loss": 0.45216054916381837, "step": 460 }, { "epoch": 0.040625546042285514, "grad_norm": 0.9534209604496774, "learning_rate": 1.3541059988351777e-06, "loss": 0.47389841079711914, "step": 465 }, { "epoch": 0.041062379870697184, "grad_norm": 0.9800015038048207, "learning_rate": 1.3686662783925453e-06, "loss": 0.4080831527709961, "step": 470 }, { "epoch": 0.04149921369910886, "grad_norm": 2.254744069691929, "learning_rate": 1.3832265579499128e-06, "loss": 0.4482013702392578, "step": 475 }, { "epoch": 0.04193604752752053, "grad_norm": 1.1180537688187666, "learning_rate": 1.3977868375072804e-06, "loss": 0.42386798858642577, "step": 480 }, { "epoch": 0.0423728813559322, "grad_norm": 1.0342495829742113, "learning_rate": 1.4123471170646478e-06, "loss": 0.43498730659484863, "step": 485 }, { "epoch": 0.04280971518434388, "grad_norm": 1.1589563644223133, "learning_rate": 1.4269073966220155e-06, "loss": 0.4615750312805176, "step": 490 }, { "epoch": 0.04324654901275555, "grad_norm": 1.066744426936682, "learning_rate": 1.4414676761793827e-06, "loss": 0.4746453285217285, "step": 495 }, { "epoch": 0.04368338284116722, "grad_norm": 0.9755445829616193, "learning_rate": 1.45602795573675e-06, "loss": 0.4576003074645996, "step": 500 }, { "epoch": 0.04412021666957889, "grad_norm": 0.9255071283878527, "learning_rate": 1.4705882352941177e-06, "loss": 0.45267958641052247, "step": 505 }, { "epoch": 0.044557050497990566, "grad_norm": 0.9585942325611988, "learning_rate": 1.4851485148514852e-06, "loss": 0.4329216003417969, "step": 510 }, { "epoch": 0.044993884326402236, "grad_norm": 1.1735879750313472, "learning_rate": 1.4997087944088528e-06, "loss": 0.41016664505004885, "step": 515 }, { "epoch": 0.04543071815481391, "grad_norm": 0.9514532921641408, "learning_rate": 1.5142690739662202e-06, "loss": 0.42246761322021487, "step": 520 }, { "epoch": 0.045867551983225584, "grad_norm": 1.1940995367004141, "learning_rate": 1.5288293535235879e-06, "loss": 0.42496209144592284, "step": 525 }, { "epoch": 0.046304385811637254, "grad_norm": 0.8665320819691279, "learning_rate": 1.5433896330809553e-06, "loss": 0.4362898349761963, "step": 530 }, { "epoch": 0.046741219640048924, "grad_norm": 1.129825973110193, "learning_rate": 1.5579499126383227e-06, "loss": 0.4151638031005859, "step": 535 }, { "epoch": 0.047178053468460594, "grad_norm": 1.0859493665233535, "learning_rate": 1.5725101921956903e-06, "loss": 0.4453767776489258, "step": 540 }, { "epoch": 0.04761488729687227, "grad_norm": 1.131637523740547, "learning_rate": 1.5870704717530578e-06, "loss": 0.47969441413879393, "step": 545 }, { "epoch": 0.04805172112528394, "grad_norm": 1.0554933990023139, "learning_rate": 1.6016307513104254e-06, "loss": 0.4292738914489746, "step": 550 }, { "epoch": 0.04848855495369561, "grad_norm": 1.0331395562422856, "learning_rate": 1.6161910308677928e-06, "loss": 0.41269731521606445, "step": 555 }, { "epoch": 0.04892538878210729, "grad_norm": 1.2455669286340862, "learning_rate": 1.6307513104251602e-06, "loss": 0.4486018180847168, "step": 560 }, { "epoch": 0.04936222261051896, "grad_norm": 0.9973330871047006, "learning_rate": 1.6453115899825279e-06, "loss": 0.4090826988220215, "step": 565 }, { "epoch": 0.04979905643893063, "grad_norm": 0.8707636365178172, "learning_rate": 1.6598718695398953e-06, "loss": 0.4320216655731201, "step": 570 }, { "epoch": 0.050235890267342306, "grad_norm": 0.9732046278340021, "learning_rate": 1.674432149097263e-06, "loss": 0.3874271154403687, "step": 575 }, { "epoch": 0.050672724095753977, "grad_norm": 1.1162742884211783, "learning_rate": 1.6889924286546302e-06, "loss": 0.430413818359375, "step": 580 }, { "epoch": 0.05110955792416565, "grad_norm": 1.2203024554291064, "learning_rate": 1.703552708211998e-06, "loss": 0.4321478843688965, "step": 585 }, { "epoch": 0.05154639175257732, "grad_norm": 1.0788046891355758, "learning_rate": 1.7181129877693652e-06, "loss": 0.42530183792114257, "step": 590 }, { "epoch": 0.051983225580988994, "grad_norm": 1.0907013732254747, "learning_rate": 1.7326732673267326e-06, "loss": 0.44817113876342773, "step": 595 }, { "epoch": 0.052420059409400664, "grad_norm": 0.9009695564184137, "learning_rate": 1.7472335468841003e-06, "loss": 0.4532756805419922, "step": 600 }, { "epoch": 0.052856893237812334, "grad_norm": 1.8594829209644852, "learning_rate": 1.7617938264414677e-06, "loss": 0.40618324279785156, "step": 605 }, { "epoch": 0.05329372706622401, "grad_norm": 1.2836845639534549, "learning_rate": 1.7763541059988353e-06, "loss": 0.4093473434448242, "step": 610 }, { "epoch": 0.05373056089463568, "grad_norm": 1.3723068988776488, "learning_rate": 1.7909143855562028e-06, "loss": 0.45548386573791505, "step": 615 }, { "epoch": 0.05416739472304735, "grad_norm": 1.1483560162036661, "learning_rate": 1.8054746651135704e-06, "loss": 0.43339195251464846, "step": 620 }, { "epoch": 0.05460422855145902, "grad_norm": 0.9569460601746488, "learning_rate": 1.8200349446709378e-06, "loss": 0.44464902877807616, "step": 625 }, { "epoch": 0.0550410623798707, "grad_norm": 1.2128255826480225, "learning_rate": 1.8345952242283053e-06, "loss": 0.4320832252502441, "step": 630 }, { "epoch": 0.05547789620828237, "grad_norm": 1.222052233539631, "learning_rate": 1.8491555037856729e-06, "loss": 0.45357069969177244, "step": 635 }, { "epoch": 0.05591473003669404, "grad_norm": 1.1045192531818164, "learning_rate": 1.8637157833430403e-06, "loss": 0.41315631866455077, "step": 640 }, { "epoch": 0.05635156386510572, "grad_norm": 1.2540843118325313, "learning_rate": 1.878276062900408e-06, "loss": 0.41021575927734377, "step": 645 }, { "epoch": 0.05678839769351739, "grad_norm": 0.787551916436897, "learning_rate": 1.8928363424577754e-06, "loss": 0.39406623840332033, "step": 650 }, { "epoch": 0.05722523152192906, "grad_norm": 1.1179390516699246, "learning_rate": 1.907396622015143e-06, "loss": 0.47342281341552733, "step": 655 }, { "epoch": 0.05766206535034073, "grad_norm": 0.9552117269535308, "learning_rate": 1.9219569015725102e-06, "loss": 0.4208021640777588, "step": 660 }, { "epoch": 0.058098899178752404, "grad_norm": 1.0359498710037804, "learning_rate": 1.9365171811298776e-06, "loss": 0.435499382019043, "step": 665 }, { "epoch": 0.058535733007164074, "grad_norm": 1.7109511691357582, "learning_rate": 1.9510774606872455e-06, "loss": 0.4656370162963867, "step": 670 }, { "epoch": 0.058972566835575745, "grad_norm": 0.9869257645613418, "learning_rate": 1.965637740244613e-06, "loss": 0.407666540145874, "step": 675 }, { "epoch": 0.05940940066398742, "grad_norm": 1.0338026889650977, "learning_rate": 1.9801980198019803e-06, "loss": 0.42275257110595704, "step": 680 }, { "epoch": 0.05984623449239909, "grad_norm": 1.4026952489995645, "learning_rate": 1.9947582993593478e-06, "loss": 0.45141091346740725, "step": 685 }, { "epoch": 0.06028306832081076, "grad_norm": 1.023118463884253, "learning_rate": 2.0093185789167156e-06, "loss": 0.44068541526794436, "step": 690 }, { "epoch": 0.06071990214922244, "grad_norm": 1.0464072486690836, "learning_rate": 2.023878858474083e-06, "loss": 0.41353540420532225, "step": 695 }, { "epoch": 0.06115673597763411, "grad_norm": 0.9464397021390852, "learning_rate": 2.0384391380314505e-06, "loss": 0.4515063285827637, "step": 700 }, { "epoch": 0.06159356980604578, "grad_norm": 0.9464945863257002, "learning_rate": 2.052999417588818e-06, "loss": 0.42475042343139646, "step": 705 }, { "epoch": 0.06203040363445745, "grad_norm": 0.9627986100335428, "learning_rate": 2.0675596971461853e-06, "loss": 0.4062337398529053, "step": 710 }, { "epoch": 0.06246723746286913, "grad_norm": 1.067137787775245, "learning_rate": 2.082119976703553e-06, "loss": 0.418302583694458, "step": 715 }, { "epoch": 0.0629040712912808, "grad_norm": 0.9305479743502025, "learning_rate": 2.09668025626092e-06, "loss": 0.4386444568634033, "step": 720 }, { "epoch": 0.06334090511969247, "grad_norm": 0.9800607220700576, "learning_rate": 2.1112405358182876e-06, "loss": 0.40645565986633303, "step": 725 }, { "epoch": 0.06377773894810414, "grad_norm": 0.9289772809282909, "learning_rate": 2.1258008153756554e-06, "loss": 0.40067195892333984, "step": 730 }, { "epoch": 0.06421457277651581, "grad_norm": 1.0246864851528354, "learning_rate": 2.140361094933023e-06, "loss": 0.44303159713745116, "step": 735 }, { "epoch": 0.06465140660492749, "grad_norm": 1.065011103594921, "learning_rate": 2.1549213744903903e-06, "loss": 0.40886592864990234, "step": 740 }, { "epoch": 0.06508824043333916, "grad_norm": 1.1215664742633276, "learning_rate": 2.1694816540477577e-06, "loss": 0.43088350296020506, "step": 745 }, { "epoch": 0.06552507426175083, "grad_norm": 0.9910625802170413, "learning_rate": 2.1840419336051256e-06, "loss": 0.39481592178344727, "step": 750 }, { "epoch": 0.0659619080901625, "grad_norm": 1.0559168009431625, "learning_rate": 2.198602213162493e-06, "loss": 0.44126081466674805, "step": 755 }, { "epoch": 0.06639874191857417, "grad_norm": 1.3528205960504056, "learning_rate": 2.2131624927198604e-06, "loss": 0.43095035552978517, "step": 760 }, { "epoch": 0.06683557574698584, "grad_norm": 0.8751381322412706, "learning_rate": 2.227722772277228e-06, "loss": 0.389419412612915, "step": 765 }, { "epoch": 0.06727240957539751, "grad_norm": 0.7729375012171142, "learning_rate": 2.2422830518345953e-06, "loss": 0.39824628829956055, "step": 770 }, { "epoch": 0.0677092434038092, "grad_norm": 0.9946088446439157, "learning_rate": 2.256843331391963e-06, "loss": 0.44208364486694335, "step": 775 }, { "epoch": 0.06814607723222087, "grad_norm": 0.9145127691327722, "learning_rate": 2.2714036109493305e-06, "loss": 0.3961998224258423, "step": 780 }, { "epoch": 0.06858291106063254, "grad_norm": 0.9795669323162994, "learning_rate": 2.285963890506698e-06, "loss": 0.42339324951171875, "step": 785 }, { "epoch": 0.06901974488904421, "grad_norm": 1.0672545292250049, "learning_rate": 2.3005241700640654e-06, "loss": 0.40024499893188475, "step": 790 }, { "epoch": 0.06945657871745588, "grad_norm": 1.0279452369318958, "learning_rate": 2.315084449621433e-06, "loss": 0.3740841865539551, "step": 795 }, { "epoch": 0.06989341254586755, "grad_norm": 0.970784478078267, "learning_rate": 2.3296447291788007e-06, "loss": 0.3844287395477295, "step": 800 }, { "epoch": 0.07033024637427922, "grad_norm": 0.959593025308813, "learning_rate": 2.3442050087361677e-06, "loss": 0.4055701732635498, "step": 805 }, { "epoch": 0.0707670802026909, "grad_norm": 1.0050914737221297, "learning_rate": 2.3587652882935355e-06, "loss": 0.4114830017089844, "step": 810 }, { "epoch": 0.07120391403110257, "grad_norm": 0.8424971730307537, "learning_rate": 2.373325567850903e-06, "loss": 0.4302999019622803, "step": 815 }, { "epoch": 0.07164074785951424, "grad_norm": 0.9601435971392517, "learning_rate": 2.3878858474082704e-06, "loss": 0.4233075141906738, "step": 820 }, { "epoch": 0.07207758168792591, "grad_norm": 0.8522531396006059, "learning_rate": 2.4024461269656378e-06, "loss": 0.3776766300201416, "step": 825 }, { "epoch": 0.07251441551633758, "grad_norm": 0.7204278162743054, "learning_rate": 2.417006406523005e-06, "loss": 0.40434846878051756, "step": 830 }, { "epoch": 0.07295124934474925, "grad_norm": 1.06569339945484, "learning_rate": 2.431566686080373e-06, "loss": 0.40108938217163087, "step": 835 }, { "epoch": 0.07338808317316092, "grad_norm": 0.8924600128211588, "learning_rate": 2.4461269656377405e-06, "loss": 0.3697865724563599, "step": 840 }, { "epoch": 0.07382491700157261, "grad_norm": 0.9429599031922732, "learning_rate": 2.460687245195108e-06, "loss": 0.38335742950439455, "step": 845 }, { "epoch": 0.07426175082998428, "grad_norm": 0.8043747965473925, "learning_rate": 2.4752475247524753e-06, "loss": 0.42061529159545896, "step": 850 }, { "epoch": 0.07469858465839595, "grad_norm": 0.9920685062246233, "learning_rate": 2.489807804309843e-06, "loss": 0.4016770362854004, "step": 855 }, { "epoch": 0.07513541848680762, "grad_norm": 0.940122974440222, "learning_rate": 2.50436808386721e-06, "loss": 0.4421844482421875, "step": 860 }, { "epoch": 0.07557225231521929, "grad_norm": 0.908733419105678, "learning_rate": 2.518928363424578e-06, "loss": 0.40953493118286133, "step": 865 }, { "epoch": 0.07600908614363096, "grad_norm": 0.9436321535443659, "learning_rate": 2.5334886429819455e-06, "loss": 0.4050749778747559, "step": 870 }, { "epoch": 0.07644591997204264, "grad_norm": 0.9813386389032577, "learning_rate": 2.548048922539313e-06, "loss": 0.4562321662902832, "step": 875 }, { "epoch": 0.07688275380045431, "grad_norm": 0.9524143917377973, "learning_rate": 2.5626092020966803e-06, "loss": 0.3997232437133789, "step": 880 }, { "epoch": 0.07731958762886598, "grad_norm": 0.9796219166006452, "learning_rate": 2.577169481654048e-06, "loss": 0.37527999877929685, "step": 885 }, { "epoch": 0.07775642145727765, "grad_norm": 1.0998235995678516, "learning_rate": 2.5917297612114156e-06, "loss": 0.39909660816192627, "step": 890 }, { "epoch": 0.07819325528568932, "grad_norm": 0.9351065423488055, "learning_rate": 2.606290040768783e-06, "loss": 0.4092076301574707, "step": 895 }, { "epoch": 0.07863008911410099, "grad_norm": 0.8027637047331371, "learning_rate": 2.6208503203261504e-06, "loss": 0.40480942726135255, "step": 900 }, { "epoch": 0.07906692294251266, "grad_norm": 0.876544514602195, "learning_rate": 2.635410599883518e-06, "loss": 0.4266118049621582, "step": 905 }, { "epoch": 0.07950375677092435, "grad_norm": 0.9768355077504229, "learning_rate": 2.6499708794408857e-06, "loss": 0.40882291793823244, "step": 910 }, { "epoch": 0.07994059059933602, "grad_norm": 0.9473012730719912, "learning_rate": 2.664531158998253e-06, "loss": 0.4156352043151855, "step": 915 }, { "epoch": 0.08037742442774769, "grad_norm": 0.9851292075735277, "learning_rate": 2.6790914385556205e-06, "loss": 0.4019881248474121, "step": 920 }, { "epoch": 0.08081425825615936, "grad_norm": 0.9438600588945922, "learning_rate": 2.693651718112988e-06, "loss": 0.3860332489013672, "step": 925 }, { "epoch": 0.08125109208457103, "grad_norm": 0.7700113254100555, "learning_rate": 2.7082119976703554e-06, "loss": 0.3913813591003418, "step": 930 }, { "epoch": 0.0816879259129827, "grad_norm": 0.8936406048993514, "learning_rate": 2.7227722772277232e-06, "loss": 0.39567389488220217, "step": 935 }, { "epoch": 0.08212475974139437, "grad_norm": 1.024927453231714, "learning_rate": 2.7373325567850907e-06, "loss": 0.43407230377197265, "step": 940 }, { "epoch": 0.08256159356980605, "grad_norm": 0.751659358417385, "learning_rate": 2.751892836342458e-06, "loss": 0.35830535888671877, "step": 945 }, { "epoch": 0.08299842739821772, "grad_norm": 0.9986283495733431, "learning_rate": 2.7664531158998255e-06, "loss": 0.4120110034942627, "step": 950 }, { "epoch": 0.08343526122662939, "grad_norm": 0.8973910189443934, "learning_rate": 2.781013395457193e-06, "loss": 0.39009599685668944, "step": 955 }, { "epoch": 0.08387209505504106, "grad_norm": 0.9802165814396784, "learning_rate": 2.795573675014561e-06, "loss": 0.40192532539367676, "step": 960 }, { "epoch": 0.08430892888345273, "grad_norm": 0.9373648678582178, "learning_rate": 2.8101339545719282e-06, "loss": 0.3880904197692871, "step": 965 }, { "epoch": 0.0847457627118644, "grad_norm": 0.919124594988104, "learning_rate": 2.8246942341292956e-06, "loss": 0.4005584239959717, "step": 970 }, { "epoch": 0.08518259654027607, "grad_norm": 1.1987243268217778, "learning_rate": 2.8392545136866626e-06, "loss": 0.42265024185180666, "step": 975 }, { "epoch": 0.08561943036868776, "grad_norm": 0.7264427842910276, "learning_rate": 2.853814793244031e-06, "loss": 0.36936030387878416, "step": 980 }, { "epoch": 0.08605626419709943, "grad_norm": 0.8951342902971221, "learning_rate": 2.8683750728013983e-06, "loss": 0.41772122383117677, "step": 985 }, { "epoch": 0.0864930980255111, "grad_norm": 0.9958272005111631, "learning_rate": 2.8829353523587653e-06, "loss": 0.39186117649078367, "step": 990 }, { "epoch": 0.08692993185392277, "grad_norm": 0.8936717641833317, "learning_rate": 2.8974956319161328e-06, "loss": 0.3792526960372925, "step": 995 }, { "epoch": 0.08736676568233444, "grad_norm": 0.9915996550210322, "learning_rate": 2.9120559114735e-06, "loss": 0.38324549198150637, "step": 1000 }, { "epoch": 0.08780359951074611, "grad_norm": 0.8922666332833927, "learning_rate": 2.9266161910308685e-06, "loss": 0.37809147834777834, "step": 1005 }, { "epoch": 0.08824043333915778, "grad_norm": 0.7700313876639927, "learning_rate": 2.9411764705882355e-06, "loss": 0.42400007247924804, "step": 1010 }, { "epoch": 0.08867726716756946, "grad_norm": 0.9018233693721245, "learning_rate": 2.955736750145603e-06, "loss": 0.421420955657959, "step": 1015 }, { "epoch": 0.08911410099598113, "grad_norm": 0.832264438027464, "learning_rate": 2.9702970297029703e-06, "loss": 0.3824253797531128, "step": 1020 }, { "epoch": 0.0895509348243928, "grad_norm": 0.9158161486041414, "learning_rate": 2.9848573092603377e-06, "loss": 0.41114158630371095, "step": 1025 }, { "epoch": 0.08998776865280447, "grad_norm": 0.9130159386541529, "learning_rate": 2.9994175888177056e-06, "loss": 0.39345602989196776, "step": 1030 }, { "epoch": 0.09042460248121614, "grad_norm": 0.8471190191212521, "learning_rate": 3.013977868375073e-06, "loss": 0.3706113338470459, "step": 1035 }, { "epoch": 0.09086143630962781, "grad_norm": 1.2714114123485865, "learning_rate": 3.0285381479324404e-06, "loss": 0.41997332572937013, "step": 1040 }, { "epoch": 0.09129827013803948, "grad_norm": 0.799130386621188, "learning_rate": 3.043098427489808e-06, "loss": 0.4015970230102539, "step": 1045 }, { "epoch": 0.09173510396645117, "grad_norm": 0.9886560202925558, "learning_rate": 3.0576587070471757e-06, "loss": 0.41883268356323244, "step": 1050 }, { "epoch": 0.09217193779486284, "grad_norm": 1.3153708156480861, "learning_rate": 3.072218986604543e-06, "loss": 0.4066831111907959, "step": 1055 }, { "epoch": 0.09260877162327451, "grad_norm": 0.7171036226767377, "learning_rate": 3.0867792661619106e-06, "loss": 0.40769329071044924, "step": 1060 }, { "epoch": 0.09304560545168618, "grad_norm": 0.8625739998798555, "learning_rate": 3.101339545719278e-06, "loss": 0.3895176887512207, "step": 1065 }, { "epoch": 0.09348243928009785, "grad_norm": 0.9844243537736669, "learning_rate": 3.1158998252766454e-06, "loss": 0.4487447738647461, "step": 1070 }, { "epoch": 0.09391927310850952, "grad_norm": 0.9098887573477697, "learning_rate": 3.1304601048340133e-06, "loss": 0.3743014335632324, "step": 1075 }, { "epoch": 0.09435610693692119, "grad_norm": 0.8245866207908586, "learning_rate": 3.1450203843913807e-06, "loss": 0.3620908737182617, "step": 1080 }, { "epoch": 0.09479294076533287, "grad_norm": 0.9988951615138834, "learning_rate": 3.159580663948748e-06, "loss": 0.3649896144866943, "step": 1085 }, { "epoch": 0.09522977459374454, "grad_norm": 0.9955518670458222, "learning_rate": 3.1741409435061155e-06, "loss": 0.40137529373168945, "step": 1090 }, { "epoch": 0.09566660842215621, "grad_norm": 0.8606120860677478, "learning_rate": 3.188701223063483e-06, "loss": 0.4203033447265625, "step": 1095 }, { "epoch": 0.09610344225056788, "grad_norm": 0.8572259886686586, "learning_rate": 3.203261502620851e-06, "loss": 0.3994074821472168, "step": 1100 }, { "epoch": 0.09654027607897955, "grad_norm": 0.7192345575857022, "learning_rate": 3.2178217821782182e-06, "loss": 0.4009498119354248, "step": 1105 }, { "epoch": 0.09697710990739122, "grad_norm": 0.8318695114875259, "learning_rate": 3.2323820617355856e-06, "loss": 0.41753196716308594, "step": 1110 }, { "epoch": 0.0974139437358029, "grad_norm": 0.9196784593446674, "learning_rate": 3.246942341292953e-06, "loss": 0.418365478515625, "step": 1115 }, { "epoch": 0.09785077756421458, "grad_norm": 0.9513818691252155, "learning_rate": 3.2615026208503205e-06, "loss": 0.3749547481536865, "step": 1120 }, { "epoch": 0.09828761139262625, "grad_norm": 0.7780770844119732, "learning_rate": 3.2760629004076883e-06, "loss": 0.3863163709640503, "step": 1125 }, { "epoch": 0.09872444522103792, "grad_norm": 0.9631724902433072, "learning_rate": 3.2906231799650558e-06, "loss": 0.38781008720397947, "step": 1130 }, { "epoch": 0.09916127904944959, "grad_norm": 1.0931487889810223, "learning_rate": 3.305183459522423e-06, "loss": 0.39806137084960935, "step": 1135 }, { "epoch": 0.09959811287786126, "grad_norm": 1.077345201459613, "learning_rate": 3.3197437390797906e-06, "loss": 0.3871628284454346, "step": 1140 }, { "epoch": 0.10003494670627293, "grad_norm": 0.7295226948939167, "learning_rate": 3.3343040186371585e-06, "loss": 0.3628930807113647, "step": 1145 }, { "epoch": 0.10047178053468461, "grad_norm": 0.915490279125611, "learning_rate": 3.348864298194526e-06, "loss": 0.4046034812927246, "step": 1150 }, { "epoch": 0.10090861436309628, "grad_norm": 0.8461967253824549, "learning_rate": 3.3634245777518933e-06, "loss": 0.36959998607635497, "step": 1155 }, { "epoch": 0.10134544819150795, "grad_norm": 0.8754911726230198, "learning_rate": 3.3779848573092603e-06, "loss": 0.3727055549621582, "step": 1160 }, { "epoch": 0.10178228201991962, "grad_norm": 0.940002668178776, "learning_rate": 3.3925451368666277e-06, "loss": 0.39271543025970457, "step": 1165 }, { "epoch": 0.1022191158483313, "grad_norm": 0.8895427084600904, "learning_rate": 3.407105416423996e-06, "loss": 0.38036441802978516, "step": 1170 }, { "epoch": 0.10265594967674296, "grad_norm": 0.9406147591843335, "learning_rate": 3.421665695981363e-06, "loss": 0.3946018934249878, "step": 1175 }, { "epoch": 0.10309278350515463, "grad_norm": 0.912856504763363, "learning_rate": 3.4362259755387304e-06, "loss": 0.4039170265197754, "step": 1180 }, { "epoch": 0.10352961733356632, "grad_norm": 0.9687985229130971, "learning_rate": 3.450786255096098e-06, "loss": 0.40448732376098634, "step": 1185 }, { "epoch": 0.10396645116197799, "grad_norm": 0.9546841320177516, "learning_rate": 3.4653465346534653e-06, "loss": 0.4015976428985596, "step": 1190 }, { "epoch": 0.10440328499038966, "grad_norm": 0.797080359725853, "learning_rate": 3.479906814210833e-06, "loss": 0.38635199069976806, "step": 1195 }, { "epoch": 0.10484011881880133, "grad_norm": 0.9654644861246179, "learning_rate": 3.4944670937682006e-06, "loss": 0.3657480001449585, "step": 1200 }, { "epoch": 0.105276952647213, "grad_norm": 0.8163226499282966, "learning_rate": 3.509027373325568e-06, "loss": 0.3614840507507324, "step": 1205 }, { "epoch": 0.10571378647562467, "grad_norm": 0.9526723154339195, "learning_rate": 3.5235876528829354e-06, "loss": 0.40645685195922854, "step": 1210 }, { "epoch": 0.10615062030403634, "grad_norm": 0.9026210651923327, "learning_rate": 3.5381479324403033e-06, "loss": 0.40541372299194334, "step": 1215 }, { "epoch": 0.10658745413244802, "grad_norm": 0.8786068322278772, "learning_rate": 3.5527082119976707e-06, "loss": 0.38485538959503174, "step": 1220 }, { "epoch": 0.1070242879608597, "grad_norm": 0.8772503632774397, "learning_rate": 3.567268491555038e-06, "loss": 0.36857101917266843, "step": 1225 }, { "epoch": 0.10746112178927136, "grad_norm": 0.8111761646036195, "learning_rate": 3.5818287711124055e-06, "loss": 0.38311042785644533, "step": 1230 }, { "epoch": 0.10789795561768303, "grad_norm": 0.9474094587835888, "learning_rate": 3.596389050669773e-06, "loss": 0.40342283248901367, "step": 1235 }, { "epoch": 0.1083347894460947, "grad_norm": 0.8622335780055052, "learning_rate": 3.610949330227141e-06, "loss": 0.370561408996582, "step": 1240 }, { "epoch": 0.10877162327450637, "grad_norm": 0.7187446997906333, "learning_rate": 3.6255096097845082e-06, "loss": 0.36717925071716306, "step": 1245 }, { "epoch": 0.10920845710291804, "grad_norm": 0.7025275999831392, "learning_rate": 3.6400698893418757e-06, "loss": 0.35100393295288085, "step": 1250 }, { "epoch": 0.10964529093132973, "grad_norm": 0.9433175512414738, "learning_rate": 3.654630168899243e-06, "loss": 0.36043825149536135, "step": 1255 }, { "epoch": 0.1100821247597414, "grad_norm": 0.7952681318497952, "learning_rate": 3.6691904484566105e-06, "loss": 0.4097262382507324, "step": 1260 }, { "epoch": 0.11051895858815307, "grad_norm": 0.8471913167414931, "learning_rate": 3.6837507280139784e-06, "loss": 0.39373595714569093, "step": 1265 }, { "epoch": 0.11095579241656474, "grad_norm": 0.7634760708816889, "learning_rate": 3.6983110075713458e-06, "loss": 0.3884089231491089, "step": 1270 }, { "epoch": 0.11139262624497641, "grad_norm": 0.8520039425794123, "learning_rate": 3.712871287128713e-06, "loss": 0.38507957458496095, "step": 1275 }, { "epoch": 0.11182946007338808, "grad_norm": 0.9811457773883936, "learning_rate": 3.7274315666860806e-06, "loss": 0.3833726644515991, "step": 1280 }, { "epoch": 0.11226629390179975, "grad_norm": 0.8672788085144404, "learning_rate": 3.741991846243448e-06, "loss": 0.3658846139907837, "step": 1285 }, { "epoch": 0.11270312773021143, "grad_norm": 0.8353554411278085, "learning_rate": 3.756552125800816e-06, "loss": 0.3648726463317871, "step": 1290 }, { "epoch": 0.1131399615586231, "grad_norm": 0.7959589181266485, "learning_rate": 3.7711124053581833e-06, "loss": 0.3924074649810791, "step": 1295 }, { "epoch": 0.11357679538703477, "grad_norm": 0.900361051230697, "learning_rate": 3.7856726849155508e-06, "loss": 0.41378259658813477, "step": 1300 }, { "epoch": 0.11401362921544644, "grad_norm": 0.760821222245913, "learning_rate": 3.800232964472918e-06, "loss": 0.39209396839141847, "step": 1305 }, { "epoch": 0.11445046304385811, "grad_norm": 0.7719796125605197, "learning_rate": 3.814793244030286e-06, "loss": 0.37587001323699953, "step": 1310 }, { "epoch": 0.11488729687226978, "grad_norm": 0.961455326143638, "learning_rate": 3.829353523587653e-06, "loss": 0.3735897779464722, "step": 1315 }, { "epoch": 0.11532413070068145, "grad_norm": 0.7997725600381814, "learning_rate": 3.8439138031450205e-06, "loss": 0.3748273134231567, "step": 1320 }, { "epoch": 0.11576096452909314, "grad_norm": 0.8543947903243532, "learning_rate": 3.858474082702388e-06, "loss": 0.36738100051879885, "step": 1325 }, { "epoch": 0.11619779835750481, "grad_norm": 0.8326306764541747, "learning_rate": 3.873034362259755e-06, "loss": 0.40550875663757324, "step": 1330 }, { "epoch": 0.11663463218591648, "grad_norm": 0.7964844196602231, "learning_rate": 3.8875946418171236e-06, "loss": 0.349837589263916, "step": 1335 }, { "epoch": 0.11707146601432815, "grad_norm": 0.8013745713142078, "learning_rate": 3.902154921374491e-06, "loss": 0.38518643379211426, "step": 1340 }, { "epoch": 0.11750829984273982, "grad_norm": 0.687932625721458, "learning_rate": 3.916715200931858e-06, "loss": 0.387529993057251, "step": 1345 }, { "epoch": 0.11794513367115149, "grad_norm": 0.9608474214446459, "learning_rate": 3.931275480489226e-06, "loss": 0.39855408668518066, "step": 1350 }, { "epoch": 0.11838196749956316, "grad_norm": 0.9567743526493881, "learning_rate": 3.945835760046593e-06, "loss": 0.3914192676544189, "step": 1355 }, { "epoch": 0.11881880132797484, "grad_norm": 0.7344194394053274, "learning_rate": 3.960396039603961e-06, "loss": 0.3559435844421387, "step": 1360 }, { "epoch": 0.11925563515638651, "grad_norm": 0.8173408273700246, "learning_rate": 3.974956319161328e-06, "loss": 0.3713005304336548, "step": 1365 }, { "epoch": 0.11969246898479818, "grad_norm": 0.9116407386718485, "learning_rate": 3.9895165987186955e-06, "loss": 0.3889994859695435, "step": 1370 }, { "epoch": 0.12012930281320985, "grad_norm": 0.6933290011572547, "learning_rate": 4.004076878276063e-06, "loss": 0.3770449161529541, "step": 1375 }, { "epoch": 0.12056613664162152, "grad_norm": 0.76790033471191, "learning_rate": 4.018637157833431e-06, "loss": 0.3656313419342041, "step": 1380 }, { "epoch": 0.1210029704700332, "grad_norm": 0.8763612470592987, "learning_rate": 4.033197437390799e-06, "loss": 0.3745976209640503, "step": 1385 }, { "epoch": 0.12143980429844488, "grad_norm": 0.7874145132286048, "learning_rate": 4.047757716948166e-06, "loss": 0.3917360305786133, "step": 1390 }, { "epoch": 0.12187663812685655, "grad_norm": 0.9704953177507092, "learning_rate": 4.0623179965055335e-06, "loss": 0.39340810775756835, "step": 1395 }, { "epoch": 0.12231347195526822, "grad_norm": 0.7401424387071077, "learning_rate": 4.076878276062901e-06, "loss": 0.3670994281768799, "step": 1400 }, { "epoch": 0.12275030578367989, "grad_norm": 0.6713892161196747, "learning_rate": 4.091438555620268e-06, "loss": 0.36070680618286133, "step": 1405 }, { "epoch": 0.12318713961209156, "grad_norm": 0.707820492094242, "learning_rate": 4.105998835177636e-06, "loss": 0.36820206642150877, "step": 1410 }, { "epoch": 0.12362397344050323, "grad_norm": 0.8379421662112464, "learning_rate": 4.120559114735003e-06, "loss": 0.3633321523666382, "step": 1415 }, { "epoch": 0.1240608072689149, "grad_norm": 0.973759853132289, "learning_rate": 4.135119394292371e-06, "loss": 0.35839114189147947, "step": 1420 }, { "epoch": 0.12449764109732658, "grad_norm": 0.8099578927450184, "learning_rate": 4.149679673849738e-06, "loss": 0.3524632930755615, "step": 1425 }, { "epoch": 0.12493447492573825, "grad_norm": 1.034275379300658, "learning_rate": 4.164239953407106e-06, "loss": 0.39142775535583496, "step": 1430 }, { "epoch": 0.12537130875414992, "grad_norm": 0.871682784300044, "learning_rate": 4.178800232964474e-06, "loss": 0.36816749572753904, "step": 1435 }, { "epoch": 0.1258081425825616, "grad_norm": 0.878696157868754, "learning_rate": 4.19336051252184e-06, "loss": 0.37375805377960203, "step": 1440 }, { "epoch": 0.12624497641097326, "grad_norm": 0.7618969278465632, "learning_rate": 4.207920792079208e-06, "loss": 0.38312735557556155, "step": 1445 }, { "epoch": 0.12668181023938493, "grad_norm": 0.8085408264715693, "learning_rate": 4.222481071636575e-06, "loss": 0.36228113174438475, "step": 1450 }, { "epoch": 0.1271186440677966, "grad_norm": 0.8047753687988014, "learning_rate": 4.2370413511939435e-06, "loss": 0.3994114637374878, "step": 1455 }, { "epoch": 0.12755547789620827, "grad_norm": 0.8065854924170535, "learning_rate": 4.251601630751311e-06, "loss": 0.38635711669921874, "step": 1460 }, { "epoch": 0.12799231172461994, "grad_norm": 0.8473155612608699, "learning_rate": 4.266161910308678e-06, "loss": 0.36580069065093995, "step": 1465 }, { "epoch": 0.12842914555303162, "grad_norm": 0.7898844648407913, "learning_rate": 4.280722189866046e-06, "loss": 0.37668647766113283, "step": 1470 }, { "epoch": 0.12886597938144329, "grad_norm": 0.7973391370193389, "learning_rate": 4.295282469423413e-06, "loss": 0.3558746337890625, "step": 1475 }, { "epoch": 0.12930281320985498, "grad_norm": 0.913762139298237, "learning_rate": 4.309842748980781e-06, "loss": 0.4005859851837158, "step": 1480 }, { "epoch": 0.12973964703826665, "grad_norm": 0.9578914311635995, "learning_rate": 4.324403028538148e-06, "loss": 0.37859764099121096, "step": 1485 }, { "epoch": 0.13017648086667832, "grad_norm": 0.6799691966723094, "learning_rate": 4.3389633080955154e-06, "loss": 0.3660974740982056, "step": 1490 }, { "epoch": 0.13061331469509, "grad_norm": 0.8196255182924063, "learning_rate": 4.353523587652883e-06, "loss": 0.37839195728302, "step": 1495 }, { "epoch": 0.13105014852350166, "grad_norm": 0.7177250574155208, "learning_rate": 4.368083867210251e-06, "loss": 0.34493446350097656, "step": 1500 }, { "epoch": 0.13148698235191333, "grad_norm": 0.7833741534646865, "learning_rate": 4.3826441467676186e-06, "loss": 0.3491415739059448, "step": 1505 }, { "epoch": 0.131923816180325, "grad_norm": 0.6993822467403821, "learning_rate": 4.397204426324986e-06, "loss": 0.35277044773101807, "step": 1510 }, { "epoch": 0.13236065000873667, "grad_norm": 0.8613647480193876, "learning_rate": 4.411764705882353e-06, "loss": 0.3975247383117676, "step": 1515 }, { "epoch": 0.13279748383714834, "grad_norm": 0.8283055662460596, "learning_rate": 4.426324985439721e-06, "loss": 0.3930662155151367, "step": 1520 }, { "epoch": 0.13323431766556001, "grad_norm": 0.9365273224684068, "learning_rate": 4.440885264997088e-06, "loss": 0.359273099899292, "step": 1525 }, { "epoch": 0.13367115149397168, "grad_norm": 0.8894773977140368, "learning_rate": 4.455445544554456e-06, "loss": 0.38182787895202636, "step": 1530 }, { "epoch": 0.13410798532238336, "grad_norm": 0.7113109161081373, "learning_rate": 4.470005824111823e-06, "loss": 0.3783560276031494, "step": 1535 }, { "epoch": 0.13454481915079503, "grad_norm": 0.6783064423187151, "learning_rate": 4.4845661036691905e-06, "loss": 0.38660976886749265, "step": 1540 }, { "epoch": 0.13498165297920672, "grad_norm": 0.7729841428316008, "learning_rate": 4.499126383226559e-06, "loss": 0.38667755126953124, "step": 1545 }, { "epoch": 0.1354184868076184, "grad_norm": 0.911159139252004, "learning_rate": 4.513686662783926e-06, "loss": 0.3854829788208008, "step": 1550 }, { "epoch": 0.13585532063603006, "grad_norm": 0.8840099495575476, "learning_rate": 4.528246942341294e-06, "loss": 0.380039644241333, "step": 1555 }, { "epoch": 0.13629215446444173, "grad_norm": 0.7335824876646415, "learning_rate": 4.542807221898661e-06, "loss": 0.41298680305480956, "step": 1560 }, { "epoch": 0.1367289882928534, "grad_norm": 0.8578603423342869, "learning_rate": 4.5573675014560285e-06, "loss": 0.3736377000808716, "step": 1565 }, { "epoch": 0.13716582212126507, "grad_norm": 1.0112424949748455, "learning_rate": 4.571927781013396e-06, "loss": 0.35106544494628905, "step": 1570 }, { "epoch": 0.13760265594967674, "grad_norm": 0.9112482235950221, "learning_rate": 4.586488060570763e-06, "loss": 0.3599255561828613, "step": 1575 }, { "epoch": 0.13803948977808841, "grad_norm": 0.7336130455725317, "learning_rate": 4.601048340128131e-06, "loss": 0.4323733806610107, "step": 1580 }, { "epoch": 0.13847632360650008, "grad_norm": 0.7842375812006549, "learning_rate": 4.615608619685498e-06, "loss": 0.33960418701171874, "step": 1585 }, { "epoch": 0.13891315743491175, "grad_norm": 0.8756063928048322, "learning_rate": 4.630168899242866e-06, "loss": 0.38304474353790285, "step": 1590 }, { "epoch": 0.13934999126332343, "grad_norm": 0.7579826680262721, "learning_rate": 4.644729178800234e-06, "loss": 0.36175737380981443, "step": 1595 }, { "epoch": 0.1397868250917351, "grad_norm": 0.7297588607761094, "learning_rate": 4.659289458357601e-06, "loss": 0.40027427673339844, "step": 1600 }, { "epoch": 0.14022365892014677, "grad_norm": 0.819307119884272, "learning_rate": 4.673849737914969e-06, "loss": 0.38551011085510256, "step": 1605 }, { "epoch": 0.14066049274855844, "grad_norm": 0.9098381388537502, "learning_rate": 4.688410017472335e-06, "loss": 0.3788482904434204, "step": 1610 }, { "epoch": 0.14109732657697013, "grad_norm": 1.0029830054739886, "learning_rate": 4.702970297029703e-06, "loss": 0.4077587604522705, "step": 1615 }, { "epoch": 0.1415341604053818, "grad_norm": 0.7647159274350731, "learning_rate": 4.717530576587071e-06, "loss": 0.3409064531326294, "step": 1620 }, { "epoch": 0.14197099423379347, "grad_norm": 0.8133879471919547, "learning_rate": 4.7320908561444384e-06, "loss": 0.3655757665634155, "step": 1625 }, { "epoch": 0.14240782806220514, "grad_norm": 0.750850929230406, "learning_rate": 4.746651135701806e-06, "loss": 0.3793982744216919, "step": 1630 }, { "epoch": 0.14284466189061681, "grad_norm": 0.7651526892040493, "learning_rate": 4.761211415259173e-06, "loss": 0.4080184459686279, "step": 1635 }, { "epoch": 0.14328149571902848, "grad_norm": 1.229291245546349, "learning_rate": 4.775771694816541e-06, "loss": 0.3763369083404541, "step": 1640 }, { "epoch": 0.14371832954744015, "grad_norm": 0.7472021580293979, "learning_rate": 4.790331974373908e-06, "loss": 0.39942498207092286, "step": 1645 }, { "epoch": 0.14415516337585182, "grad_norm": 0.6922758578995823, "learning_rate": 4.8048922539312756e-06, "loss": 0.3637522220611572, "step": 1650 }, { "epoch": 0.1445919972042635, "grad_norm": 0.8987201214316145, "learning_rate": 4.819452533488643e-06, "loss": 0.3553933143615723, "step": 1655 }, { "epoch": 0.14502883103267517, "grad_norm": 0.7620188049806683, "learning_rate": 4.83401281304601e-06, "loss": 0.348878812789917, "step": 1660 }, { "epoch": 0.14546566486108684, "grad_norm": 0.9013261373194598, "learning_rate": 4.848573092603379e-06, "loss": 0.36460227966308595, "step": 1665 }, { "epoch": 0.1459024986894985, "grad_norm": 0.7500274575843179, "learning_rate": 4.863133372160746e-06, "loss": 0.3500811576843262, "step": 1670 }, { "epoch": 0.14633933251791018, "grad_norm": 0.8246931334141387, "learning_rate": 4.8776936517181135e-06, "loss": 0.38417055606842043, "step": 1675 }, { "epoch": 0.14677616634632185, "grad_norm": 0.7889428092748558, "learning_rate": 4.892253931275481e-06, "loss": 0.37981112003326417, "step": 1680 }, { "epoch": 0.14721300017473354, "grad_norm": 0.6926209535965214, "learning_rate": 4.906814210832848e-06, "loss": 0.36802101135253906, "step": 1685 }, { "epoch": 0.14764983400314521, "grad_norm": 0.7773571442276949, "learning_rate": 4.921374490390216e-06, "loss": 0.3927339553833008, "step": 1690 }, { "epoch": 0.14808666783155688, "grad_norm": 0.7393023061847095, "learning_rate": 4.935934769947583e-06, "loss": 0.390496826171875, "step": 1695 }, { "epoch": 0.14852350165996855, "grad_norm": 0.8785739450045229, "learning_rate": 4.950495049504951e-06, "loss": 0.34932169914245603, "step": 1700 }, { "epoch": 0.14896033548838022, "grad_norm": 0.6743734219157163, "learning_rate": 4.965055329062318e-06, "loss": 0.3444859027862549, "step": 1705 }, { "epoch": 0.1493971693167919, "grad_norm": 0.8394342635478469, "learning_rate": 4.979615608619686e-06, "loss": 0.35922081470489503, "step": 1710 }, { "epoch": 0.14983400314520356, "grad_norm": 0.8992205122410677, "learning_rate": 4.994175888177054e-06, "loss": 0.3389040470123291, "step": 1715 }, { "epoch": 0.15027083697361523, "grad_norm": 0.7039529429193082, "learning_rate": 5.00873616773442e-06, "loss": 0.3863561153411865, "step": 1720 }, { "epoch": 0.1507076708020269, "grad_norm": 0.7550778982862967, "learning_rate": 5.023296447291789e-06, "loss": 0.3811116456985474, "step": 1725 }, { "epoch": 0.15114450463043858, "grad_norm": 1.0521524301814011, "learning_rate": 5.037856726849156e-06, "loss": 0.3783811092376709, "step": 1730 }, { "epoch": 0.15158133845885025, "grad_norm": 0.6858792912581981, "learning_rate": 5.0524170064065235e-06, "loss": 0.3415175437927246, "step": 1735 }, { "epoch": 0.15201817228726192, "grad_norm": 0.9046779185560356, "learning_rate": 5.066977285963891e-06, "loss": 0.3812889099121094, "step": 1740 }, { "epoch": 0.15245500611567359, "grad_norm": 0.7290527224569108, "learning_rate": 5.081537565521258e-06, "loss": 0.35198519229888914, "step": 1745 }, { "epoch": 0.15289183994408528, "grad_norm": 0.620045883576383, "learning_rate": 5.096097845078626e-06, "loss": 0.37622573375701907, "step": 1750 }, { "epoch": 0.15332867377249695, "grad_norm": 0.6577274121254444, "learning_rate": 5.110658124635993e-06, "loss": 0.3807244777679443, "step": 1755 }, { "epoch": 0.15376550760090862, "grad_norm": 0.9381524374745799, "learning_rate": 5.125218404193361e-06, "loss": 0.36281282901763917, "step": 1760 }, { "epoch": 0.1542023414293203, "grad_norm": 0.6953562538799625, "learning_rate": 5.139778683750728e-06, "loss": 0.36776821613311766, "step": 1765 }, { "epoch": 0.15463917525773196, "grad_norm": 0.760356609994804, "learning_rate": 5.154338963308096e-06, "loss": 0.3613171100616455, "step": 1770 }, { "epoch": 0.15507600908614363, "grad_norm": 0.8537857074066705, "learning_rate": 5.168899242865464e-06, "loss": 0.3771476745605469, "step": 1775 }, { "epoch": 0.1555128429145553, "grad_norm": 0.7848408529261254, "learning_rate": 5.183459522422831e-06, "loss": 0.36820244789123535, "step": 1780 }, { "epoch": 0.15594967674296698, "grad_norm": 0.8134242494236945, "learning_rate": 5.1980198019801986e-06, "loss": 0.36753096580505373, "step": 1785 }, { "epoch": 0.15638651057137865, "grad_norm": 0.9366364283384035, "learning_rate": 5.212580081537566e-06, "loss": 0.40273056030273435, "step": 1790 }, { "epoch": 0.15682334439979032, "grad_norm": 0.7231174891035997, "learning_rate": 5.227140361094933e-06, "loss": 0.37033724784851074, "step": 1795 }, { "epoch": 0.15726017822820199, "grad_norm": 0.8307825960943914, "learning_rate": 5.241700640652301e-06, "loss": 0.3844906806945801, "step": 1800 }, { "epoch": 0.15769701205661366, "grad_norm": 0.6447341755019413, "learning_rate": 5.256260920209668e-06, "loss": 0.34872477054595946, "step": 1805 }, { "epoch": 0.15813384588502533, "grad_norm": 0.6827800677887306, "learning_rate": 5.270821199767036e-06, "loss": 0.3447998046875, "step": 1810 }, { "epoch": 0.158570679713437, "grad_norm": 0.7173763430272918, "learning_rate": 5.285381479324403e-06, "loss": 0.34498395919799807, "step": 1815 }, { "epoch": 0.1590075135418487, "grad_norm": 0.7194861068942845, "learning_rate": 5.299941758881771e-06, "loss": 0.3853462219238281, "step": 1820 }, { "epoch": 0.15944434737026036, "grad_norm": 0.8639417435669082, "learning_rate": 5.314502038439139e-06, "loss": 0.382833456993103, "step": 1825 }, { "epoch": 0.15988118119867203, "grad_norm": 0.7946860416279812, "learning_rate": 5.329062317996506e-06, "loss": 0.37894136905670167, "step": 1830 }, { "epoch": 0.1603180150270837, "grad_norm": 0.788333256784854, "learning_rate": 5.343622597553874e-06, "loss": 0.3733766317367554, "step": 1835 }, { "epoch": 0.16075484885549537, "grad_norm": 0.7583131527837863, "learning_rate": 5.358182877111241e-06, "loss": 0.37811639308929446, "step": 1840 }, { "epoch": 0.16119168268390704, "grad_norm": 0.8921478688051208, "learning_rate": 5.3727431566686085e-06, "loss": 0.35371246337890627, "step": 1845 }, { "epoch": 0.16162851651231872, "grad_norm": 0.7965572437521629, "learning_rate": 5.387303436225976e-06, "loss": 0.3803999900817871, "step": 1850 }, { "epoch": 0.16206535034073039, "grad_norm": 0.7560370549584807, "learning_rate": 5.401863715783343e-06, "loss": 0.3691378593444824, "step": 1855 }, { "epoch": 0.16250218416914206, "grad_norm": 0.7463420197761998, "learning_rate": 5.416423995340711e-06, "loss": 0.35692758560180665, "step": 1860 }, { "epoch": 0.16293901799755373, "grad_norm": 0.6982029226228245, "learning_rate": 5.430984274898079e-06, "loss": 0.38045496940612794, "step": 1865 }, { "epoch": 0.1633758518259654, "grad_norm": 0.7352527178823566, "learning_rate": 5.4455445544554465e-06, "loss": 0.3505851745605469, "step": 1870 }, { "epoch": 0.16381268565437707, "grad_norm": 0.9087370863760763, "learning_rate": 5.460104834012814e-06, "loss": 0.3918295860290527, "step": 1875 }, { "epoch": 0.16424951948278874, "grad_norm": 0.9545917171587983, "learning_rate": 5.474665113570181e-06, "loss": 0.36276931762695314, "step": 1880 }, { "epoch": 0.1646863533112004, "grad_norm": 0.7991548504087621, "learning_rate": 5.489225393127549e-06, "loss": 0.34326982498168945, "step": 1885 }, { "epoch": 0.1651231871396121, "grad_norm": 0.760159396544314, "learning_rate": 5.503785672684916e-06, "loss": 0.370395565032959, "step": 1890 }, { "epoch": 0.16556002096802377, "grad_norm": 0.7252784321478695, "learning_rate": 5.518345952242284e-06, "loss": 0.3649036407470703, "step": 1895 }, { "epoch": 0.16599685479643544, "grad_norm": 0.7496653371483559, "learning_rate": 5.532906231799651e-06, "loss": 0.3804527759552002, "step": 1900 }, { "epoch": 0.16643368862484711, "grad_norm": 0.9932630883721518, "learning_rate": 5.5474665113570185e-06, "loss": 0.35057411193847654, "step": 1905 }, { "epoch": 0.16687052245325878, "grad_norm": 0.6774026644084494, "learning_rate": 5.562026790914386e-06, "loss": 0.3792510986328125, "step": 1910 }, { "epoch": 0.16730735628167046, "grad_norm": 0.6426905497524633, "learning_rate": 5.576587070471754e-06, "loss": 0.38052997589111326, "step": 1915 }, { "epoch": 0.16774419011008213, "grad_norm": 0.8180089795499472, "learning_rate": 5.591147350029122e-06, "loss": 0.3424232959747314, "step": 1920 }, { "epoch": 0.1681810239384938, "grad_norm": 0.8526399900702871, "learning_rate": 5.605707629586489e-06, "loss": 0.371944522857666, "step": 1925 }, { "epoch": 0.16861785776690547, "grad_norm": 0.7728263972346984, "learning_rate": 5.6202679091438564e-06, "loss": 0.37664480209350587, "step": 1930 }, { "epoch": 0.16905469159531714, "grad_norm": 0.7058879598238367, "learning_rate": 5.634828188701224e-06, "loss": 0.359332799911499, "step": 1935 }, { "epoch": 0.1694915254237288, "grad_norm": 0.6197526251940858, "learning_rate": 5.649388468258591e-06, "loss": 0.3253277063369751, "step": 1940 }, { "epoch": 0.16992835925214048, "grad_norm": 0.7375092309689214, "learning_rate": 5.663948747815958e-06, "loss": 0.38425264358520506, "step": 1945 }, { "epoch": 0.17036519308055215, "grad_norm": 0.7691648051467216, "learning_rate": 5.678509027373325e-06, "loss": 0.37702155113220215, "step": 1950 }, { "epoch": 0.17080202690896382, "grad_norm": 0.8407686132530678, "learning_rate": 5.693069306930693e-06, "loss": 0.39754087924957277, "step": 1955 }, { "epoch": 0.17123886073737551, "grad_norm": 0.7877458735069779, "learning_rate": 5.707629586488062e-06, "loss": 0.365682578086853, "step": 1960 }, { "epoch": 0.17167569456578718, "grad_norm": 0.7536395064016652, "learning_rate": 5.722189866045429e-06, "loss": 0.35552325248718264, "step": 1965 }, { "epoch": 0.17211252839419885, "grad_norm": 0.6987536868640521, "learning_rate": 5.736750145602797e-06, "loss": 0.3581237316131592, "step": 1970 }, { "epoch": 0.17254936222261053, "grad_norm": 0.7687473360739675, "learning_rate": 5.751310425160164e-06, "loss": 0.3544020652770996, "step": 1975 }, { "epoch": 0.1729861960510222, "grad_norm": 0.6619224948414256, "learning_rate": 5.765870704717531e-06, "loss": 0.33031885623931884, "step": 1980 }, { "epoch": 0.17342302987943387, "grad_norm": 0.832529292453274, "learning_rate": 5.780430984274898e-06, "loss": 0.37040176391601565, "step": 1985 }, { "epoch": 0.17385986370784554, "grad_norm": 0.7906895322146575, "learning_rate": 5.7949912638322655e-06, "loss": 0.35463082790374756, "step": 1990 }, { "epoch": 0.1742966975362572, "grad_norm": 0.6616647714833541, "learning_rate": 5.809551543389633e-06, "loss": 0.32804319858551023, "step": 1995 }, { "epoch": 0.17473353136466888, "grad_norm": 0.7463595399139504, "learning_rate": 5.824111822947e-06, "loss": 0.35709404945373535, "step": 2000 }, { "epoch": 0.17517036519308055, "grad_norm": 0.6608290122513042, "learning_rate": 5.8386721025043695e-06, "loss": 0.3498566150665283, "step": 2005 }, { "epoch": 0.17560719902149222, "grad_norm": 0.8521732201421058, "learning_rate": 5.853232382061737e-06, "loss": 0.3927572250366211, "step": 2010 }, { "epoch": 0.1760440328499039, "grad_norm": 0.6298083188712531, "learning_rate": 5.8677926616191035e-06, "loss": 0.3990782737731934, "step": 2015 }, { "epoch": 0.17648086667831556, "grad_norm": 0.7198295540165622, "learning_rate": 5.882352941176471e-06, "loss": 0.3563860893249512, "step": 2020 }, { "epoch": 0.17691770050672725, "grad_norm": 0.5803783353298309, "learning_rate": 5.896913220733838e-06, "loss": 0.3125549077987671, "step": 2025 }, { "epoch": 0.17735453433513892, "grad_norm": 0.7513082043888356, "learning_rate": 5.911473500291206e-06, "loss": 0.35771932601928713, "step": 2030 }, { "epoch": 0.1777913681635506, "grad_norm": 0.7549436491209205, "learning_rate": 5.926033779848573e-06, "loss": 0.34246652126312255, "step": 2035 }, { "epoch": 0.17822820199196227, "grad_norm": 0.9019426137851565, "learning_rate": 5.940594059405941e-06, "loss": 0.36249828338623047, "step": 2040 }, { "epoch": 0.17866503582037394, "grad_norm": 0.7939079279280432, "learning_rate": 5.955154338963308e-06, "loss": 0.37720465660095215, "step": 2045 }, { "epoch": 0.1791018696487856, "grad_norm": 0.8874503256677041, "learning_rate": 5.9697146185206755e-06, "loss": 0.3650406837463379, "step": 2050 }, { "epoch": 0.17953870347719728, "grad_norm": 0.7409483095650105, "learning_rate": 5.984274898078044e-06, "loss": 0.35630369186401367, "step": 2055 }, { "epoch": 0.17997553730560895, "grad_norm": 0.9190890912460696, "learning_rate": 5.998835177635411e-06, "loss": 0.37529075145721436, "step": 2060 }, { "epoch": 0.18041237113402062, "grad_norm": 0.6373792825494502, "learning_rate": 6.013395457192779e-06, "loss": 0.3452745914459229, "step": 2065 }, { "epoch": 0.1808492049624323, "grad_norm": 0.6916453507136423, "learning_rate": 6.027955736750146e-06, "loss": 0.3578845977783203, "step": 2070 }, { "epoch": 0.18128603879084396, "grad_norm": 0.7999258653730792, "learning_rate": 6.0425160163075134e-06, "loss": 0.36371240615844724, "step": 2075 }, { "epoch": 0.18172287261925563, "grad_norm": 0.9226833297391754, "learning_rate": 6.057076295864881e-06, "loss": 0.38952820301055907, "step": 2080 }, { "epoch": 0.1821597064476673, "grad_norm": 0.8464371345577496, "learning_rate": 6.071636575422248e-06, "loss": 0.36534938812255857, "step": 2085 }, { "epoch": 0.18259654027607897, "grad_norm": 0.6862221147896709, "learning_rate": 6.086196854979616e-06, "loss": 0.3370864152908325, "step": 2090 }, { "epoch": 0.18303337410449066, "grad_norm": 0.6789773812833962, "learning_rate": 6.100757134536983e-06, "loss": 0.3392080068588257, "step": 2095 }, { "epoch": 0.18347020793290233, "grad_norm": 0.6758610915657155, "learning_rate": 6.115317414094351e-06, "loss": 0.3410805225372314, "step": 2100 }, { "epoch": 0.183907041761314, "grad_norm": 0.7192952460181388, "learning_rate": 6.129877693651719e-06, "loss": 0.36872215270996095, "step": 2105 }, { "epoch": 0.18434387558972568, "grad_norm": 0.8386391353577117, "learning_rate": 6.144437973209086e-06, "loss": 0.35224626064300535, "step": 2110 }, { "epoch": 0.18478070941813735, "grad_norm": 0.7729884991791154, "learning_rate": 6.158998252766454e-06, "loss": 0.36676316261291503, "step": 2115 }, { "epoch": 0.18521754324654902, "grad_norm": 0.6359856249425488, "learning_rate": 6.173558532323821e-06, "loss": 0.35394997596740724, "step": 2120 }, { "epoch": 0.18565437707496069, "grad_norm": 0.6741530001628504, "learning_rate": 6.1881188118811885e-06, "loss": 0.3262760639190674, "step": 2125 }, { "epoch": 0.18609121090337236, "grad_norm": 0.7345774230250943, "learning_rate": 6.202679091438556e-06, "loss": 0.37610774040222167, "step": 2130 }, { "epoch": 0.18652804473178403, "grad_norm": 0.8839888836450984, "learning_rate": 6.217239370995923e-06, "loss": 0.3589134454727173, "step": 2135 }, { "epoch": 0.1869648785601957, "grad_norm": 0.6135334890580738, "learning_rate": 6.231799650553291e-06, "loss": 0.3722238540649414, "step": 2140 }, { "epoch": 0.18740171238860737, "grad_norm": 0.7045246187420917, "learning_rate": 6.246359930110658e-06, "loss": 0.3711501359939575, "step": 2145 }, { "epoch": 0.18783854621701904, "grad_norm": 0.8368189919502889, "learning_rate": 6.2609202096680265e-06, "loss": 0.39473321437835696, "step": 2150 }, { "epoch": 0.1882753800454307, "grad_norm": 0.6635969313986924, "learning_rate": 6.275480489225394e-06, "loss": 0.3724366664886475, "step": 2155 }, { "epoch": 0.18871221387384238, "grad_norm": 0.7193694138269635, "learning_rate": 6.290040768782761e-06, "loss": 0.3742445230484009, "step": 2160 }, { "epoch": 0.18914904770225408, "grad_norm": 0.7510529169883257, "learning_rate": 6.304601048340129e-06, "loss": 0.3872281551361084, "step": 2165 }, { "epoch": 0.18958588153066575, "grad_norm": 0.6738437582640726, "learning_rate": 6.319161327897496e-06, "loss": 0.33948256969451907, "step": 2170 }, { "epoch": 0.19002271535907742, "grad_norm": 0.7373481920138374, "learning_rate": 6.333721607454864e-06, "loss": 0.3356840372085571, "step": 2175 }, { "epoch": 0.19045954918748909, "grad_norm": 0.8118329270256512, "learning_rate": 6.348281887012231e-06, "loss": 0.35519957542419434, "step": 2180 }, { "epoch": 0.19089638301590076, "grad_norm": 0.672259344790839, "learning_rate": 6.3628421665695985e-06, "loss": 0.38954055309295654, "step": 2185 }, { "epoch": 0.19133321684431243, "grad_norm": 0.7730875571325356, "learning_rate": 6.377402446126966e-06, "loss": 0.3310117244720459, "step": 2190 }, { "epoch": 0.1917700506727241, "grad_norm": 0.7017747562485861, "learning_rate": 6.391962725684334e-06, "loss": 0.36613945960998534, "step": 2195 }, { "epoch": 0.19220688450113577, "grad_norm": 0.6573879968284417, "learning_rate": 6.406523005241702e-06, "loss": 0.3740427494049072, "step": 2200 }, { "epoch": 0.19264371832954744, "grad_norm": 0.6399519286127671, "learning_rate": 6.421083284799069e-06, "loss": 0.3882983446121216, "step": 2205 }, { "epoch": 0.1930805521579591, "grad_norm": 0.6944514483702614, "learning_rate": 6.4356435643564364e-06, "loss": 0.35132148265838625, "step": 2210 }, { "epoch": 0.19351738598637078, "grad_norm": 0.7011916381073251, "learning_rate": 6.450203843913804e-06, "loss": 0.37237226963043213, "step": 2215 }, { "epoch": 0.19395421981478245, "grad_norm": 0.7116030252012401, "learning_rate": 6.464764123471171e-06, "loss": 0.373442554473877, "step": 2220 }, { "epoch": 0.19439105364319412, "grad_norm": 0.7471297267478001, "learning_rate": 6.479324403028539e-06, "loss": 0.3334825038909912, "step": 2225 }, { "epoch": 0.1948278874716058, "grad_norm": 0.6964634031004786, "learning_rate": 6.493884682585906e-06, "loss": 0.35432770252227785, "step": 2230 }, { "epoch": 0.19526472130001749, "grad_norm": 0.7419683763062287, "learning_rate": 6.5084449621432736e-06, "loss": 0.37869112491607665, "step": 2235 }, { "epoch": 0.19570155512842916, "grad_norm": 0.6842054665206206, "learning_rate": 6.523005241700641e-06, "loss": 0.38485901355743407, "step": 2240 }, { "epoch": 0.19613838895684083, "grad_norm": 0.6835352566618841, "learning_rate": 6.537565521258009e-06, "loss": 0.37896811962127686, "step": 2245 }, { "epoch": 0.1965752227852525, "grad_norm": 0.6489383810590562, "learning_rate": 6.552125800815377e-06, "loss": 0.367059326171875, "step": 2250 }, { "epoch": 0.19701205661366417, "grad_norm": 0.5991469050961686, "learning_rate": 6.566686080372744e-06, "loss": 0.3751519680023193, "step": 2255 }, { "epoch": 0.19744889044207584, "grad_norm": 0.551484504702985, "learning_rate": 6.5812463599301115e-06, "loss": 0.35507237911224365, "step": 2260 }, { "epoch": 0.1978857242704875, "grad_norm": 0.7397460576162918, "learning_rate": 6.595806639487479e-06, "loss": 0.3635953187942505, "step": 2265 }, { "epoch": 0.19832255809889918, "grad_norm": 0.6793788088893861, "learning_rate": 6.610366919044846e-06, "loss": 0.3598971366882324, "step": 2270 }, { "epoch": 0.19875939192731085, "grad_norm": 0.7737843272039838, "learning_rate": 6.624927198602214e-06, "loss": 0.3749223232269287, "step": 2275 }, { "epoch": 0.19919622575572252, "grad_norm": 0.6753863702954462, "learning_rate": 6.639487478159581e-06, "loss": 0.3710052967071533, "step": 2280 }, { "epoch": 0.1996330595841342, "grad_norm": 0.7012284717330851, "learning_rate": 6.654047757716948e-06, "loss": 0.3706115961074829, "step": 2285 }, { "epoch": 0.20006989341254586, "grad_norm": 0.696232191641763, "learning_rate": 6.668608037274317e-06, "loss": 0.3566436290740967, "step": 2290 }, { "epoch": 0.20050672724095753, "grad_norm": 0.6210601348106474, "learning_rate": 6.683168316831684e-06, "loss": 0.3609482288360596, "step": 2295 }, { "epoch": 0.20094356106936923, "grad_norm": 0.6435289815879744, "learning_rate": 6.697728596389052e-06, "loss": 0.3563279628753662, "step": 2300 }, { "epoch": 0.2013803948977809, "grad_norm": 0.6110965517666337, "learning_rate": 6.712288875946419e-06, "loss": 0.3644765853881836, "step": 2305 }, { "epoch": 0.20181722872619257, "grad_norm": 0.9771436412560148, "learning_rate": 6.726849155503787e-06, "loss": 0.38610928058624266, "step": 2310 }, { "epoch": 0.20225406255460424, "grad_norm": 0.6243658048290334, "learning_rate": 6.741409435061154e-06, "loss": 0.33592543601989744, "step": 2315 }, { "epoch": 0.2026908963830159, "grad_norm": 0.7025451377326813, "learning_rate": 6.755969714618521e-06, "loss": 0.37249302864074707, "step": 2320 }, { "epoch": 0.20312773021142758, "grad_norm": 0.7040466799509926, "learning_rate": 6.770529994175888e-06, "loss": 0.355014705657959, "step": 2325 }, { "epoch": 0.20356456403983925, "grad_norm": 0.6439750507504143, "learning_rate": 6.7850902737332555e-06, "loss": 0.35883183479309083, "step": 2330 }, { "epoch": 0.20400139786825092, "grad_norm": 0.7278449161937304, "learning_rate": 6.799650553290625e-06, "loss": 0.3916857481002808, "step": 2335 }, { "epoch": 0.2044382316966626, "grad_norm": 0.7412730057245419, "learning_rate": 6.814210832847992e-06, "loss": 0.3681692123413086, "step": 2340 }, { "epoch": 0.20487506552507426, "grad_norm": 0.7411856654712123, "learning_rate": 6.8287711124053595e-06, "loss": 0.3906771898269653, "step": 2345 }, { "epoch": 0.20531189935348593, "grad_norm": 0.7446311654937512, "learning_rate": 6.843331391962726e-06, "loss": 0.36449105739593507, "step": 2350 }, { "epoch": 0.2057487331818976, "grad_norm": 0.6230922683737181, "learning_rate": 6.8578916715200935e-06, "loss": 0.3462799310684204, "step": 2355 }, { "epoch": 0.20618556701030927, "grad_norm": 0.6108369364080214, "learning_rate": 6.872451951077461e-06, "loss": 0.3657135725021362, "step": 2360 }, { "epoch": 0.20662240083872094, "grad_norm": 0.7420954686035872, "learning_rate": 6.887012230634828e-06, "loss": 0.35817790031433105, "step": 2365 }, { "epoch": 0.20705923466713264, "grad_norm": 0.7304578189202768, "learning_rate": 6.901572510192196e-06, "loss": 0.36753096580505373, "step": 2370 }, { "epoch": 0.2074960684955443, "grad_norm": 0.7634021363205672, "learning_rate": 6.916132789749563e-06, "loss": 0.39130487442016604, "step": 2375 }, { "epoch": 0.20793290232395598, "grad_norm": 0.6293955791974712, "learning_rate": 6.930693069306931e-06, "loss": 0.3759514808654785, "step": 2380 }, { "epoch": 0.20836973615236765, "grad_norm": 0.9078169789690107, "learning_rate": 6.945253348864299e-06, "loss": 0.398149299621582, "step": 2385 }, { "epoch": 0.20880656998077932, "grad_norm": 0.7482801944391753, "learning_rate": 6.959813628421666e-06, "loss": 0.34279069900512693, "step": 2390 }, { "epoch": 0.209243403809191, "grad_norm": 0.725819275958313, "learning_rate": 6.974373907979034e-06, "loss": 0.3716492414474487, "step": 2395 }, { "epoch": 0.20968023763760266, "grad_norm": 0.6721560845652398, "learning_rate": 6.988934187536401e-06, "loss": 0.3816210269927979, "step": 2400 }, { "epoch": 0.21011707146601433, "grad_norm": 0.7924115839442565, "learning_rate": 7.0034944670937686e-06, "loss": 0.3685163974761963, "step": 2405 }, { "epoch": 0.210553905294426, "grad_norm": 0.6036524369750478, "learning_rate": 7.018054746651136e-06, "loss": 0.36073827743530273, "step": 2410 }, { "epoch": 0.21099073912283767, "grad_norm": 0.6896498081005216, "learning_rate": 7.032615026208503e-06, "loss": 0.34104845523834226, "step": 2415 }, { "epoch": 0.21142757295124934, "grad_norm": 0.6370003856381051, "learning_rate": 7.047175305765871e-06, "loss": 0.3398436546325684, "step": 2420 }, { "epoch": 0.211864406779661, "grad_norm": 0.6668281989573499, "learning_rate": 7.061735585323238e-06, "loss": 0.35391199588775635, "step": 2425 }, { "epoch": 0.21230124060807268, "grad_norm": 0.7242116680054788, "learning_rate": 7.0762958648806065e-06, "loss": 0.3586705446243286, "step": 2430 }, { "epoch": 0.21273807443648435, "grad_norm": 0.6712382501508666, "learning_rate": 7.090856144437974e-06, "loss": 0.31951251029968264, "step": 2435 }, { "epoch": 0.21317490826489605, "grad_norm": 0.6082637110601334, "learning_rate": 7.105416423995341e-06, "loss": 0.35065836906433107, "step": 2440 }, { "epoch": 0.21361174209330772, "grad_norm": 0.6872818772760146, "learning_rate": 7.119976703552709e-06, "loss": 0.3460221290588379, "step": 2445 }, { "epoch": 0.2140485759217194, "grad_norm": 0.5859810765836417, "learning_rate": 7.134536983110076e-06, "loss": 0.34882321357727053, "step": 2450 }, { "epoch": 0.21448540975013106, "grad_norm": 0.8245795422876853, "learning_rate": 7.149097262667444e-06, "loss": 0.3540318012237549, "step": 2455 }, { "epoch": 0.21492224357854273, "grad_norm": 0.7539016851510868, "learning_rate": 7.163657542224811e-06, "loss": 0.3894538879394531, "step": 2460 }, { "epoch": 0.2153590774069544, "grad_norm": 0.6141302044955014, "learning_rate": 7.1782178217821785e-06, "loss": 0.3736457824707031, "step": 2465 }, { "epoch": 0.21579591123536607, "grad_norm": 0.6633501902662218, "learning_rate": 7.192778101339546e-06, "loss": 0.33775105476379397, "step": 2470 }, { "epoch": 0.21623274506377774, "grad_norm": 0.5476149054654407, "learning_rate": 7.207338380896913e-06, "loss": 0.34150807857513427, "step": 2475 }, { "epoch": 0.2166695788921894, "grad_norm": 0.6170415362590457, "learning_rate": 7.221898660454282e-06, "loss": 0.3818201541900635, "step": 2480 }, { "epoch": 0.21710641272060108, "grad_norm": 0.7385230246716775, "learning_rate": 7.236458940011649e-06, "loss": 0.3487027883529663, "step": 2485 }, { "epoch": 0.21754324654901275, "grad_norm": 0.7244035965662572, "learning_rate": 7.2510192195690165e-06, "loss": 0.36789641380310056, "step": 2490 }, { "epoch": 0.21798008037742442, "grad_norm": 0.6956419954518192, "learning_rate": 7.265579499126384e-06, "loss": 0.36410861015319823, "step": 2495 }, { "epoch": 0.2184169142058361, "grad_norm": 0.7185675008646544, "learning_rate": 7.280139778683751e-06, "loss": 0.3578779935836792, "step": 2500 }, { "epoch": 0.21885374803424779, "grad_norm": 0.7445024604079077, "learning_rate": 7.294700058241119e-06, "loss": 0.35570406913757324, "step": 2505 }, { "epoch": 0.21929058186265946, "grad_norm": 0.7538691266586436, "learning_rate": 7.309260337798486e-06, "loss": 0.37264862060546877, "step": 2510 }, { "epoch": 0.21972741569107113, "grad_norm": 0.7767364818593314, "learning_rate": 7.323820617355854e-06, "loss": 0.39140844345092773, "step": 2515 }, { "epoch": 0.2201642495194828, "grad_norm": 0.7828011713516126, "learning_rate": 7.338380896913221e-06, "loss": 0.3441926956176758, "step": 2520 }, { "epoch": 0.22060108334789447, "grad_norm": 0.8091094117276278, "learning_rate": 7.352941176470589e-06, "loss": 0.37132773399353025, "step": 2525 }, { "epoch": 0.22103791717630614, "grad_norm": 0.5871203139619662, "learning_rate": 7.367501456027957e-06, "loss": 0.33880462646484377, "step": 2530 }, { "epoch": 0.2214747510047178, "grad_norm": 0.5742776491980183, "learning_rate": 7.382061735585324e-06, "loss": 0.34047799110412597, "step": 2535 }, { "epoch": 0.22191158483312948, "grad_norm": 0.7921037255590461, "learning_rate": 7.3966220151426916e-06, "loss": 0.3577542304992676, "step": 2540 }, { "epoch": 0.22234841866154115, "grad_norm": 0.6956534053986854, "learning_rate": 7.411182294700059e-06, "loss": 0.3407714605331421, "step": 2545 }, { "epoch": 0.22278525248995282, "grad_norm": 0.715258615094935, "learning_rate": 7.425742574257426e-06, "loss": 0.358453106880188, "step": 2550 }, { "epoch": 0.2232220863183645, "grad_norm": 0.7395784796774002, "learning_rate": 7.440302853814794e-06, "loss": 0.38074631690979005, "step": 2555 }, { "epoch": 0.22365892014677616, "grad_norm": 0.6720167076817063, "learning_rate": 7.454863133372161e-06, "loss": 0.34781956672668457, "step": 2560 }, { "epoch": 0.22409575397518783, "grad_norm": 0.5986012934773063, "learning_rate": 7.469423412929529e-06, "loss": 0.3527717590332031, "step": 2565 }, { "epoch": 0.2245325878035995, "grad_norm": 0.8384489035927538, "learning_rate": 7.483983692486896e-06, "loss": 0.3532160997390747, "step": 2570 }, { "epoch": 0.2249694216320112, "grad_norm": 0.6116869148581083, "learning_rate": 7.498543972044264e-06, "loss": 0.38284156322479246, "step": 2575 }, { "epoch": 0.22540625546042287, "grad_norm": 0.6579890938122118, "learning_rate": 7.513104251601632e-06, "loss": 0.3430779457092285, "step": 2580 }, { "epoch": 0.22584308928883454, "grad_norm": 0.6698098350052173, "learning_rate": 7.527664531158999e-06, "loss": 0.37479660511016843, "step": 2585 }, { "epoch": 0.2262799231172462, "grad_norm": 0.6946081759648081, "learning_rate": 7.542224810716367e-06, "loss": 0.34972782135009767, "step": 2590 }, { "epoch": 0.22671675694565788, "grad_norm": 0.6530770061255425, "learning_rate": 7.556785090273734e-06, "loss": 0.33446154594421384, "step": 2595 }, { "epoch": 0.22715359077406955, "grad_norm": 0.6424919799018514, "learning_rate": 7.5713453698311015e-06, "loss": 0.3423313140869141, "step": 2600 }, { "epoch": 0.22759042460248122, "grad_norm": 0.5923249755574431, "learning_rate": 7.585905649388469e-06, "loss": 0.3549938201904297, "step": 2605 }, { "epoch": 0.2280272584308929, "grad_norm": 0.6687795890013838, "learning_rate": 7.600465928945836e-06, "loss": 0.35867795944213865, "step": 2610 }, { "epoch": 0.22846409225930456, "grad_norm": 0.9511773513357894, "learning_rate": 7.615026208503204e-06, "loss": 0.33241615295410154, "step": 2615 }, { "epoch": 0.22890092608771623, "grad_norm": 0.7456762099738559, "learning_rate": 7.629586488060571e-06, "loss": 0.37729582786560056, "step": 2620 }, { "epoch": 0.2293377599161279, "grad_norm": 0.6802306537218604, "learning_rate": 7.64414676761794e-06, "loss": 0.3412250280380249, "step": 2625 }, { "epoch": 0.22977459374453957, "grad_norm": 0.5336063412174575, "learning_rate": 7.658707047175306e-06, "loss": 0.326814603805542, "step": 2630 }, { "epoch": 0.23021142757295124, "grad_norm": 0.6132142388218147, "learning_rate": 7.673267326732674e-06, "loss": 0.3603346586227417, "step": 2635 }, { "epoch": 0.2306482614013629, "grad_norm": 0.6326357668292028, "learning_rate": 7.687827606290041e-06, "loss": 0.3631857395172119, "step": 2640 }, { "epoch": 0.2310850952297746, "grad_norm": 0.5833996012837498, "learning_rate": 7.70238788584741e-06, "loss": 0.3587920665740967, "step": 2645 }, { "epoch": 0.23152192905818628, "grad_norm": 0.7686187697572396, "learning_rate": 7.716948165404776e-06, "loss": 0.33124604225158694, "step": 2650 }, { "epoch": 0.23195876288659795, "grad_norm": 0.6006884899352561, "learning_rate": 7.731508444962144e-06, "loss": 0.33424460887908936, "step": 2655 }, { "epoch": 0.23239559671500962, "grad_norm": 0.6389869635877703, "learning_rate": 7.74606872451951e-06, "loss": 0.3553281307220459, "step": 2660 }, { "epoch": 0.2328324305434213, "grad_norm": 0.7293694754027187, "learning_rate": 7.760629004076879e-06, "loss": 0.3521750450134277, "step": 2665 }, { "epoch": 0.23326926437183296, "grad_norm": 0.8000105974417093, "learning_rate": 7.775189283634247e-06, "loss": 0.38339910507202146, "step": 2670 }, { "epoch": 0.23370609820024463, "grad_norm": 0.9893949027894577, "learning_rate": 7.789749563191614e-06, "loss": 0.3701789855957031, "step": 2675 }, { "epoch": 0.2341429320286563, "grad_norm": 0.5746660896505187, "learning_rate": 7.804309842748982e-06, "loss": 0.3364317178726196, "step": 2680 }, { "epoch": 0.23457976585706797, "grad_norm": 0.5972292394583488, "learning_rate": 7.818870122306349e-06, "loss": 0.34050884246826174, "step": 2685 }, { "epoch": 0.23501659968547964, "grad_norm": 0.5959781886311387, "learning_rate": 7.833430401863717e-06, "loss": 0.36385459899902345, "step": 2690 }, { "epoch": 0.2354534335138913, "grad_norm": 0.5614249925223384, "learning_rate": 7.847990681421083e-06, "loss": 0.34111318588256834, "step": 2695 }, { "epoch": 0.23589026734230298, "grad_norm": 0.7079072974607035, "learning_rate": 7.862550960978452e-06, "loss": 0.3567982196807861, "step": 2700 }, { "epoch": 0.23632710117071465, "grad_norm": 0.6785711806949156, "learning_rate": 7.877111240535818e-06, "loss": 0.3454697370529175, "step": 2705 }, { "epoch": 0.23676393499912632, "grad_norm": 0.5609600266120207, "learning_rate": 7.891671520093187e-06, "loss": 0.34027848243713377, "step": 2710 }, { "epoch": 0.23720076882753802, "grad_norm": 0.6331748319467065, "learning_rate": 7.906231799650555e-06, "loss": 0.3191443681716919, "step": 2715 }, { "epoch": 0.2376376026559497, "grad_norm": 0.5951955328722218, "learning_rate": 7.920792079207921e-06, "loss": 0.34945082664489746, "step": 2720 }, { "epoch": 0.23807443648436136, "grad_norm": 0.8931417180433463, "learning_rate": 7.93535235876529e-06, "loss": 0.37967064380645754, "step": 2725 }, { "epoch": 0.23851127031277303, "grad_norm": 0.5943602625827098, "learning_rate": 7.949912638322656e-06, "loss": 0.3123589038848877, "step": 2730 }, { "epoch": 0.2389481041411847, "grad_norm": 0.6130124963386756, "learning_rate": 7.964472917880025e-06, "loss": 0.37939136028289794, "step": 2735 }, { "epoch": 0.23938493796959637, "grad_norm": 0.5718294938952129, "learning_rate": 7.979033197437391e-06, "loss": 0.33417224884033203, "step": 2740 }, { "epoch": 0.23982177179800804, "grad_norm": 0.7008645161536309, "learning_rate": 7.99359347699476e-06, "loss": 0.3567851781845093, "step": 2745 }, { "epoch": 0.2402586056264197, "grad_norm": 4.48071299069076, "learning_rate": 8.008153756552126e-06, "loss": 0.34499688148498536, "step": 2750 }, { "epoch": 0.24069543945483138, "grad_norm": 0.6725396294383557, "learning_rate": 8.022714036109494e-06, "loss": 0.3514946460723877, "step": 2755 }, { "epoch": 0.24113227328324305, "grad_norm": 0.6294894125952248, "learning_rate": 8.037274315666862e-06, "loss": 0.3455192565917969, "step": 2760 }, { "epoch": 0.24156910711165472, "grad_norm": 0.6674680748774342, "learning_rate": 8.051834595224229e-06, "loss": 0.32422804832458496, "step": 2765 }, { "epoch": 0.2420059409400664, "grad_norm": 0.7492812424716967, "learning_rate": 8.066394874781597e-06, "loss": 0.36422090530395507, "step": 2770 }, { "epoch": 0.24244277476847806, "grad_norm": 0.6313832849507158, "learning_rate": 8.080955154338964e-06, "loss": 0.3622490882873535, "step": 2775 }, { "epoch": 0.24287960859688976, "grad_norm": 0.5921940416638252, "learning_rate": 8.095515433896332e-06, "loss": 0.3365023612976074, "step": 2780 }, { "epoch": 0.24331644242530143, "grad_norm": 0.7061397249914456, "learning_rate": 8.110075713453699e-06, "loss": 0.3612786293029785, "step": 2785 }, { "epoch": 0.2437532762537131, "grad_norm": 0.5833298373260588, "learning_rate": 8.124635993011067e-06, "loss": 0.34108405113220214, "step": 2790 }, { "epoch": 0.24419011008212477, "grad_norm": 0.5560381057722051, "learning_rate": 8.139196272568434e-06, "loss": 0.3732252597808838, "step": 2795 }, { "epoch": 0.24462694391053644, "grad_norm": 0.6110124343776221, "learning_rate": 8.153756552125802e-06, "loss": 0.3552409648895264, "step": 2800 }, { "epoch": 0.2450637777389481, "grad_norm": 0.6681637942201936, "learning_rate": 8.168316831683168e-06, "loss": 0.3455709934234619, "step": 2805 }, { "epoch": 0.24550061156735978, "grad_norm": 0.6450299268475965, "learning_rate": 8.182877111240537e-06, "loss": 0.3921844482421875, "step": 2810 }, { "epoch": 0.24593744539577145, "grad_norm": 0.7074235992949233, "learning_rate": 8.197437390797905e-06, "loss": 0.3537395477294922, "step": 2815 }, { "epoch": 0.24637427922418312, "grad_norm": 0.6319652762278665, "learning_rate": 8.211997670355272e-06, "loss": 0.3694887638092041, "step": 2820 }, { "epoch": 0.2468111130525948, "grad_norm": 0.6465725793396181, "learning_rate": 8.22655794991264e-06, "loss": 0.40778274536132814, "step": 2825 }, { "epoch": 0.24724794688100646, "grad_norm": 0.5938138445514334, "learning_rate": 8.241118229470006e-06, "loss": 0.335831880569458, "step": 2830 }, { "epoch": 0.24768478070941813, "grad_norm": 0.8005389013132556, "learning_rate": 8.255678509027375e-06, "loss": 0.3703723907470703, "step": 2835 }, { "epoch": 0.2481216145378298, "grad_norm": 0.5637168748301561, "learning_rate": 8.270238788584741e-06, "loss": 0.3274531364440918, "step": 2840 }, { "epoch": 0.24855844836624147, "grad_norm": 0.5839850004645893, "learning_rate": 8.284799068142108e-06, "loss": 0.3429368495941162, "step": 2845 }, { "epoch": 0.24899528219465317, "grad_norm": 0.5646336356146116, "learning_rate": 8.299359347699476e-06, "loss": 0.36389889717102053, "step": 2850 }, { "epoch": 0.24943211602306484, "grad_norm": 0.6982796255799228, "learning_rate": 8.313919627256844e-06, "loss": 0.3348679542541504, "step": 2855 }, { "epoch": 0.2498689498514765, "grad_norm": 0.7450408084966539, "learning_rate": 8.328479906814213e-06, "loss": 0.33834590911865237, "step": 2860 }, { "epoch": 0.25030578367988815, "grad_norm": 0.6561416741983753, "learning_rate": 8.34304018637158e-06, "loss": 0.365637993812561, "step": 2865 }, { "epoch": 0.25074261750829985, "grad_norm": 0.6193488254129494, "learning_rate": 8.357600465928948e-06, "loss": 0.3378806829452515, "step": 2870 }, { "epoch": 0.2511794513367115, "grad_norm": 0.6106913601939201, "learning_rate": 8.372160745486314e-06, "loss": 0.35423710346221926, "step": 2875 }, { "epoch": 0.2516162851651232, "grad_norm": 0.615965233123171, "learning_rate": 8.38672102504368e-06, "loss": 0.34273619651794435, "step": 2880 }, { "epoch": 0.2520531189935349, "grad_norm": 0.7806559625966712, "learning_rate": 8.401281304601049e-06, "loss": 0.3473399877548218, "step": 2885 }, { "epoch": 0.25248995282194653, "grad_norm": 0.7696283513697042, "learning_rate": 8.415841584158416e-06, "loss": 0.34609322547912597, "step": 2890 }, { "epoch": 0.2529267866503582, "grad_norm": 0.6174805778390828, "learning_rate": 8.430401863715784e-06, "loss": 0.33936114311218263, "step": 2895 }, { "epoch": 0.25336362047876987, "grad_norm": 0.6286421744102961, "learning_rate": 8.44496214327315e-06, "loss": 0.3573204755783081, "step": 2900 }, { "epoch": 0.25380045430718157, "grad_norm": 0.5912411441375532, "learning_rate": 8.45952242283052e-06, "loss": 0.33123579025268557, "step": 2905 }, { "epoch": 0.2542372881355932, "grad_norm": 0.6384785604960356, "learning_rate": 8.474082702387887e-06, "loss": 0.3722397327423096, "step": 2910 }, { "epoch": 0.2546741219640049, "grad_norm": 0.6689686136532637, "learning_rate": 8.488642981945253e-06, "loss": 0.34297640323638917, "step": 2915 }, { "epoch": 0.25511095579241655, "grad_norm": 0.7200901078715937, "learning_rate": 8.503203261502622e-06, "loss": 0.3251065731048584, "step": 2920 }, { "epoch": 0.25554778962082825, "grad_norm": 0.8274363213459873, "learning_rate": 8.517763541059988e-06, "loss": 0.37982821464538574, "step": 2925 }, { "epoch": 0.2559846234492399, "grad_norm": 0.5922469445487722, "learning_rate": 8.532323820617357e-06, "loss": 0.3461365461349487, "step": 2930 }, { "epoch": 0.2564214572776516, "grad_norm": 0.6527761240530171, "learning_rate": 8.546884100174723e-06, "loss": 0.3473654747009277, "step": 2935 }, { "epoch": 0.25685829110606323, "grad_norm": 0.7135137539376302, "learning_rate": 8.561444379732091e-06, "loss": 0.37824287414550783, "step": 2940 }, { "epoch": 0.25729512493447493, "grad_norm": 0.5761369784358351, "learning_rate": 8.576004659289458e-06, "loss": 0.35984115600585936, "step": 2945 }, { "epoch": 0.25773195876288657, "grad_norm": 0.5316090124580366, "learning_rate": 8.590564938846826e-06, "loss": 0.3476438522338867, "step": 2950 }, { "epoch": 0.25816879259129827, "grad_norm": 0.5626430534476188, "learning_rate": 8.605125218404195e-06, "loss": 0.3362871170043945, "step": 2955 }, { "epoch": 0.25860562641970997, "grad_norm": 0.6457535983778553, "learning_rate": 8.619685497961561e-06, "loss": 0.3506113052368164, "step": 2960 }, { "epoch": 0.2590424602481216, "grad_norm": 0.5897106102483982, "learning_rate": 8.63424577751893e-06, "loss": 0.35953707695007325, "step": 2965 }, { "epoch": 0.2594792940765333, "grad_norm": 0.5773360768410302, "learning_rate": 8.648806057076296e-06, "loss": 0.3630875587463379, "step": 2970 }, { "epoch": 0.25991612790494495, "grad_norm": 0.7297055663009355, "learning_rate": 8.663366336633664e-06, "loss": 0.3426224231719971, "step": 2975 }, { "epoch": 0.26035296173335665, "grad_norm": 0.6311266637185832, "learning_rate": 8.677926616191031e-06, "loss": 0.3473088264465332, "step": 2980 }, { "epoch": 0.2607897955617683, "grad_norm": 0.7542317626880402, "learning_rate": 8.692486895748399e-06, "loss": 0.37248835563659666, "step": 2985 }, { "epoch": 0.26122662939018, "grad_norm": 0.6052071027067953, "learning_rate": 8.707047175305766e-06, "loss": 0.36325273513793943, "step": 2990 }, { "epoch": 0.26166346321859163, "grad_norm": 0.6131593510409735, "learning_rate": 8.721607454863134e-06, "loss": 0.3283590793609619, "step": 2995 }, { "epoch": 0.2621002970470033, "grad_norm": 0.8019379046357707, "learning_rate": 8.736167734420502e-06, "loss": 0.36461195945739744, "step": 3000 }, { "epoch": 0.26253713087541497, "grad_norm": 0.5779803706010727, "learning_rate": 8.750728013977869e-06, "loss": 0.3741759777069092, "step": 3005 }, { "epoch": 0.26297396470382667, "grad_norm": 0.6985016354189488, "learning_rate": 8.765288293535237e-06, "loss": 0.3428728342056274, "step": 3010 }, { "epoch": 0.2634107985322383, "grad_norm": 0.6794853412087116, "learning_rate": 8.779848573092604e-06, "loss": 0.3515950679779053, "step": 3015 }, { "epoch": 0.26384763236065, "grad_norm": 0.6221319579409401, "learning_rate": 8.794408852649972e-06, "loss": 0.37505407333374025, "step": 3020 }, { "epoch": 0.2642844661890617, "grad_norm": 0.6301269956750803, "learning_rate": 8.808969132207339e-06, "loss": 0.3343632698059082, "step": 3025 }, { "epoch": 0.26472130001747335, "grad_norm": 0.6110733788641329, "learning_rate": 8.823529411764707e-06, "loss": 0.3490022897720337, "step": 3030 }, { "epoch": 0.26515813384588505, "grad_norm": 0.566007679538451, "learning_rate": 8.838089691322073e-06, "loss": 0.3505785703659058, "step": 3035 }, { "epoch": 0.2655949676742967, "grad_norm": 0.6292946374485585, "learning_rate": 8.852649970879442e-06, "loss": 0.3534836769104004, "step": 3040 }, { "epoch": 0.2660318015027084, "grad_norm": 0.614931426121352, "learning_rate": 8.86721025043681e-06, "loss": 0.39344425201416017, "step": 3045 }, { "epoch": 0.26646863533112003, "grad_norm": 0.6560072884124627, "learning_rate": 8.881770529994177e-06, "loss": 0.38312792778015137, "step": 3050 }, { "epoch": 0.2669054691595317, "grad_norm": 0.6540228832766212, "learning_rate": 8.896330809551545e-06, "loss": 0.35973668098449707, "step": 3055 }, { "epoch": 0.26734230298794337, "grad_norm": 0.6130365316428389, "learning_rate": 8.910891089108911e-06, "loss": 0.3490328788757324, "step": 3060 }, { "epoch": 0.26777913681635507, "grad_norm": 0.5656707114062376, "learning_rate": 8.92545136866628e-06, "loss": 0.3258897542953491, "step": 3065 }, { "epoch": 0.2682159706447667, "grad_norm": 0.729051480216173, "learning_rate": 8.940011648223646e-06, "loss": 0.36944868564605715, "step": 3070 }, { "epoch": 0.2686528044731784, "grad_norm": 0.5112842438153381, "learning_rate": 8.954571927781014e-06, "loss": 0.3353513479232788, "step": 3075 }, { "epoch": 0.26908963830159005, "grad_norm": 0.5157518133128104, "learning_rate": 8.969132207338381e-06, "loss": 0.32668964862823485, "step": 3080 }, { "epoch": 0.26952647213000175, "grad_norm": 0.5445487590158462, "learning_rate": 8.98369248689575e-06, "loss": 0.3300473690032959, "step": 3085 }, { "epoch": 0.26996330595841345, "grad_norm": 0.642364940781929, "learning_rate": 8.998252766453118e-06, "loss": 0.33866994380950927, "step": 3090 }, { "epoch": 0.2704001397868251, "grad_norm": 0.628960172362706, "learning_rate": 9.012813046010484e-06, "loss": 0.38263626098632814, "step": 3095 }, { "epoch": 0.2708369736152368, "grad_norm": 0.6342503581806233, "learning_rate": 9.027373325567852e-06, "loss": 0.348248815536499, "step": 3100 }, { "epoch": 0.27127380744364843, "grad_norm": 0.6030375774660479, "learning_rate": 9.041933605125219e-06, "loss": 0.329938006401062, "step": 3105 }, { "epoch": 0.2717106412720601, "grad_norm": 0.5049550830441312, "learning_rate": 9.056493884682587e-06, "loss": 0.33180263042449953, "step": 3110 }, { "epoch": 0.27214747510047177, "grad_norm": 0.6913243919677137, "learning_rate": 9.071054164239954e-06, "loss": 0.3547125101089478, "step": 3115 }, { "epoch": 0.27258430892888347, "grad_norm": 0.6895292626772479, "learning_rate": 9.085614443797322e-06, "loss": 0.3357377052307129, "step": 3120 }, { "epoch": 0.2730211427572951, "grad_norm": 0.5559399709249255, "learning_rate": 9.100174723354689e-06, "loss": 0.30981276035308836, "step": 3125 }, { "epoch": 0.2734579765857068, "grad_norm": 0.629642678510808, "learning_rate": 9.114735002912057e-06, "loss": 0.36365437507629395, "step": 3130 }, { "epoch": 0.27389481041411845, "grad_norm": 0.6309521348375098, "learning_rate": 9.129295282469424e-06, "loss": 0.3742026090621948, "step": 3135 }, { "epoch": 0.27433164424253015, "grad_norm": 0.6435008501771671, "learning_rate": 9.143855562026792e-06, "loss": 0.3562572956085205, "step": 3140 }, { "epoch": 0.2747684780709418, "grad_norm": 0.5461243861977948, "learning_rate": 9.15841584158416e-06, "loss": 0.36630394458770754, "step": 3145 }, { "epoch": 0.2752053118993535, "grad_norm": 0.5396448288605697, "learning_rate": 9.172976121141527e-06, "loss": 0.3351914882659912, "step": 3150 }, { "epoch": 0.27564214572776513, "grad_norm": 0.5053995354263803, "learning_rate": 9.187536400698895e-06, "loss": 0.32382593154907224, "step": 3155 }, { "epoch": 0.27607897955617683, "grad_norm": 0.5999671329931299, "learning_rate": 9.202096680256262e-06, "loss": 0.35402612686157225, "step": 3160 }, { "epoch": 0.2765158133845885, "grad_norm": 0.6794344793452549, "learning_rate": 9.21665695981363e-06, "loss": 0.3287058353424072, "step": 3165 }, { "epoch": 0.27695264721300017, "grad_norm": 0.7005864009204126, "learning_rate": 9.231217239370996e-06, "loss": 0.33144330978393555, "step": 3170 }, { "epoch": 0.27738948104141187, "grad_norm": 0.6232599087658748, "learning_rate": 9.245777518928365e-06, "loss": 0.33385262489318845, "step": 3175 }, { "epoch": 0.2778263148698235, "grad_norm": 0.5362876267647254, "learning_rate": 9.260337798485731e-06, "loss": 0.3321423292160034, "step": 3180 }, { "epoch": 0.2782631486982352, "grad_norm": 0.6664294429628411, "learning_rate": 9.2748980780431e-06, "loss": 0.3577437400817871, "step": 3185 }, { "epoch": 0.27869998252664685, "grad_norm": 0.6167873523473651, "learning_rate": 9.289458357600468e-06, "loss": 0.359033203125, "step": 3190 }, { "epoch": 0.27913681635505855, "grad_norm": 0.530274414313717, "learning_rate": 9.304018637157834e-06, "loss": 0.3416207075119019, "step": 3195 }, { "epoch": 0.2795736501834702, "grad_norm": 0.5232347306476782, "learning_rate": 9.318578916715203e-06, "loss": 0.3430491924285889, "step": 3200 }, { "epoch": 0.2800104840118819, "grad_norm": 0.6166961637973709, "learning_rate": 9.33313919627257e-06, "loss": 0.36217589378356935, "step": 3205 }, { "epoch": 0.28044731784029353, "grad_norm": 0.5735295873602057, "learning_rate": 9.347699475829937e-06, "loss": 0.3288098335266113, "step": 3210 }, { "epoch": 0.28088415166870523, "grad_norm": 0.5250622469572356, "learning_rate": 9.362259755387304e-06, "loss": 0.347118616104126, "step": 3215 }, { "epoch": 0.28132098549711687, "grad_norm": 0.5599284167423482, "learning_rate": 9.37682003494467e-06, "loss": 0.32776508331298826, "step": 3220 }, { "epoch": 0.28175781932552857, "grad_norm": 0.5737673144590885, "learning_rate": 9.391380314502039e-06, "loss": 0.36972222328186033, "step": 3225 }, { "epoch": 0.28219465315394027, "grad_norm": 0.4615359809386262, "learning_rate": 9.405940594059405e-06, "loss": 0.3241116523742676, "step": 3230 }, { "epoch": 0.2826314869823519, "grad_norm": 0.5855234395381287, "learning_rate": 9.420500873616775e-06, "loss": 0.3685986757278442, "step": 3235 }, { "epoch": 0.2830683208107636, "grad_norm": 0.5433604107951295, "learning_rate": 9.435061153174142e-06, "loss": 0.3263537883758545, "step": 3240 }, { "epoch": 0.28350515463917525, "grad_norm": 0.532240966361247, "learning_rate": 9.449621432731509e-06, "loss": 0.3350705623626709, "step": 3245 }, { "epoch": 0.28394198846758695, "grad_norm": 0.6735718205004403, "learning_rate": 9.464181712288877e-06, "loss": 0.3720221757888794, "step": 3250 }, { "epoch": 0.2843788222959986, "grad_norm": 0.5810690459180403, "learning_rate": 9.478741991846243e-06, "loss": 0.3453375816345215, "step": 3255 }, { "epoch": 0.2848156561244103, "grad_norm": 0.5169946953924621, "learning_rate": 9.493302271403612e-06, "loss": 0.34659316539764407, "step": 3260 }, { "epoch": 0.28525248995282193, "grad_norm": 0.6814628060349859, "learning_rate": 9.507862550960978e-06, "loss": 0.34301741123199464, "step": 3265 }, { "epoch": 0.28568932378123363, "grad_norm": 0.6069577135541603, "learning_rate": 9.522422830518347e-06, "loss": 0.34044928550720216, "step": 3270 }, { "epoch": 0.28612615760964527, "grad_norm": 0.58000712254612, "learning_rate": 9.536983110075713e-06, "loss": 0.342789363861084, "step": 3275 }, { "epoch": 0.28656299143805697, "grad_norm": 0.6397611508633622, "learning_rate": 9.551543389633081e-06, "loss": 0.34837913513183594, "step": 3280 }, { "epoch": 0.2869998252664686, "grad_norm": 0.5667402336408418, "learning_rate": 9.56610366919045e-06, "loss": 0.3431486845016479, "step": 3285 }, { "epoch": 0.2874366590948803, "grad_norm": 0.5186711623551278, "learning_rate": 9.580663948747816e-06, "loss": 0.2991778373718262, "step": 3290 }, { "epoch": 0.287873492923292, "grad_norm": 0.5360200133353058, "learning_rate": 9.595224228305185e-06, "loss": 0.32862653732299807, "step": 3295 }, { "epoch": 0.28831032675170365, "grad_norm": 0.5133554091172486, "learning_rate": 9.609784507862551e-06, "loss": 0.3309582233428955, "step": 3300 }, { "epoch": 0.28874716058011535, "grad_norm": 0.5885578820940391, "learning_rate": 9.62434478741992e-06, "loss": 0.338640022277832, "step": 3305 }, { "epoch": 0.289183994408527, "grad_norm": 0.6168550317885726, "learning_rate": 9.638905066977286e-06, "loss": 0.34025726318359373, "step": 3310 }, { "epoch": 0.2896208282369387, "grad_norm": 0.6969537711017001, "learning_rate": 9.653465346534654e-06, "loss": 0.32862062454223634, "step": 3315 }, { "epoch": 0.29005766206535033, "grad_norm": 0.6389828858361036, "learning_rate": 9.66802562609202e-06, "loss": 0.37732508182525637, "step": 3320 }, { "epoch": 0.29049449589376203, "grad_norm": 0.5414481269502052, "learning_rate": 9.682585905649389e-06, "loss": 0.3341397762298584, "step": 3325 }, { "epoch": 0.29093132972217367, "grad_norm": 0.5654745521342324, "learning_rate": 9.697146185206757e-06, "loss": 0.33045201301574706, "step": 3330 }, { "epoch": 0.29136816355058537, "grad_norm": 0.6433286433946673, "learning_rate": 9.711706464764124e-06, "loss": 0.33776943683624266, "step": 3335 }, { "epoch": 0.291804997378997, "grad_norm": 0.6777451665319612, "learning_rate": 9.726266744321492e-06, "loss": 0.3636423826217651, "step": 3340 }, { "epoch": 0.2922418312074087, "grad_norm": 0.5938351762313062, "learning_rate": 9.740827023878859e-06, "loss": 0.3761787176132202, "step": 3345 }, { "epoch": 0.29267866503582035, "grad_norm": 0.5348518408493217, "learning_rate": 9.755387303436227e-06, "loss": 0.3406930923461914, "step": 3350 }, { "epoch": 0.29311549886423205, "grad_norm": 0.5639888988531601, "learning_rate": 9.769947582993594e-06, "loss": 0.362371826171875, "step": 3355 }, { "epoch": 0.2935523326926437, "grad_norm": 0.6337760772001743, "learning_rate": 9.784507862550962e-06, "loss": 0.3420462131500244, "step": 3360 }, { "epoch": 0.2939891665210554, "grad_norm": 0.5275941053692648, "learning_rate": 9.799068142108328e-06, "loss": 0.3368542671203613, "step": 3365 }, { "epoch": 0.2944260003494671, "grad_norm": 0.5376745890646693, "learning_rate": 9.813628421665697e-06, "loss": 0.34477503299713136, "step": 3370 }, { "epoch": 0.29486283417787873, "grad_norm": 0.5372952133459695, "learning_rate": 9.828188701223065e-06, "loss": 0.3433236598968506, "step": 3375 }, { "epoch": 0.29529966800629043, "grad_norm": 0.5308349770502306, "learning_rate": 9.842748980780432e-06, "loss": 0.33595824241638184, "step": 3380 }, { "epoch": 0.29573650183470207, "grad_norm": 0.45778601752179543, "learning_rate": 9.8573092603378e-06, "loss": 0.3558748722076416, "step": 3385 }, { "epoch": 0.29617333566311377, "grad_norm": 0.4918672393832209, "learning_rate": 9.871869539895166e-06, "loss": 0.3471201419830322, "step": 3390 }, { "epoch": 0.2966101694915254, "grad_norm": 0.7681913370200908, "learning_rate": 9.886429819452535e-06, "loss": 0.3439859628677368, "step": 3395 }, { "epoch": 0.2970470033199371, "grad_norm": 0.5872477870842095, "learning_rate": 9.900990099009901e-06, "loss": 0.3560892105102539, "step": 3400 }, { "epoch": 0.29748383714834875, "grad_norm": 0.564303548207149, "learning_rate": 9.91555037856727e-06, "loss": 0.3323225975036621, "step": 3405 }, { "epoch": 0.29792067097676045, "grad_norm": 0.5480027112902309, "learning_rate": 9.930110658124636e-06, "loss": 0.36754460334777833, "step": 3410 }, { "epoch": 0.2983575048051721, "grad_norm": 0.6423012081066648, "learning_rate": 9.944670937682004e-06, "loss": 0.3325392961502075, "step": 3415 }, { "epoch": 0.2987943386335838, "grad_norm": 0.5960784467769327, "learning_rate": 9.959231217239373e-06, "loss": 0.3291003704071045, "step": 3420 }, { "epoch": 0.29923117246199543, "grad_norm": 0.659500812829209, "learning_rate": 9.97379149679674e-06, "loss": 0.3334655284881592, "step": 3425 }, { "epoch": 0.29966800629040713, "grad_norm": 0.5863699836983044, "learning_rate": 9.988351776354108e-06, "loss": 0.3612937927246094, "step": 3430 }, { "epoch": 0.3001048401188188, "grad_norm": 0.5594792118726679, "learning_rate": 9.999999974164888e-06, "loss": 0.35433106422424315, "step": 3435 }, { "epoch": 0.30054167394723047, "grad_norm": 0.7487008059277841, "learning_rate": 9.999999069935965e-06, "loss": 0.33095455169677734, "step": 3440 }, { "epoch": 0.30097850777564217, "grad_norm": 0.6889697982063486, "learning_rate": 9.999996873951664e-06, "loss": 0.34226393699645996, "step": 3445 }, { "epoch": 0.3014153416040538, "grad_norm": 0.5433967718170212, "learning_rate": 9.999993386212556e-06, "loss": 0.35340704917907717, "step": 3450 }, { "epoch": 0.3018521754324655, "grad_norm": 0.5788357720621126, "learning_rate": 9.999988606719539e-06, "loss": 0.34591214656829833, "step": 3455 }, { "epoch": 0.30228900926087715, "grad_norm": 0.5633499747371017, "learning_rate": 9.999982535473846e-06, "loss": 0.34605319499969484, "step": 3460 }, { "epoch": 0.30272584308928885, "grad_norm": 0.5621933700415165, "learning_rate": 9.99997517247705e-06, "loss": 0.3308484315872192, "step": 3465 }, { "epoch": 0.3031626769177005, "grad_norm": 0.6918708512342303, "learning_rate": 9.99996651773105e-06, "loss": 0.3334173679351807, "step": 3470 }, { "epoch": 0.3035995107461122, "grad_norm": 0.6515398186503727, "learning_rate": 9.999956571238082e-06, "loss": 0.37396860122680664, "step": 3475 }, { "epoch": 0.30403634457452383, "grad_norm": 0.6281963245194561, "learning_rate": 9.999945333000717e-06, "loss": 0.33119902610778806, "step": 3480 }, { "epoch": 0.30447317840293553, "grad_norm": 0.5746606879126449, "learning_rate": 9.999932803021859e-06, "loss": 0.35440816879272463, "step": 3485 }, { "epoch": 0.30491001223134717, "grad_norm": 0.5676596060909838, "learning_rate": 9.999918981304744e-06, "loss": 0.34382286071777346, "step": 3490 }, { "epoch": 0.30534684605975887, "grad_norm": 0.4886105451684132, "learning_rate": 9.999903867852943e-06, "loss": 0.30050790309906006, "step": 3495 }, { "epoch": 0.30578367988817057, "grad_norm": 0.5245779485358549, "learning_rate": 9.999887462670358e-06, "loss": 0.3284953832626343, "step": 3500 }, { "epoch": 0.3062205137165822, "grad_norm": 0.6290840556032227, "learning_rate": 9.999869765761235e-06, "loss": 0.33315269947052, "step": 3505 }, { "epoch": 0.3066573475449939, "grad_norm": 0.5551995610006394, "learning_rate": 9.999850777130138e-06, "loss": 0.3134757518768311, "step": 3510 }, { "epoch": 0.30709418137340555, "grad_norm": 0.6096087080939202, "learning_rate": 9.999830496781975e-06, "loss": 0.3396044492721558, "step": 3515 }, { "epoch": 0.30753101520181725, "grad_norm": 0.53012285446325, "learning_rate": 9.999808924721985e-06, "loss": 0.30172152519226075, "step": 3520 }, { "epoch": 0.3079678490302289, "grad_norm": 0.5786903034659566, "learning_rate": 9.999786060955746e-06, "loss": 0.3176602602005005, "step": 3525 }, { "epoch": 0.3084046828586406, "grad_norm": 0.9626249834630849, "learning_rate": 9.999761905489158e-06, "loss": 0.3334537744522095, "step": 3530 }, { "epoch": 0.30884151668705223, "grad_norm": 0.6680062030912826, "learning_rate": 9.999736458328466e-06, "loss": 0.35321803092956544, "step": 3535 }, { "epoch": 0.30927835051546393, "grad_norm": 0.5608893751005556, "learning_rate": 9.999709719480243e-06, "loss": 0.3613155364990234, "step": 3540 }, { "epoch": 0.30971518434387557, "grad_norm": 0.5937600202473894, "learning_rate": 9.999681688951397e-06, "loss": 0.3400393009185791, "step": 3545 }, { "epoch": 0.31015201817228727, "grad_norm": 0.6007213017558252, "learning_rate": 9.999652366749168e-06, "loss": 0.33361454010009767, "step": 3550 }, { "epoch": 0.3105888520006989, "grad_norm": 0.5873736571072606, "learning_rate": 9.999621752881136e-06, "loss": 0.33024890422821046, "step": 3555 }, { "epoch": 0.3110256858291106, "grad_norm": 0.5598987603203539, "learning_rate": 9.999589847355206e-06, "loss": 0.34737112522125246, "step": 3560 }, { "epoch": 0.31146251965752225, "grad_norm": 0.5063203273477954, "learning_rate": 9.999556650179622e-06, "loss": 0.3237443447113037, "step": 3565 }, { "epoch": 0.31189935348593395, "grad_norm": 0.6809378215677045, "learning_rate": 9.999522161362961e-06, "loss": 0.35674300193786623, "step": 3570 }, { "epoch": 0.31233618731434565, "grad_norm": 0.5862911663820876, "learning_rate": 9.999486380914133e-06, "loss": 0.34760470390319825, "step": 3575 }, { "epoch": 0.3127730211427573, "grad_norm": 0.809330759151176, "learning_rate": 9.999449308842382e-06, "loss": 0.3459350109100342, "step": 3580 }, { "epoch": 0.313209854971169, "grad_norm": 0.6922683647479789, "learning_rate": 9.999410945157284e-06, "loss": 0.36099565029144287, "step": 3585 }, { "epoch": 0.31364668879958063, "grad_norm": 0.6248022481989117, "learning_rate": 9.999371289868754e-06, "loss": 0.3536688804626465, "step": 3590 }, { "epoch": 0.31408352262799233, "grad_norm": 0.5835763854590661, "learning_rate": 9.999330342987032e-06, "loss": 0.3511929750442505, "step": 3595 }, { "epoch": 0.31452035645640397, "grad_norm": 0.6569511250693325, "learning_rate": 9.999288104522702e-06, "loss": 0.34888591766357424, "step": 3600 }, { "epoch": 0.31495719028481567, "grad_norm": 0.5533938771454902, "learning_rate": 9.999244574486672e-06, "loss": 0.32205514907836913, "step": 3605 }, { "epoch": 0.3153940241132273, "grad_norm": 0.4920784836578133, "learning_rate": 9.99919975289019e-06, "loss": 0.3283698081970215, "step": 3610 }, { "epoch": 0.315830857941639, "grad_norm": 0.6225649559303593, "learning_rate": 9.999153639744836e-06, "loss": 0.36293940544128417, "step": 3615 }, { "epoch": 0.31626769177005065, "grad_norm": 0.5607375147100654, "learning_rate": 9.999106235062524e-06, "loss": 0.35121970176696776, "step": 3620 }, { "epoch": 0.31670452559846235, "grad_norm": 0.6131870863323025, "learning_rate": 9.999057538855498e-06, "loss": 0.35702059268951414, "step": 3625 }, { "epoch": 0.317141359426874, "grad_norm": 0.5905017304839705, "learning_rate": 9.999007551136342e-06, "loss": 0.3222618579864502, "step": 3630 }, { "epoch": 0.3175781932552857, "grad_norm": 0.4853816570088634, "learning_rate": 9.998956271917967e-06, "loss": 0.36825387477874755, "step": 3635 }, { "epoch": 0.3180150270836974, "grad_norm": 0.5501088091410675, "learning_rate": 9.998903701213625e-06, "loss": 0.33483109474182127, "step": 3640 }, { "epoch": 0.31845186091210903, "grad_norm": 0.5346556826883206, "learning_rate": 9.998849839036894e-06, "loss": 0.33582425117492676, "step": 3645 }, { "epoch": 0.31888869474052073, "grad_norm": 0.5286342756304232, "learning_rate": 9.998794685401692e-06, "loss": 0.33624677658081054, "step": 3650 }, { "epoch": 0.31932552856893237, "grad_norm": 0.5215681699553211, "learning_rate": 9.998738240322268e-06, "loss": 0.35508022308349607, "step": 3655 }, { "epoch": 0.31976236239734407, "grad_norm": 0.5756856526985958, "learning_rate": 9.998680503813203e-06, "loss": 0.3514007329940796, "step": 3660 }, { "epoch": 0.3201991962257557, "grad_norm": 0.6240936301979021, "learning_rate": 9.998621475889414e-06, "loss": 0.3604942798614502, "step": 3665 }, { "epoch": 0.3206360300541674, "grad_norm": 0.5465017820135674, "learning_rate": 9.99856115656615e-06, "loss": 0.34111671447753905, "step": 3670 }, { "epoch": 0.32107286388257905, "grad_norm": 0.7540926683926632, "learning_rate": 9.998499545858997e-06, "loss": 0.3658783435821533, "step": 3675 }, { "epoch": 0.32150969771099075, "grad_norm": 0.5942121061336512, "learning_rate": 9.99843664378387e-06, "loss": 0.33240911960601804, "step": 3680 }, { "epoch": 0.3219465315394024, "grad_norm": 0.5721181787937818, "learning_rate": 9.99837245035702e-06, "loss": 0.32027199268341067, "step": 3685 }, { "epoch": 0.3223833653678141, "grad_norm": 0.5704262177939364, "learning_rate": 9.998306965595034e-06, "loss": 0.34150137901306155, "step": 3690 }, { "epoch": 0.32282019919622573, "grad_norm": 0.6295804841752792, "learning_rate": 9.998240189514828e-06, "loss": 0.3422281503677368, "step": 3695 }, { "epoch": 0.32325703302463743, "grad_norm": 0.7202425391263707, "learning_rate": 9.998172122133652e-06, "loss": 0.3752723455429077, "step": 3700 }, { "epoch": 0.3236938668530491, "grad_norm": 0.5162652151812662, "learning_rate": 9.998102763469095e-06, "loss": 0.3221442699432373, "step": 3705 }, { "epoch": 0.32413070068146077, "grad_norm": 0.5049078481356187, "learning_rate": 9.998032113539072e-06, "loss": 0.34982030391693114, "step": 3710 }, { "epoch": 0.32456753450987247, "grad_norm": 0.5949570151635927, "learning_rate": 9.997960172361839e-06, "loss": 0.33191652297973634, "step": 3715 }, { "epoch": 0.3250043683382841, "grad_norm": 0.5976195119377936, "learning_rate": 9.997886939955978e-06, "loss": 0.3759481430053711, "step": 3720 }, { "epoch": 0.3254412021666958, "grad_norm": 0.6628873283534249, "learning_rate": 9.997812416340413e-06, "loss": 0.34987030029296873, "step": 3725 }, { "epoch": 0.32587803599510745, "grad_norm": 0.6060608860519475, "learning_rate": 9.997736601534396e-06, "loss": 0.3552370548248291, "step": 3730 }, { "epoch": 0.32631486982351915, "grad_norm": 0.5660453707819055, "learning_rate": 9.997659495557512e-06, "loss": 0.372175407409668, "step": 3735 }, { "epoch": 0.3267517036519308, "grad_norm": 0.6105791108258458, "learning_rate": 9.997581098429684e-06, "loss": 0.3639263153076172, "step": 3740 }, { "epoch": 0.3271885374803425, "grad_norm": 0.5866217219927191, "learning_rate": 9.997501410171164e-06, "loss": 0.35164766311645507, "step": 3745 }, { "epoch": 0.32762537130875413, "grad_norm": 0.6222614906893651, "learning_rate": 9.99742043080254e-06, "loss": 0.34939796924591066, "step": 3750 }, { "epoch": 0.32806220513716583, "grad_norm": 0.48082485766670224, "learning_rate": 9.997338160344733e-06, "loss": 0.32866268157958983, "step": 3755 }, { "epoch": 0.3284990389655775, "grad_norm": 0.5127763524429508, "learning_rate": 9.997254598819e-06, "loss": 0.3696180820465088, "step": 3760 }, { "epoch": 0.32893587279398917, "grad_norm": 0.5417457866742396, "learning_rate": 9.997169746246924e-06, "loss": 0.32765674591064453, "step": 3765 }, { "epoch": 0.3293727066224008, "grad_norm": 0.6195554545724894, "learning_rate": 9.997083602650433e-06, "loss": 0.34647607803344727, "step": 3770 }, { "epoch": 0.3298095404508125, "grad_norm": 0.5934395290450944, "learning_rate": 9.996996168051777e-06, "loss": 0.35483560562133787, "step": 3775 }, { "epoch": 0.3302463742792242, "grad_norm": 0.7078564789800039, "learning_rate": 9.996907442473549e-06, "loss": 0.333709192276001, "step": 3780 }, { "epoch": 0.33068320810763585, "grad_norm": 0.5209504569928001, "learning_rate": 9.996817425938668e-06, "loss": 0.3382713794708252, "step": 3785 }, { "epoch": 0.33112004193604755, "grad_norm": 0.4804376580710687, "learning_rate": 9.996726118470392e-06, "loss": 0.31196002960205077, "step": 3790 }, { "epoch": 0.3315568757644592, "grad_norm": 0.6166608403832131, "learning_rate": 9.99663352009231e-06, "loss": 0.3202012538909912, "step": 3795 }, { "epoch": 0.3319937095928709, "grad_norm": 0.8132733791503978, "learning_rate": 9.996539630828343e-06, "loss": 0.35005755424499513, "step": 3800 }, { "epoch": 0.33243054342128253, "grad_norm": 0.5516064498588313, "learning_rate": 9.996444450702752e-06, "loss": 0.3380916595458984, "step": 3805 }, { "epoch": 0.33286737724969423, "grad_norm": 0.5139306536058709, "learning_rate": 9.996347979740121e-06, "loss": 0.3570961236953735, "step": 3810 }, { "epoch": 0.33330421107810587, "grad_norm": 0.5271778240737177, "learning_rate": 9.996250217965378e-06, "loss": 0.35366652011871336, "step": 3815 }, { "epoch": 0.33374104490651757, "grad_norm": 0.6065150434390956, "learning_rate": 9.996151165403776e-06, "loss": 0.337571907043457, "step": 3820 }, { "epoch": 0.3341778787349292, "grad_norm": 0.5841303214722324, "learning_rate": 9.996050822080909e-06, "loss": 0.3599510192871094, "step": 3825 }, { "epoch": 0.3346147125633409, "grad_norm": 0.5481740756128481, "learning_rate": 9.9959491880227e-06, "loss": 0.34564547538757323, "step": 3830 }, { "epoch": 0.33505154639175255, "grad_norm": 0.5379020541751001, "learning_rate": 9.995846263255405e-06, "loss": 0.3252537727355957, "step": 3835 }, { "epoch": 0.33548838022016425, "grad_norm": 0.46705759802050034, "learning_rate": 9.995742047805614e-06, "loss": 0.3534907341003418, "step": 3840 }, { "epoch": 0.33592521404857595, "grad_norm": 0.5963972831770793, "learning_rate": 9.995636541700253e-06, "loss": 0.33924002647399903, "step": 3845 }, { "epoch": 0.3363620478769876, "grad_norm": 0.5974939689320844, "learning_rate": 9.99552974496658e-06, "loss": 0.36642544269561766, "step": 3850 }, { "epoch": 0.3367988817053993, "grad_norm": 0.5726435744805374, "learning_rate": 9.995421657632186e-06, "loss": 0.3652334690093994, "step": 3855 }, { "epoch": 0.33723571553381093, "grad_norm": 0.6343781816110489, "learning_rate": 9.995312279724994e-06, "loss": 0.32996439933776855, "step": 3860 }, { "epoch": 0.33767254936222263, "grad_norm": 0.6254511549185928, "learning_rate": 9.995201611273261e-06, "loss": 0.29564332962036133, "step": 3865 }, { "epoch": 0.33810938319063427, "grad_norm": 0.5020402268531194, "learning_rate": 9.99508965230558e-06, "loss": 0.3401199817657471, "step": 3870 }, { "epoch": 0.33854621701904597, "grad_norm": 0.6972183806839776, "learning_rate": 9.994976402850877e-06, "loss": 0.34812164306640625, "step": 3875 }, { "epoch": 0.3389830508474576, "grad_norm": 1.0168992103281382, "learning_rate": 9.994861862938408e-06, "loss": 0.3606101036071777, "step": 3880 }, { "epoch": 0.3394198846758693, "grad_norm": 0.5291146285077201, "learning_rate": 9.994746032597765e-06, "loss": 0.35053138732910155, "step": 3885 }, { "epoch": 0.33985671850428095, "grad_norm": 0.6775234079652379, "learning_rate": 9.994628911858873e-06, "loss": 0.3588568687438965, "step": 3890 }, { "epoch": 0.34029355233269265, "grad_norm": 0.5779546764212846, "learning_rate": 9.994510500751992e-06, "loss": 0.389392614364624, "step": 3895 }, { "epoch": 0.3407303861611043, "grad_norm": 0.5439815922476725, "learning_rate": 9.99439079930771e-06, "loss": 0.3462557792663574, "step": 3900 }, { "epoch": 0.341167219989516, "grad_norm": 0.5459966762985216, "learning_rate": 9.994269807556955e-06, "loss": 0.3620922565460205, "step": 3905 }, { "epoch": 0.34160405381792763, "grad_norm": 0.5329187563730898, "learning_rate": 9.994147525530984e-06, "loss": 0.32199907302856445, "step": 3910 }, { "epoch": 0.34204088764633933, "grad_norm": 0.543365109537265, "learning_rate": 9.99402395326139e-06, "loss": 0.3729154109954834, "step": 3915 }, { "epoch": 0.34247772147475103, "grad_norm": 0.5401147348529384, "learning_rate": 9.993899090780097e-06, "loss": 0.34712605476379393, "step": 3920 }, { "epoch": 0.34291455530316267, "grad_norm": 0.46169727693632, "learning_rate": 9.993772938119361e-06, "loss": 0.32599949836730957, "step": 3925 }, { "epoch": 0.34335138913157437, "grad_norm": 0.46062049737560273, "learning_rate": 9.99364549531178e-06, "loss": 0.3395646095275879, "step": 3930 }, { "epoch": 0.343788222959986, "grad_norm": 0.5282142842312441, "learning_rate": 9.993516762390274e-06, "loss": 0.34210758209228515, "step": 3935 }, { "epoch": 0.3442250567883977, "grad_norm": 0.5872057181502185, "learning_rate": 9.9933867393881e-06, "loss": 0.34928174018859864, "step": 3940 }, { "epoch": 0.34466189061680935, "grad_norm": 0.5781924602915532, "learning_rate": 9.993255426338855e-06, "loss": 0.37265424728393554, "step": 3945 }, { "epoch": 0.34509872444522105, "grad_norm": 0.680069391687783, "learning_rate": 9.99312282327646e-06, "loss": 0.3468410730361938, "step": 3950 }, { "epoch": 0.3455355582736327, "grad_norm": 0.7024425106215488, "learning_rate": 9.992988930235172e-06, "loss": 0.3588718414306641, "step": 3955 }, { "epoch": 0.3459723921020444, "grad_norm": 0.5550570192542412, "learning_rate": 9.992853747249586e-06, "loss": 0.3402172803878784, "step": 3960 }, { "epoch": 0.34640922593045603, "grad_norm": 0.4993769333121397, "learning_rate": 9.992717274354627e-06, "loss": 0.3549471378326416, "step": 3965 }, { "epoch": 0.34684605975886773, "grad_norm": 0.5169542473737402, "learning_rate": 9.99257951158555e-06, "loss": 0.3538684844970703, "step": 3970 }, { "epoch": 0.3472828935872794, "grad_norm": 0.4484988127066007, "learning_rate": 9.992440458977946e-06, "loss": 0.3253441333770752, "step": 3975 }, { "epoch": 0.34771972741569107, "grad_norm": 0.5913122455194141, "learning_rate": 9.992300116567742e-06, "loss": 0.37661654949188234, "step": 3980 }, { "epoch": 0.34815656124410277, "grad_norm": 0.54166024227485, "learning_rate": 9.992158484391192e-06, "loss": 0.3331989049911499, "step": 3985 }, { "epoch": 0.3485933950725144, "grad_norm": 0.607501753202694, "learning_rate": 9.992015562484891e-06, "loss": 0.3439152479171753, "step": 3990 }, { "epoch": 0.3490302289009261, "grad_norm": 0.5389297014445471, "learning_rate": 9.991871350885762e-06, "loss": 0.34252099990844725, "step": 3995 }, { "epoch": 0.34946706272933775, "grad_norm": 0.4795670232370366, "learning_rate": 9.99172584963106e-06, "loss": 0.32248497009277344, "step": 4000 }, { "epoch": 0.34990389655774945, "grad_norm": 0.5294543286622018, "learning_rate": 9.991579058758378e-06, "loss": 0.3739789485931396, "step": 4005 }, { "epoch": 0.3503407303861611, "grad_norm": 0.6467210896446793, "learning_rate": 9.991430978305637e-06, "loss": 0.3427543878555298, "step": 4010 }, { "epoch": 0.3507775642145728, "grad_norm": 0.6438624654114512, "learning_rate": 9.991281608311095e-06, "loss": 0.3542837619781494, "step": 4015 }, { "epoch": 0.35121439804298443, "grad_norm": 0.5491061385379095, "learning_rate": 9.991130948813343e-06, "loss": 0.31438169479370115, "step": 4020 }, { "epoch": 0.35165123187139613, "grad_norm": 0.5626692053843121, "learning_rate": 9.990978999851302e-06, "loss": 0.32274479866027833, "step": 4025 }, { "epoch": 0.3520880656998078, "grad_norm": 0.5100494631244181, "learning_rate": 9.990825761464232e-06, "loss": 0.3127216100692749, "step": 4030 }, { "epoch": 0.35252489952821947, "grad_norm": 0.5607532931194944, "learning_rate": 9.990671233691716e-06, "loss": 0.33539552688598634, "step": 4035 }, { "epoch": 0.3529617333566311, "grad_norm": 0.5099431258094206, "learning_rate": 9.990515416573681e-06, "loss": 0.32053816318511963, "step": 4040 }, { "epoch": 0.3533985671850428, "grad_norm": 0.5818437518544165, "learning_rate": 9.990358310150384e-06, "loss": 0.32146401405334474, "step": 4045 }, { "epoch": 0.3538354010134545, "grad_norm": 0.6279195932301858, "learning_rate": 9.990199914462408e-06, "loss": 0.32848362922668456, "step": 4050 }, { "epoch": 0.35427223484186615, "grad_norm": 0.7441340391321356, "learning_rate": 9.99004022955068e-06, "loss": 0.32823934555053713, "step": 4055 }, { "epoch": 0.35470906867027785, "grad_norm": 0.48228379670959676, "learning_rate": 9.989879255456451e-06, "loss": 0.34150242805480957, "step": 4060 }, { "epoch": 0.3551459024986895, "grad_norm": 0.5689974048463352, "learning_rate": 9.98971699222131e-06, "loss": 0.3340949535369873, "step": 4065 }, { "epoch": 0.3555827363271012, "grad_norm": 0.483141952647845, "learning_rate": 9.98955343988718e-06, "loss": 0.3190625190734863, "step": 4070 }, { "epoch": 0.35601957015551283, "grad_norm": 0.7205631743256455, "learning_rate": 9.989388598496313e-06, "loss": 0.3394859075546265, "step": 4075 }, { "epoch": 0.35645640398392453, "grad_norm": 0.584179464708575, "learning_rate": 9.989222468091296e-06, "loss": 0.3659473180770874, "step": 4080 }, { "epoch": 0.3568932378123362, "grad_norm": 0.6565809024017419, "learning_rate": 9.989055048715048e-06, "loss": 0.34237287044525144, "step": 4085 }, { "epoch": 0.35733007164074787, "grad_norm": 0.528447853112012, "learning_rate": 9.988886340410826e-06, "loss": 0.34848599433898925, "step": 4090 }, { "epoch": 0.3577669054691595, "grad_norm": 0.5989185383187807, "learning_rate": 9.988716343222211e-06, "loss": 0.35111513137817385, "step": 4095 }, { "epoch": 0.3582037392975712, "grad_norm": 0.5968003023504799, "learning_rate": 9.988545057193124e-06, "loss": 0.34605357646942136, "step": 4100 }, { "epoch": 0.35864057312598285, "grad_norm": 0.4893225115614653, "learning_rate": 9.988372482367819e-06, "loss": 0.338465142250061, "step": 4105 }, { "epoch": 0.35907740695439455, "grad_norm": 0.6085944013447616, "learning_rate": 9.988198618790877e-06, "loss": 0.3710038661956787, "step": 4110 }, { "epoch": 0.3595142407828062, "grad_norm": 0.6597341336394176, "learning_rate": 9.988023466507219e-06, "loss": 0.31553092002868655, "step": 4115 }, { "epoch": 0.3599510746112179, "grad_norm": 0.6314708019846391, "learning_rate": 9.987847025562094e-06, "loss": 0.32213051319122316, "step": 4120 }, { "epoch": 0.3603879084396296, "grad_norm": 0.47039349372312, "learning_rate": 9.987669296001086e-06, "loss": 0.3353367567062378, "step": 4125 }, { "epoch": 0.36082474226804123, "grad_norm": 0.5702556059656285, "learning_rate": 9.987490277870114e-06, "loss": 0.33417959213256837, "step": 4130 }, { "epoch": 0.36126157609645293, "grad_norm": 0.5494956617422662, "learning_rate": 9.987309971215422e-06, "loss": 0.35245819091796876, "step": 4135 }, { "epoch": 0.3616984099248646, "grad_norm": 0.635254728201455, "learning_rate": 9.987128376083599e-06, "loss": 0.36660969257354736, "step": 4140 }, { "epoch": 0.36213524375327627, "grad_norm": 0.5904304230853592, "learning_rate": 9.986945492521557e-06, "loss": 0.33929083347320554, "step": 4145 }, { "epoch": 0.3625720775816879, "grad_norm": 0.6394086199263221, "learning_rate": 9.986761320576543e-06, "loss": 0.3341410160064697, "step": 4150 }, { "epoch": 0.3630089114100996, "grad_norm": 0.6192798782679082, "learning_rate": 9.98657586029614e-06, "loss": 0.3389934539794922, "step": 4155 }, { "epoch": 0.36344574523851125, "grad_norm": 0.49680587736569454, "learning_rate": 9.98638911172826e-06, "loss": 0.31475238800048827, "step": 4160 }, { "epoch": 0.36388257906692295, "grad_norm": 0.6062889593930164, "learning_rate": 9.986201074921154e-06, "loss": 0.32101075649261473, "step": 4165 }, { "epoch": 0.3643194128953346, "grad_norm": 0.5791716893219252, "learning_rate": 9.986011749923397e-06, "loss": 0.35816068649291993, "step": 4170 }, { "epoch": 0.3647562467237463, "grad_norm": 0.5947824053290099, "learning_rate": 9.985821136783903e-06, "loss": 0.3450965404510498, "step": 4175 }, { "epoch": 0.36519308055215793, "grad_norm": 0.48336327858408296, "learning_rate": 9.985629235551917e-06, "loss": 0.32272977828979493, "step": 4180 }, { "epoch": 0.36562991438056963, "grad_norm": 0.5402844377982847, "learning_rate": 9.985436046277014e-06, "loss": 0.3359605550765991, "step": 4185 }, { "epoch": 0.36606674820898133, "grad_norm": 0.5431535482074327, "learning_rate": 9.985241569009112e-06, "loss": 0.3452897071838379, "step": 4190 }, { "epoch": 0.36650358203739297, "grad_norm": 1.1288548583230498, "learning_rate": 9.985045803798447e-06, "loss": 0.3249474048614502, "step": 4195 }, { "epoch": 0.36694041586580467, "grad_norm": 0.5178764628148406, "learning_rate": 9.9848487506956e-06, "loss": 0.3324175596237183, "step": 4200 }, { "epoch": 0.3673772496942163, "grad_norm": 0.608516630709615, "learning_rate": 9.984650409751478e-06, "loss": 0.3573195457458496, "step": 4205 }, { "epoch": 0.367814083522628, "grad_norm": 0.49360415877053143, "learning_rate": 9.984450781017321e-06, "loss": 0.3433647155761719, "step": 4210 }, { "epoch": 0.36825091735103965, "grad_norm": 0.5993811103674888, "learning_rate": 9.984249864544705e-06, "loss": 0.3432304859161377, "step": 4215 }, { "epoch": 0.36868775117945135, "grad_norm": 0.5557647141906228, "learning_rate": 9.984047660385539e-06, "loss": 0.33267648220062257, "step": 4220 }, { "epoch": 0.369124585007863, "grad_norm": 0.586223424279316, "learning_rate": 9.983844168592058e-06, "loss": 0.291914701461792, "step": 4225 }, { "epoch": 0.3695614188362747, "grad_norm": 0.5371762522259704, "learning_rate": 9.983639389216839e-06, "loss": 0.3316781997680664, "step": 4230 }, { "epoch": 0.36999825266468633, "grad_norm": 0.4686815703721863, "learning_rate": 9.983433322312784e-06, "loss": 0.33297247886657716, "step": 4235 }, { "epoch": 0.37043508649309803, "grad_norm": 0.4707168527683475, "learning_rate": 9.983225967933132e-06, "loss": 0.3351769924163818, "step": 4240 }, { "epoch": 0.3708719203215097, "grad_norm": 0.5310908551187993, "learning_rate": 9.983017326131453e-06, "loss": 0.35129404067993164, "step": 4245 }, { "epoch": 0.37130875414992137, "grad_norm": 0.6726619506614133, "learning_rate": 9.98280739696165e-06, "loss": 0.3633124828338623, "step": 4250 }, { "epoch": 0.37174558797833307, "grad_norm": 0.5805033621975036, "learning_rate": 9.982596180477956e-06, "loss": 0.3032383918762207, "step": 4255 }, { "epoch": 0.3721824218067447, "grad_norm": 0.507986143616931, "learning_rate": 9.982383676734942e-06, "loss": 0.33097639083862307, "step": 4260 }, { "epoch": 0.3726192556351564, "grad_norm": 0.6108545883988623, "learning_rate": 9.982169885787508e-06, "loss": 0.36245572566986084, "step": 4265 }, { "epoch": 0.37305608946356805, "grad_norm": 0.5903354605885391, "learning_rate": 9.981954807690887e-06, "loss": 0.34878926277160643, "step": 4270 }, { "epoch": 0.37349292329197975, "grad_norm": 0.5726815798258972, "learning_rate": 9.981738442500644e-06, "loss": 0.31586179733276365, "step": 4275 }, { "epoch": 0.3739297571203914, "grad_norm": 0.49665783898371607, "learning_rate": 9.981520790272677e-06, "loss": 0.3381922245025635, "step": 4280 }, { "epoch": 0.3743665909488031, "grad_norm": 0.5401105329523266, "learning_rate": 9.981301851063218e-06, "loss": 0.33162569999694824, "step": 4285 }, { "epoch": 0.37480342477721473, "grad_norm": 0.538756819915678, "learning_rate": 9.98108162492883e-06, "loss": 0.3227358341217041, "step": 4290 }, { "epoch": 0.37524025860562643, "grad_norm": 0.585974473628033, "learning_rate": 9.980860111926407e-06, "loss": 0.3376508951187134, "step": 4295 }, { "epoch": 0.3756770924340381, "grad_norm": 0.5508617625062006, "learning_rate": 9.98063731211318e-06, "loss": 0.3188109159469604, "step": 4300 }, { "epoch": 0.37611392626244977, "grad_norm": 0.5269016110651907, "learning_rate": 9.980413225546707e-06, "loss": 0.3585697650909424, "step": 4305 }, { "epoch": 0.3765507600908614, "grad_norm": 0.4370325088337087, "learning_rate": 9.980187852284881e-06, "loss": 0.34775140285491946, "step": 4310 }, { "epoch": 0.3769875939192731, "grad_norm": 0.4863093530962389, "learning_rate": 9.97996119238593e-06, "loss": 0.36190783977508545, "step": 4315 }, { "epoch": 0.37742442774768475, "grad_norm": 0.5126198495816587, "learning_rate": 9.97973324590841e-06, "loss": 0.3465893030166626, "step": 4320 }, { "epoch": 0.37786126157609645, "grad_norm": 0.538967199901898, "learning_rate": 9.97950401291121e-06, "loss": 0.32648863792419436, "step": 4325 }, { "epoch": 0.37829809540450815, "grad_norm": 0.5072651980991817, "learning_rate": 9.979273493453555e-06, "loss": 0.3377454996109009, "step": 4330 }, { "epoch": 0.3787349292329198, "grad_norm": 0.5303197935250133, "learning_rate": 9.979041687595e-06, "loss": 0.31086087226867676, "step": 4335 }, { "epoch": 0.3791717630613315, "grad_norm": 0.6649778987370185, "learning_rate": 9.978808595395429e-06, "loss": 0.35870392322540284, "step": 4340 }, { "epoch": 0.37960859688974313, "grad_norm": 0.7409601923734983, "learning_rate": 9.978574216915065e-06, "loss": 0.3263205051422119, "step": 4345 }, { "epoch": 0.38004543071815483, "grad_norm": 0.5936800240157415, "learning_rate": 9.97833855221446e-06, "loss": 0.3406821250915527, "step": 4350 }, { "epoch": 0.3804822645465665, "grad_norm": 0.6559715314091071, "learning_rate": 9.978101601354495e-06, "loss": 0.3319076061248779, "step": 4355 }, { "epoch": 0.38091909837497817, "grad_norm": 0.6463197790328199, "learning_rate": 9.97786336439639e-06, "loss": 0.340219783782959, "step": 4360 }, { "epoch": 0.3813559322033898, "grad_norm": 0.6425584715832207, "learning_rate": 9.977623841401691e-06, "loss": 0.3452609539031982, "step": 4365 }, { "epoch": 0.3817927660318015, "grad_norm": 0.4829349756071021, "learning_rate": 9.977383032432282e-06, "loss": 0.3299814462661743, "step": 4370 }, { "epoch": 0.38222959986021315, "grad_norm": 0.6121755517412555, "learning_rate": 9.977140937550375e-06, "loss": 0.3316166400909424, "step": 4375 }, { "epoch": 0.38266643368862485, "grad_norm": 0.6350114800734686, "learning_rate": 9.976897556818515e-06, "loss": 0.38245725631713867, "step": 4380 }, { "epoch": 0.3831032675170365, "grad_norm": 0.5079106029780895, "learning_rate": 9.97665289029958e-06, "loss": 0.35636637210845945, "step": 4385 }, { "epoch": 0.3835401013454482, "grad_norm": 0.5609078817416905, "learning_rate": 9.976406938056778e-06, "loss": 0.3226120710372925, "step": 4390 }, { "epoch": 0.3839769351738599, "grad_norm": 0.517624733159076, "learning_rate": 9.976159700153654e-06, "loss": 0.35279364585876466, "step": 4395 }, { "epoch": 0.38441376900227153, "grad_norm": 0.7026557029380686, "learning_rate": 9.97591117665408e-06, "loss": 0.34121150970458985, "step": 4400 }, { "epoch": 0.38485060283068323, "grad_norm": 0.535439650962722, "learning_rate": 9.975661367622265e-06, "loss": 0.34800128936767577, "step": 4405 }, { "epoch": 0.3852874366590949, "grad_norm": 0.44525156322400844, "learning_rate": 9.975410273122745e-06, "loss": 0.3288179874420166, "step": 4410 }, { "epoch": 0.38572427048750657, "grad_norm": 0.47647389284428604, "learning_rate": 9.975157893220391e-06, "loss": 0.347149658203125, "step": 4415 }, { "epoch": 0.3861611043159182, "grad_norm": 0.515725609193162, "learning_rate": 9.974904227980406e-06, "loss": 0.3630523681640625, "step": 4420 }, { "epoch": 0.3865979381443299, "grad_norm": 0.5268794020702627, "learning_rate": 9.974649277468324e-06, "loss": 0.3194692373275757, "step": 4425 }, { "epoch": 0.38703477197274155, "grad_norm": 0.6154735269419608, "learning_rate": 9.974393041750015e-06, "loss": 0.33717525005340576, "step": 4430 }, { "epoch": 0.38747160580115325, "grad_norm": 0.53642552842897, "learning_rate": 9.974135520891672e-06, "loss": 0.3294651031494141, "step": 4435 }, { "epoch": 0.3879084396295649, "grad_norm": 0.6161351100893262, "learning_rate": 9.973876714959832e-06, "loss": 0.32815454006195066, "step": 4440 }, { "epoch": 0.3883452734579766, "grad_norm": 0.49201226983818974, "learning_rate": 9.973616624021352e-06, "loss": 0.33446440696716306, "step": 4445 }, { "epoch": 0.38878210728638823, "grad_norm": 0.52611735048427, "learning_rate": 9.97335524814343e-06, "loss": 0.3372478485107422, "step": 4450 }, { "epoch": 0.38921894111479993, "grad_norm": 0.6267634116944307, "learning_rate": 9.973092587393594e-06, "loss": 0.3435975074768066, "step": 4455 }, { "epoch": 0.3896557749432116, "grad_norm": 0.5210303404530389, "learning_rate": 9.9728286418397e-06, "loss": 0.336124324798584, "step": 4460 }, { "epoch": 0.3900926087716233, "grad_norm": 0.5553318471731826, "learning_rate": 9.97256341154994e-06, "loss": 0.31918673515319823, "step": 4465 }, { "epoch": 0.39052944260003497, "grad_norm": 0.5312161205682147, "learning_rate": 9.972296896592835e-06, "loss": 0.3526679277420044, "step": 4470 }, { "epoch": 0.3909662764284466, "grad_norm": 0.5344608470029446, "learning_rate": 9.972029097037243e-06, "loss": 0.3214511156082153, "step": 4475 }, { "epoch": 0.3914031102568583, "grad_norm": 0.5085464006159718, "learning_rate": 9.971760012952346e-06, "loss": 0.3390002727508545, "step": 4480 }, { "epoch": 0.39183994408526995, "grad_norm": 0.53500972015578, "learning_rate": 9.971489644407663e-06, "loss": 0.34739174842834475, "step": 4485 }, { "epoch": 0.39227677791368165, "grad_norm": 0.546577189634888, "learning_rate": 9.971217991473048e-06, "loss": 0.37390570640563964, "step": 4490 }, { "epoch": 0.3927136117420933, "grad_norm": 0.5222202772920361, "learning_rate": 9.970945054218678e-06, "loss": 0.3111907958984375, "step": 4495 }, { "epoch": 0.393150445570505, "grad_norm": 0.5709837806676934, "learning_rate": 9.97067083271507e-06, "loss": 0.32748961448669434, "step": 4500 }, { "epoch": 0.39358727939891663, "grad_norm": 0.7194271218898687, "learning_rate": 9.970395327033066e-06, "loss": 0.33382296562194824, "step": 4505 }, { "epoch": 0.39402411322732833, "grad_norm": 0.581883315454521, "learning_rate": 9.970118537243848e-06, "loss": 0.3412910461425781, "step": 4510 }, { "epoch": 0.39446094705574, "grad_norm": 0.5619497745433872, "learning_rate": 9.96984046341892e-06, "loss": 0.3332595586776733, "step": 4515 }, { "epoch": 0.3948977808841517, "grad_norm": 0.5843074733474294, "learning_rate": 9.969561105630125e-06, "loss": 0.3640787601470947, "step": 4520 }, { "epoch": 0.3953346147125633, "grad_norm": 0.6360018209153037, "learning_rate": 9.969280463949637e-06, "loss": 0.3320883274078369, "step": 4525 }, { "epoch": 0.395771448540975, "grad_norm": 0.5643037126995156, "learning_rate": 9.968998538449958e-06, "loss": 0.34174883365631104, "step": 4530 }, { "epoch": 0.3962082823693867, "grad_norm": 0.5632567606884628, "learning_rate": 9.968715329203925e-06, "loss": 0.32503314018249513, "step": 4535 }, { "epoch": 0.39664511619779835, "grad_norm": 0.4754267296563731, "learning_rate": 9.968430836284702e-06, "loss": 0.33356804847717286, "step": 4540 }, { "epoch": 0.39708195002621005, "grad_norm": 0.560300107110216, "learning_rate": 9.968145059765791e-06, "loss": 0.3701903820037842, "step": 4545 }, { "epoch": 0.3975187838546217, "grad_norm": 0.650781781190046, "learning_rate": 9.967857999721023e-06, "loss": 0.31531057357788084, "step": 4550 }, { "epoch": 0.3979556176830334, "grad_norm": 0.4890013223124384, "learning_rate": 9.96756965622456e-06, "loss": 0.33075857162475586, "step": 4555 }, { "epoch": 0.39839245151144503, "grad_norm": 0.46425039126866857, "learning_rate": 9.967280029350895e-06, "loss": 0.3271761894226074, "step": 4560 }, { "epoch": 0.39882928533985673, "grad_norm": 0.5523396255128273, "learning_rate": 9.966989119174855e-06, "loss": 0.33322296142578123, "step": 4565 }, { "epoch": 0.3992661191682684, "grad_norm": 0.6638078999137089, "learning_rate": 9.966696925771595e-06, "loss": 0.34601435661315916, "step": 4570 }, { "epoch": 0.39970295299668007, "grad_norm": 0.7122904213962602, "learning_rate": 9.966403449216603e-06, "loss": 0.32684874534606934, "step": 4575 }, { "epoch": 0.4001397868250917, "grad_norm": 0.5388842926512505, "learning_rate": 9.966108689585704e-06, "loss": 0.3368037223815918, "step": 4580 }, { "epoch": 0.4005766206535034, "grad_norm": 0.5826443169620974, "learning_rate": 9.965812646955042e-06, "loss": 0.3482532024383545, "step": 4585 }, { "epoch": 0.40101345448191505, "grad_norm": 0.6029058266935425, "learning_rate": 9.965515321401106e-06, "loss": 0.35750226974487304, "step": 4590 }, { "epoch": 0.40145028831032675, "grad_norm": 0.6160500982369438, "learning_rate": 9.965216713000708e-06, "loss": 0.3358476638793945, "step": 4595 }, { "epoch": 0.40188712213873845, "grad_norm": 1.1111688488077427, "learning_rate": 9.964916821830992e-06, "loss": 0.32058300971984866, "step": 4600 }, { "epoch": 0.4023239559671501, "grad_norm": 0.4900415321329034, "learning_rate": 9.96461564796944e-06, "loss": 0.32277233600616456, "step": 4605 }, { "epoch": 0.4027607897955618, "grad_norm": 0.48648167169495166, "learning_rate": 9.964313191493856e-06, "loss": 0.33329153060913086, "step": 4610 }, { "epoch": 0.40319762362397343, "grad_norm": 0.4977167283130842, "learning_rate": 9.964009452482383e-06, "loss": 0.28298096656799315, "step": 4615 }, { "epoch": 0.40363445745238513, "grad_norm": 0.5547104035060831, "learning_rate": 9.96370443101349e-06, "loss": 0.33664746284484864, "step": 4620 }, { "epoch": 0.4040712912807968, "grad_norm": 0.4540317557053332, "learning_rate": 9.963398127165981e-06, "loss": 0.3147326946258545, "step": 4625 }, { "epoch": 0.40450812510920847, "grad_norm": 0.504044909181397, "learning_rate": 9.963090541018992e-06, "loss": 0.3360127449035645, "step": 4630 }, { "epoch": 0.4049449589376201, "grad_norm": 0.6249612748645516, "learning_rate": 9.962781672651983e-06, "loss": 0.3105597496032715, "step": 4635 }, { "epoch": 0.4053817927660318, "grad_norm": 0.5158877573232626, "learning_rate": 9.962471522144755e-06, "loss": 0.3329577922821045, "step": 4640 }, { "epoch": 0.40581862659444345, "grad_norm": 0.557110414255339, "learning_rate": 9.962160089577432e-06, "loss": 0.3542471408843994, "step": 4645 }, { "epoch": 0.40625546042285515, "grad_norm": 0.48366460638963255, "learning_rate": 9.961847375030478e-06, "loss": 0.32396554946899414, "step": 4650 }, { "epoch": 0.4066922942512668, "grad_norm": 0.6149210861993442, "learning_rate": 9.961533378584677e-06, "loss": 0.3291712522506714, "step": 4655 }, { "epoch": 0.4071291280796785, "grad_norm": 0.48059618578449226, "learning_rate": 9.961218100321157e-06, "loss": 0.3241724491119385, "step": 4660 }, { "epoch": 0.40756596190809014, "grad_norm": 0.5427119670976179, "learning_rate": 9.960901540321366e-06, "loss": 0.3336660385131836, "step": 4665 }, { "epoch": 0.40800279573650183, "grad_norm": 0.5435128278626344, "learning_rate": 9.960583698667087e-06, "loss": 0.341505765914917, "step": 4670 }, { "epoch": 0.40843962956491353, "grad_norm": 0.4705912465747617, "learning_rate": 9.960264575440439e-06, "loss": 0.3631165027618408, "step": 4675 }, { "epoch": 0.4088764633933252, "grad_norm": 0.44131701595690775, "learning_rate": 9.959944170723864e-06, "loss": 0.31310467720031737, "step": 4680 }, { "epoch": 0.40931329722173687, "grad_norm": 0.47937468783471215, "learning_rate": 9.95962248460014e-06, "loss": 0.33187673091888426, "step": 4685 }, { "epoch": 0.4097501310501485, "grad_norm": 0.5314309875769766, "learning_rate": 9.959299517152376e-06, "loss": 0.33267483711242674, "step": 4690 }, { "epoch": 0.4101869648785602, "grad_norm": 0.5055374100225548, "learning_rate": 9.95897526846401e-06, "loss": 0.33191747665405275, "step": 4695 }, { "epoch": 0.41062379870697185, "grad_norm": 0.5383019732747397, "learning_rate": 9.958649738618812e-06, "loss": 0.3712820053100586, "step": 4700 }, { "epoch": 0.41106063253538355, "grad_norm": 0.7629532965671996, "learning_rate": 9.958322927700883e-06, "loss": 0.3558624505996704, "step": 4705 }, { "epoch": 0.4114974663637952, "grad_norm": 0.5550708430425194, "learning_rate": 9.957994835794655e-06, "loss": 0.3401182174682617, "step": 4710 }, { "epoch": 0.4119343001922069, "grad_norm": 0.5312989885568105, "learning_rate": 9.957665462984892e-06, "loss": 0.3356866598129272, "step": 4715 }, { "epoch": 0.41237113402061853, "grad_norm": 0.5469552553575487, "learning_rate": 9.957334809356686e-06, "loss": 0.33791294097900393, "step": 4720 }, { "epoch": 0.41280796784903023, "grad_norm": 0.5398665287087739, "learning_rate": 9.957002874995463e-06, "loss": 0.3150274038314819, "step": 4725 }, { "epoch": 0.4132448016774419, "grad_norm": 0.5960486094062625, "learning_rate": 9.956669659986979e-06, "loss": 0.3612999439239502, "step": 4730 }, { "epoch": 0.4136816355058536, "grad_norm": 0.41352336563712255, "learning_rate": 9.956335164417319e-06, "loss": 0.3398073673248291, "step": 4735 }, { "epoch": 0.41411846933426527, "grad_norm": 0.6008062426330335, "learning_rate": 9.955999388372902e-06, "loss": 0.34183168411254883, "step": 4740 }, { "epoch": 0.4145553031626769, "grad_norm": 0.49719272286692195, "learning_rate": 9.955662331940475e-06, "loss": 0.34546241760253904, "step": 4745 }, { "epoch": 0.4149921369910886, "grad_norm": 0.531164992019649, "learning_rate": 9.955323995207115e-06, "loss": 0.3377117395401001, "step": 4750 }, { "epoch": 0.41542897081950025, "grad_norm": 0.5179866635226975, "learning_rate": 9.954984378260235e-06, "loss": 0.334679913520813, "step": 4755 }, { "epoch": 0.41586580464791195, "grad_norm": 0.546513928379957, "learning_rate": 9.954643481187576e-06, "loss": 0.35289156436920166, "step": 4760 }, { "epoch": 0.4163026384763236, "grad_norm": 0.7558039167516638, "learning_rate": 9.954301304077205e-06, "loss": 0.36086835861206057, "step": 4765 }, { "epoch": 0.4167394723047353, "grad_norm": 0.5462650204602715, "learning_rate": 9.953957847017527e-06, "loss": 0.38542938232421875, "step": 4770 }, { "epoch": 0.41717630613314693, "grad_norm": 0.5702378465995019, "learning_rate": 9.953613110097273e-06, "loss": 0.32401766777038576, "step": 4775 }, { "epoch": 0.41761313996155863, "grad_norm": 0.5607679065994176, "learning_rate": 9.953267093405508e-06, "loss": 0.34375605583190916, "step": 4780 }, { "epoch": 0.4180499737899703, "grad_norm": 0.5102467268090091, "learning_rate": 9.952919797031625e-06, "loss": 0.3431102991104126, "step": 4785 }, { "epoch": 0.418486807618382, "grad_norm": 0.5369363323909843, "learning_rate": 9.952571221065346e-06, "loss": 0.3103039741516113, "step": 4790 }, { "epoch": 0.4189236414467936, "grad_norm": 0.5874525204306895, "learning_rate": 9.95222136559673e-06, "loss": 0.3177194356918335, "step": 4795 }, { "epoch": 0.4193604752752053, "grad_norm": 0.4575758853563607, "learning_rate": 9.951870230716159e-06, "loss": 0.32567012310028076, "step": 4800 }, { "epoch": 0.419797309103617, "grad_norm": 0.5602685127256224, "learning_rate": 9.951517816514353e-06, "loss": 0.3416574001312256, "step": 4805 }, { "epoch": 0.42023414293202865, "grad_norm": 0.536931391792598, "learning_rate": 9.951164123082355e-06, "loss": 0.3420892238616943, "step": 4810 }, { "epoch": 0.42067097676044035, "grad_norm": 0.5583779403000887, "learning_rate": 9.950809150511542e-06, "loss": 0.326077938079834, "step": 4815 }, { "epoch": 0.421107810588852, "grad_norm": 0.5546936606775961, "learning_rate": 9.950452898893625e-06, "loss": 0.3207207918167114, "step": 4820 }, { "epoch": 0.4215446444172637, "grad_norm": 0.5417949638199555, "learning_rate": 9.950095368320638e-06, "loss": 0.3226727247238159, "step": 4825 }, { "epoch": 0.42198147824567533, "grad_norm": 0.5344004743645147, "learning_rate": 9.949736558884953e-06, "loss": 0.30492348670959474, "step": 4830 }, { "epoch": 0.42241831207408703, "grad_norm": 0.500744422483878, "learning_rate": 9.949376470679269e-06, "loss": 0.3122705459594727, "step": 4835 }, { "epoch": 0.4228551459024987, "grad_norm": 0.47636419078941866, "learning_rate": 9.94901510379661e-06, "loss": 0.3361629009246826, "step": 4840 }, { "epoch": 0.4232919797309104, "grad_norm": 0.6166200549234147, "learning_rate": 9.94865245833034e-06, "loss": 0.3302281379699707, "step": 4845 }, { "epoch": 0.423728813559322, "grad_norm": 0.4435462976749806, "learning_rate": 9.948288534374148e-06, "loss": 0.3468549013137817, "step": 4850 }, { "epoch": 0.4241656473877337, "grad_norm": 0.47826270850047936, "learning_rate": 9.947923332022053e-06, "loss": 0.34860763549804685, "step": 4855 }, { "epoch": 0.42460248121614536, "grad_norm": 0.646384648788541, "learning_rate": 9.947556851368408e-06, "loss": 0.34458811283111573, "step": 4860 }, { "epoch": 0.42503931504455705, "grad_norm": 0.5679321527902843, "learning_rate": 9.947189092507892e-06, "loss": 0.3324551582336426, "step": 4865 }, { "epoch": 0.4254761488729687, "grad_norm": 0.4778761226274191, "learning_rate": 9.946820055535516e-06, "loss": 0.3341715335845947, "step": 4870 }, { "epoch": 0.4259129827013804, "grad_norm": 0.5123995110703422, "learning_rate": 9.94644974054662e-06, "loss": 0.3428332328796387, "step": 4875 }, { "epoch": 0.4263498165297921, "grad_norm": 0.45934882278227357, "learning_rate": 9.946078147636878e-06, "loss": 0.30244951248168944, "step": 4880 }, { "epoch": 0.42678665035820373, "grad_norm": 0.518017418463324, "learning_rate": 9.94570527690229e-06, "loss": 0.34114856719970704, "step": 4885 }, { "epoch": 0.42722348418661543, "grad_norm": 0.569467475786937, "learning_rate": 9.945331128439185e-06, "loss": 0.30938358306884767, "step": 4890 }, { "epoch": 0.4276603180150271, "grad_norm": 0.5026713842085754, "learning_rate": 9.944955702344228e-06, "loss": 0.32950329780578613, "step": 4895 }, { "epoch": 0.4280971518434388, "grad_norm": 0.41201458078884756, "learning_rate": 9.944578998714411e-06, "loss": 0.3275744915008545, "step": 4900 }, { "epoch": 0.4285339856718504, "grad_norm": 0.4865717782666368, "learning_rate": 9.944201017647054e-06, "loss": 0.30189130306243894, "step": 4905 }, { "epoch": 0.4289708195002621, "grad_norm": 0.5289679124693751, "learning_rate": 9.943821759239809e-06, "loss": 0.3292719841003418, "step": 4910 }, { "epoch": 0.42940765332867376, "grad_norm": 0.5296276519740881, "learning_rate": 9.94344122359066e-06, "loss": 0.34888086318969724, "step": 4915 }, { "epoch": 0.42984448715708545, "grad_norm": 0.4808128145530095, "learning_rate": 9.943059410797916e-06, "loss": 0.3360834360122681, "step": 4920 }, { "epoch": 0.4302813209854971, "grad_norm": 0.5646460197856231, "learning_rate": 9.942676320960221e-06, "loss": 0.3034728765487671, "step": 4925 }, { "epoch": 0.4307181548139088, "grad_norm": 0.5135531458098921, "learning_rate": 9.942291954176544e-06, "loss": 0.3540319919586182, "step": 4930 }, { "epoch": 0.43115498864232044, "grad_norm": 0.47329237097641097, "learning_rate": 9.941906310546187e-06, "loss": 0.31737337112426756, "step": 4935 }, { "epoch": 0.43159182247073213, "grad_norm": 0.5650513115984322, "learning_rate": 9.941519390168786e-06, "loss": 0.3505756378173828, "step": 4940 }, { "epoch": 0.43202865629914383, "grad_norm": 0.48182774880195683, "learning_rate": 9.941131193144298e-06, "loss": 0.3426870822906494, "step": 4945 }, { "epoch": 0.4324654901275555, "grad_norm": 0.511963283911251, "learning_rate": 9.940741719573015e-06, "loss": 0.33955214023590086, "step": 4950 }, { "epoch": 0.43290232395596717, "grad_norm": 0.5782881203385011, "learning_rate": 9.940350969555557e-06, "loss": 0.2804577350616455, "step": 4955 }, { "epoch": 0.4333391577843788, "grad_norm": 0.6574056000937176, "learning_rate": 9.939958943192877e-06, "loss": 0.34012413024902344, "step": 4960 }, { "epoch": 0.4337759916127905, "grad_norm": 0.5642645186406269, "learning_rate": 9.939565640586252e-06, "loss": 0.34596073627471924, "step": 4965 }, { "epoch": 0.43421282544120215, "grad_norm": 0.4834929095201293, "learning_rate": 9.939171061837297e-06, "loss": 0.33093810081481934, "step": 4970 }, { "epoch": 0.43464965926961385, "grad_norm": 0.5088121476863411, "learning_rate": 9.938775207047948e-06, "loss": 0.35358595848083496, "step": 4975 }, { "epoch": 0.4350864930980255, "grad_norm": 0.5321189349930331, "learning_rate": 9.938378076320477e-06, "loss": 0.3208459854125977, "step": 4980 }, { "epoch": 0.4355233269264372, "grad_norm": 0.6047701682120487, "learning_rate": 9.93797966975748e-06, "loss": 0.34274346828460694, "step": 4985 }, { "epoch": 0.43596016075484884, "grad_norm": 0.600285506024837, "learning_rate": 9.93757998746189e-06, "loss": 0.3446365833282471, "step": 4990 }, { "epoch": 0.43639699458326053, "grad_norm": 0.5203249842705618, "learning_rate": 9.937179029536962e-06, "loss": 0.3168343544006348, "step": 4995 }, { "epoch": 0.4368338284116722, "grad_norm": 0.44672570050612603, "learning_rate": 9.936776796086284e-06, "loss": 0.3339958667755127, "step": 5000 }, { "epoch": 0.4372706622400839, "grad_norm": 0.4802339365046641, "learning_rate": 9.936373287213776e-06, "loss": 0.31447525024414064, "step": 5005 }, { "epoch": 0.43770749606849557, "grad_norm": 0.45323437244038584, "learning_rate": 9.935968503023682e-06, "loss": 0.30912299156188966, "step": 5010 }, { "epoch": 0.4381443298969072, "grad_norm": 0.6585542535578025, "learning_rate": 9.935562443620582e-06, "loss": 0.31055262088775637, "step": 5015 }, { "epoch": 0.4385811637253189, "grad_norm": 0.558906668258936, "learning_rate": 9.935155109109378e-06, "loss": 0.3425605773925781, "step": 5020 }, { "epoch": 0.43901799755373055, "grad_norm": 0.5292065900923183, "learning_rate": 9.934746499595308e-06, "loss": 0.3523892879486084, "step": 5025 }, { "epoch": 0.43945483138214225, "grad_norm": 0.5563461956037404, "learning_rate": 9.934336615183935e-06, "loss": 0.3333237409591675, "step": 5030 }, { "epoch": 0.4398916652105539, "grad_norm": 0.5131102048641656, "learning_rate": 9.933925455981154e-06, "loss": 0.3226830005645752, "step": 5035 }, { "epoch": 0.4403284990389656, "grad_norm": 0.5708554720370302, "learning_rate": 9.93351302209319e-06, "loss": 0.3208452701568604, "step": 5040 }, { "epoch": 0.44076533286737724, "grad_norm": 0.4764555835411299, "learning_rate": 9.933099313626594e-06, "loss": 0.2973538160324097, "step": 5045 }, { "epoch": 0.44120216669578893, "grad_norm": 0.5694252477796004, "learning_rate": 9.932684330688248e-06, "loss": 0.3501061201095581, "step": 5050 }, { "epoch": 0.4416390005242006, "grad_norm": 0.5742818498306427, "learning_rate": 9.932268073385363e-06, "loss": 0.3263805389404297, "step": 5055 }, { "epoch": 0.4420758343526123, "grad_norm": 0.5829991641149715, "learning_rate": 9.931850541825479e-06, "loss": 0.3353442668914795, "step": 5060 }, { "epoch": 0.4425126681810239, "grad_norm": 0.7168135732097997, "learning_rate": 9.931431736116467e-06, "loss": 0.33800640106201174, "step": 5065 }, { "epoch": 0.4429495020094356, "grad_norm": 0.5516323510743147, "learning_rate": 9.931011656366527e-06, "loss": 0.33952405452728274, "step": 5070 }, { "epoch": 0.44338633583784726, "grad_norm": 0.5361556718661785, "learning_rate": 9.930590302684187e-06, "loss": 0.3402872562408447, "step": 5075 }, { "epoch": 0.44382316966625895, "grad_norm": 0.48739382377577756, "learning_rate": 9.930167675178302e-06, "loss": 0.2892477989196777, "step": 5080 }, { "epoch": 0.44426000349467065, "grad_norm": 0.49953765790777, "learning_rate": 9.929743773958058e-06, "loss": 0.30461983680725097, "step": 5085 }, { "epoch": 0.4446968373230823, "grad_norm": 0.5174985998897849, "learning_rate": 9.929318599132973e-06, "loss": 0.3193185567855835, "step": 5090 }, { "epoch": 0.445133671151494, "grad_norm": 0.5451884270282599, "learning_rate": 9.928892150812889e-06, "loss": 0.3266033172607422, "step": 5095 }, { "epoch": 0.44557050497990563, "grad_norm": 0.9480249942489882, "learning_rate": 9.92846442910798e-06, "loss": 0.3392582654953003, "step": 5100 }, { "epoch": 0.44600733880831733, "grad_norm": 0.632383148487906, "learning_rate": 9.92803543412875e-06, "loss": 0.33571553230285645, "step": 5105 }, { "epoch": 0.446444172636729, "grad_norm": 0.5452488037214519, "learning_rate": 9.92760516598603e-06, "loss": 0.3335491895675659, "step": 5110 }, { "epoch": 0.4468810064651407, "grad_norm": 0.5299998735939679, "learning_rate": 9.927173624790978e-06, "loss": 0.3597539186477661, "step": 5115 }, { "epoch": 0.4473178402935523, "grad_norm": 0.6066269244647186, "learning_rate": 9.926740810655085e-06, "loss": 0.3463149070739746, "step": 5120 }, { "epoch": 0.447754674121964, "grad_norm": 0.5143994205010547, "learning_rate": 9.92630672369017e-06, "loss": 0.34765920639038084, "step": 5125 }, { "epoch": 0.44819150795037566, "grad_norm": 0.5851143775657036, "learning_rate": 9.925871364008375e-06, "loss": 0.35279800891876223, "step": 5130 }, { "epoch": 0.44862834177878735, "grad_norm": 0.5028711248572606, "learning_rate": 9.925434731722181e-06, "loss": 0.3424410343170166, "step": 5135 }, { "epoch": 0.449065175607199, "grad_norm": 0.5294284386173258, "learning_rate": 9.92499682694439e-06, "loss": 0.29635028839111327, "step": 5140 }, { "epoch": 0.4495020094356107, "grad_norm": 0.7842610769255716, "learning_rate": 9.924557649788138e-06, "loss": 0.3331594467163086, "step": 5145 }, { "epoch": 0.4499388432640224, "grad_norm": 0.5091756183954348, "learning_rate": 9.924117200366882e-06, "loss": 0.33837261199951174, "step": 5150 }, { "epoch": 0.45037567709243403, "grad_norm": 0.5912038313291221, "learning_rate": 9.923675478794418e-06, "loss": 0.34342122077941895, "step": 5155 }, { "epoch": 0.45081251092084573, "grad_norm": 0.4984038779385137, "learning_rate": 9.92323248518486e-06, "loss": 0.3055710315704346, "step": 5160 }, { "epoch": 0.4512493447492574, "grad_norm": 0.4438827025486079, "learning_rate": 9.92278821965266e-06, "loss": 0.2986997127532959, "step": 5165 }, { "epoch": 0.4516861785776691, "grad_norm": 0.4191707445812578, "learning_rate": 9.922342682312593e-06, "loss": 0.3569056749343872, "step": 5170 }, { "epoch": 0.4521230124060807, "grad_norm": 0.4725391420380101, "learning_rate": 9.921895873279763e-06, "loss": 0.31669819355010986, "step": 5175 }, { "epoch": 0.4525598462344924, "grad_norm": 0.5088782683478656, "learning_rate": 9.921447792669605e-06, "loss": 0.3176229000091553, "step": 5180 }, { "epoch": 0.45299668006290406, "grad_norm": 0.7684327752894894, "learning_rate": 9.92099844059788e-06, "loss": 0.36287355422973633, "step": 5185 }, { "epoch": 0.45343351389131575, "grad_norm": 0.547974635328606, "learning_rate": 9.920547817180682e-06, "loss": 0.32699971199035643, "step": 5190 }, { "epoch": 0.4538703477197274, "grad_norm": 0.5388295707679696, "learning_rate": 9.920095922534425e-06, "loss": 0.31832902431488036, "step": 5195 }, { "epoch": 0.4543071815481391, "grad_norm": 0.4816296882392909, "learning_rate": 9.91964275677586e-06, "loss": 0.32994661331176756, "step": 5200 }, { "epoch": 0.45474401537655074, "grad_norm": 0.5423977212640091, "learning_rate": 9.91918832002206e-06, "loss": 0.3255728006362915, "step": 5205 }, { "epoch": 0.45518084920496243, "grad_norm": 0.42430165595502567, "learning_rate": 9.918732612390433e-06, "loss": 0.31226935386657717, "step": 5210 }, { "epoch": 0.4556176830333741, "grad_norm": 0.5324642809114652, "learning_rate": 9.918275633998709e-06, "loss": 0.3210742950439453, "step": 5215 }, { "epoch": 0.4560545168617858, "grad_norm": 0.4981682344350558, "learning_rate": 9.91781738496495e-06, "loss": 0.37302706241607664, "step": 5220 }, { "epoch": 0.4564913506901975, "grad_norm": 0.587518488334888, "learning_rate": 9.917357865407543e-06, "loss": 0.3032832622528076, "step": 5225 }, { "epoch": 0.4569281845186091, "grad_norm": 0.5465606781216025, "learning_rate": 9.91689707544521e-06, "loss": 0.3330334424972534, "step": 5230 }, { "epoch": 0.4573650183470208, "grad_norm": 0.45272133789266444, "learning_rate": 9.91643501519699e-06, "loss": 0.31917407512664797, "step": 5235 }, { "epoch": 0.45780185217543246, "grad_norm": 0.5443278708898184, "learning_rate": 9.915971684782262e-06, "loss": 0.3298666954040527, "step": 5240 }, { "epoch": 0.45823868600384415, "grad_norm": 0.5376326027932062, "learning_rate": 9.915507084320727e-06, "loss": 0.32689456939697265, "step": 5245 }, { "epoch": 0.4586755198322558, "grad_norm": 0.46824247902090704, "learning_rate": 9.915041213932414e-06, "loss": 0.321742582321167, "step": 5250 }, { "epoch": 0.4591123536606675, "grad_norm": 0.46154329777416536, "learning_rate": 9.914574073737681e-06, "loss": 0.3252159595489502, "step": 5255 }, { "epoch": 0.45954918748907914, "grad_norm": 0.6689987979968725, "learning_rate": 9.914105663857216e-06, "loss": 0.3151599884033203, "step": 5260 }, { "epoch": 0.45998602131749083, "grad_norm": 0.5702228772146533, "learning_rate": 9.913635984412032e-06, "loss": 0.3402653455734253, "step": 5265 }, { "epoch": 0.4604228551459025, "grad_norm": 0.61535481111629, "learning_rate": 9.91316503552347e-06, "loss": 0.3210702419281006, "step": 5270 }, { "epoch": 0.4608596889743142, "grad_norm": 0.48061811761605994, "learning_rate": 9.912692817313203e-06, "loss": 0.3265460252761841, "step": 5275 }, { "epoch": 0.4612965228027258, "grad_norm": 0.5304270838240356, "learning_rate": 9.912219329903226e-06, "loss": 0.318938684463501, "step": 5280 }, { "epoch": 0.4617333566311375, "grad_norm": 0.6160338568028466, "learning_rate": 9.911744573415866e-06, "loss": 0.31750950813293455, "step": 5285 }, { "epoch": 0.4621701904595492, "grad_norm": 0.5443465086789571, "learning_rate": 9.911268547973778e-06, "loss": 0.32354211807250977, "step": 5290 }, { "epoch": 0.46260702428796086, "grad_norm": 0.5039144438920573, "learning_rate": 9.910791253699943e-06, "loss": 0.3124234199523926, "step": 5295 }, { "epoch": 0.46304385811637255, "grad_norm": 0.6082105823718892, "learning_rate": 9.910312690717672e-06, "loss": 0.35210237503051756, "step": 5300 }, { "epoch": 0.4634806919447842, "grad_norm": 0.5041455048497532, "learning_rate": 9.9098328591506e-06, "loss": 0.33781673908233645, "step": 5305 }, { "epoch": 0.4639175257731959, "grad_norm": 0.49068764552700694, "learning_rate": 9.909351759122692e-06, "loss": 0.32319297790527346, "step": 5310 }, { "epoch": 0.46435435960160754, "grad_norm": 0.6874007501308677, "learning_rate": 9.908869390758243e-06, "loss": 0.35238304138183596, "step": 5315 }, { "epoch": 0.46479119343001923, "grad_norm": 0.4650961197479209, "learning_rate": 9.90838575418187e-06, "loss": 0.317523193359375, "step": 5320 }, { "epoch": 0.4652280272584309, "grad_norm": 0.4829160968688746, "learning_rate": 9.907900849518524e-06, "loss": 0.3210891246795654, "step": 5325 }, { "epoch": 0.4656648610868426, "grad_norm": 0.47578717418453903, "learning_rate": 9.90741467689348e-06, "loss": 0.3355602741241455, "step": 5330 }, { "epoch": 0.4661016949152542, "grad_norm": 0.5106459708968105, "learning_rate": 9.906927236432341e-06, "loss": 0.3043470859527588, "step": 5335 }, { "epoch": 0.4665385287436659, "grad_norm": 0.5447222825410604, "learning_rate": 9.906438528261038e-06, "loss": 0.3416615962982178, "step": 5340 }, { "epoch": 0.46697536257207756, "grad_norm": 0.45710719632615254, "learning_rate": 9.905948552505828e-06, "loss": 0.3227440357208252, "step": 5345 }, { "epoch": 0.46741219640048925, "grad_norm": 0.5314798163237865, "learning_rate": 9.905457309293299e-06, "loss": 0.3420973777770996, "step": 5350 }, { "epoch": 0.46784903022890095, "grad_norm": 0.46058640980973103, "learning_rate": 9.904964798750362e-06, "loss": 0.34356396198272704, "step": 5355 }, { "epoch": 0.4682858640573126, "grad_norm": 0.4499142970910697, "learning_rate": 9.904471021004261e-06, "loss": 0.3016827583312988, "step": 5360 }, { "epoch": 0.4687226978857243, "grad_norm": 0.441537168014208, "learning_rate": 9.90397597618256e-06, "loss": 0.3146039009094238, "step": 5365 }, { "epoch": 0.46915953171413594, "grad_norm": 0.4508600798644635, "learning_rate": 9.903479664413156e-06, "loss": 0.3412508249282837, "step": 5370 }, { "epoch": 0.46959636554254763, "grad_norm": 0.5115309548928317, "learning_rate": 9.902982085824272e-06, "loss": 0.32626967430114745, "step": 5375 }, { "epoch": 0.4700331993709593, "grad_norm": 0.4987729978866421, "learning_rate": 9.902483240544457e-06, "loss": 0.33916919231414794, "step": 5380 }, { "epoch": 0.470470033199371, "grad_norm": 0.48661815004196457, "learning_rate": 9.901983128702592e-06, "loss": 0.3127013683319092, "step": 5385 }, { "epoch": 0.4709068670277826, "grad_norm": 0.5957574725436445, "learning_rate": 9.901481750427877e-06, "loss": 0.3277576446533203, "step": 5390 }, { "epoch": 0.4713437008561943, "grad_norm": 0.5484544892885215, "learning_rate": 9.900979105849845e-06, "loss": 0.3238211631774902, "step": 5395 }, { "epoch": 0.47178053468460596, "grad_norm": 0.6201529890151558, "learning_rate": 9.900475195098355e-06, "loss": 0.325926399230957, "step": 5400 }, { "epoch": 0.47221736851301765, "grad_norm": 0.5315527592315215, "learning_rate": 9.899970018303592e-06, "loss": 0.34867048263549805, "step": 5405 }, { "epoch": 0.4726542023414293, "grad_norm": 0.522212369769082, "learning_rate": 9.899463575596072e-06, "loss": 0.341522741317749, "step": 5410 }, { "epoch": 0.473091036169841, "grad_norm": 0.607607400944683, "learning_rate": 9.898955867106632e-06, "loss": 0.32478766441345214, "step": 5415 }, { "epoch": 0.47352786999825264, "grad_norm": 0.5339617468967972, "learning_rate": 9.898446892966439e-06, "loss": 0.3317256927490234, "step": 5420 }, { "epoch": 0.47396470382666434, "grad_norm": 0.44064938557423483, "learning_rate": 9.897936653306989e-06, "loss": 0.3475985050201416, "step": 5425 }, { "epoch": 0.47440153765507603, "grad_norm": 0.5154137179856991, "learning_rate": 9.897425148260103e-06, "loss": 0.34240806102752686, "step": 5430 }, { "epoch": 0.4748383714834877, "grad_norm": 0.47476332786996256, "learning_rate": 9.896912377957925e-06, "loss": 0.3130682945251465, "step": 5435 }, { "epoch": 0.4752752053118994, "grad_norm": 0.5163678738790325, "learning_rate": 9.896398342532934e-06, "loss": 0.32221484184265137, "step": 5440 }, { "epoch": 0.475712039140311, "grad_norm": 0.5967418025263158, "learning_rate": 9.89588304211793e-06, "loss": 0.32371225357055666, "step": 5445 }, { "epoch": 0.4761488729687227, "grad_norm": 0.48014835646824205, "learning_rate": 9.895366476846045e-06, "loss": 0.34739041328430176, "step": 5450 }, { "epoch": 0.47658570679713436, "grad_norm": 0.5344271320410389, "learning_rate": 9.894848646850727e-06, "loss": 0.36365323066711425, "step": 5455 }, { "epoch": 0.47702254062554605, "grad_norm": 0.4661256649838104, "learning_rate": 9.894329552265765e-06, "loss": 0.3198976993560791, "step": 5460 }, { "epoch": 0.4774593744539577, "grad_norm": 0.5264946228031931, "learning_rate": 9.893809193225262e-06, "loss": 0.3271039962768555, "step": 5465 }, { "epoch": 0.4778962082823694, "grad_norm": 0.43535878448211773, "learning_rate": 9.893287569863657e-06, "loss": 0.33368768692016604, "step": 5470 }, { "epoch": 0.47833304211078104, "grad_norm": 0.6446619853942471, "learning_rate": 9.892764682315711e-06, "loss": 0.33176627159118655, "step": 5475 }, { "epoch": 0.47876987593919274, "grad_norm": 0.40655924111930936, "learning_rate": 9.892240530716512e-06, "loss": 0.3060999155044556, "step": 5480 }, { "epoch": 0.4792067097676044, "grad_norm": 0.5183205055045831, "learning_rate": 9.891715115201476e-06, "loss": 0.3510743618011475, "step": 5485 }, { "epoch": 0.4796435435960161, "grad_norm": 0.5525207403038014, "learning_rate": 9.891188435906345e-06, "loss": 0.3257258892059326, "step": 5490 }, { "epoch": 0.4800803774244278, "grad_norm": 0.5443851590265213, "learning_rate": 9.890660492967187e-06, "loss": 0.3035292625427246, "step": 5495 }, { "epoch": 0.4805172112528394, "grad_norm": 0.6489974873864307, "learning_rate": 9.890131286520394e-06, "loss": 0.34661900997161865, "step": 5500 }, { "epoch": 0.4809540450812511, "grad_norm": 0.5457829165040509, "learning_rate": 9.88960081670269e-06, "loss": 0.3167874813079834, "step": 5505 }, { "epoch": 0.48139087890966276, "grad_norm": 0.46767562210743135, "learning_rate": 9.889069083651124e-06, "loss": 0.3272775888442993, "step": 5510 }, { "epoch": 0.48182771273807445, "grad_norm": 0.48879935418679643, "learning_rate": 9.888536087503066e-06, "loss": 0.3458139896392822, "step": 5515 }, { "epoch": 0.4822645465664861, "grad_norm": 0.5674585581074058, "learning_rate": 9.888001828396217e-06, "loss": 0.32593817710876466, "step": 5520 }, { "epoch": 0.4827013803948978, "grad_norm": 0.4768612051523965, "learning_rate": 9.887466306468606e-06, "loss": 0.2958309888839722, "step": 5525 }, { "epoch": 0.48313821422330944, "grad_norm": 0.46157088715953903, "learning_rate": 9.886929521858582e-06, "loss": 0.30621066093444826, "step": 5530 }, { "epoch": 0.48357504805172113, "grad_norm": 0.5133911879376328, "learning_rate": 9.886391474704827e-06, "loss": 0.35221998691558837, "step": 5535 }, { "epoch": 0.4840118818801328, "grad_norm": 0.4919219928863231, "learning_rate": 9.885852165146346e-06, "loss": 0.31473355293273925, "step": 5540 }, { "epoch": 0.4844487157085445, "grad_norm": 0.43422003823672795, "learning_rate": 9.885311593322466e-06, "loss": 0.3037498950958252, "step": 5545 }, { "epoch": 0.4848855495369561, "grad_norm": 0.5132920819168033, "learning_rate": 9.88476975937285e-06, "loss": 0.3657618761062622, "step": 5550 }, { "epoch": 0.4853223833653678, "grad_norm": 0.5617782989719976, "learning_rate": 9.884226663437477e-06, "loss": 0.3153830528259277, "step": 5555 }, { "epoch": 0.4857592171937795, "grad_norm": 0.5568539376945908, "learning_rate": 9.88368230565666e-06, "loss": 0.3126667499542236, "step": 5560 }, { "epoch": 0.48619605102219116, "grad_norm": 0.5095155212130992, "learning_rate": 9.883136686171032e-06, "loss": 0.31332173347473147, "step": 5565 }, { "epoch": 0.48663288485060285, "grad_norm": 0.5668767555853361, "learning_rate": 9.882589805121555e-06, "loss": 0.36366822719573977, "step": 5570 }, { "epoch": 0.4870697186790145, "grad_norm": 0.5082941477707292, "learning_rate": 9.882041662649518e-06, "loss": 0.3744348526000977, "step": 5575 }, { "epoch": 0.4875065525074262, "grad_norm": 0.4973350649085914, "learning_rate": 9.88149225889653e-06, "loss": 0.32933406829833983, "step": 5580 }, { "epoch": 0.48794338633583784, "grad_norm": 0.4414364104333076, "learning_rate": 9.880941594004534e-06, "loss": 0.3000446319580078, "step": 5585 }, { "epoch": 0.48838022016424953, "grad_norm": 0.5335610750685739, "learning_rate": 9.880389668115795e-06, "loss": 0.33817205429077146, "step": 5590 }, { "epoch": 0.4888170539926612, "grad_norm": 0.6112670250857765, "learning_rate": 9.8798364813729e-06, "loss": 0.3301222801208496, "step": 5595 }, { "epoch": 0.4892538878210729, "grad_norm": 0.5747139024095628, "learning_rate": 9.879282033918768e-06, "loss": 0.3180300235748291, "step": 5600 }, { "epoch": 0.4896907216494845, "grad_norm": 0.6187978379192777, "learning_rate": 9.878726325896643e-06, "loss": 0.35770263671875, "step": 5605 }, { "epoch": 0.4901275554778962, "grad_norm": 0.6354337468996507, "learning_rate": 9.878169357450089e-06, "loss": 0.31451263427734377, "step": 5610 }, { "epoch": 0.49056438930630786, "grad_norm": 0.5195501491822351, "learning_rate": 9.877611128723003e-06, "loss": 0.30617642402648926, "step": 5615 }, { "epoch": 0.49100122313471956, "grad_norm": 0.7642180006225092, "learning_rate": 9.8770516398596e-06, "loss": 0.3381989002227783, "step": 5620 }, { "epoch": 0.4914380569631312, "grad_norm": 0.6009758759899715, "learning_rate": 9.876490891004427e-06, "loss": 0.3068371772766113, "step": 5625 }, { "epoch": 0.4918748907915429, "grad_norm": 0.508458615645658, "learning_rate": 9.875928882302355e-06, "loss": 0.3267204284667969, "step": 5630 }, { "epoch": 0.4923117246199546, "grad_norm": 0.5117099551303362, "learning_rate": 9.875365613898578e-06, "loss": 0.30417208671569823, "step": 5635 }, { "epoch": 0.49274855844836624, "grad_norm": 0.5403372828239124, "learning_rate": 9.874801085938616e-06, "loss": 0.3332077980041504, "step": 5640 }, { "epoch": 0.49318539227677793, "grad_norm": 0.572042881839274, "learning_rate": 9.87423529856832e-06, "loss": 0.29231762886047363, "step": 5645 }, { "epoch": 0.4936222261051896, "grad_norm": 0.6312964270220759, "learning_rate": 9.873668251933856e-06, "loss": 0.3443809986114502, "step": 5650 }, { "epoch": 0.4940590599336013, "grad_norm": 0.502528183072636, "learning_rate": 9.873099946181724e-06, "loss": 0.3258388042449951, "step": 5655 }, { "epoch": 0.4944958937620129, "grad_norm": 0.5049157042407626, "learning_rate": 9.872530381458746e-06, "loss": 0.3135204553604126, "step": 5660 }, { "epoch": 0.4949327275904246, "grad_norm": 0.6797516022913443, "learning_rate": 9.87195955791207e-06, "loss": 0.3215497016906738, "step": 5665 }, { "epoch": 0.49536956141883626, "grad_norm": 0.4813219110091631, "learning_rate": 9.871387475689171e-06, "loss": 0.3269141674041748, "step": 5670 }, { "epoch": 0.49580639524724796, "grad_norm": 0.4686485410042782, "learning_rate": 9.870814134937843e-06, "loss": 0.3300326824188232, "step": 5675 }, { "epoch": 0.4962432290756596, "grad_norm": 0.4712180919302581, "learning_rate": 9.870239535806211e-06, "loss": 0.3275312423706055, "step": 5680 }, { "epoch": 0.4966800629040713, "grad_norm": 0.6553236713853297, "learning_rate": 9.869663678442723e-06, "loss": 0.3369849681854248, "step": 5685 }, { "epoch": 0.49711689673248294, "grad_norm": 0.5243693203828563, "learning_rate": 9.869086562996154e-06, "loss": 0.32958030700683594, "step": 5690 }, { "epoch": 0.49755373056089464, "grad_norm": 0.6877072105514404, "learning_rate": 9.8685081896156e-06, "loss": 0.33879454135894777, "step": 5695 }, { "epoch": 0.49799056438930633, "grad_norm": 0.49038903685535035, "learning_rate": 9.867928558450488e-06, "loss": 0.3287527322769165, "step": 5700 }, { "epoch": 0.498427398217718, "grad_norm": 0.45373335110171265, "learning_rate": 9.867347669650562e-06, "loss": 0.3115555286407471, "step": 5705 }, { "epoch": 0.4988642320461297, "grad_norm": 0.5470003763464621, "learning_rate": 9.866765523365898e-06, "loss": 0.3227699279785156, "step": 5710 }, { "epoch": 0.4993010658745413, "grad_norm": 0.49850617559003163, "learning_rate": 9.866182119746892e-06, "loss": 0.3367525577545166, "step": 5715 }, { "epoch": 0.499737899702953, "grad_norm": 0.529563826292789, "learning_rate": 9.86559745894427e-06, "loss": 0.35074028968811033, "step": 5720 }, { "epoch": 0.5001747335313647, "grad_norm": 0.4703636292679197, "learning_rate": 9.865011541109078e-06, "loss": 0.3199999094009399, "step": 5725 }, { "epoch": 0.5006115673597763, "grad_norm": 0.5199727023519025, "learning_rate": 9.86442436639269e-06, "loss": 0.3641315698623657, "step": 5730 }, { "epoch": 0.501048401188188, "grad_norm": 0.4815688868069579, "learning_rate": 9.8638359349468e-06, "loss": 0.30329203605651855, "step": 5735 }, { "epoch": 0.5014852350165997, "grad_norm": 0.5138070984405967, "learning_rate": 9.863246246923433e-06, "loss": 0.34125123023986814, "step": 5740 }, { "epoch": 0.5019220688450113, "grad_norm": 0.5346428394057816, "learning_rate": 9.862655302474933e-06, "loss": 0.32217481136322024, "step": 5745 }, { "epoch": 0.502358902673423, "grad_norm": 0.5045557864648987, "learning_rate": 9.862063101753974e-06, "loss": 0.3241204977035522, "step": 5750 }, { "epoch": 0.5027957365018347, "grad_norm": 0.4817781733611689, "learning_rate": 9.861469644913551e-06, "loss": 0.315904426574707, "step": 5755 }, { "epoch": 0.5032325703302464, "grad_norm": 0.4882635645191746, "learning_rate": 9.860874932106981e-06, "loss": 0.3378006935119629, "step": 5760 }, { "epoch": 0.503669404158658, "grad_norm": 0.5840722401989953, "learning_rate": 9.860278963487913e-06, "loss": 0.34776475429534914, "step": 5765 }, { "epoch": 0.5041062379870698, "grad_norm": 0.46745900619280517, "learning_rate": 9.859681739210316e-06, "loss": 0.32327837944030763, "step": 5770 }, { "epoch": 0.5045430718154814, "grad_norm": 0.55870525404029, "learning_rate": 9.85908325942848e-06, "loss": 0.33731167316436766, "step": 5775 }, { "epoch": 0.5049799056438931, "grad_norm": 0.5415740414485304, "learning_rate": 9.858483524297025e-06, "loss": 0.3009993076324463, "step": 5780 }, { "epoch": 0.5054167394723047, "grad_norm": 0.488676813243234, "learning_rate": 9.857882533970895e-06, "loss": 0.30797429084777833, "step": 5785 }, { "epoch": 0.5058535733007165, "grad_norm": 0.4956320793920231, "learning_rate": 9.857280288605353e-06, "loss": 0.3044158697128296, "step": 5790 }, { "epoch": 0.5062904071291281, "grad_norm": 0.5343590324770674, "learning_rate": 9.856676788355993e-06, "loss": 0.32959957122802735, "step": 5795 }, { "epoch": 0.5067272409575397, "grad_norm": 0.5340736097222598, "learning_rate": 9.856072033378726e-06, "loss": 0.33607561588287355, "step": 5800 }, { "epoch": 0.5071640747859514, "grad_norm": 0.6244796123508815, "learning_rate": 9.855466023829796e-06, "loss": 0.3197298526763916, "step": 5805 }, { "epoch": 0.5076009086143631, "grad_norm": 0.4778835152786922, "learning_rate": 9.854858759865762e-06, "loss": 0.3236112356185913, "step": 5810 }, { "epoch": 0.5080377424427748, "grad_norm": 0.5579097059548191, "learning_rate": 9.854250241643516e-06, "loss": 0.28863677978515623, "step": 5815 }, { "epoch": 0.5084745762711864, "grad_norm": 0.5449046551995483, "learning_rate": 9.853640469320262e-06, "loss": 0.31138954162597654, "step": 5820 }, { "epoch": 0.5089114100995981, "grad_norm": 0.47640041316213727, "learning_rate": 9.853029443053544e-06, "loss": 0.2992840766906738, "step": 5825 }, { "epoch": 0.5093482439280098, "grad_norm": 0.5282450800119268, "learning_rate": 9.852417163001215e-06, "loss": 0.28574934005737307, "step": 5830 }, { "epoch": 0.5097850777564215, "grad_norm": 0.6161913136765631, "learning_rate": 9.85180362932146e-06, "loss": 0.3114015102386475, "step": 5835 }, { "epoch": 0.5102219115848331, "grad_norm": 0.5267857310212061, "learning_rate": 9.85118884217279e-06, "loss": 0.33678698539733887, "step": 5840 }, { "epoch": 0.5106587454132449, "grad_norm": 0.5060834323388204, "learning_rate": 9.85057280171403e-06, "loss": 0.25972809791564944, "step": 5845 }, { "epoch": 0.5110955792416565, "grad_norm": 0.5739018740096166, "learning_rate": 9.849955508104339e-06, "loss": 0.32025918960571287, "step": 5850 }, { "epoch": 0.5115324130700681, "grad_norm": 0.5101716043531942, "learning_rate": 9.849336961503191e-06, "loss": 0.3279205322265625, "step": 5855 }, { "epoch": 0.5119692468984798, "grad_norm": 0.4688255238745501, "learning_rate": 9.848717162070394e-06, "loss": 0.31790053844451904, "step": 5860 }, { "epoch": 0.5124060807268915, "grad_norm": 0.6183309617603872, "learning_rate": 9.848096109966068e-06, "loss": 0.33344852924346924, "step": 5865 }, { "epoch": 0.5128429145553032, "grad_norm": 0.6151359809478509, "learning_rate": 9.847473805350667e-06, "loss": 0.33341307640075685, "step": 5870 }, { "epoch": 0.5132797483837148, "grad_norm": 0.4830760313309701, "learning_rate": 9.846850248384963e-06, "loss": 0.32609970569610597, "step": 5875 }, { "epoch": 0.5137165822121265, "grad_norm": 0.809458390087954, "learning_rate": 9.84622543923005e-06, "loss": 0.37216849327087403, "step": 5880 }, { "epoch": 0.5141534160405382, "grad_norm": 0.5188541843934723, "learning_rate": 9.845599378047352e-06, "loss": 0.3411808252334595, "step": 5885 }, { "epoch": 0.5145902498689499, "grad_norm": 0.5290152655972973, "learning_rate": 9.844972064998611e-06, "loss": 0.32850592136383056, "step": 5890 }, { "epoch": 0.5150270836973615, "grad_norm": 0.4799122728779307, "learning_rate": 9.844343500245894e-06, "loss": 0.3197658538818359, "step": 5895 }, { "epoch": 0.5154639175257731, "grad_norm": 0.60096701816678, "learning_rate": 9.84371368395159e-06, "loss": 0.3495452880859375, "step": 5900 }, { "epoch": 0.5159007513541849, "grad_norm": 0.501892308342563, "learning_rate": 9.843082616278416e-06, "loss": 0.3592367649078369, "step": 5905 }, { "epoch": 0.5163375851825965, "grad_norm": 0.49150476956222877, "learning_rate": 9.842450297389405e-06, "loss": 0.3204598903656006, "step": 5910 }, { "epoch": 0.5167744190110082, "grad_norm": 0.5199851939506315, "learning_rate": 9.841816727447921e-06, "loss": 0.3123659610748291, "step": 5915 }, { "epoch": 0.5172112528394199, "grad_norm": 0.49670036534429507, "learning_rate": 9.841181906617646e-06, "loss": 0.3229984283447266, "step": 5920 }, { "epoch": 0.5176480866678316, "grad_norm": 0.5180133898772802, "learning_rate": 9.840545835062584e-06, "loss": 0.310563325881958, "step": 5925 }, { "epoch": 0.5180849204962432, "grad_norm": 0.5499742982788771, "learning_rate": 9.83990851294707e-06, "loss": 0.30941162109375, "step": 5930 }, { "epoch": 0.5185217543246549, "grad_norm": 0.42388307761680466, "learning_rate": 9.839269940435754e-06, "loss": 0.3375090599060059, "step": 5935 }, { "epoch": 0.5189585881530666, "grad_norm": 0.4611118949501949, "learning_rate": 9.838630117693612e-06, "loss": 0.2954240322113037, "step": 5940 }, { "epoch": 0.5193954219814783, "grad_norm": 0.5130400788932415, "learning_rate": 9.837989044885942e-06, "loss": 0.33081927299499514, "step": 5945 }, { "epoch": 0.5198322558098899, "grad_norm": 0.5326742192139431, "learning_rate": 9.837346722178369e-06, "loss": 0.32750937938690183, "step": 5950 }, { "epoch": 0.5202690896383015, "grad_norm": 0.48681559031836424, "learning_rate": 9.836703149736834e-06, "loss": 0.31734437942504884, "step": 5955 }, { "epoch": 0.5207059234667133, "grad_norm": 0.5144794962298275, "learning_rate": 9.836058327727607e-06, "loss": 0.3363353252410889, "step": 5960 }, { "epoch": 0.5211427572951249, "grad_norm": 0.5423446899687253, "learning_rate": 9.835412256317278e-06, "loss": 0.3270412445068359, "step": 5965 }, { "epoch": 0.5215795911235366, "grad_norm": 0.46679558159857265, "learning_rate": 9.83476493567276e-06, "loss": 0.31939024925231935, "step": 5970 }, { "epoch": 0.5220164249519483, "grad_norm": 0.6104304016438789, "learning_rate": 9.83411636596129e-06, "loss": 0.30699617862701417, "step": 5975 }, { "epoch": 0.52245325878036, "grad_norm": 0.6992596751698433, "learning_rate": 9.833466547350426e-06, "loss": 0.33792614936828613, "step": 5980 }, { "epoch": 0.5228900926087716, "grad_norm": 0.6128706504530955, "learning_rate": 9.832815480008048e-06, "loss": 0.3215433359146118, "step": 5985 }, { "epoch": 0.5233269264371833, "grad_norm": 0.4668823943557716, "learning_rate": 9.832163164102362e-06, "loss": 0.28201205730438234, "step": 5990 }, { "epoch": 0.523763760265595, "grad_norm": 0.5964113356131845, "learning_rate": 9.831509599801894e-06, "loss": 0.29841573238372804, "step": 5995 }, { "epoch": 0.5242005940940067, "grad_norm": 0.5468134389952048, "learning_rate": 9.830854787275493e-06, "loss": 0.3065089464187622, "step": 6000 }, { "epoch": 0.5246374279224183, "grad_norm": 0.4852722129499418, "learning_rate": 9.830198726692331e-06, "loss": 0.34604525566101074, "step": 6005 }, { "epoch": 0.5250742617508299, "grad_norm": 0.4891456332726376, "learning_rate": 9.8295414182219e-06, "loss": 0.3111282825469971, "step": 6010 }, { "epoch": 0.5255110955792417, "grad_norm": 0.43979440637177475, "learning_rate": 9.82888286203402e-06, "loss": 0.31007728576660154, "step": 6015 }, { "epoch": 0.5259479294076533, "grad_norm": 0.45490984825855046, "learning_rate": 9.828223058298825e-06, "loss": 0.3356476306915283, "step": 6020 }, { "epoch": 0.526384763236065, "grad_norm": 0.4398792235875625, "learning_rate": 9.82756200718678e-06, "loss": 0.3205873489379883, "step": 6025 }, { "epoch": 0.5268215970644766, "grad_norm": 0.45846744603805556, "learning_rate": 9.826899708868666e-06, "loss": 0.32191243171691897, "step": 6030 }, { "epoch": 0.5272584308928884, "grad_norm": 0.6190510120563656, "learning_rate": 9.82623616351559e-06, "loss": 0.3534689903259277, "step": 6035 }, { "epoch": 0.5276952647213, "grad_norm": 0.4939255688082058, "learning_rate": 9.82557137129898e-06, "loss": 0.3323659420013428, "step": 6040 }, { "epoch": 0.5281320985497117, "grad_norm": 0.5903542368989865, "learning_rate": 9.824905332390584e-06, "loss": 0.3217034339904785, "step": 6045 }, { "epoch": 0.5285689323781234, "grad_norm": 0.5367738354970761, "learning_rate": 9.824238046962475e-06, "loss": 0.3236216068267822, "step": 6050 }, { "epoch": 0.529005766206535, "grad_norm": 0.5588922910564244, "learning_rate": 9.823569515187047e-06, "loss": 0.30690879821777345, "step": 6055 }, { "epoch": 0.5294426000349467, "grad_norm": 0.5232296002596644, "learning_rate": 9.822899737237016e-06, "loss": 0.29073290824890136, "step": 6060 }, { "epoch": 0.5298794338633583, "grad_norm": 0.47251598375147863, "learning_rate": 9.822228713285417e-06, "loss": 0.33078651428222655, "step": 6065 }, { "epoch": 0.5303162676917701, "grad_norm": 0.6577684743675342, "learning_rate": 9.821556443505615e-06, "loss": 0.31955173015594485, "step": 6070 }, { "epoch": 0.5307531015201817, "grad_norm": 0.5263181738866646, "learning_rate": 9.82088292807129e-06, "loss": 0.3468482494354248, "step": 6075 }, { "epoch": 0.5311899353485934, "grad_norm": 0.5839832763450767, "learning_rate": 9.820208167156441e-06, "loss": 0.32794525623321535, "step": 6080 }, { "epoch": 0.531626769177005, "grad_norm": 0.5469220416629879, "learning_rate": 9.8195321609354e-06, "loss": 0.29346046447753904, "step": 6085 }, { "epoch": 0.5320636030054168, "grad_norm": 0.5350686216691241, "learning_rate": 9.81885490958281e-06, "loss": 0.3137685298919678, "step": 6090 }, { "epoch": 0.5325004368338284, "grad_norm": 0.5554314857296461, "learning_rate": 9.81817641327364e-06, "loss": 0.3166851282119751, "step": 6095 }, { "epoch": 0.5329372706622401, "grad_norm": 0.525535427550923, "learning_rate": 9.81749667218318e-06, "loss": 0.31662912368774415, "step": 6100 }, { "epoch": 0.5333741044906517, "grad_norm": 0.4719496246664382, "learning_rate": 9.816815686487042e-06, "loss": 0.29541969299316406, "step": 6105 }, { "epoch": 0.5338109383190635, "grad_norm": 0.4584709494045525, "learning_rate": 9.816133456361161e-06, "loss": 0.30390331745147703, "step": 6110 }, { "epoch": 0.5342477721474751, "grad_norm": 0.41551649614716646, "learning_rate": 9.81544998198179e-06, "loss": 0.32845301628112794, "step": 6115 }, { "epoch": 0.5346846059758867, "grad_norm": 0.4800671132948581, "learning_rate": 9.814765263525508e-06, "loss": 0.29924798011779785, "step": 6120 }, { "epoch": 0.5351214398042985, "grad_norm": 0.5099176106224638, "learning_rate": 9.814079301169211e-06, "loss": 0.3187701225280762, "step": 6125 }, { "epoch": 0.5355582736327101, "grad_norm": 0.4901952491497636, "learning_rate": 9.813392095090116e-06, "loss": 0.32009937763214114, "step": 6130 }, { "epoch": 0.5359951074611218, "grad_norm": 0.46010078087547385, "learning_rate": 9.812703645465767e-06, "loss": 0.3063355445861816, "step": 6135 }, { "epoch": 0.5364319412895334, "grad_norm": 0.5339024155679849, "learning_rate": 9.812013952474023e-06, "loss": 0.29492034912109377, "step": 6140 }, { "epoch": 0.5368687751179452, "grad_norm": 0.5301224694014138, "learning_rate": 9.81132301629307e-06, "loss": 0.3193953990936279, "step": 6145 }, { "epoch": 0.5373056089463568, "grad_norm": 0.48285024419906336, "learning_rate": 9.810630837101409e-06, "loss": 0.30362935066223146, "step": 6150 }, { "epoch": 0.5377424427747685, "grad_norm": 0.5641193887672764, "learning_rate": 9.809937415077867e-06, "loss": 0.3049565553665161, "step": 6155 }, { "epoch": 0.5381792766031801, "grad_norm": 0.5412846262669979, "learning_rate": 9.809242750401591e-06, "loss": 0.3135908842086792, "step": 6160 }, { "epoch": 0.5386161104315919, "grad_norm": 0.5530614671172678, "learning_rate": 9.808546843252046e-06, "loss": 0.2992690086364746, "step": 6165 }, { "epoch": 0.5390529442600035, "grad_norm": 0.5359029819361658, "learning_rate": 9.807849693809024e-06, "loss": 0.31996927261352537, "step": 6170 }, { "epoch": 0.5394897780884151, "grad_norm": 0.5469567347367944, "learning_rate": 9.807151302252632e-06, "loss": 0.3361514091491699, "step": 6175 }, { "epoch": 0.5399266119168269, "grad_norm": 0.49793438502596693, "learning_rate": 9.806451668763299e-06, "loss": 0.3240628242492676, "step": 6180 }, { "epoch": 0.5403634457452385, "grad_norm": 0.5634081428421762, "learning_rate": 9.805750793521778e-06, "loss": 0.3341865539550781, "step": 6185 }, { "epoch": 0.5408002795736502, "grad_norm": 0.5766307100593722, "learning_rate": 9.805048676709141e-06, "loss": 0.3340183734893799, "step": 6190 }, { "epoch": 0.5412371134020618, "grad_norm": 0.5162816247679765, "learning_rate": 9.80434531850678e-06, "loss": 0.31493215560913085, "step": 6195 }, { "epoch": 0.5416739472304736, "grad_norm": 0.5054814791906779, "learning_rate": 9.803640719096408e-06, "loss": 0.30943660736083983, "step": 6200 }, { "epoch": 0.5421107810588852, "grad_norm": 0.4978172437663457, "learning_rate": 9.80293487866006e-06, "loss": 0.2935888051986694, "step": 6205 }, { "epoch": 0.5425476148872969, "grad_norm": 0.6546418704382887, "learning_rate": 9.80222779738009e-06, "loss": 0.3052821636199951, "step": 6210 }, { "epoch": 0.5429844487157085, "grad_norm": 0.5154479788612225, "learning_rate": 9.801519475439174e-06, "loss": 0.34078636169433596, "step": 6215 }, { "epoch": 0.5434212825441203, "grad_norm": 0.5871205244684918, "learning_rate": 9.800809913020306e-06, "loss": 0.32512507438659666, "step": 6220 }, { "epoch": 0.5438581163725319, "grad_norm": 0.49852372410596224, "learning_rate": 9.800099110306804e-06, "loss": 0.30539608001708984, "step": 6225 }, { "epoch": 0.5442949502009435, "grad_norm": 0.8534766811495588, "learning_rate": 9.799387067482306e-06, "loss": 0.3105482578277588, "step": 6230 }, { "epoch": 0.5447317840293552, "grad_norm": 0.4231437988486588, "learning_rate": 9.798673784730766e-06, "loss": 0.33448209762573244, "step": 6235 }, { "epoch": 0.5451686178577669, "grad_norm": 0.7781772544076543, "learning_rate": 9.79795926223646e-06, "loss": 0.3298498153686523, "step": 6240 }, { "epoch": 0.5456054516861786, "grad_norm": 0.4992357499100324, "learning_rate": 9.797243500183991e-06, "loss": 0.2835962772369385, "step": 6245 }, { "epoch": 0.5460422855145902, "grad_norm": 0.5396391359340978, "learning_rate": 9.796526498758274e-06, "loss": 0.31627306938171384, "step": 6250 }, { "epoch": 0.546479119343002, "grad_norm": 0.5235258985745219, "learning_rate": 9.795808258144548e-06, "loss": 0.3288753032684326, "step": 6255 }, { "epoch": 0.5469159531714136, "grad_norm": 0.5005495311062432, "learning_rate": 9.79508877852837e-06, "loss": 0.3247203826904297, "step": 6260 }, { "epoch": 0.5473527869998253, "grad_norm": 0.45017545907021317, "learning_rate": 9.794368060095619e-06, "loss": 0.3019089937210083, "step": 6265 }, { "epoch": 0.5477896208282369, "grad_norm": 0.5263897234499284, "learning_rate": 9.793646103032493e-06, "loss": 0.31860828399658203, "step": 6270 }, { "epoch": 0.5482264546566487, "grad_norm": 0.4880258612181439, "learning_rate": 9.79292290752551e-06, "loss": 0.3363968849182129, "step": 6275 }, { "epoch": 0.5486632884850603, "grad_norm": 0.5294365770259739, "learning_rate": 9.792198473761511e-06, "loss": 0.3438690662384033, "step": 6280 }, { "epoch": 0.5491001223134719, "grad_norm": 0.4525949066853338, "learning_rate": 9.791472801927653e-06, "loss": 0.306095027923584, "step": 6285 }, { "epoch": 0.5495369561418836, "grad_norm": 0.5546378493500422, "learning_rate": 9.790745892211412e-06, "loss": 0.31283888816833494, "step": 6290 }, { "epoch": 0.5499737899702953, "grad_norm": 0.4909230327674068, "learning_rate": 9.790017744800588e-06, "loss": 0.31217069625854493, "step": 6295 }, { "epoch": 0.550410623798707, "grad_norm": 0.48985039470884817, "learning_rate": 9.789288359883298e-06, "loss": 0.3187845706939697, "step": 6300 }, { "epoch": 0.5508474576271186, "grad_norm": 0.4851837585519672, "learning_rate": 9.788557737647982e-06, "loss": 0.306842303276062, "step": 6305 }, { "epoch": 0.5512842914555303, "grad_norm": 0.5453279719661425, "learning_rate": 9.78782587828339e-06, "loss": 0.3402463436126709, "step": 6310 }, { "epoch": 0.551721125283942, "grad_norm": 0.6563210650722594, "learning_rate": 9.787092781978607e-06, "loss": 0.33607163429260256, "step": 6315 }, { "epoch": 0.5521579591123537, "grad_norm": 0.47549957667844217, "learning_rate": 9.786358448923025e-06, "loss": 0.3364156723022461, "step": 6320 }, { "epoch": 0.5525947929407653, "grad_norm": 0.5083461230516589, "learning_rate": 9.78562287930636e-06, "loss": 0.3054628372192383, "step": 6325 }, { "epoch": 0.553031626769177, "grad_norm": 0.41813006414347426, "learning_rate": 9.784886073318649e-06, "loss": 0.3313840389251709, "step": 6330 }, { "epoch": 0.5534684605975887, "grad_norm": 0.4732217361065534, "learning_rate": 9.784148031150244e-06, "loss": 0.30528435707092283, "step": 6335 }, { "epoch": 0.5539052944260003, "grad_norm": 0.5285942474943575, "learning_rate": 9.783408752991821e-06, "loss": 0.30856914520263673, "step": 6340 }, { "epoch": 0.554342128254412, "grad_norm": 0.3982928215200378, "learning_rate": 9.782668239034373e-06, "loss": 0.3137691497802734, "step": 6345 }, { "epoch": 0.5547789620828237, "grad_norm": 0.4649913593008793, "learning_rate": 9.781926489469212e-06, "loss": 0.31018290519714353, "step": 6350 }, { "epoch": 0.5552157959112354, "grad_norm": 0.5208249845726253, "learning_rate": 9.78118350448797e-06, "loss": 0.3424390316009521, "step": 6355 }, { "epoch": 0.555652629739647, "grad_norm": 0.4278862691608086, "learning_rate": 9.7804392842826e-06, "loss": 0.3068122863769531, "step": 6360 }, { "epoch": 0.5560894635680587, "grad_norm": 0.5982757211190115, "learning_rate": 9.779693829045367e-06, "loss": 0.3339836120605469, "step": 6365 }, { "epoch": 0.5565262973964704, "grad_norm": 0.4223640267836373, "learning_rate": 9.778947138968866e-06, "loss": 0.2918074131011963, "step": 6370 }, { "epoch": 0.5569631312248821, "grad_norm": 0.5947708149088689, "learning_rate": 9.778199214246001e-06, "loss": 0.33418500423431396, "step": 6375 }, { "epoch": 0.5573999650532937, "grad_norm": 0.5510882709152802, "learning_rate": 9.777450055070002e-06, "loss": 0.2829581260681152, "step": 6380 }, { "epoch": 0.5578367988817055, "grad_norm": 0.5296985489551717, "learning_rate": 9.776699661634414e-06, "loss": 0.27202162742614744, "step": 6385 }, { "epoch": 0.5582736327101171, "grad_norm": 0.4932561754645821, "learning_rate": 9.775948034133102e-06, "loss": 0.3494239091873169, "step": 6390 }, { "epoch": 0.5587104665385287, "grad_norm": 0.4441033915679764, "learning_rate": 9.775195172760251e-06, "loss": 0.3282155990600586, "step": 6395 }, { "epoch": 0.5591473003669404, "grad_norm": 0.4881572473402294, "learning_rate": 9.77444107771036e-06, "loss": 0.31967720985412595, "step": 6400 }, { "epoch": 0.5595841341953521, "grad_norm": 0.5953496378263087, "learning_rate": 9.773685749178253e-06, "loss": 0.3294121742248535, "step": 6405 }, { "epoch": 0.5600209680237638, "grad_norm": 0.5361378206179315, "learning_rate": 9.77292918735907e-06, "loss": 0.3127251863479614, "step": 6410 }, { "epoch": 0.5604578018521754, "grad_norm": 0.48544215987633715, "learning_rate": 9.77217139244827e-06, "loss": 0.32505991458892824, "step": 6415 }, { "epoch": 0.5608946356805871, "grad_norm": 0.6015747644849578, "learning_rate": 9.77141236464163e-06, "loss": 0.33009748458862304, "step": 6420 }, { "epoch": 0.5613314695089988, "grad_norm": 0.5272362128195205, "learning_rate": 9.770652104135243e-06, "loss": 0.31910669803619385, "step": 6425 }, { "epoch": 0.5617683033374105, "grad_norm": 0.4684392146677298, "learning_rate": 9.769890611125527e-06, "loss": 0.3084476709365845, "step": 6430 }, { "epoch": 0.5622051371658221, "grad_norm": 0.5413005686796064, "learning_rate": 9.769127885809213e-06, "loss": 0.3303098201751709, "step": 6435 }, { "epoch": 0.5626419709942337, "grad_norm": 0.530865979939572, "learning_rate": 9.768363928383352e-06, "loss": 0.31355652809143064, "step": 6440 }, { "epoch": 0.5630788048226455, "grad_norm": 0.6290806204238848, "learning_rate": 9.767598739045312e-06, "loss": 0.3354077100753784, "step": 6445 }, { "epoch": 0.5635156386510571, "grad_norm": 0.4980622653742548, "learning_rate": 9.766832317992782e-06, "loss": 0.3053667783737183, "step": 6450 }, { "epoch": 0.5639524724794688, "grad_norm": 0.4895597862816491, "learning_rate": 9.766064665423768e-06, "loss": 0.30860466957092286, "step": 6455 }, { "epoch": 0.5643893063078805, "grad_norm": 0.4527303697575985, "learning_rate": 9.765295781536593e-06, "loss": 0.32692546844482423, "step": 6460 }, { "epoch": 0.5648261401362922, "grad_norm": 0.4537422482532381, "learning_rate": 9.764525666529899e-06, "loss": 0.3055605173110962, "step": 6465 }, { "epoch": 0.5652629739647038, "grad_norm": 0.5644500312840487, "learning_rate": 9.763754320602645e-06, "loss": 0.34294915199279785, "step": 6470 }, { "epoch": 0.5656998077931155, "grad_norm": 0.4401508805564259, "learning_rate": 9.762981743954112e-06, "loss": 0.2794306755065918, "step": 6475 }, { "epoch": 0.5661366416215272, "grad_norm": 0.46689024759933545, "learning_rate": 9.762207936783895e-06, "loss": 0.32485647201538087, "step": 6480 }, { "epoch": 0.5665734754499389, "grad_norm": 0.6168381225304478, "learning_rate": 9.761432899291906e-06, "loss": 0.2996094226837158, "step": 6485 }, { "epoch": 0.5670103092783505, "grad_norm": 0.5380850528023575, "learning_rate": 9.760656631678377e-06, "loss": 0.3229713439941406, "step": 6490 }, { "epoch": 0.5674471431067621, "grad_norm": 0.4863896784398112, "learning_rate": 9.759879134143861e-06, "loss": 0.31436939239501954, "step": 6495 }, { "epoch": 0.5678839769351739, "grad_norm": 0.4785929795995449, "learning_rate": 9.759100406889223e-06, "loss": 0.31489109992980957, "step": 6500 }, { "epoch": 0.5683208107635855, "grad_norm": 0.545516667116671, "learning_rate": 9.758320450115648e-06, "loss": 0.3252610683441162, "step": 6505 }, { "epoch": 0.5687576445919972, "grad_norm": 0.47487856192373995, "learning_rate": 9.75753926402464e-06, "loss": 0.30904879570007326, "step": 6510 }, { "epoch": 0.5691944784204088, "grad_norm": 0.539192254881329, "learning_rate": 9.756756848818016e-06, "loss": 0.3162715435028076, "step": 6515 }, { "epoch": 0.5696313122488206, "grad_norm": 0.4437069503490946, "learning_rate": 9.755973204697918e-06, "loss": 0.33615703582763673, "step": 6520 }, { "epoch": 0.5700681460772322, "grad_norm": 0.5160245560502997, "learning_rate": 9.755188331866799e-06, "loss": 0.3319102764129639, "step": 6525 }, { "epoch": 0.5705049799056439, "grad_norm": 0.43148089884735674, "learning_rate": 9.754402230527433e-06, "loss": 0.3324231386184692, "step": 6530 }, { "epoch": 0.5709418137340556, "grad_norm": 0.5087778121998668, "learning_rate": 9.753614900882907e-06, "loss": 0.31247615814208984, "step": 6535 }, { "epoch": 0.5713786475624673, "grad_norm": 0.5112267577199514, "learning_rate": 9.752826343136632e-06, "loss": 0.3096801280975342, "step": 6540 }, { "epoch": 0.5718154813908789, "grad_norm": 0.524734604006507, "learning_rate": 9.752036557492333e-06, "loss": 0.3329927921295166, "step": 6545 }, { "epoch": 0.5722523152192905, "grad_norm": 0.6041556716557854, "learning_rate": 9.751245544154049e-06, "loss": 0.33375000953674316, "step": 6550 }, { "epoch": 0.5726891490477023, "grad_norm": 0.5420988440483534, "learning_rate": 9.750453303326141e-06, "loss": 0.34026174545288085, "step": 6555 }, { "epoch": 0.5731259828761139, "grad_norm": 0.5344761742870932, "learning_rate": 9.749659835213287e-06, "loss": 0.31345558166503906, "step": 6560 }, { "epoch": 0.5735628167045256, "grad_norm": 0.498834922760029, "learning_rate": 9.748865140020477e-06, "loss": 0.33810153007507326, "step": 6565 }, { "epoch": 0.5739996505329372, "grad_norm": 0.5626070372920341, "learning_rate": 9.748069217953023e-06, "loss": 0.321174168586731, "step": 6570 }, { "epoch": 0.574436484361349, "grad_norm": 0.5773558739373102, "learning_rate": 9.747272069216553e-06, "loss": 0.3077446937561035, "step": 6575 }, { "epoch": 0.5748733181897606, "grad_norm": 0.5406481574800369, "learning_rate": 9.74647369401701e-06, "loss": 0.3222812175750732, "step": 6580 }, { "epoch": 0.5753101520181723, "grad_norm": 0.4342514974485511, "learning_rate": 9.745674092560655e-06, "loss": 0.33725414276123045, "step": 6585 }, { "epoch": 0.575746985846584, "grad_norm": 0.5486797364427679, "learning_rate": 9.744873265054068e-06, "loss": 0.3163029432296753, "step": 6590 }, { "epoch": 0.5761838196749957, "grad_norm": 0.517138841746569, "learning_rate": 9.744071211704143e-06, "loss": 0.31975903511047366, "step": 6595 }, { "epoch": 0.5766206535034073, "grad_norm": 0.6272247862160041, "learning_rate": 9.743267932718087e-06, "loss": 0.2851548671722412, "step": 6600 }, { "epoch": 0.5770574873318189, "grad_norm": 0.5065992368324438, "learning_rate": 9.742463428303435e-06, "loss": 0.32284746170043943, "step": 6605 }, { "epoch": 0.5774943211602307, "grad_norm": 0.512961659650958, "learning_rate": 9.74165769866803e-06, "loss": 0.31704487800598147, "step": 6610 }, { "epoch": 0.5779311549886423, "grad_norm": 0.4796171828398788, "learning_rate": 9.740850744020027e-06, "loss": 0.3094925403594971, "step": 6615 }, { "epoch": 0.578367988817054, "grad_norm": 0.5909231858279781, "learning_rate": 9.740042564567912e-06, "loss": 0.32551429271697996, "step": 6620 }, { "epoch": 0.5788048226454656, "grad_norm": 0.5464179064107088, "learning_rate": 9.739233160520472e-06, "loss": 0.33297321796417234, "step": 6625 }, { "epoch": 0.5792416564738774, "grad_norm": 0.5041371975882666, "learning_rate": 9.738422532086822e-06, "loss": 0.2849516630172729, "step": 6630 }, { "epoch": 0.579678490302289, "grad_norm": 0.5278193710196407, "learning_rate": 9.737610679476388e-06, "loss": 0.2993858814239502, "step": 6635 }, { "epoch": 0.5801153241307007, "grad_norm": 0.5442264803053684, "learning_rate": 9.736797602898914e-06, "loss": 0.32997727394104004, "step": 6640 }, { "epoch": 0.5805521579591123, "grad_norm": 0.5192823301048414, "learning_rate": 9.735983302564455e-06, "loss": 0.3211833000183105, "step": 6645 }, { "epoch": 0.5809889917875241, "grad_norm": 0.4196994178003978, "learning_rate": 9.73516777868339e-06, "loss": 0.34179446697235105, "step": 6650 }, { "epoch": 0.5814258256159357, "grad_norm": 0.5326961436538507, "learning_rate": 9.734351031466409e-06, "loss": 0.31445937156677245, "step": 6655 }, { "epoch": 0.5818626594443473, "grad_norm": 0.5485334903983746, "learning_rate": 9.733533061124521e-06, "loss": 0.2993583679199219, "step": 6660 }, { "epoch": 0.5822994932727591, "grad_norm": 0.4989233195514557, "learning_rate": 9.73271386786905e-06, "loss": 0.3097491979598999, "step": 6665 }, { "epoch": 0.5827363271011707, "grad_norm": 0.4832969477297964, "learning_rate": 9.731893451911632e-06, "loss": 0.31270763874053953, "step": 6670 }, { "epoch": 0.5831731609295824, "grad_norm": 0.5360738915730199, "learning_rate": 9.731071813464225e-06, "loss": 0.34092252254486083, "step": 6675 }, { "epoch": 0.583609994757994, "grad_norm": 0.4638445393124289, "learning_rate": 9.730248952739102e-06, "loss": 0.3004140377044678, "step": 6680 }, { "epoch": 0.5840468285864058, "grad_norm": 0.5033128029600158, "learning_rate": 9.729424869948845e-06, "loss": 0.31487646102905276, "step": 6685 }, { "epoch": 0.5844836624148174, "grad_norm": 0.5271667929222138, "learning_rate": 9.72859956530636e-06, "loss": 0.3152409791946411, "step": 6690 }, { "epoch": 0.5849204962432291, "grad_norm": 0.4899450838391336, "learning_rate": 9.727773039024866e-06, "loss": 0.31670122146606444, "step": 6695 }, { "epoch": 0.5853573300716407, "grad_norm": 0.48489121193871815, "learning_rate": 9.726945291317897e-06, "loss": 0.3489399433135986, "step": 6700 }, { "epoch": 0.5857941639000525, "grad_norm": 0.5655678586117054, "learning_rate": 9.726116322399299e-06, "loss": 0.27698872089385984, "step": 6705 }, { "epoch": 0.5862309977284641, "grad_norm": 0.5092752299077548, "learning_rate": 9.72528613248324e-06, "loss": 0.31446127891540526, "step": 6710 }, { "epoch": 0.5866678315568757, "grad_norm": 0.5394754930710773, "learning_rate": 9.724454721784202e-06, "loss": 0.30352780818939207, "step": 6715 }, { "epoch": 0.5871046653852874, "grad_norm": 0.5606450473013448, "learning_rate": 9.723622090516978e-06, "loss": 0.2998064994812012, "step": 6720 }, { "epoch": 0.5875414992136991, "grad_norm": 0.4623164567287061, "learning_rate": 9.72278823889668e-06, "loss": 0.29731733798980714, "step": 6725 }, { "epoch": 0.5879783330421108, "grad_norm": 0.5850190155661937, "learning_rate": 9.721953167138734e-06, "loss": 0.3113441467285156, "step": 6730 }, { "epoch": 0.5884151668705224, "grad_norm": 0.43908062183516045, "learning_rate": 9.721116875458883e-06, "loss": 0.3287813663482666, "step": 6735 }, { "epoch": 0.5888520006989342, "grad_norm": 0.5689099877031701, "learning_rate": 9.720279364073184e-06, "loss": 0.34227685928344725, "step": 6740 }, { "epoch": 0.5892888345273458, "grad_norm": 0.5329709372032192, "learning_rate": 9.719440633198006e-06, "loss": 0.3214404582977295, "step": 6745 }, { "epoch": 0.5897256683557575, "grad_norm": 0.4485927398592606, "learning_rate": 9.71860068305004e-06, "loss": 0.31965181827545164, "step": 6750 }, { "epoch": 0.5901625021841691, "grad_norm": 0.49259783314080413, "learning_rate": 9.717759513846286e-06, "loss": 0.3204825401306152, "step": 6755 }, { "epoch": 0.5905993360125809, "grad_norm": 0.5020025766292225, "learning_rate": 9.716917125804062e-06, "loss": 0.31321010589599607, "step": 6760 }, { "epoch": 0.5910361698409925, "grad_norm": 0.5316866093419601, "learning_rate": 9.716073519140999e-06, "loss": 0.3356682777404785, "step": 6765 }, { "epoch": 0.5914730036694041, "grad_norm": 0.5504021359483336, "learning_rate": 9.715228694075043e-06, "loss": 0.3148735523223877, "step": 6770 }, { "epoch": 0.5919098374978158, "grad_norm": 0.5029865471903693, "learning_rate": 9.714382650824459e-06, "loss": 0.3066816806793213, "step": 6775 }, { "epoch": 0.5923466713262275, "grad_norm": 0.4794445733133148, "learning_rate": 9.713535389607817e-06, "loss": 0.29470186233520507, "step": 6780 }, { "epoch": 0.5927835051546392, "grad_norm": 0.4597502693763415, "learning_rate": 9.712686910644016e-06, "loss": 0.3164568662643433, "step": 6785 }, { "epoch": 0.5932203389830508, "grad_norm": 0.5109377924204458, "learning_rate": 9.711837214152254e-06, "loss": 0.3332784175872803, "step": 6790 }, { "epoch": 0.5936571728114626, "grad_norm": 0.5005396419562618, "learning_rate": 9.710986300352056e-06, "loss": 0.3254432678222656, "step": 6795 }, { "epoch": 0.5940940066398742, "grad_norm": 0.5441463489149413, "learning_rate": 9.710134169463253e-06, "loss": 0.35677623748779297, "step": 6800 }, { "epoch": 0.5945308404682859, "grad_norm": 0.44055702537624397, "learning_rate": 9.709280821705997e-06, "loss": 0.32401695251464846, "step": 6805 }, { "epoch": 0.5949676742966975, "grad_norm": 0.5534819686100231, "learning_rate": 9.70842625730075e-06, "loss": 0.29012014865875246, "step": 6810 }, { "epoch": 0.5954045081251093, "grad_norm": 0.5191798249786866, "learning_rate": 9.70757047646829e-06, "loss": 0.31830406188964844, "step": 6815 }, { "epoch": 0.5958413419535209, "grad_norm": 0.5303834575948941, "learning_rate": 9.706713479429708e-06, "loss": 0.35505080223083496, "step": 6820 }, { "epoch": 0.5962781757819325, "grad_norm": 0.5714095174279239, "learning_rate": 9.70585526640641e-06, "loss": 0.30403957366943357, "step": 6825 }, { "epoch": 0.5967150096103442, "grad_norm": 0.5749705246536395, "learning_rate": 9.70499583762012e-06, "loss": 0.3200355529785156, "step": 6830 }, { "epoch": 0.5971518434387559, "grad_norm": 0.43929624892484864, "learning_rate": 9.704135193292868e-06, "loss": 0.32276365756988523, "step": 6835 }, { "epoch": 0.5975886772671676, "grad_norm": 0.4517252776722219, "learning_rate": 9.703273333647002e-06, "loss": 0.2612508058547974, "step": 6840 }, { "epoch": 0.5980255110955792, "grad_norm": 0.48686460973401346, "learning_rate": 9.702410258905188e-06, "loss": 0.3117987155914307, "step": 6845 }, { "epoch": 0.5984623449239909, "grad_norm": 0.5200254964484917, "learning_rate": 9.7015459692904e-06, "loss": 0.322119402885437, "step": 6850 }, { "epoch": 0.5988991787524026, "grad_norm": 0.5023322959280236, "learning_rate": 9.70068046502593e-06, "loss": 0.3299184083938599, "step": 6855 }, { "epoch": 0.5993360125808143, "grad_norm": 0.48981754410616, "learning_rate": 9.69981374633538e-06, "loss": 0.31357483863830565, "step": 6860 }, { "epoch": 0.5997728464092259, "grad_norm": 0.5272642984119638, "learning_rate": 9.69894581344267e-06, "loss": 0.32370235919952395, "step": 6865 }, { "epoch": 0.6002096802376377, "grad_norm": 0.4621498704599589, "learning_rate": 9.698076666572028e-06, "loss": 0.3116750955581665, "step": 6870 }, { "epoch": 0.6006465140660493, "grad_norm": 0.6186665241445605, "learning_rate": 9.697206305948003e-06, "loss": 0.30641300678253175, "step": 6875 }, { "epoch": 0.6010833478944609, "grad_norm": 0.5090994376729726, "learning_rate": 9.69633473179545e-06, "loss": 0.3289961576461792, "step": 6880 }, { "epoch": 0.6015201817228726, "grad_norm": 0.5717826067902803, "learning_rate": 9.695461944339544e-06, "loss": 0.31000990867614747, "step": 6885 }, { "epoch": 0.6019570155512843, "grad_norm": 0.6717553716522291, "learning_rate": 9.69458794380577e-06, "loss": 0.3099811553955078, "step": 6890 }, { "epoch": 0.602393849379696, "grad_norm": 0.5047850803448942, "learning_rate": 9.693712730419927e-06, "loss": 0.33166751861572263, "step": 6895 }, { "epoch": 0.6028306832081076, "grad_norm": 0.5120789851901213, "learning_rate": 9.692836304408124e-06, "loss": 0.3105798244476318, "step": 6900 }, { "epoch": 0.6032675170365193, "grad_norm": 0.5349938831926453, "learning_rate": 9.691958665996792e-06, "loss": 0.3286198616027832, "step": 6905 }, { "epoch": 0.603704350864931, "grad_norm": 0.5036942570007846, "learning_rate": 9.691079815412667e-06, "loss": 0.29644999504089353, "step": 6910 }, { "epoch": 0.6041411846933427, "grad_norm": 0.5156433579092098, "learning_rate": 9.6901997528828e-06, "loss": 0.3591575860977173, "step": 6915 }, { "epoch": 0.6045780185217543, "grad_norm": 0.5425562115519748, "learning_rate": 9.68931847863456e-06, "loss": 0.31996798515319824, "step": 6920 }, { "epoch": 0.6050148523501659, "grad_norm": 0.5224539459028358, "learning_rate": 9.688435992895622e-06, "loss": 0.3152348041534424, "step": 6925 }, { "epoch": 0.6054516861785777, "grad_norm": 0.44068070127877484, "learning_rate": 9.687552295893978e-06, "loss": 0.30735299587249754, "step": 6930 }, { "epoch": 0.6058885200069893, "grad_norm": 0.4542632246345442, "learning_rate": 9.686667387857931e-06, "loss": 0.3216227054595947, "step": 6935 }, { "epoch": 0.606325353835401, "grad_norm": 0.5003721092011315, "learning_rate": 9.685781269016101e-06, "loss": 0.31608607769012453, "step": 6940 }, { "epoch": 0.6067621876638127, "grad_norm": 0.6102272571868325, "learning_rate": 9.684893939597414e-06, "loss": 0.31104512214660646, "step": 6945 }, { "epoch": 0.6071990214922244, "grad_norm": 0.5985962755050708, "learning_rate": 9.684005399831116e-06, "loss": 0.312805700302124, "step": 6950 }, { "epoch": 0.607635855320636, "grad_norm": 0.5006009603387046, "learning_rate": 9.683115649946761e-06, "loss": 0.28683815002441404, "step": 6955 }, { "epoch": 0.6080726891490477, "grad_norm": 0.48011125464252946, "learning_rate": 9.682224690174215e-06, "loss": 0.27176265716552733, "step": 6960 }, { "epoch": 0.6085095229774594, "grad_norm": 0.5565293889036487, "learning_rate": 9.68133252074366e-06, "loss": 0.3041905641555786, "step": 6965 }, { "epoch": 0.6089463568058711, "grad_norm": 0.46374399440103403, "learning_rate": 9.68043914188559e-06, "loss": 0.3157806396484375, "step": 6970 }, { "epoch": 0.6093831906342827, "grad_norm": 0.5400291440792652, "learning_rate": 9.67954455383081e-06, "loss": 0.32120504379272463, "step": 6975 }, { "epoch": 0.6098200244626943, "grad_norm": 0.5563736302781244, "learning_rate": 9.678648756810435e-06, "loss": 0.306745719909668, "step": 6980 }, { "epoch": 0.6102568582911061, "grad_norm": 0.5045411256224356, "learning_rate": 9.6777517510559e-06, "loss": 0.3307461977005005, "step": 6985 }, { "epoch": 0.6106936921195177, "grad_norm": 0.576880862109804, "learning_rate": 9.676853536798943e-06, "loss": 0.3284144401550293, "step": 6990 }, { "epoch": 0.6111305259479294, "grad_norm": 0.5339038208917259, "learning_rate": 9.675954114271622e-06, "loss": 0.30486068725585935, "step": 6995 }, { "epoch": 0.6115673597763411, "grad_norm": 0.4961090385195257, "learning_rate": 9.675053483706302e-06, "loss": 0.3061419725418091, "step": 7000 }, { "epoch": 0.6120041936047528, "grad_norm": 0.4129123200158861, "learning_rate": 9.674151645335662e-06, "loss": 0.33312206268310546, "step": 7005 }, { "epoch": 0.6124410274331644, "grad_norm": 0.5171782686149263, "learning_rate": 9.673248599392693e-06, "loss": 0.31880717277526854, "step": 7010 }, { "epoch": 0.6128778612615761, "grad_norm": 0.5910928889267394, "learning_rate": 9.672344346110697e-06, "loss": 0.30821690559387205, "step": 7015 }, { "epoch": 0.6133146950899878, "grad_norm": 0.521177581182026, "learning_rate": 9.671438885723292e-06, "loss": 0.29243614673614504, "step": 7020 }, { "epoch": 0.6137515289183995, "grad_norm": 0.592950252960173, "learning_rate": 9.670532218464404e-06, "loss": 0.3534694194793701, "step": 7025 }, { "epoch": 0.6141883627468111, "grad_norm": 0.4860500306705665, "learning_rate": 9.669624344568267e-06, "loss": 0.3364858627319336, "step": 7030 }, { "epoch": 0.6146251965752227, "grad_norm": 0.5663471601415966, "learning_rate": 9.668715264269437e-06, "loss": 0.3196817636489868, "step": 7035 }, { "epoch": 0.6150620304036345, "grad_norm": 0.6016402386034523, "learning_rate": 9.667804977802771e-06, "loss": 0.2959843635559082, "step": 7040 }, { "epoch": 0.6154988642320461, "grad_norm": 0.46267754310841713, "learning_rate": 9.666893485403447e-06, "loss": 0.3384548187255859, "step": 7045 }, { "epoch": 0.6159356980604578, "grad_norm": 0.5582961688798866, "learning_rate": 9.665980787306947e-06, "loss": 0.32454309463500974, "step": 7050 }, { "epoch": 0.6163725318888694, "grad_norm": 0.5130139563840147, "learning_rate": 9.665066883749068e-06, "loss": 0.3059628963470459, "step": 7055 }, { "epoch": 0.6168093657172812, "grad_norm": 0.5060946516097268, "learning_rate": 9.66415177496592e-06, "loss": 0.3147700548171997, "step": 7060 }, { "epoch": 0.6172461995456928, "grad_norm": 0.5438170371337333, "learning_rate": 9.66323546119392e-06, "loss": 0.2874962329864502, "step": 7065 }, { "epoch": 0.6176830333741045, "grad_norm": 0.49873364423190725, "learning_rate": 9.6623179426698e-06, "loss": 0.27376766204833985, "step": 7070 }, { "epoch": 0.6181198672025162, "grad_norm": 0.4216864579132382, "learning_rate": 9.661399219630601e-06, "loss": 0.36361069679260255, "step": 7075 }, { "epoch": 0.6185567010309279, "grad_norm": 0.5360419263688083, "learning_rate": 9.660479292313678e-06, "loss": 0.3192591190338135, "step": 7080 }, { "epoch": 0.6189935348593395, "grad_norm": 0.5480888211421704, "learning_rate": 9.659558160956694e-06, "loss": 0.29141409397125245, "step": 7085 }, { "epoch": 0.6194303686877511, "grad_norm": 0.39752325271556277, "learning_rate": 9.658635825797623e-06, "loss": 0.27186031341552735, "step": 7090 }, { "epoch": 0.6198672025161629, "grad_norm": 0.38754872468770246, "learning_rate": 9.657712287074755e-06, "loss": 0.32732970714569093, "step": 7095 }, { "epoch": 0.6203040363445745, "grad_norm": 0.37453777183244075, "learning_rate": 9.656787545026682e-06, "loss": 0.27546727657318115, "step": 7100 }, { "epoch": 0.6207408701729862, "grad_norm": 0.476162470668941, "learning_rate": 9.655861599892315e-06, "loss": 0.325670051574707, "step": 7105 }, { "epoch": 0.6211777040013978, "grad_norm": 0.48354832742430687, "learning_rate": 9.654934451910876e-06, "loss": 0.336107063293457, "step": 7110 }, { "epoch": 0.6216145378298096, "grad_norm": 0.5621713615029904, "learning_rate": 9.654006101321889e-06, "loss": 0.3155368328094482, "step": 7115 }, { "epoch": 0.6220513716582212, "grad_norm": 0.5617227873765701, "learning_rate": 9.653076548365198e-06, "loss": 0.3090078353881836, "step": 7120 }, { "epoch": 0.6224882054866329, "grad_norm": 0.41039591070727355, "learning_rate": 9.652145793280953e-06, "loss": 0.30134124755859376, "step": 7125 }, { "epoch": 0.6229250393150445, "grad_norm": 0.5202865366260258, "learning_rate": 9.651213836309617e-06, "loss": 0.30742678642272947, "step": 7130 }, { "epoch": 0.6233618731434563, "grad_norm": 0.5269176249906685, "learning_rate": 9.65028067769196e-06, "loss": 0.291595458984375, "step": 7135 }, { "epoch": 0.6237987069718679, "grad_norm": 0.49367058153831733, "learning_rate": 9.649346317669067e-06, "loss": 0.30565178394317627, "step": 7140 }, { "epoch": 0.6242355408002795, "grad_norm": 0.47661175334641376, "learning_rate": 9.648410756482328e-06, "loss": 0.3152168273925781, "step": 7145 }, { "epoch": 0.6246723746286913, "grad_norm": 0.5634337782671525, "learning_rate": 9.647473994373449e-06, "loss": 0.32381525039672854, "step": 7150 }, { "epoch": 0.6251092084571029, "grad_norm": 0.5055497276070191, "learning_rate": 9.64653603158444e-06, "loss": 0.3295446395874023, "step": 7155 }, { "epoch": 0.6255460422855146, "grad_norm": 0.4206466181364352, "learning_rate": 9.645596868357629e-06, "loss": 0.31116509437561035, "step": 7160 }, { "epoch": 0.6259828761139262, "grad_norm": 0.45809372716752905, "learning_rate": 9.644656504935648e-06, "loss": 0.2741153001785278, "step": 7165 }, { "epoch": 0.626419709942338, "grad_norm": 0.6021026511935027, "learning_rate": 9.64371494156144e-06, "loss": 0.30208144187927244, "step": 7170 }, { "epoch": 0.6268565437707496, "grad_norm": 0.5324417763289111, "learning_rate": 9.642772178478262e-06, "loss": 0.3199042797088623, "step": 7175 }, { "epoch": 0.6272933775991613, "grad_norm": 0.4747249826862053, "learning_rate": 9.641828215929672e-06, "loss": 0.30195913314819334, "step": 7180 }, { "epoch": 0.6277302114275729, "grad_norm": 0.5222326504924192, "learning_rate": 9.64088305415955e-06, "loss": 0.33397009372711184, "step": 7185 }, { "epoch": 0.6281670452559847, "grad_norm": 0.59263429832748, "learning_rate": 9.639936693412078e-06, "loss": 0.32103400230407714, "step": 7190 }, { "epoch": 0.6286038790843963, "grad_norm": 0.5179554289911723, "learning_rate": 9.638989133931747e-06, "loss": 0.28336358070373535, "step": 7195 }, { "epoch": 0.6290407129128079, "grad_norm": 0.57750264128704, "learning_rate": 9.63804037596336e-06, "loss": 0.29765801429748534, "step": 7200 }, { "epoch": 0.6294775467412196, "grad_norm": 0.5388976353126222, "learning_rate": 9.637090419752034e-06, "loss": 0.29700684547424316, "step": 7205 }, { "epoch": 0.6299143805696313, "grad_norm": 0.5254312573906528, "learning_rate": 9.636139265543187e-06, "loss": 0.3358144998550415, "step": 7210 }, { "epoch": 0.630351214398043, "grad_norm": 0.5093863430164712, "learning_rate": 9.635186913582554e-06, "loss": 0.32656311988830566, "step": 7215 }, { "epoch": 0.6307880482264546, "grad_norm": 0.4945322409156616, "learning_rate": 9.634233364116173e-06, "loss": 0.29951746463775636, "step": 7220 }, { "epoch": 0.6312248820548664, "grad_norm": 0.5592369471204162, "learning_rate": 9.633278617390395e-06, "loss": 0.3340898036956787, "step": 7225 }, { "epoch": 0.631661715883278, "grad_norm": 0.5987532771656223, "learning_rate": 9.632322673651884e-06, "loss": 0.3293590068817139, "step": 7230 }, { "epoch": 0.6320985497116897, "grad_norm": 0.45997983697514727, "learning_rate": 9.631365533147602e-06, "loss": 0.33641643524169923, "step": 7235 }, { "epoch": 0.6325353835401013, "grad_norm": 0.570452697894375, "learning_rate": 9.630407196124835e-06, "loss": 0.32082476615905764, "step": 7240 }, { "epoch": 0.6329722173685131, "grad_norm": 0.48416361535549746, "learning_rate": 9.629447662831165e-06, "loss": 0.3146545886993408, "step": 7245 }, { "epoch": 0.6334090511969247, "grad_norm": 0.49767932681254495, "learning_rate": 9.628486933514492e-06, "loss": 0.32555398941040037, "step": 7250 }, { "epoch": 0.6338458850253363, "grad_norm": 0.49844395924841556, "learning_rate": 9.627525008423018e-06, "loss": 0.3228600025177002, "step": 7255 }, { "epoch": 0.634282718853748, "grad_norm": 0.4297033499316875, "learning_rate": 9.626561887805261e-06, "loss": 0.3005897045135498, "step": 7260 }, { "epoch": 0.6347195526821597, "grad_norm": 0.5410320667580102, "learning_rate": 9.62559757191004e-06, "loss": 0.3160225868225098, "step": 7265 }, { "epoch": 0.6351563865105714, "grad_norm": 0.5284233582990184, "learning_rate": 9.624632060986493e-06, "loss": 0.3107737064361572, "step": 7270 }, { "epoch": 0.635593220338983, "grad_norm": 0.5476693408629675, "learning_rate": 9.623665355284055e-06, "loss": 0.31419351100921633, "step": 7275 }, { "epoch": 0.6360300541673948, "grad_norm": 0.5173094355986759, "learning_rate": 9.62269745505248e-06, "loss": 0.306463885307312, "step": 7280 }, { "epoch": 0.6364668879958064, "grad_norm": 0.4309423820202649, "learning_rate": 9.621728360541822e-06, "loss": 0.3213703393936157, "step": 7285 }, { "epoch": 0.6369037218242181, "grad_norm": 0.8664066641508853, "learning_rate": 9.62075807200245e-06, "loss": 0.27894423007965086, "step": 7290 }, { "epoch": 0.6373405556526297, "grad_norm": 0.5475509439371133, "learning_rate": 9.619786589685041e-06, "loss": 0.2879197359085083, "step": 7295 }, { "epoch": 0.6377773894810415, "grad_norm": 0.5616197871406774, "learning_rate": 9.618813913840574e-06, "loss": 0.2973952293395996, "step": 7300 }, { "epoch": 0.6382142233094531, "grad_norm": 0.41647689939121435, "learning_rate": 9.617840044720344e-06, "loss": 0.3275290966033936, "step": 7305 }, { "epoch": 0.6386510571378647, "grad_norm": 0.6050131579725936, "learning_rate": 9.616864982575952e-06, "loss": 0.35246455669403076, "step": 7310 }, { "epoch": 0.6390878909662764, "grad_norm": 0.5709696978676342, "learning_rate": 9.6158887276593e-06, "loss": 0.26271071434021, "step": 7315 }, { "epoch": 0.6395247247946881, "grad_norm": 0.5654633250156492, "learning_rate": 9.614911280222616e-06, "loss": 0.2898075103759766, "step": 7320 }, { "epoch": 0.6399615586230998, "grad_norm": 0.5871876985188702, "learning_rate": 9.613932640518416e-06, "loss": 0.339166259765625, "step": 7325 }, { "epoch": 0.6403983924515114, "grad_norm": 0.5138511095606344, "learning_rate": 9.612952808799533e-06, "loss": 0.3114602565765381, "step": 7330 }, { "epoch": 0.6408352262799231, "grad_norm": 0.5596804638497043, "learning_rate": 9.61197178531911e-06, "loss": 0.2884641647338867, "step": 7335 }, { "epoch": 0.6412720601083348, "grad_norm": 0.5215891080117341, "learning_rate": 9.610989570330595e-06, "loss": 0.2868127584457397, "step": 7340 }, { "epoch": 0.6417088939367465, "grad_norm": 0.5478419890001618, "learning_rate": 9.610006164087747e-06, "loss": 0.35109882354736327, "step": 7345 }, { "epoch": 0.6421457277651581, "grad_norm": 0.4733639691866631, "learning_rate": 9.609021566844623e-06, "loss": 0.30983400344848633, "step": 7350 }, { "epoch": 0.6425825615935699, "grad_norm": 0.45238514757264553, "learning_rate": 9.608035778855602e-06, "loss": 0.30718135833740234, "step": 7355 }, { "epoch": 0.6430193954219815, "grad_norm": 0.7764730831060531, "learning_rate": 9.60704880037536e-06, "loss": 0.28314805030822754, "step": 7360 }, { "epoch": 0.6434562292503931, "grad_norm": 0.4709673772798849, "learning_rate": 9.606060631658884e-06, "loss": 0.3177448272705078, "step": 7365 }, { "epoch": 0.6438930630788048, "grad_norm": 0.5443513180000948, "learning_rate": 9.605071272961471e-06, "loss": 0.29431743621826173, "step": 7370 }, { "epoch": 0.6443298969072165, "grad_norm": 0.5370915322415892, "learning_rate": 9.604080724538717e-06, "loss": 0.32349884510040283, "step": 7375 }, { "epoch": 0.6447667307356282, "grad_norm": 0.4557030996952698, "learning_rate": 9.60308898664654e-06, "loss": 0.3289552450180054, "step": 7380 }, { "epoch": 0.6452035645640398, "grad_norm": 0.6310331729092298, "learning_rate": 9.602096059541151e-06, "loss": 0.30401442050933836, "step": 7385 }, { "epoch": 0.6456403983924515, "grad_norm": 0.5598314527044548, "learning_rate": 9.601101943479073e-06, "loss": 0.3048851013183594, "step": 7390 }, { "epoch": 0.6460772322208632, "grad_norm": 0.45231794349386645, "learning_rate": 9.60010663871714e-06, "loss": 0.33751213550567627, "step": 7395 }, { "epoch": 0.6465140660492749, "grad_norm": 0.4570486313667312, "learning_rate": 9.599110145512486e-06, "loss": 0.32208545207977296, "step": 7400 }, { "epoch": 0.6469508998776865, "grad_norm": 0.5508691362276988, "learning_rate": 9.598112464122564e-06, "loss": 0.3154139518737793, "step": 7405 }, { "epoch": 0.6473877337060981, "grad_norm": 0.5349004600707759, "learning_rate": 9.597113594805118e-06, "loss": 0.30453853607177733, "step": 7410 }, { "epoch": 0.6478245675345099, "grad_norm": 0.3807057504184466, "learning_rate": 9.59611353781821e-06, "loss": 0.28764734268188474, "step": 7415 }, { "epoch": 0.6482614013629215, "grad_norm": 0.42262663873647976, "learning_rate": 9.595112293420207e-06, "loss": 0.30527610778808595, "step": 7420 }, { "epoch": 0.6486982351913332, "grad_norm": 0.5737757274749814, "learning_rate": 9.594109861869781e-06, "loss": 0.32306880950927735, "step": 7425 }, { "epoch": 0.6491350690197449, "grad_norm": 0.527382513329333, "learning_rate": 9.59310624342591e-06, "loss": 0.29839506149291994, "step": 7430 }, { "epoch": 0.6495719028481566, "grad_norm": 0.5044370112090308, "learning_rate": 9.59210143834788e-06, "loss": 0.30297648906707764, "step": 7435 }, { "epoch": 0.6500087366765682, "grad_norm": 0.46060308683846474, "learning_rate": 9.591095446895286e-06, "loss": 0.303938627243042, "step": 7440 }, { "epoch": 0.6504455705049799, "grad_norm": 0.43083465792319603, "learning_rate": 9.590088269328026e-06, "loss": 0.3022911548614502, "step": 7445 }, { "epoch": 0.6508824043333916, "grad_norm": 0.5972553423802266, "learning_rate": 9.589079905906303e-06, "loss": 0.31242587566375735, "step": 7450 }, { "epoch": 0.6513192381618033, "grad_norm": 0.4894027437430086, "learning_rate": 9.58807035689063e-06, "loss": 0.29987730979919436, "step": 7455 }, { "epoch": 0.6517560719902149, "grad_norm": 0.5823312398874089, "learning_rate": 9.58705962254183e-06, "loss": 0.28582539558410647, "step": 7460 }, { "epoch": 0.6521929058186265, "grad_norm": 0.6063773021342677, "learning_rate": 9.586047703121019e-06, "loss": 0.31088211536407473, "step": 7465 }, { "epoch": 0.6526297396470383, "grad_norm": 0.5075619546743689, "learning_rate": 9.585034598889633e-06, "loss": 0.3292787790298462, "step": 7470 }, { "epoch": 0.6530665734754499, "grad_norm": 0.49832108962821337, "learning_rate": 9.584020310109407e-06, "loss": 0.3205557107925415, "step": 7475 }, { "epoch": 0.6535034073038616, "grad_norm": 0.5532188635144659, "learning_rate": 9.583004837042386e-06, "loss": 0.3478519439697266, "step": 7480 }, { "epoch": 0.6539402411322733, "grad_norm": 0.486887067242441, "learning_rate": 9.581988179950914e-06, "loss": 0.311754846572876, "step": 7485 }, { "epoch": 0.654377074960685, "grad_norm": 0.5147081676903883, "learning_rate": 9.58097033909765e-06, "loss": 0.3147754192352295, "step": 7490 }, { "epoch": 0.6548139087890966, "grad_norm": 0.5724891836769769, "learning_rate": 9.57995131474555e-06, "loss": 0.2888053894042969, "step": 7495 }, { "epoch": 0.6552507426175083, "grad_norm": 0.5289121240156184, "learning_rate": 9.578931107157885e-06, "loss": 0.305270791053772, "step": 7500 }, { "epoch": 0.65568757644592, "grad_norm": 0.4966555883974772, "learning_rate": 9.577909716598223e-06, "loss": 0.31629648208618166, "step": 7505 }, { "epoch": 0.6561244102743317, "grad_norm": 0.6064924094153998, "learning_rate": 9.576887143330444e-06, "loss": 0.323093056678772, "step": 7510 }, { "epoch": 0.6565612441027433, "grad_norm": 0.5679201327673722, "learning_rate": 9.575863387618727e-06, "loss": 0.3378469467163086, "step": 7515 }, { "epoch": 0.656998077931155, "grad_norm": 0.5245887954808446, "learning_rate": 9.574838449727565e-06, "loss": 0.3113137722015381, "step": 7520 }, { "epoch": 0.6574349117595667, "grad_norm": 0.49773456146007283, "learning_rate": 9.57381232992175e-06, "loss": 0.2871575355529785, "step": 7525 }, { "epoch": 0.6578717455879783, "grad_norm": 0.49461838154392873, "learning_rate": 9.57278502846638e-06, "loss": 0.3145397186279297, "step": 7530 }, { "epoch": 0.65830857941639, "grad_norm": 0.4529600844485445, "learning_rate": 9.571756545626861e-06, "loss": 0.2944584846496582, "step": 7535 }, { "epoch": 0.6587454132448016, "grad_norm": 0.62451395344404, "learning_rate": 9.570726881668903e-06, "loss": 0.3199388265609741, "step": 7540 }, { "epoch": 0.6591822470732134, "grad_norm": 0.44747811156166495, "learning_rate": 9.569696036858518e-06, "loss": 0.2966005802154541, "step": 7545 }, { "epoch": 0.659619080901625, "grad_norm": 0.5617460553212236, "learning_rate": 9.56866401146203e-06, "loss": 0.31887083053588866, "step": 7550 }, { "epoch": 0.6600559147300367, "grad_norm": 0.5253996294944945, "learning_rate": 9.567630805746063e-06, "loss": 0.2926729440689087, "step": 7555 }, { "epoch": 0.6604927485584484, "grad_norm": 0.466255050164398, "learning_rate": 9.566596419977543e-06, "loss": 0.3051872968673706, "step": 7560 }, { "epoch": 0.6609295823868601, "grad_norm": 0.5040980497292409, "learning_rate": 9.56556085442371e-06, "loss": 0.2887491464614868, "step": 7565 }, { "epoch": 0.6613664162152717, "grad_norm": 0.5490201179970052, "learning_rate": 9.5645241093521e-06, "loss": 0.30405411720275877, "step": 7570 }, { "epoch": 0.6618032500436833, "grad_norm": 0.4841630192403661, "learning_rate": 9.563486185030557e-06, "loss": 0.3168169975280762, "step": 7575 }, { "epoch": 0.6622400838720951, "grad_norm": 0.4475476491494319, "learning_rate": 9.562447081727233e-06, "loss": 0.31593689918518064, "step": 7580 }, { "epoch": 0.6626769177005067, "grad_norm": 0.524073410214369, "learning_rate": 9.56140679971058e-06, "loss": 0.3307675361633301, "step": 7585 }, { "epoch": 0.6631137515289184, "grad_norm": 0.5285225605296404, "learning_rate": 9.560365339249354e-06, "loss": 0.31054227352142333, "step": 7590 }, { "epoch": 0.66355058535733, "grad_norm": 0.4719277138763646, "learning_rate": 9.559322700612622e-06, "loss": 0.28908610343933105, "step": 7595 }, { "epoch": 0.6639874191857418, "grad_norm": 0.4894273411398012, "learning_rate": 9.558278884069745e-06, "loss": 0.2945610523223877, "step": 7600 }, { "epoch": 0.6644242530141534, "grad_norm": 0.5051345698125113, "learning_rate": 9.557233889890398e-06, "loss": 0.30794060230255127, "step": 7605 }, { "epoch": 0.6648610868425651, "grad_norm": 0.4909439885627585, "learning_rate": 9.556187718344556e-06, "loss": 0.31220808029174807, "step": 7610 }, { "epoch": 0.6652979206709767, "grad_norm": 0.48569231228362975, "learning_rate": 9.5551403697025e-06, "loss": 0.3119745016098022, "step": 7615 }, { "epoch": 0.6657347544993885, "grad_norm": 0.5010572590414833, "learning_rate": 9.55409184423481e-06, "loss": 0.29135658740997317, "step": 7620 }, { "epoch": 0.6661715883278001, "grad_norm": 0.47435311942243563, "learning_rate": 9.553042142212376e-06, "loss": 0.3282465934753418, "step": 7625 }, { "epoch": 0.6666084221562117, "grad_norm": 0.503027320201014, "learning_rate": 9.55199126390639e-06, "loss": 0.33209824562072754, "step": 7630 }, { "epoch": 0.6670452559846235, "grad_norm": 0.49022620817289453, "learning_rate": 9.550939209588349e-06, "loss": 0.3117039203643799, "step": 7635 }, { "epoch": 0.6674820898130351, "grad_norm": 0.5196264093479254, "learning_rate": 9.549885979530048e-06, "loss": 0.30413618087768557, "step": 7640 }, { "epoch": 0.6679189236414468, "grad_norm": 0.5393426731266541, "learning_rate": 9.548831574003592e-06, "loss": 0.31572115421295166, "step": 7645 }, { "epoch": 0.6683557574698584, "grad_norm": 0.7350754727381543, "learning_rate": 9.54777599328139e-06, "loss": 0.33211312294006345, "step": 7650 }, { "epoch": 0.6687925912982702, "grad_norm": 0.6989764180879156, "learning_rate": 9.546719237636149e-06, "loss": 0.2876763820648193, "step": 7655 }, { "epoch": 0.6692294251266818, "grad_norm": 0.4890447846568486, "learning_rate": 9.545661307340886e-06, "loss": 0.27686774730682373, "step": 7660 }, { "epoch": 0.6696662589550935, "grad_norm": 0.4707508857995399, "learning_rate": 9.544602202668918e-06, "loss": 0.3263144254684448, "step": 7665 }, { "epoch": 0.6701030927835051, "grad_norm": 0.5517647221114134, "learning_rate": 9.543541923893864e-06, "loss": 0.30724287033081055, "step": 7670 }, { "epoch": 0.6705399266119169, "grad_norm": 0.5888248597912439, "learning_rate": 9.542480471289651e-06, "loss": 0.2775583744049072, "step": 7675 }, { "epoch": 0.6709767604403285, "grad_norm": 0.5420321600873875, "learning_rate": 9.541417845130506e-06, "loss": 0.29575111865997317, "step": 7680 }, { "epoch": 0.6714135942687401, "grad_norm": 0.4846958775650715, "learning_rate": 9.540354045690956e-06, "loss": 0.29367473125457766, "step": 7685 }, { "epoch": 0.6718504280971519, "grad_norm": 0.4764346102092172, "learning_rate": 9.53928907324584e-06, "loss": 0.29651246070861814, "step": 7690 }, { "epoch": 0.6722872619255635, "grad_norm": 0.4531443597515018, "learning_rate": 9.538222928070288e-06, "loss": 0.30849125385284426, "step": 7695 }, { "epoch": 0.6727240957539752, "grad_norm": 0.6728950730165781, "learning_rate": 9.537155610439748e-06, "loss": 0.29385881423950194, "step": 7700 }, { "epoch": 0.6731609295823868, "grad_norm": 0.488216650016796, "learning_rate": 9.536087120629958e-06, "loss": 0.294641375541687, "step": 7705 }, { "epoch": 0.6735977634107986, "grad_norm": 0.5686677696125949, "learning_rate": 9.535017458916965e-06, "loss": 0.29272372722625734, "step": 7710 }, { "epoch": 0.6740345972392102, "grad_norm": 0.5842169170676065, "learning_rate": 9.533946625577114e-06, "loss": 0.32037954330444335, "step": 7715 }, { "epoch": 0.6744714310676219, "grad_norm": 0.5598820569849413, "learning_rate": 9.53287462088706e-06, "loss": 0.32308435440063477, "step": 7720 }, { "epoch": 0.6749082648960335, "grad_norm": 0.491798390579892, "learning_rate": 9.531801445123756e-06, "loss": 0.27925910949707033, "step": 7725 }, { "epoch": 0.6753450987244453, "grad_norm": 0.5223742978782357, "learning_rate": 9.530727098564456e-06, "loss": 0.29980695247650146, "step": 7730 }, { "epoch": 0.6757819325528569, "grad_norm": 0.5596105571435398, "learning_rate": 9.529651581486721e-06, "loss": 0.3184674263000488, "step": 7735 }, { "epoch": 0.6762187663812685, "grad_norm": 0.4223763696021457, "learning_rate": 9.52857489416841e-06, "loss": 0.308300518989563, "step": 7740 }, { "epoch": 0.6766556002096802, "grad_norm": 0.5549942422610413, "learning_rate": 9.52749703688769e-06, "loss": 0.31522355079650877, "step": 7745 }, { "epoch": 0.6770924340380919, "grad_norm": 0.6540533046033352, "learning_rate": 9.526418009923022e-06, "loss": 0.28359546661376955, "step": 7750 }, { "epoch": 0.6775292678665036, "grad_norm": 0.5600505028087234, "learning_rate": 9.525337813553175e-06, "loss": 0.30519139766693115, "step": 7755 }, { "epoch": 0.6779661016949152, "grad_norm": 0.5594276031700339, "learning_rate": 9.524256448057221e-06, "loss": 0.2860889196395874, "step": 7760 }, { "epoch": 0.678402935523327, "grad_norm": 0.485817943343453, "learning_rate": 9.523173913714532e-06, "loss": 0.28122243881225584, "step": 7765 }, { "epoch": 0.6788397693517386, "grad_norm": 0.45988992313598603, "learning_rate": 9.52209021080478e-06, "loss": 0.3036750316619873, "step": 7770 }, { "epoch": 0.6792766031801503, "grad_norm": 0.5125294233082602, "learning_rate": 9.521005339607941e-06, "loss": 0.30034375190734863, "step": 7775 }, { "epoch": 0.6797134370085619, "grad_norm": 0.5576916627421358, "learning_rate": 9.519919300404294e-06, "loss": 0.32369394302368165, "step": 7780 }, { "epoch": 0.6801502708369737, "grad_norm": 0.5208883443159652, "learning_rate": 9.518832093474418e-06, "loss": 0.3184517383575439, "step": 7785 }, { "epoch": 0.6805871046653853, "grad_norm": 0.5116189083611284, "learning_rate": 9.517743719099193e-06, "loss": 0.3040097713470459, "step": 7790 }, { "epoch": 0.6810239384937969, "grad_norm": 0.500238500263775, "learning_rate": 9.516654177559802e-06, "loss": 0.3098776817321777, "step": 7795 }, { "epoch": 0.6814607723222086, "grad_norm": 0.49972916824841507, "learning_rate": 9.515563469137732e-06, "loss": 0.3042091608047485, "step": 7800 }, { "epoch": 0.6818976061506203, "grad_norm": 0.681115733361583, "learning_rate": 9.514471594114766e-06, "loss": 0.3124203681945801, "step": 7805 }, { "epoch": 0.682334439979032, "grad_norm": 0.5743244593079014, "learning_rate": 9.513378552772994e-06, "loss": 0.31552577018737793, "step": 7810 }, { "epoch": 0.6827712738074436, "grad_norm": 0.4184130801992797, "learning_rate": 9.5122843453948e-06, "loss": 0.3259428977966309, "step": 7815 }, { "epoch": 0.6832081076358553, "grad_norm": 0.7374061016608109, "learning_rate": 9.511188972262876e-06, "loss": 0.31448318958282473, "step": 7820 }, { "epoch": 0.683644941464267, "grad_norm": 0.5238757077543437, "learning_rate": 9.510092433660212e-06, "loss": 0.31803383827209475, "step": 7825 }, { "epoch": 0.6840817752926787, "grad_norm": 0.5027993557600409, "learning_rate": 9.508994729870102e-06, "loss": 0.30141334533691405, "step": 7830 }, { "epoch": 0.6845186091210903, "grad_norm": 0.49278856339096017, "learning_rate": 9.50789586117614e-06, "loss": 0.280211353302002, "step": 7835 }, { "epoch": 0.6849554429495021, "grad_norm": 0.47437772329871064, "learning_rate": 9.506795827862215e-06, "loss": 0.30685150623321533, "step": 7840 }, { "epoch": 0.6853922767779137, "grad_norm": 0.5510103814252979, "learning_rate": 9.505694630212524e-06, "loss": 0.2630069971084595, "step": 7845 }, { "epoch": 0.6858291106063253, "grad_norm": 0.6160212031726061, "learning_rate": 9.504592268511565e-06, "loss": 0.2977846622467041, "step": 7850 }, { "epoch": 0.686265944434737, "grad_norm": 0.44371915626892555, "learning_rate": 9.503488743044134e-06, "loss": 0.3067914962768555, "step": 7855 }, { "epoch": 0.6867027782631487, "grad_norm": 0.6127447282192251, "learning_rate": 9.502384054095326e-06, "loss": 0.2932214021682739, "step": 7860 }, { "epoch": 0.6871396120915604, "grad_norm": 0.489271220394002, "learning_rate": 9.501278201950539e-06, "loss": 0.3298938274383545, "step": 7865 }, { "epoch": 0.687576445919972, "grad_norm": 0.5093035901766583, "learning_rate": 9.50017118689547e-06, "loss": 0.2943981647491455, "step": 7870 }, { "epoch": 0.6880132797483837, "grad_norm": 0.4517112531197856, "learning_rate": 9.49906300921612e-06, "loss": 0.273573899269104, "step": 7875 }, { "epoch": 0.6884501135767954, "grad_norm": 0.5795781682742693, "learning_rate": 9.497953669198788e-06, "loss": 0.30536890029907227, "step": 7880 }, { "epoch": 0.6888869474052071, "grad_norm": 0.4444231460255522, "learning_rate": 9.496843167130073e-06, "loss": 0.27512006759643554, "step": 7885 }, { "epoch": 0.6893237812336187, "grad_norm": 0.5120091147002214, "learning_rate": 9.495731503296872e-06, "loss": 0.3153116703033447, "step": 7890 }, { "epoch": 0.6897606150620305, "grad_norm": 0.5002275052353746, "learning_rate": 9.494618677986387e-06, "loss": 0.30268182754516604, "step": 7895 }, { "epoch": 0.6901974488904421, "grad_norm": 0.5371345534116259, "learning_rate": 9.493504691486118e-06, "loss": 0.29711968898773194, "step": 7900 }, { "epoch": 0.6906342827188537, "grad_norm": 0.4651417944917144, "learning_rate": 9.492389544083862e-06, "loss": 0.2845624446868896, "step": 7905 }, { "epoch": 0.6910711165472654, "grad_norm": 0.48131145923123, "learning_rate": 9.491273236067721e-06, "loss": 0.3117487668991089, "step": 7910 }, { "epoch": 0.6915079503756771, "grad_norm": 0.5499564076020986, "learning_rate": 9.490155767726094e-06, "loss": 0.31826415061950686, "step": 7915 }, { "epoch": 0.6919447842040888, "grad_norm": 0.4970961395450139, "learning_rate": 9.48903713934768e-06, "loss": 0.28652684688568114, "step": 7920 }, { "epoch": 0.6923816180325004, "grad_norm": 0.4653316131085615, "learning_rate": 9.487917351221477e-06, "loss": 0.3063636779785156, "step": 7925 }, { "epoch": 0.6928184518609121, "grad_norm": 0.5252174095418934, "learning_rate": 9.486796403636786e-06, "loss": 0.27192039489746095, "step": 7930 }, { "epoch": 0.6932552856893238, "grad_norm": 0.5135554574026429, "learning_rate": 9.485674296883202e-06, "loss": 0.29521331787109373, "step": 7935 }, { "epoch": 0.6936921195177355, "grad_norm": 0.4909050386046712, "learning_rate": 9.484551031250623e-06, "loss": 0.3046566963195801, "step": 7940 }, { "epoch": 0.6941289533461471, "grad_norm": 0.43912995507002633, "learning_rate": 9.483426607029249e-06, "loss": 0.2900372505187988, "step": 7945 }, { "epoch": 0.6945657871745587, "grad_norm": 0.5794225346628177, "learning_rate": 9.482301024509574e-06, "loss": 0.3043113946914673, "step": 7950 }, { "epoch": 0.6950026210029705, "grad_norm": 0.46477070142933163, "learning_rate": 9.481174283982392e-06, "loss": 0.3325371026992798, "step": 7955 }, { "epoch": 0.6954394548313821, "grad_norm": 0.6195586506171853, "learning_rate": 9.4800463857388e-06, "loss": 0.28672943115234373, "step": 7960 }, { "epoch": 0.6958762886597938, "grad_norm": 0.53749558117981, "learning_rate": 9.478917330070193e-06, "loss": 0.28901751041412355, "step": 7965 }, { "epoch": 0.6963131224882055, "grad_norm": 0.5097651401042107, "learning_rate": 9.47778711726826e-06, "loss": 0.30350069999694823, "step": 7970 }, { "epoch": 0.6967499563166172, "grad_norm": 0.4965307876363164, "learning_rate": 9.476655747624995e-06, "loss": 0.27974705696105956, "step": 7975 }, { "epoch": 0.6971867901450288, "grad_norm": 0.5340193204589153, "learning_rate": 9.47552322143269e-06, "loss": 0.3149142265319824, "step": 7980 }, { "epoch": 0.6976236239734405, "grad_norm": 0.508946974586154, "learning_rate": 9.47438953898393e-06, "loss": 0.3213608503341675, "step": 7985 }, { "epoch": 0.6980604578018522, "grad_norm": 0.5101348248958107, "learning_rate": 9.473254700571607e-06, "loss": 0.3311962604522705, "step": 7990 }, { "epoch": 0.6984972916302639, "grad_norm": 0.5038299723454308, "learning_rate": 9.472118706488907e-06, "loss": 0.32758493423461915, "step": 7995 }, { "epoch": 0.6989341254586755, "grad_norm": 0.45671653185657496, "learning_rate": 9.470981557029314e-06, "loss": 0.2782686471939087, "step": 8000 }, { "epoch": 0.6993709592870871, "grad_norm": 0.6130131101111417, "learning_rate": 9.469843252486613e-06, "loss": 0.2937135696411133, "step": 8005 }, { "epoch": 0.6998077931154989, "grad_norm": 0.5220575718572027, "learning_rate": 9.468703793154887e-06, "loss": 0.3033681631088257, "step": 8010 }, { "epoch": 0.7002446269439105, "grad_norm": 0.5429526759400487, "learning_rate": 9.467563179328514e-06, "loss": 0.3281139373779297, "step": 8015 }, { "epoch": 0.7006814607723222, "grad_norm": 0.38441077909845733, "learning_rate": 9.466421411302176e-06, "loss": 0.2737737655639648, "step": 8020 }, { "epoch": 0.7011182946007338, "grad_norm": 0.5813552040874563, "learning_rate": 9.465278489370848e-06, "loss": 0.3103340625762939, "step": 8025 }, { "epoch": 0.7015551284291456, "grad_norm": 0.5601265344166206, "learning_rate": 9.464134413829806e-06, "loss": 0.303656268119812, "step": 8030 }, { "epoch": 0.7019919622575572, "grad_norm": 0.46925902990623636, "learning_rate": 9.462989184974624e-06, "loss": 0.31939306259155276, "step": 8035 }, { "epoch": 0.7024287960859689, "grad_norm": 0.7931638430324921, "learning_rate": 9.461842803101171e-06, "loss": 0.32843353748321535, "step": 8040 }, { "epoch": 0.7028656299143806, "grad_norm": 0.48055359394087177, "learning_rate": 9.460695268505618e-06, "loss": 0.31796960830688475, "step": 8045 }, { "epoch": 0.7033024637427923, "grad_norm": 0.4963534155749524, "learning_rate": 9.45954658148443e-06, "loss": 0.3189819812774658, "step": 8050 }, { "epoch": 0.7037392975712039, "grad_norm": 0.5422863654128323, "learning_rate": 9.458396742334372e-06, "loss": 0.30405893325805666, "step": 8055 }, { "epoch": 0.7041761313996155, "grad_norm": 0.5558878658263884, "learning_rate": 9.457245751352507e-06, "loss": 0.2926322460174561, "step": 8060 }, { "epoch": 0.7046129652280273, "grad_norm": 0.5697614196067642, "learning_rate": 9.456093608836195e-06, "loss": 0.27811644077301023, "step": 8065 }, { "epoch": 0.7050497990564389, "grad_norm": 0.6336738864014249, "learning_rate": 9.454940315083094e-06, "loss": 0.36019144058227537, "step": 8070 }, { "epoch": 0.7054866328848506, "grad_norm": 0.5509405175859478, "learning_rate": 9.453785870391156e-06, "loss": 0.3081229209899902, "step": 8075 }, { "epoch": 0.7059234667132622, "grad_norm": 0.6267878616101873, "learning_rate": 9.452630275058636e-06, "loss": 0.32094929218292234, "step": 8080 }, { "epoch": 0.706360300541674, "grad_norm": 0.5088808028925208, "learning_rate": 9.451473529384081e-06, "loss": 0.32646856307983396, "step": 8085 }, { "epoch": 0.7067971343700856, "grad_norm": 0.41247829533128916, "learning_rate": 9.45031563366634e-06, "loss": 0.3563928365707397, "step": 8090 }, { "epoch": 0.7072339681984973, "grad_norm": 0.48675558024728355, "learning_rate": 9.449156588204555e-06, "loss": 0.28351926803588867, "step": 8095 }, { "epoch": 0.707670802026909, "grad_norm": 0.5019250912023123, "learning_rate": 9.447996393298167e-06, "loss": 0.2921389102935791, "step": 8100 }, { "epoch": 0.7081076358553207, "grad_norm": 0.6300647199445572, "learning_rate": 9.446835049246914e-06, "loss": 0.3321222305297852, "step": 8105 }, { "epoch": 0.7085444696837323, "grad_norm": 0.5421905560333468, "learning_rate": 9.445672556350828e-06, "loss": 0.31692502498626707, "step": 8110 }, { "epoch": 0.708981303512144, "grad_norm": 0.4716092473912915, "learning_rate": 9.444508914910244e-06, "loss": 0.30401296615600587, "step": 8115 }, { "epoch": 0.7094181373405557, "grad_norm": 0.6205469757574611, "learning_rate": 9.443344125225788e-06, "loss": 0.27900333404541017, "step": 8120 }, { "epoch": 0.7098549711689673, "grad_norm": 0.5524777317140441, "learning_rate": 9.442178187598388e-06, "loss": 0.313389253616333, "step": 8125 }, { "epoch": 0.710291804997379, "grad_norm": 0.41686125349308734, "learning_rate": 9.441011102329261e-06, "loss": 0.30097036361694335, "step": 8130 }, { "epoch": 0.7107286388257906, "grad_norm": 0.6764040843169017, "learning_rate": 9.439842869719926e-06, "loss": 0.3397873878479004, "step": 8135 }, { "epoch": 0.7111654726542024, "grad_norm": 0.5346302361511487, "learning_rate": 9.438673490072197e-06, "loss": 0.3310606241226196, "step": 8140 }, { "epoch": 0.711602306482614, "grad_norm": 0.5471864821877812, "learning_rate": 9.437502963688188e-06, "loss": 0.3390693187713623, "step": 8145 }, { "epoch": 0.7120391403110257, "grad_norm": 0.44943066852860536, "learning_rate": 9.4363312908703e-06, "loss": 0.30745863914489746, "step": 8150 }, { "epoch": 0.7124759741394373, "grad_norm": 0.5985809764575422, "learning_rate": 9.43515847192124e-06, "loss": 0.3221661806106567, "step": 8155 }, { "epoch": 0.7129128079678491, "grad_norm": 0.5298111037893802, "learning_rate": 9.433984507144005e-06, "loss": 0.2964481353759766, "step": 8160 }, { "epoch": 0.7133496417962607, "grad_norm": 0.4634135169794209, "learning_rate": 9.432809396841893e-06, "loss": 0.2914870738983154, "step": 8165 }, { "epoch": 0.7137864756246723, "grad_norm": 0.505801642228538, "learning_rate": 9.431633141318494e-06, "loss": 0.3146775245666504, "step": 8170 }, { "epoch": 0.7142233094530841, "grad_norm": 0.4404817663833968, "learning_rate": 9.430455740877692e-06, "loss": 0.3075283050537109, "step": 8175 }, { "epoch": 0.7146601432814957, "grad_norm": 0.5533566647788283, "learning_rate": 9.429277195823674e-06, "loss": 0.26789259910583496, "step": 8180 }, { "epoch": 0.7150969771099074, "grad_norm": 0.8398789987786274, "learning_rate": 9.428097506460913e-06, "loss": 0.27174532413482666, "step": 8185 }, { "epoch": 0.715533810938319, "grad_norm": 0.6719947833171885, "learning_rate": 9.42691667309419e-06, "loss": 0.30710563659667967, "step": 8190 }, { "epoch": 0.7159706447667308, "grad_norm": 0.5972272734661302, "learning_rate": 9.42573469602857e-06, "loss": 0.2957906246185303, "step": 8195 }, { "epoch": 0.7164074785951424, "grad_norm": 0.44671647842979967, "learning_rate": 9.424551575569418e-06, "loss": 0.31532926559448243, "step": 8200 }, { "epoch": 0.7168443124235541, "grad_norm": 0.4300757048837226, "learning_rate": 9.423367312022398e-06, "loss": 0.3000842571258545, "step": 8205 }, { "epoch": 0.7172811462519657, "grad_norm": 0.5512633940235269, "learning_rate": 9.422181905693461e-06, "loss": 0.2996603727340698, "step": 8210 }, { "epoch": 0.7177179800803775, "grad_norm": 0.5521835528914353, "learning_rate": 9.420995356888862e-06, "loss": 0.3158989429473877, "step": 8215 }, { "epoch": 0.7181548139087891, "grad_norm": 0.5031179670362416, "learning_rate": 9.419807665915146e-06, "loss": 0.2853677272796631, "step": 8220 }, { "epoch": 0.7185916477372007, "grad_norm": 0.5117309647942997, "learning_rate": 9.418618833079153e-06, "loss": 0.2949176788330078, "step": 8225 }, { "epoch": 0.7190284815656124, "grad_norm": 0.5311351809363759, "learning_rate": 9.41742885868802e-06, "loss": 0.26585898399353025, "step": 8230 }, { "epoch": 0.7194653153940241, "grad_norm": 0.4169056766639986, "learning_rate": 9.41623774304918e-06, "loss": 0.28898162841796876, "step": 8235 }, { "epoch": 0.7199021492224358, "grad_norm": 0.5577710948045248, "learning_rate": 9.415045486470358e-06, "loss": 0.30416762828826904, "step": 8240 }, { "epoch": 0.7203389830508474, "grad_norm": 0.49314999642820384, "learning_rate": 9.413852089259573e-06, "loss": 0.3161791801452637, "step": 8245 }, { "epoch": 0.7207758168792592, "grad_norm": 0.5471848403524916, "learning_rate": 9.412657551725142e-06, "loss": 0.28891525268554685, "step": 8250 }, { "epoch": 0.7212126507076708, "grad_norm": 0.6133202438995486, "learning_rate": 9.411461874175675e-06, "loss": 0.26570839881896974, "step": 8255 }, { "epoch": 0.7216494845360825, "grad_norm": 0.5093381731013842, "learning_rate": 9.410265056920078e-06, "loss": 0.3043022394180298, "step": 8260 }, { "epoch": 0.7220863183644941, "grad_norm": 0.6451082726397516, "learning_rate": 9.409067100267548e-06, "loss": 0.3172751903533936, "step": 8265 }, { "epoch": 0.7225231521929059, "grad_norm": 0.6517490212733322, "learning_rate": 9.40786800452758e-06, "loss": 0.33050515651702883, "step": 8270 }, { "epoch": 0.7229599860213175, "grad_norm": 0.5606914218210487, "learning_rate": 9.40666777000996e-06, "loss": 0.2794440746307373, "step": 8275 }, { "epoch": 0.7233968198497291, "grad_norm": 0.6059063686142367, "learning_rate": 9.405466397024771e-06, "loss": 0.29727883338928224, "step": 8280 }, { "epoch": 0.7238336536781408, "grad_norm": 0.509278260665414, "learning_rate": 9.404263885882389e-06, "loss": 0.3065652370452881, "step": 8285 }, { "epoch": 0.7242704875065525, "grad_norm": 0.5947775653774751, "learning_rate": 9.403060236893486e-06, "loss": 0.28753085136413575, "step": 8290 }, { "epoch": 0.7247073213349642, "grad_norm": 0.5090379653027787, "learning_rate": 9.401855450369021e-06, "loss": 0.30462160110473635, "step": 8295 }, { "epoch": 0.7251441551633758, "grad_norm": 0.5433053531262294, "learning_rate": 9.400649526620258e-06, "loss": 0.30427615642547606, "step": 8300 }, { "epoch": 0.7255809889917876, "grad_norm": 0.507358918300496, "learning_rate": 9.399442465958745e-06, "loss": 0.3003854751586914, "step": 8305 }, { "epoch": 0.7260178228201992, "grad_norm": 0.5060199656188938, "learning_rate": 9.398234268696327e-06, "loss": 0.2930461883544922, "step": 8310 }, { "epoch": 0.7264546566486109, "grad_norm": 0.5328666862470585, "learning_rate": 9.397024935145147e-06, "loss": 0.3025523662567139, "step": 8315 }, { "epoch": 0.7268914904770225, "grad_norm": 0.5877418957993168, "learning_rate": 9.395814465617634e-06, "loss": 0.2831772804260254, "step": 8320 }, { "epoch": 0.7273283243054343, "grad_norm": 0.49013174214633154, "learning_rate": 9.394602860426514e-06, "loss": 0.31791350841522215, "step": 8325 }, { "epoch": 0.7277651581338459, "grad_norm": 0.4447976853243195, "learning_rate": 9.393390119884808e-06, "loss": 0.3082219362258911, "step": 8330 }, { "epoch": 0.7282019919622575, "grad_norm": 0.4913983877422477, "learning_rate": 9.39217624430583e-06, "loss": 0.29886417388916015, "step": 8335 }, { "epoch": 0.7286388257906692, "grad_norm": 0.5148032945575752, "learning_rate": 9.390961234003186e-06, "loss": 0.27787294387817385, "step": 8340 }, { "epoch": 0.7290756596190809, "grad_norm": 0.5389255889555995, "learning_rate": 9.389745089290772e-06, "loss": 0.28214349746704104, "step": 8345 }, { "epoch": 0.7295124934474926, "grad_norm": 0.5642418087501646, "learning_rate": 9.388527810482783e-06, "loss": 0.2859578847885132, "step": 8350 }, { "epoch": 0.7299493272759042, "grad_norm": 0.49652253354006987, "learning_rate": 9.387309397893705e-06, "loss": 0.325529956817627, "step": 8355 }, { "epoch": 0.7303861611043159, "grad_norm": 0.512335811369392, "learning_rate": 9.386089851838314e-06, "loss": 0.3453487157821655, "step": 8360 }, { "epoch": 0.7308229949327276, "grad_norm": 0.6217572926798953, "learning_rate": 9.384869172631682e-06, "loss": 0.3289516448974609, "step": 8365 }, { "epoch": 0.7312598287611393, "grad_norm": 0.5689874769710123, "learning_rate": 9.383647360589172e-06, "loss": 0.3116494417190552, "step": 8370 }, { "epoch": 0.7316966625895509, "grad_norm": 0.4922020433215095, "learning_rate": 9.382424416026443e-06, "loss": 0.27922854423522947, "step": 8375 }, { "epoch": 0.7321334964179627, "grad_norm": 0.49975470931955307, "learning_rate": 9.381200339259444e-06, "loss": 0.3173540115356445, "step": 8380 }, { "epoch": 0.7325703302463743, "grad_norm": 0.600160309763228, "learning_rate": 9.379975130604413e-06, "loss": 0.2876257419586182, "step": 8385 }, { "epoch": 0.7330071640747859, "grad_norm": 0.4304533870196949, "learning_rate": 9.378748790377887e-06, "loss": 0.31366815567016604, "step": 8390 }, { "epoch": 0.7334439979031976, "grad_norm": 0.5481076373724256, "learning_rate": 9.377521318896694e-06, "loss": 0.3114192485809326, "step": 8395 }, { "epoch": 0.7338808317316093, "grad_norm": 0.5704198235097355, "learning_rate": 9.376292716477947e-06, "loss": 0.2812433481216431, "step": 8400 }, { "epoch": 0.734317665560021, "grad_norm": 0.4803030213719384, "learning_rate": 9.375062983439062e-06, "loss": 0.32620749473571775, "step": 8405 }, { "epoch": 0.7347544993884326, "grad_norm": 0.5551263546050763, "learning_rate": 9.37383212009774e-06, "loss": 0.3041268825531006, "step": 8410 }, { "epoch": 0.7351913332168443, "grad_norm": 0.605173419485313, "learning_rate": 9.372600126771974e-06, "loss": 0.3268574714660645, "step": 8415 }, { "epoch": 0.735628167045256, "grad_norm": 0.4917378623756618, "learning_rate": 9.371367003780053e-06, "loss": 0.2749946594238281, "step": 8420 }, { "epoch": 0.7360650008736677, "grad_norm": 0.4552879973339583, "learning_rate": 9.370132751440559e-06, "loss": 0.3073667049407959, "step": 8425 }, { "epoch": 0.7365018347020793, "grad_norm": 0.5187778912613338, "learning_rate": 9.368897370072357e-06, "loss": 0.31197776794433596, "step": 8430 }, { "epoch": 0.736938668530491, "grad_norm": 0.5036487315090526, "learning_rate": 9.36766085999461e-06, "loss": 0.30596113204956055, "step": 8435 }, { "epoch": 0.7373755023589027, "grad_norm": 0.4876925527417063, "learning_rate": 9.366423221526775e-06, "loss": 0.27675318717956543, "step": 8440 }, { "epoch": 0.7378123361873143, "grad_norm": 0.526519146892297, "learning_rate": 9.365184454988594e-06, "loss": 0.3004322528839111, "step": 8445 }, { "epoch": 0.738249170015726, "grad_norm": 0.583869005752291, "learning_rate": 9.363944560700104e-06, "loss": 0.3128202438354492, "step": 8450 }, { "epoch": 0.7386860038441377, "grad_norm": 0.5110269347190669, "learning_rate": 9.362703538981638e-06, "loss": 0.32673087120056155, "step": 8455 }, { "epoch": 0.7391228376725494, "grad_norm": 0.6316551052912144, "learning_rate": 9.361461390153808e-06, "loss": 0.29923229217529296, "step": 8460 }, { "epoch": 0.739559671500961, "grad_norm": 0.5637014859644509, "learning_rate": 9.360218114537526e-06, "loss": 0.2998372554779053, "step": 8465 }, { "epoch": 0.7399965053293727, "grad_norm": 0.38886791027268924, "learning_rate": 9.358973712454e-06, "loss": 0.28119187355041503, "step": 8470 }, { "epoch": 0.7404333391577844, "grad_norm": 0.539136955816549, "learning_rate": 9.357728184224718e-06, "loss": 0.29202046394348147, "step": 8475 }, { "epoch": 0.7408701729861961, "grad_norm": 0.6001588386920911, "learning_rate": 9.356481530171461e-06, "loss": 0.3005113363265991, "step": 8480 }, { "epoch": 0.7413070068146077, "grad_norm": 0.47538268669333733, "learning_rate": 9.355233750616308e-06, "loss": 0.2885361909866333, "step": 8485 }, { "epoch": 0.7417438406430193, "grad_norm": 0.4383399519881402, "learning_rate": 9.353984845881623e-06, "loss": 0.3114445686340332, "step": 8490 }, { "epoch": 0.7421806744714311, "grad_norm": 0.4894572340147781, "learning_rate": 9.352734816290062e-06, "loss": 0.305023717880249, "step": 8495 }, { "epoch": 0.7426175082998427, "grad_norm": 0.531484729474869, "learning_rate": 9.35148366216457e-06, "loss": 0.2822196245193481, "step": 8500 }, { "epoch": 0.7430543421282544, "grad_norm": 0.5332863820112066, "learning_rate": 9.350231383828385e-06, "loss": 0.32847013473510744, "step": 8505 }, { "epoch": 0.7434911759566661, "grad_norm": 0.4997837753948762, "learning_rate": 9.348977981605034e-06, "loss": 0.3153341054916382, "step": 8510 }, { "epoch": 0.7439280097850778, "grad_norm": 0.48751815839879836, "learning_rate": 9.347723455818336e-06, "loss": 0.3163304328918457, "step": 8515 }, { "epoch": 0.7443648436134894, "grad_norm": 0.5317846980636756, "learning_rate": 9.346467806792402e-06, "loss": 0.2990793943405151, "step": 8520 }, { "epoch": 0.7448016774419011, "grad_norm": 0.515204658594752, "learning_rate": 9.345211034851623e-06, "loss": 0.29246320724487307, "step": 8525 }, { "epoch": 0.7452385112703128, "grad_norm": 0.569179123623869, "learning_rate": 9.343953140320693e-06, "loss": 0.2995848417282104, "step": 8530 }, { "epoch": 0.7456753450987245, "grad_norm": 0.5364728409891986, "learning_rate": 9.34269412352459e-06, "loss": 0.2610916614532471, "step": 8535 }, { "epoch": 0.7461121789271361, "grad_norm": 0.5103920189368495, "learning_rate": 9.341433984788579e-06, "loss": 0.2996052265167236, "step": 8540 }, { "epoch": 0.7465490127555477, "grad_norm": 0.5050302769725774, "learning_rate": 9.34017272443822e-06, "loss": 0.2793650388717651, "step": 8545 }, { "epoch": 0.7469858465839595, "grad_norm": 0.5505786571924016, "learning_rate": 9.338910342799365e-06, "loss": 0.3180132627487183, "step": 8550 }, { "epoch": 0.7474226804123711, "grad_norm": 0.5150347730461587, "learning_rate": 9.337646840198146e-06, "loss": 0.30477521419525144, "step": 8555 }, { "epoch": 0.7478595142407828, "grad_norm": 0.558978777490847, "learning_rate": 9.336382216960994e-06, "loss": 0.2896077871322632, "step": 8560 }, { "epoch": 0.7482963480691944, "grad_norm": 0.47970185037821444, "learning_rate": 9.335116473414626e-06, "loss": 0.28867301940917967, "step": 8565 }, { "epoch": 0.7487331818976062, "grad_norm": 0.45192272735641503, "learning_rate": 9.333849609886044e-06, "loss": 0.2784199953079224, "step": 8570 }, { "epoch": 0.7491700157260178, "grad_norm": 0.5462154519775564, "learning_rate": 9.332581626702548e-06, "loss": 0.3143450260162354, "step": 8575 }, { "epoch": 0.7496068495544295, "grad_norm": 0.5620795193890485, "learning_rate": 9.331312524191721e-06, "loss": 0.2721808910369873, "step": 8580 }, { "epoch": 0.7500436833828412, "grad_norm": 0.5040461580483534, "learning_rate": 9.330042302681437e-06, "loss": 0.2954395771026611, "step": 8585 }, { "epoch": 0.7504805172112529, "grad_norm": 0.5158732404436673, "learning_rate": 9.32877096249986e-06, "loss": 0.28891739845275877, "step": 8590 }, { "epoch": 0.7509173510396645, "grad_norm": 0.4908637338977104, "learning_rate": 9.32749850397544e-06, "loss": 0.2923166751861572, "step": 8595 }, { "epoch": 0.7513541848680761, "grad_norm": 0.4528726825170559, "learning_rate": 9.326224927436923e-06, "loss": 0.3130697250366211, "step": 8600 }, { "epoch": 0.7517910186964879, "grad_norm": 0.47783640778190506, "learning_rate": 9.324950233213332e-06, "loss": 0.27835683822631835, "step": 8605 }, { "epoch": 0.7522278525248995, "grad_norm": 0.5155867865627521, "learning_rate": 9.323674421633991e-06, "loss": 0.27945446968078613, "step": 8610 }, { "epoch": 0.7526646863533112, "grad_norm": 0.43775178533354864, "learning_rate": 9.322397493028506e-06, "loss": 0.3346781492233276, "step": 8615 }, { "epoch": 0.7531015201817228, "grad_norm": 0.6111472857788844, "learning_rate": 9.321119447726772e-06, "loss": 0.32984333038330077, "step": 8620 }, { "epoch": 0.7535383540101346, "grad_norm": 0.5336679315619542, "learning_rate": 9.319840286058974e-06, "loss": 0.28543150424957275, "step": 8625 }, { "epoch": 0.7539751878385462, "grad_norm": 0.5258669628234477, "learning_rate": 9.318560008355587e-06, "loss": 0.3123094320297241, "step": 8630 }, { "epoch": 0.7544120216669579, "grad_norm": 0.5870671896892645, "learning_rate": 9.317278614947368e-06, "loss": 0.28314876556396484, "step": 8635 }, { "epoch": 0.7548488554953695, "grad_norm": 0.6740130223970114, "learning_rate": 9.315996106165369e-06, "loss": 0.3143348217010498, "step": 8640 }, { "epoch": 0.7552856893237813, "grad_norm": 0.6026193792724935, "learning_rate": 9.314712482340927e-06, "loss": 0.28630404472351073, "step": 8645 }, { "epoch": 0.7557225231521929, "grad_norm": 0.5280789038950624, "learning_rate": 9.313427743805669e-06, "loss": 0.3054490089416504, "step": 8650 }, { "epoch": 0.7561593569806045, "grad_norm": 0.5808306946058378, "learning_rate": 9.312141890891506e-06, "loss": 0.3088539600372314, "step": 8655 }, { "epoch": 0.7565961908090163, "grad_norm": 0.49877621588305654, "learning_rate": 9.310854923930642e-06, "loss": 0.27915067672729493, "step": 8660 }, { "epoch": 0.7570330246374279, "grad_norm": 0.4537449390297727, "learning_rate": 9.309566843255565e-06, "loss": 0.29294595718383787, "step": 8665 }, { "epoch": 0.7574698584658396, "grad_norm": 0.49367930552282513, "learning_rate": 9.308277649199054e-06, "loss": 0.3213660717010498, "step": 8670 }, { "epoch": 0.7579066922942512, "grad_norm": 0.5489769324145035, "learning_rate": 9.30698734209417e-06, "loss": 0.3201817274093628, "step": 8675 }, { "epoch": 0.758343526122663, "grad_norm": 0.5503504190885447, "learning_rate": 9.305695922274267e-06, "loss": 0.30341272354125975, "step": 8680 }, { "epoch": 0.7587803599510746, "grad_norm": 0.6313606917904262, "learning_rate": 9.304403390072987e-06, "loss": 0.30737128257751467, "step": 8685 }, { "epoch": 0.7592171937794863, "grad_norm": 0.5604572662435346, "learning_rate": 9.303109745824255e-06, "loss": 0.29983344078063967, "step": 8690 }, { "epoch": 0.7596540276078979, "grad_norm": 0.5511111071556777, "learning_rate": 9.301814989862285e-06, "loss": 0.29681053161621096, "step": 8695 }, { "epoch": 0.7600908614363097, "grad_norm": 0.4315521802986221, "learning_rate": 9.300519122521582e-06, "loss": 0.31261491775512695, "step": 8700 }, { "epoch": 0.7605276952647213, "grad_norm": 0.548270373717554, "learning_rate": 9.299222144136928e-06, "loss": 0.2808777570724487, "step": 8705 }, { "epoch": 0.760964529093133, "grad_norm": 0.5268758033114386, "learning_rate": 9.297924055043406e-06, "loss": 0.2865268707275391, "step": 8710 }, { "epoch": 0.7614013629215447, "grad_norm": 0.4673297301760444, "learning_rate": 9.296624855576375e-06, "loss": 0.2918189287185669, "step": 8715 }, { "epoch": 0.7618381967499563, "grad_norm": 0.4616454306783117, "learning_rate": 9.295324546071487e-06, "loss": 0.28897294998168943, "step": 8720 }, { "epoch": 0.762275030578368, "grad_norm": 0.4928290834354302, "learning_rate": 9.294023126864675e-06, "loss": 0.3005611658096313, "step": 8725 }, { "epoch": 0.7627118644067796, "grad_norm": 0.6339454294843964, "learning_rate": 9.292720598292164e-06, "loss": 0.293149471282959, "step": 8730 }, { "epoch": 0.7631486982351914, "grad_norm": 0.48788013136404307, "learning_rate": 9.291416960690465e-06, "loss": 0.2979804754257202, "step": 8735 }, { "epoch": 0.763585532063603, "grad_norm": 0.45185595715858545, "learning_rate": 9.290112214396372e-06, "loss": 0.2765189647674561, "step": 8740 }, { "epoch": 0.7640223658920147, "grad_norm": 0.5388570047382291, "learning_rate": 9.28880635974697e-06, "loss": 0.285162091255188, "step": 8745 }, { "epoch": 0.7644591997204263, "grad_norm": 0.49938636163521344, "learning_rate": 9.287499397079626e-06, "loss": 0.28013787269592283, "step": 8750 }, { "epoch": 0.7648960335488381, "grad_norm": 0.5205308150004878, "learning_rate": 9.286191326731995e-06, "loss": 0.30118789672851565, "step": 8755 }, { "epoch": 0.7653328673772497, "grad_norm": 0.4989315611000833, "learning_rate": 9.28488214904202e-06, "loss": 0.29864523410797117, "step": 8760 }, { "epoch": 0.7657697012056613, "grad_norm": 0.4618905869941608, "learning_rate": 9.28357186434793e-06, "loss": 0.2667813777923584, "step": 8765 }, { "epoch": 0.766206535034073, "grad_norm": 0.4952188946636101, "learning_rate": 9.282260472988234e-06, "loss": 0.28930201530456545, "step": 8770 }, { "epoch": 0.7666433688624847, "grad_norm": 0.5819159403350075, "learning_rate": 9.280947975301735e-06, "loss": 0.3147002935409546, "step": 8775 }, { "epoch": 0.7670802026908964, "grad_norm": 0.691330407002587, "learning_rate": 9.279634371627516e-06, "loss": 0.2928340911865234, "step": 8780 }, { "epoch": 0.767517036519308, "grad_norm": 0.5765636856191054, "learning_rate": 9.27831966230495e-06, "loss": 0.28283860683441164, "step": 8785 }, { "epoch": 0.7679538703477198, "grad_norm": 0.5256243052770916, "learning_rate": 9.277003847673693e-06, "loss": 0.28580639362335203, "step": 8790 }, { "epoch": 0.7683907041761314, "grad_norm": 0.5155813136062488, "learning_rate": 9.275686928073687e-06, "loss": 0.31131882667541505, "step": 8795 }, { "epoch": 0.7688275380045431, "grad_norm": 0.462765200007644, "learning_rate": 9.274368903845158e-06, "loss": 0.30142874717712403, "step": 8800 }, { "epoch": 0.7692643718329547, "grad_norm": 0.5803156722197982, "learning_rate": 9.273049775328621e-06, "loss": 0.31960597038269045, "step": 8805 }, { "epoch": 0.7697012056613665, "grad_norm": 0.5024486987926072, "learning_rate": 9.271729542864875e-06, "loss": 0.3295454502105713, "step": 8810 }, { "epoch": 0.7701380394897781, "grad_norm": 0.48818027638865674, "learning_rate": 9.270408206795004e-06, "loss": 0.27219548225402834, "step": 8815 }, { "epoch": 0.7705748733181897, "grad_norm": 0.49295385454496465, "learning_rate": 9.269085767460371e-06, "loss": 0.29431474208831787, "step": 8820 }, { "epoch": 0.7710117071466014, "grad_norm": 0.5310348944492086, "learning_rate": 9.267762225202637e-06, "loss": 0.3445547103881836, "step": 8825 }, { "epoch": 0.7714485409750131, "grad_norm": 0.5326551717129075, "learning_rate": 9.266437580363736e-06, "loss": 0.3176601409912109, "step": 8830 }, { "epoch": 0.7718853748034248, "grad_norm": 0.45975481358603987, "learning_rate": 9.265111833285894e-06, "loss": 0.3213218688964844, "step": 8835 }, { "epoch": 0.7723222086318364, "grad_norm": 0.597162632193737, "learning_rate": 9.263784984311618e-06, "loss": 0.27585926055908205, "step": 8840 }, { "epoch": 0.7727590424602481, "grad_norm": 0.5542363356848153, "learning_rate": 9.262457033783701e-06, "loss": 0.25282821655273435, "step": 8845 }, { "epoch": 0.7731958762886598, "grad_norm": 0.5219981177407276, "learning_rate": 9.26112798204522e-06, "loss": 0.30734546184539796, "step": 8850 }, { "epoch": 0.7736327101170715, "grad_norm": 0.5500900040048503, "learning_rate": 9.25979782943954e-06, "loss": 0.28885159492492674, "step": 8855 }, { "epoch": 0.7740695439454831, "grad_norm": 0.5746164663217358, "learning_rate": 9.258466576310304e-06, "loss": 0.25620055198669434, "step": 8860 }, { "epoch": 0.7745063777738949, "grad_norm": 0.5013676921342946, "learning_rate": 9.257134223001443e-06, "loss": 0.31802973747253416, "step": 8865 }, { "epoch": 0.7749432116023065, "grad_norm": 0.4302694909111521, "learning_rate": 9.255800769857172e-06, "loss": 0.29840717315673826, "step": 8870 }, { "epoch": 0.7753800454307181, "grad_norm": 0.43546331868075794, "learning_rate": 9.254466217221992e-06, "loss": 0.2897490978240967, "step": 8875 }, { "epoch": 0.7758168792591298, "grad_norm": 0.43859119190768747, "learning_rate": 9.253130565440686e-06, "loss": 0.33298962116241454, "step": 8880 }, { "epoch": 0.7762537130875415, "grad_norm": 0.4790450277835275, "learning_rate": 9.251793814858319e-06, "loss": 0.3055426120758057, "step": 8885 }, { "epoch": 0.7766905469159532, "grad_norm": 0.6322261929510739, "learning_rate": 9.250455965820244e-06, "loss": 0.3224658489227295, "step": 8890 }, { "epoch": 0.7771273807443648, "grad_norm": 0.5395299737718793, "learning_rate": 9.249117018672093e-06, "loss": 0.3203346967697144, "step": 8895 }, { "epoch": 0.7775642145727765, "grad_norm": 0.43561130010323734, "learning_rate": 9.24777697375979e-06, "loss": 0.30083389282226564, "step": 8900 }, { "epoch": 0.7780010484011882, "grad_norm": 0.49552849749494804, "learning_rate": 9.24643583142953e-06, "loss": 0.2841221332550049, "step": 8905 }, { "epoch": 0.7784378822295999, "grad_norm": 0.6233064168899172, "learning_rate": 9.245093592027805e-06, "loss": 0.32137298583984375, "step": 8910 }, { "epoch": 0.7788747160580115, "grad_norm": 0.4565271815371453, "learning_rate": 9.24375025590138e-06, "loss": 0.3120421409606934, "step": 8915 }, { "epoch": 0.7793115498864231, "grad_norm": 0.5785411495788922, "learning_rate": 9.24240582339731e-06, "loss": 0.2984806537628174, "step": 8920 }, { "epoch": 0.7797483837148349, "grad_norm": 0.5050136814205372, "learning_rate": 9.241060294862929e-06, "loss": 0.28135857582092283, "step": 8925 }, { "epoch": 0.7801852175432465, "grad_norm": 0.525911237537615, "learning_rate": 9.239713670645855e-06, "loss": 0.3123392820358276, "step": 8930 }, { "epoch": 0.7806220513716582, "grad_norm": 0.5637074133509804, "learning_rate": 9.238365951093993e-06, "loss": 0.29008548259735106, "step": 8935 }, { "epoch": 0.7810588852000699, "grad_norm": 0.4883131558061438, "learning_rate": 9.237017136555524e-06, "loss": 0.3177931785583496, "step": 8940 }, { "epoch": 0.7814957190284816, "grad_norm": 0.52781050111936, "learning_rate": 9.235667227378919e-06, "loss": 0.29513702392578123, "step": 8945 }, { "epoch": 0.7819325528568932, "grad_norm": 0.5348816101040605, "learning_rate": 9.234316223912925e-06, "loss": 0.32437782287597655, "step": 8950 }, { "epoch": 0.7823693866853049, "grad_norm": 0.48590474544106993, "learning_rate": 9.23296412650658e-06, "loss": 0.29350886344909666, "step": 8955 }, { "epoch": 0.7828062205137166, "grad_norm": 0.504680182914782, "learning_rate": 9.231610935509197e-06, "loss": 0.2982045650482178, "step": 8960 }, { "epoch": 0.7832430543421283, "grad_norm": 0.5148791689120648, "learning_rate": 9.230256651270375e-06, "loss": 0.3054478168487549, "step": 8965 }, { "epoch": 0.7836798881705399, "grad_norm": 0.5313453716255355, "learning_rate": 9.228901274139993e-06, "loss": 0.28283350467681884, "step": 8970 }, { "epoch": 0.7841167219989515, "grad_norm": 0.673206555580915, "learning_rate": 9.227544804468217e-06, "loss": 0.30378429889678954, "step": 8975 }, { "epoch": 0.7845535558273633, "grad_norm": 0.5338090063690524, "learning_rate": 9.22618724260549e-06, "loss": 0.30886058807373046, "step": 8980 }, { "epoch": 0.784990389655775, "grad_norm": 0.4989222733627624, "learning_rate": 9.224828588902543e-06, "loss": 0.29489812850952146, "step": 8985 }, { "epoch": 0.7854272234841866, "grad_norm": 0.5409502878664946, "learning_rate": 9.223468843710384e-06, "loss": 0.2711538791656494, "step": 8990 }, { "epoch": 0.7858640573125983, "grad_norm": 0.4698463061399432, "learning_rate": 9.222108007380301e-06, "loss": 0.2992063522338867, "step": 8995 }, { "epoch": 0.78630089114101, "grad_norm": 0.5817184200585822, "learning_rate": 9.220746080263874e-06, "loss": 0.27962918281555177, "step": 9000 }, { "epoch": 0.7867377249694216, "grad_norm": 0.5223549507649661, "learning_rate": 9.219383062712954e-06, "loss": 0.2808762788772583, "step": 9005 }, { "epoch": 0.7871745587978333, "grad_norm": 0.47384663535025007, "learning_rate": 9.21801895507968e-06, "loss": 0.31287102699279784, "step": 9010 }, { "epoch": 0.787611392626245, "grad_norm": 0.4258567884225111, "learning_rate": 9.216653757716471e-06, "loss": 0.3016608238220215, "step": 9015 }, { "epoch": 0.7880482264546567, "grad_norm": 0.5081461396184017, "learning_rate": 9.215287470976025e-06, "loss": 0.3053655385971069, "step": 9020 }, { "epoch": 0.7884850602830683, "grad_norm": 0.6181703339353743, "learning_rate": 9.213920095211326e-06, "loss": 0.3093433380126953, "step": 9025 }, { "epoch": 0.78892189411148, "grad_norm": 0.515616610604319, "learning_rate": 9.212551630775637e-06, "loss": 0.28918981552124023, "step": 9030 }, { "epoch": 0.7893587279398917, "grad_norm": 0.4731568705777655, "learning_rate": 9.2111820780225e-06, "loss": 0.29020252227783205, "step": 9035 }, { "epoch": 0.7897955617683033, "grad_norm": 0.4380269314347323, "learning_rate": 9.209811437305743e-06, "loss": 0.30870025157928466, "step": 9040 }, { "epoch": 0.790232395596715, "grad_norm": 0.43210763877701974, "learning_rate": 9.20843970897947e-06, "loss": 0.30441741943359374, "step": 9045 }, { "epoch": 0.7906692294251266, "grad_norm": 0.5368745935834913, "learning_rate": 9.207066893398073e-06, "loss": 0.2824918508529663, "step": 9050 }, { "epoch": 0.7911060632535384, "grad_norm": 0.5736237446451283, "learning_rate": 9.205692990916217e-06, "loss": 0.29456701278686526, "step": 9055 }, { "epoch": 0.79154289708195, "grad_norm": 0.549405803121862, "learning_rate": 9.20431800188885e-06, "loss": 0.3207383155822754, "step": 9060 }, { "epoch": 0.7919797309103617, "grad_norm": 0.4950047395158497, "learning_rate": 9.202941926671205e-06, "loss": 0.31170430183410647, "step": 9065 }, { "epoch": 0.7924165647387734, "grad_norm": 0.49092777402079246, "learning_rate": 9.201564765618792e-06, "loss": 0.32594730854034426, "step": 9070 }, { "epoch": 0.7928533985671851, "grad_norm": 0.4978302748018089, "learning_rate": 9.200186519087401e-06, "loss": 0.2943294048309326, "step": 9075 }, { "epoch": 0.7932902323955967, "grad_norm": 0.6033173376066102, "learning_rate": 9.198807187433104e-06, "loss": 0.26759934425354004, "step": 9080 }, { "epoch": 0.7937270662240083, "grad_norm": 0.4805714968047806, "learning_rate": 9.197426771012252e-06, "loss": 0.27690463066101073, "step": 9085 }, { "epoch": 0.7941639000524201, "grad_norm": 0.5071951777419007, "learning_rate": 9.19604527018148e-06, "loss": 0.29041328430175783, "step": 9090 }, { "epoch": 0.7946007338808317, "grad_norm": 0.626200909127371, "learning_rate": 9.194662685297698e-06, "loss": 0.29894988536834716, "step": 9095 }, { "epoch": 0.7950375677092434, "grad_norm": 0.5691816236926036, "learning_rate": 9.193279016718097e-06, "loss": 0.266798734664917, "step": 9100 }, { "epoch": 0.795474401537655, "grad_norm": 0.48566109251194256, "learning_rate": 9.191894264800153e-06, "loss": 0.3419633388519287, "step": 9105 }, { "epoch": 0.7959112353660668, "grad_norm": 0.41537816070961964, "learning_rate": 9.190508429901616e-06, "loss": 0.264078426361084, "step": 9110 }, { "epoch": 0.7963480691944784, "grad_norm": 0.4625727791163825, "learning_rate": 9.189121512380518e-06, "loss": 0.28604860305786134, "step": 9115 }, { "epoch": 0.7967849030228901, "grad_norm": 0.431142305081851, "learning_rate": 9.187733512595172e-06, "loss": 0.2785694122314453, "step": 9120 }, { "epoch": 0.7972217368513017, "grad_norm": 0.6451192845881566, "learning_rate": 9.186344430904165e-06, "loss": 0.3034615993499756, "step": 9125 }, { "epoch": 0.7976585706797135, "grad_norm": 0.5205476811612046, "learning_rate": 9.184954267666376e-06, "loss": 0.3073569297790527, "step": 9130 }, { "epoch": 0.7980954045081251, "grad_norm": 0.481894597637011, "learning_rate": 9.183563023240946e-06, "loss": 0.3105189323425293, "step": 9135 }, { "epoch": 0.7985322383365367, "grad_norm": 0.5503568048565878, "learning_rate": 9.182170697987312e-06, "loss": 0.3032977104187012, "step": 9140 }, { "epoch": 0.7989690721649485, "grad_norm": 0.5419112044566007, "learning_rate": 9.180777292265177e-06, "loss": 0.29835972785949705, "step": 9145 }, { "epoch": 0.7994059059933601, "grad_norm": 0.426888629899615, "learning_rate": 9.179382806434534e-06, "loss": 0.2998703956604004, "step": 9150 }, { "epoch": 0.7998427398217718, "grad_norm": 0.4922663381970808, "learning_rate": 9.177987240855645e-06, "loss": 0.2691205978393555, "step": 9155 }, { "epoch": 0.8002795736501834, "grad_norm": 0.5111649820868623, "learning_rate": 9.176590595889061e-06, "loss": 0.27904682159423827, "step": 9160 }, { "epoch": 0.8007164074785952, "grad_norm": 0.5031817394858179, "learning_rate": 9.175192871895603e-06, "loss": 0.31877918243408204, "step": 9165 }, { "epoch": 0.8011532413070068, "grad_norm": 0.4863741835135263, "learning_rate": 9.173794069236377e-06, "loss": 0.28221564292907714, "step": 9170 }, { "epoch": 0.8015900751354185, "grad_norm": 0.5087881549932441, "learning_rate": 9.172394188272763e-06, "loss": 0.2914480209350586, "step": 9175 }, { "epoch": 0.8020269089638301, "grad_norm": 0.5432898235151081, "learning_rate": 9.170993229366424e-06, "loss": 0.27608590126037597, "step": 9180 }, { "epoch": 0.8024637427922419, "grad_norm": 1.0067239642042647, "learning_rate": 9.169591192879299e-06, "loss": 0.3165531396865845, "step": 9185 }, { "epoch": 0.8029005766206535, "grad_norm": 0.44234811966386384, "learning_rate": 9.168188079173603e-06, "loss": 0.31368117332458495, "step": 9190 }, { "epoch": 0.8033374104490651, "grad_norm": 0.7064889500138344, "learning_rate": 9.166783888611835e-06, "loss": 0.2946948051452637, "step": 9195 }, { "epoch": 0.8037742442774769, "grad_norm": 0.5385849702761191, "learning_rate": 9.165378621556768e-06, "loss": 0.290651273727417, "step": 9200 }, { "epoch": 0.8042110781058885, "grad_norm": 0.5769902339624573, "learning_rate": 9.163972278371453e-06, "loss": 0.3018660306930542, "step": 9205 }, { "epoch": 0.8046479119343002, "grad_norm": 0.48107694825035446, "learning_rate": 9.162564859419224e-06, "loss": 0.29888765811920165, "step": 9210 }, { "epoch": 0.8050847457627118, "grad_norm": 0.5168739561817938, "learning_rate": 9.161156365063686e-06, "loss": 0.322019100189209, "step": 9215 }, { "epoch": 0.8055215795911236, "grad_norm": 0.44194815016439165, "learning_rate": 9.159746795668728e-06, "loss": 0.3041735887527466, "step": 9220 }, { "epoch": 0.8059584134195352, "grad_norm": 0.4712391134957286, "learning_rate": 9.15833615159851e-06, "loss": 0.30842421054840086, "step": 9225 }, { "epoch": 0.8063952472479469, "grad_norm": 0.5679617770098674, "learning_rate": 9.156924433217477e-06, "loss": 0.3077878475189209, "step": 9230 }, { "epoch": 0.8068320810763585, "grad_norm": 0.4692870800243757, "learning_rate": 9.155511640890345e-06, "loss": 0.27698793411254885, "step": 9235 }, { "epoch": 0.8072689149047703, "grad_norm": 0.4908631689727386, "learning_rate": 9.154097774982113e-06, "loss": 0.2855446100234985, "step": 9240 }, { "epoch": 0.8077057487331819, "grad_norm": 0.620117487461405, "learning_rate": 9.152682835858054e-06, "loss": 0.29093360900878906, "step": 9245 }, { "epoch": 0.8081425825615935, "grad_norm": 0.5076567614151225, "learning_rate": 9.151266823883718e-06, "loss": 0.3196234941482544, "step": 9250 }, { "epoch": 0.8085794163900052, "grad_norm": 0.5439568551758829, "learning_rate": 9.149849739424937e-06, "loss": 0.32396669387817384, "step": 9255 }, { "epoch": 0.8090162502184169, "grad_norm": 0.5505744069237156, "learning_rate": 9.148431582847811e-06, "loss": 0.28449559211730957, "step": 9260 }, { "epoch": 0.8094530840468286, "grad_norm": 0.4579441934910385, "learning_rate": 9.147012354518725e-06, "loss": 0.28466434478759767, "step": 9265 }, { "epoch": 0.8098899178752402, "grad_norm": 0.526235519849724, "learning_rate": 9.14559205480434e-06, "loss": 0.2958641529083252, "step": 9270 }, { "epoch": 0.810326751703652, "grad_norm": 0.5024295241606118, "learning_rate": 9.14417068407159e-06, "loss": 0.3055263519287109, "step": 9275 }, { "epoch": 0.8107635855320636, "grad_norm": 0.6052440461386995, "learning_rate": 9.142748242687687e-06, "loss": 0.2904601335525513, "step": 9280 }, { "epoch": 0.8112004193604753, "grad_norm": 0.4029824465464888, "learning_rate": 9.141324731020122e-06, "loss": 0.3229323387145996, "step": 9285 }, { "epoch": 0.8116372531888869, "grad_norm": 0.46854995354496076, "learning_rate": 9.139900149436658e-06, "loss": 0.28477447032928466, "step": 9290 }, { "epoch": 0.8120740870172987, "grad_norm": 0.6085793081863974, "learning_rate": 9.138474498305344e-06, "loss": 0.30689315795898436, "step": 9295 }, { "epoch": 0.8125109208457103, "grad_norm": 0.5126717621836615, "learning_rate": 9.13704777799449e-06, "loss": 0.2864444017410278, "step": 9300 }, { "epoch": 0.812947754674122, "grad_norm": 0.6002148908316441, "learning_rate": 9.135619988872695e-06, "loss": 0.2953790187835693, "step": 9305 }, { "epoch": 0.8133845885025336, "grad_norm": 0.5125368682584567, "learning_rate": 9.134191131308831e-06, "loss": 0.299599289894104, "step": 9310 }, { "epoch": 0.8138214223309453, "grad_norm": 0.4866205733562623, "learning_rate": 9.132761205672043e-06, "loss": 0.30605666637420653, "step": 9315 }, { "epoch": 0.814258256159357, "grad_norm": 0.5738451395550329, "learning_rate": 9.131330212331755e-06, "loss": 0.2728219747543335, "step": 9320 }, { "epoch": 0.8146950899877686, "grad_norm": 0.49762984409175814, "learning_rate": 9.129898151657666e-06, "loss": 0.27495291233062746, "step": 9325 }, { "epoch": 0.8151319238161803, "grad_norm": 0.45176525267216133, "learning_rate": 9.128465024019749e-06, "loss": 0.29404029846191404, "step": 9330 }, { "epoch": 0.815568757644592, "grad_norm": 0.5190718705465297, "learning_rate": 9.127030829788256e-06, "loss": 0.3092677593231201, "step": 9335 }, { "epoch": 0.8160055914730037, "grad_norm": 0.49865163809661167, "learning_rate": 9.125595569333708e-06, "loss": 0.29395604133605957, "step": 9340 }, { "epoch": 0.8164424253014153, "grad_norm": 0.4802076721337686, "learning_rate": 9.124159243026913e-06, "loss": 0.2713152408599854, "step": 9345 }, { "epoch": 0.8168792591298271, "grad_norm": 0.4603989079693913, "learning_rate": 9.122721851238943e-06, "loss": 0.2836842775344849, "step": 9350 }, { "epoch": 0.8173160929582387, "grad_norm": 0.48422026186138406, "learning_rate": 9.121283394341151e-06, "loss": 0.31168928146362307, "step": 9355 }, { "epoch": 0.8177529267866503, "grad_norm": 0.6025616699335057, "learning_rate": 9.119843872705165e-06, "loss": 0.29638779163360596, "step": 9360 }, { "epoch": 0.818189760615062, "grad_norm": 0.4320487534126402, "learning_rate": 9.118403286702886e-06, "loss": 0.30595712661743163, "step": 9365 }, { "epoch": 0.8186265944434737, "grad_norm": 0.4999294781365495, "learning_rate": 9.11696163670649e-06, "loss": 0.27115085124969485, "step": 9370 }, { "epoch": 0.8190634282718854, "grad_norm": 0.5236893015558248, "learning_rate": 9.11551892308843e-06, "loss": 0.2724335193634033, "step": 9375 }, { "epoch": 0.819500262100297, "grad_norm": 0.5078914061851109, "learning_rate": 9.114075146221436e-06, "loss": 0.2676672458648682, "step": 9380 }, { "epoch": 0.8199370959287087, "grad_norm": 0.5043492157652126, "learning_rate": 9.112630306478501e-06, "loss": 0.3080162048339844, "step": 9385 }, { "epoch": 0.8203739297571204, "grad_norm": 0.4786383035159689, "learning_rate": 9.11118440423291e-06, "loss": 0.277081823348999, "step": 9390 }, { "epoch": 0.8208107635855321, "grad_norm": 0.5200150394069893, "learning_rate": 9.109737439858205e-06, "loss": 0.29818105697631836, "step": 9395 }, { "epoch": 0.8212475974139437, "grad_norm": 0.48506459882756675, "learning_rate": 9.108289413728219e-06, "loss": 0.29517674446105957, "step": 9400 }, { "epoch": 0.8216844312423555, "grad_norm": 0.5044346025738684, "learning_rate": 9.106840326217047e-06, "loss": 0.29332647323608396, "step": 9405 }, { "epoch": 0.8221212650707671, "grad_norm": 0.5659187826484066, "learning_rate": 9.105390177699061e-06, "loss": 0.3089465141296387, "step": 9410 }, { "epoch": 0.8225580988991787, "grad_norm": 0.509513631164637, "learning_rate": 9.103938968548912e-06, "loss": 0.2844001293182373, "step": 9415 }, { "epoch": 0.8229949327275904, "grad_norm": 0.6723627028025579, "learning_rate": 9.10248669914152e-06, "loss": 0.3076247453689575, "step": 9420 }, { "epoch": 0.8234317665560021, "grad_norm": 0.5229543772666685, "learning_rate": 9.10103336985208e-06, "loss": 0.31901285648345945, "step": 9425 }, { "epoch": 0.8238686003844138, "grad_norm": 0.4702687912443915, "learning_rate": 9.099578981056062e-06, "loss": 0.2803272008895874, "step": 9430 }, { "epoch": 0.8243054342128254, "grad_norm": 0.5739547576549033, "learning_rate": 9.098123533129208e-06, "loss": 0.2878105640411377, "step": 9435 }, { "epoch": 0.8247422680412371, "grad_norm": 0.5322816026313968, "learning_rate": 9.096667026447535e-06, "loss": 0.2836915969848633, "step": 9440 }, { "epoch": 0.8251791018696488, "grad_norm": 0.5168811984425601, "learning_rate": 9.095209461387334e-06, "loss": 0.28948369026184084, "step": 9445 }, { "epoch": 0.8256159356980605, "grad_norm": 0.5373233580651874, "learning_rate": 9.093750838325166e-06, "loss": 0.3011425018310547, "step": 9450 }, { "epoch": 0.8260527695264721, "grad_norm": 0.7200918417564598, "learning_rate": 9.092291157637871e-06, "loss": 0.30965001583099366, "step": 9455 }, { "epoch": 0.8264896033548838, "grad_norm": 0.5946012313921426, "learning_rate": 9.090830419702559e-06, "loss": 0.31653897762298583, "step": 9460 }, { "epoch": 0.8269264371832955, "grad_norm": 0.48631479859870186, "learning_rate": 9.089368624896611e-06, "loss": 0.2890730381011963, "step": 9465 }, { "epoch": 0.8273632710117071, "grad_norm": 0.5849860134851976, "learning_rate": 9.087905773597685e-06, "loss": 0.28948078155517576, "step": 9470 }, { "epoch": 0.8278001048401188, "grad_norm": 0.5033661871730505, "learning_rate": 9.08644186618371e-06, "loss": 0.299122953414917, "step": 9475 }, { "epoch": 0.8282369386685305, "grad_norm": 0.5981241493977453, "learning_rate": 9.084976903032887e-06, "loss": 0.31320302486419677, "step": 9480 }, { "epoch": 0.8286737724969422, "grad_norm": 0.531273131488003, "learning_rate": 9.083510884523694e-06, "loss": 0.26456222534179685, "step": 9485 }, { "epoch": 0.8291106063253538, "grad_norm": 0.5667963646936459, "learning_rate": 9.082043811034873e-06, "loss": 0.30763969421386717, "step": 9490 }, { "epoch": 0.8295474401537655, "grad_norm": 0.5864393805808308, "learning_rate": 9.08057568294545e-06, "loss": 0.28388564586639403, "step": 9495 }, { "epoch": 0.8299842739821772, "grad_norm": 0.5099573039121085, "learning_rate": 9.079106500634713e-06, "loss": 0.2784921169281006, "step": 9500 }, { "epoch": 0.8304211078105889, "grad_norm": 0.5273102934561665, "learning_rate": 9.07763626448223e-06, "loss": 0.2907400131225586, "step": 9505 }, { "epoch": 0.8308579416390005, "grad_norm": 0.5208071027273813, "learning_rate": 9.076164974867838e-06, "loss": 0.29044675827026367, "step": 9510 }, { "epoch": 0.8312947754674122, "grad_norm": 0.6633366617129018, "learning_rate": 9.074692632171644e-06, "loss": 0.2617527961730957, "step": 9515 }, { "epoch": 0.8317316092958239, "grad_norm": 0.47038286448919775, "learning_rate": 9.073219236774032e-06, "loss": 0.30174932479858396, "step": 9520 }, { "epoch": 0.8321684431242355, "grad_norm": 0.43271462015465884, "learning_rate": 9.071744789055655e-06, "loss": 0.28715875148773196, "step": 9525 }, { "epoch": 0.8326052769526472, "grad_norm": 0.5339941728941339, "learning_rate": 9.070269289397435e-06, "loss": 0.2803659200668335, "step": 9530 }, { "epoch": 0.8330421107810588, "grad_norm": 0.4294741847264774, "learning_rate": 9.068792738180572e-06, "loss": 0.31589205265045167, "step": 9535 }, { "epoch": 0.8334789446094706, "grad_norm": 0.6261044728779438, "learning_rate": 9.067315135786534e-06, "loss": 0.30124671459198, "step": 9540 }, { "epoch": 0.8339157784378822, "grad_norm": 1.8457331773504002, "learning_rate": 9.065836482597061e-06, "loss": 0.30704877376556394, "step": 9545 }, { "epoch": 0.8343526122662939, "grad_norm": 0.5648234603322343, "learning_rate": 9.064356778994166e-06, "loss": 0.31539416313171387, "step": 9550 }, { "epoch": 0.8347894460947056, "grad_norm": 0.5333310729205455, "learning_rate": 9.06287602536013e-06, "loss": 0.3128191947937012, "step": 9555 }, { "epoch": 0.8352262799231173, "grad_norm": 0.46463660755324815, "learning_rate": 9.061394222077508e-06, "loss": 0.30561351776123047, "step": 9560 }, { "epoch": 0.8356631137515289, "grad_norm": 0.5892156717184305, "learning_rate": 9.059911369529126e-06, "loss": 0.3094494104385376, "step": 9565 }, { "epoch": 0.8360999475799406, "grad_norm": 0.5390168913633064, "learning_rate": 9.058427468098081e-06, "loss": 0.3000635623931885, "step": 9570 }, { "epoch": 0.8365367814083523, "grad_norm": 0.5985928778233794, "learning_rate": 9.056942518167741e-06, "loss": 0.2696832180023193, "step": 9575 }, { "epoch": 0.836973615236764, "grad_norm": 0.5471441766823764, "learning_rate": 9.055456520121743e-06, "loss": 0.2943007230758667, "step": 9580 }, { "epoch": 0.8374104490651756, "grad_norm": 0.5768008247735374, "learning_rate": 9.053969474343997e-06, "loss": 0.3141876220703125, "step": 9585 }, { "epoch": 0.8378472828935872, "grad_norm": 0.5231632288578956, "learning_rate": 9.052481381218682e-06, "loss": 0.3127879619598389, "step": 9590 }, { "epoch": 0.838284116721999, "grad_norm": 0.6315164933651263, "learning_rate": 9.050992241130251e-06, "loss": 0.2557543754577637, "step": 9595 }, { "epoch": 0.8387209505504106, "grad_norm": 0.48102966012246273, "learning_rate": 9.049502054463421e-06, "loss": 0.30200092792510985, "step": 9600 }, { "epoch": 0.8391577843788223, "grad_norm": 0.46306035868001966, "learning_rate": 9.04801082160319e-06, "loss": 0.32693278789520264, "step": 9605 }, { "epoch": 0.839594618207234, "grad_norm": 0.5407154444904336, "learning_rate": 9.046518542934812e-06, "loss": 0.29235541820526123, "step": 9610 }, { "epoch": 0.8400314520356457, "grad_norm": 0.4604817128144195, "learning_rate": 9.045025218843822e-06, "loss": 0.260449743270874, "step": 9615 }, { "epoch": 0.8404682858640573, "grad_norm": 0.5173568745839394, "learning_rate": 9.043530849716025e-06, "loss": 0.3049675226211548, "step": 9620 }, { "epoch": 0.840905119692469, "grad_norm": 0.5604398721774003, "learning_rate": 9.042035435937488e-06, "loss": 0.2844096660614014, "step": 9625 }, { "epoch": 0.8413419535208807, "grad_norm": 0.5084569054460449, "learning_rate": 9.040538977894557e-06, "loss": 0.27511091232299806, "step": 9630 }, { "epoch": 0.8417787873492923, "grad_norm": 0.48361060431166303, "learning_rate": 9.03904147597384e-06, "loss": 0.2934545993804932, "step": 9635 }, { "epoch": 0.842215621177704, "grad_norm": 0.6342310303571166, "learning_rate": 9.03754293056222e-06, "loss": 0.28328852653503417, "step": 9640 }, { "epoch": 0.8426524550061156, "grad_norm": 0.5750750746294685, "learning_rate": 9.036043342046848e-06, "loss": 0.2844132423400879, "step": 9645 }, { "epoch": 0.8430892888345274, "grad_norm": 0.4963374291122341, "learning_rate": 9.034542710815144e-06, "loss": 0.2787686824798584, "step": 9650 }, { "epoch": 0.843526122662939, "grad_norm": 0.5159140692159055, "learning_rate": 9.033041037254801e-06, "loss": 0.2730738639831543, "step": 9655 }, { "epoch": 0.8439629564913507, "grad_norm": 0.46401824333386765, "learning_rate": 9.031538321753771e-06, "loss": 0.29308667182922366, "step": 9660 }, { "epoch": 0.8443997903197623, "grad_norm": 0.5770896494646588, "learning_rate": 9.030034564700289e-06, "loss": 0.27127861976623535, "step": 9665 }, { "epoch": 0.8448366241481741, "grad_norm": 0.5294937567258916, "learning_rate": 9.02852976648285e-06, "loss": 0.3152534246444702, "step": 9670 }, { "epoch": 0.8452734579765857, "grad_norm": 0.5713382299425901, "learning_rate": 9.027023927490218e-06, "loss": 0.27993273735046387, "step": 9675 }, { "epoch": 0.8457102918049973, "grad_norm": 0.4754304876208857, "learning_rate": 9.025517048111433e-06, "loss": 0.2899031639099121, "step": 9680 }, { "epoch": 0.8461471256334091, "grad_norm": 0.6748453935074621, "learning_rate": 9.024009128735794e-06, "loss": 0.30723042488098146, "step": 9685 }, { "epoch": 0.8465839594618207, "grad_norm": 0.4933784088194356, "learning_rate": 9.022500169752874e-06, "loss": 0.2938103675842285, "step": 9690 }, { "epoch": 0.8470207932902324, "grad_norm": 0.4873686432477035, "learning_rate": 9.020990171552518e-06, "loss": 0.2828935146331787, "step": 9695 }, { "epoch": 0.847457627118644, "grad_norm": 0.5404616018358451, "learning_rate": 9.019479134524834e-06, "loss": 0.3088389873504639, "step": 9700 }, { "epoch": 0.8478944609470558, "grad_norm": 0.6281911972038614, "learning_rate": 9.0179670590602e-06, "loss": 0.26628737449645995, "step": 9705 }, { "epoch": 0.8483312947754674, "grad_norm": 0.4254930143792816, "learning_rate": 9.016453945549261e-06, "loss": 0.29316000938415526, "step": 9710 }, { "epoch": 0.8487681286038791, "grad_norm": 0.5572487253806581, "learning_rate": 9.014939794382935e-06, "loss": 0.2960775375366211, "step": 9715 }, { "epoch": 0.8492049624322907, "grad_norm": 0.46602103339675327, "learning_rate": 9.013424605952401e-06, "loss": 0.29908578395843505, "step": 9720 }, { "epoch": 0.8496417962607025, "grad_norm": 0.4534844338902348, "learning_rate": 9.011908380649113e-06, "loss": 0.28446125984191895, "step": 9725 }, { "epoch": 0.8500786300891141, "grad_norm": 0.4918081933987608, "learning_rate": 9.010391118864785e-06, "loss": 0.2849869728088379, "step": 9730 }, { "epoch": 0.8505154639175257, "grad_norm": 0.47692445745441686, "learning_rate": 9.008872820991408e-06, "loss": 0.2719141960144043, "step": 9735 }, { "epoch": 0.8509522977459374, "grad_norm": 0.5405993521119465, "learning_rate": 9.007353487421233e-06, "loss": 0.29302937984466554, "step": 9740 }, { "epoch": 0.8513891315743491, "grad_norm": 0.44317240259744556, "learning_rate": 9.005833118546782e-06, "loss": 0.3029485702514648, "step": 9745 }, { "epoch": 0.8518259654027608, "grad_norm": 0.44408724219038753, "learning_rate": 9.004311714760844e-06, "loss": 0.28754544258117676, "step": 9750 }, { "epoch": 0.8522627992311724, "grad_norm": 0.5627851360714115, "learning_rate": 9.002789276456478e-06, "loss": 0.2632430553436279, "step": 9755 }, { "epoch": 0.8526996330595842, "grad_norm": 0.4704278886257149, "learning_rate": 9.001265804027005e-06, "loss": 0.2973446846008301, "step": 9760 }, { "epoch": 0.8531364668879958, "grad_norm": 0.7662991720918126, "learning_rate": 8.999741297866016e-06, "loss": 0.2886791229248047, "step": 9765 }, { "epoch": 0.8535733007164075, "grad_norm": 0.47852197821547116, "learning_rate": 8.998215758367369e-06, "loss": 0.28110542297363283, "step": 9770 }, { "epoch": 0.8540101345448191, "grad_norm": 0.6733288087032089, "learning_rate": 8.996689185925187e-06, "loss": 0.30501327514648435, "step": 9775 }, { "epoch": 0.8544469683732309, "grad_norm": 0.5665977323483086, "learning_rate": 8.995161580933867e-06, "loss": 0.2860919952392578, "step": 9780 }, { "epoch": 0.8548838022016425, "grad_norm": 0.639955912204246, "learning_rate": 8.993632943788063e-06, "loss": 0.278171443939209, "step": 9785 }, { "epoch": 0.8553206360300541, "grad_norm": 0.6045913333132795, "learning_rate": 8.9921032748827e-06, "loss": 0.2675478935241699, "step": 9790 }, { "epoch": 0.8557574698584658, "grad_norm": 0.4265642723044555, "learning_rate": 8.990572574612972e-06, "loss": 0.2922807216644287, "step": 9795 }, { "epoch": 0.8561943036868775, "grad_norm": 0.5833044652666969, "learning_rate": 8.989040843374337e-06, "loss": 0.292206335067749, "step": 9800 }, { "epoch": 0.8566311375152892, "grad_norm": 0.5670714958443573, "learning_rate": 8.987508081562517e-06, "loss": 0.3007469177246094, "step": 9805 }, { "epoch": 0.8570679713437008, "grad_norm": 0.5655477705032426, "learning_rate": 8.985974289573506e-06, "loss": 0.27672324180603025, "step": 9810 }, { "epoch": 0.8575048051721126, "grad_norm": 0.4412415421704956, "learning_rate": 8.984439467803559e-06, "loss": 0.29488799571990965, "step": 9815 }, { "epoch": 0.8579416390005242, "grad_norm": 0.5447307619182512, "learning_rate": 8.982903616649198e-06, "loss": 0.29896395206451415, "step": 9820 }, { "epoch": 0.8583784728289359, "grad_norm": 0.4373942622646699, "learning_rate": 8.981366736507213e-06, "loss": 0.27466020584106443, "step": 9825 }, { "epoch": 0.8588153066573475, "grad_norm": 0.47094765488111767, "learning_rate": 8.979828827774659e-06, "loss": 0.29214959144592284, "step": 9830 }, { "epoch": 0.8592521404857593, "grad_norm": 0.4868803991669792, "learning_rate": 8.978289890848857e-06, "loss": 0.302882719039917, "step": 9835 }, { "epoch": 0.8596889743141709, "grad_norm": 0.5569029093526575, "learning_rate": 8.976749926127392e-06, "loss": 0.29007325172424314, "step": 9840 }, { "epoch": 0.8601258081425825, "grad_norm": 0.4746085055793632, "learning_rate": 8.975208934008115e-06, "loss": 0.297971773147583, "step": 9845 }, { "epoch": 0.8605626419709942, "grad_norm": 0.5489435592540078, "learning_rate": 8.973666914889145e-06, "loss": 0.28307766914367677, "step": 9850 }, { "epoch": 0.8609994757994059, "grad_norm": 0.529664065369372, "learning_rate": 8.972123869168862e-06, "loss": 0.30622282028198244, "step": 9855 }, { "epoch": 0.8614363096278176, "grad_norm": 0.5293064216476124, "learning_rate": 8.970579797245917e-06, "loss": 0.3274893045425415, "step": 9860 }, { "epoch": 0.8618731434562292, "grad_norm": 0.5110132417940825, "learning_rate": 8.969034699519218e-06, "loss": 0.28120625019073486, "step": 9865 }, { "epoch": 0.8623099772846409, "grad_norm": 0.5053983163546418, "learning_rate": 8.967488576387949e-06, "loss": 0.29333019256591797, "step": 9870 }, { "epoch": 0.8627468111130526, "grad_norm": 0.5097295753669727, "learning_rate": 8.965941428251546e-06, "loss": 0.2914262771606445, "step": 9875 }, { "epoch": 0.8631836449414643, "grad_norm": 0.5272964186777498, "learning_rate": 8.96439325550972e-06, "loss": 0.2846785068511963, "step": 9880 }, { "epoch": 0.8636204787698759, "grad_norm": 0.5349728076979288, "learning_rate": 8.962844058562444e-06, "loss": 0.2895301103591919, "step": 9885 }, { "epoch": 0.8640573125982877, "grad_norm": 0.5719772502338318, "learning_rate": 8.961293837809952e-06, "loss": 0.2857767105102539, "step": 9890 }, { "epoch": 0.8644941464266993, "grad_norm": 0.5165479811497082, "learning_rate": 8.959742593652748e-06, "loss": 0.2666506052017212, "step": 9895 }, { "epoch": 0.864930980255111, "grad_norm": 0.4281934110860751, "learning_rate": 8.958190326491595e-06, "loss": 0.26433520317077636, "step": 9900 }, { "epoch": 0.8653678140835226, "grad_norm": 0.5174640512360321, "learning_rate": 8.956637036727524e-06, "loss": 0.2807903289794922, "step": 9905 }, { "epoch": 0.8658046479119343, "grad_norm": 0.5466673573683437, "learning_rate": 8.95508272476183e-06, "loss": 0.3116171836853027, "step": 9910 }, { "epoch": 0.866241481740346, "grad_norm": 0.5239911139517776, "learning_rate": 8.953527390996072e-06, "loss": 0.2943114280700684, "step": 9915 }, { "epoch": 0.8666783155687576, "grad_norm": 0.6656049669989575, "learning_rate": 8.95197103583207e-06, "loss": 0.2909384250640869, "step": 9920 }, { "epoch": 0.8671151493971693, "grad_norm": 0.5686384544810765, "learning_rate": 8.950413659671911e-06, "loss": 0.26167998313903806, "step": 9925 }, { "epoch": 0.867551983225581, "grad_norm": 0.4229184149331338, "learning_rate": 8.948855262917945e-06, "loss": 0.25649571418762207, "step": 9930 }, { "epoch": 0.8679888170539927, "grad_norm": 0.514458542507941, "learning_rate": 8.947295845972783e-06, "loss": 0.30271594524383544, "step": 9935 }, { "epoch": 0.8684256508824043, "grad_norm": 0.5314181052221946, "learning_rate": 8.945735409239309e-06, "loss": 0.30550398826599123, "step": 9940 }, { "epoch": 0.868862484710816, "grad_norm": 0.6174344557872538, "learning_rate": 8.944173953120657e-06, "loss": 0.30131778717041013, "step": 9945 }, { "epoch": 0.8692993185392277, "grad_norm": 0.4423676874148629, "learning_rate": 8.942611478020231e-06, "loss": 0.2722295045852661, "step": 9950 }, { "epoch": 0.8697361523676393, "grad_norm": 0.4492207612248739, "learning_rate": 8.941047984341704e-06, "loss": 0.29335243701934816, "step": 9955 }, { "epoch": 0.870172986196051, "grad_norm": 0.4890947305108611, "learning_rate": 8.939483472489001e-06, "loss": 0.2960677146911621, "step": 9960 }, { "epoch": 0.8706098200244627, "grad_norm": 0.584296605518741, "learning_rate": 8.937917942866317e-06, "loss": 0.2922086238861084, "step": 9965 }, { "epoch": 0.8710466538528744, "grad_norm": 0.5480831338036931, "learning_rate": 8.93635139587811e-06, "loss": 0.30340800285339353, "step": 9970 }, { "epoch": 0.871483487681286, "grad_norm": 0.5061808506172065, "learning_rate": 8.934783831929094e-06, "loss": 0.2786545753479004, "step": 9975 }, { "epoch": 0.8719203215096977, "grad_norm": 0.5205866572704437, "learning_rate": 8.933215251424258e-06, "loss": 0.2826559066772461, "step": 9980 }, { "epoch": 0.8723571553381094, "grad_norm": 0.571282798209333, "learning_rate": 8.93164565476884e-06, "loss": 0.2909469366073608, "step": 9985 }, { "epoch": 0.8727939891665211, "grad_norm": 0.46296399632381496, "learning_rate": 8.930075042368354e-06, "loss": 0.28118209838867186, "step": 9990 }, { "epoch": 0.8732308229949327, "grad_norm": 0.5162483077694128, "learning_rate": 8.928503414628563e-06, "loss": 0.301605749130249, "step": 9995 }, { "epoch": 0.8736676568233444, "grad_norm": 0.5436259939286978, "learning_rate": 8.9269307719555e-06, "loss": 0.255997896194458, "step": 10000 }, { "epoch": 0.8741044906517561, "grad_norm": 0.4915581319322936, "learning_rate": 8.925357114755461e-06, "loss": 0.2709792137145996, "step": 10005 }, { "epoch": 0.8745413244801677, "grad_norm": 0.5079622479286326, "learning_rate": 8.923782443435004e-06, "loss": 0.279314661026001, "step": 10010 }, { "epoch": 0.8749781583085794, "grad_norm": 0.48977581057886416, "learning_rate": 8.922206758400943e-06, "loss": 0.3149538516998291, "step": 10015 }, { "epoch": 0.8754149921369911, "grad_norm": 0.4587952487141323, "learning_rate": 8.92063006006036e-06, "loss": 0.2704309940338135, "step": 10020 }, { "epoch": 0.8758518259654028, "grad_norm": 0.49367606181232115, "learning_rate": 8.919052348820596e-06, "loss": 0.2849855899810791, "step": 10025 }, { "epoch": 0.8762886597938144, "grad_norm": 0.884573095453113, "learning_rate": 8.917473625089255e-06, "loss": 0.3110016345977783, "step": 10030 }, { "epoch": 0.8767254936222261, "grad_norm": 0.5113506488170436, "learning_rate": 8.9158938892742e-06, "loss": 0.2856534481048584, "step": 10035 }, { "epoch": 0.8771623274506378, "grad_norm": 0.418017475691793, "learning_rate": 8.914313141783562e-06, "loss": 0.2690437793731689, "step": 10040 }, { "epoch": 0.8775991612790495, "grad_norm": 0.5158018632905408, "learning_rate": 8.912731383025725e-06, "loss": 0.27952477931976316, "step": 10045 }, { "epoch": 0.8780359951074611, "grad_norm": 0.47966980215120886, "learning_rate": 8.911148613409339e-06, "loss": 0.268095064163208, "step": 10050 }, { "epoch": 0.8784728289358728, "grad_norm": 0.498504322982874, "learning_rate": 8.909564833343316e-06, "loss": 0.25452380180358886, "step": 10055 }, { "epoch": 0.8789096627642845, "grad_norm": 0.5081630355065242, "learning_rate": 8.907980043236826e-06, "loss": 0.29064116477966306, "step": 10060 }, { "epoch": 0.8793464965926961, "grad_norm": 0.5204742903859017, "learning_rate": 8.906394243499301e-06, "loss": 0.28669133186340334, "step": 10065 }, { "epoch": 0.8797833304211078, "grad_norm": 0.4179921440273538, "learning_rate": 8.904807434540434e-06, "loss": 0.27633256912231446, "step": 10070 }, { "epoch": 0.8802201642495194, "grad_norm": 0.51221816533718, "learning_rate": 8.90321961677018e-06, "loss": 0.2652554988861084, "step": 10075 }, { "epoch": 0.8806569980779312, "grad_norm": 0.5583640075043546, "learning_rate": 8.901630790598752e-06, "loss": 0.3196396827697754, "step": 10080 }, { "epoch": 0.8810938319063428, "grad_norm": 0.43353051542851656, "learning_rate": 8.900040956436626e-06, "loss": 0.2838418006896973, "step": 10085 }, { "epoch": 0.8815306657347545, "grad_norm": 0.5382295485552839, "learning_rate": 8.898450114694539e-06, "loss": 0.2636082649230957, "step": 10090 }, { "epoch": 0.8819674995631662, "grad_norm": 0.5108738788615099, "learning_rate": 8.896858265783483e-06, "loss": 0.2808398962020874, "step": 10095 }, { "epoch": 0.8824043333915779, "grad_norm": 0.5083812600486852, "learning_rate": 8.895265410114719e-06, "loss": 0.26941637992858886, "step": 10100 }, { "epoch": 0.8828411672199895, "grad_norm": 0.526823634407415, "learning_rate": 8.893671548099758e-06, "loss": 0.2935339450836182, "step": 10105 }, { "epoch": 0.8832780010484012, "grad_norm": 0.7060408668368587, "learning_rate": 8.892076680150379e-06, "loss": 0.26912741661071776, "step": 10110 }, { "epoch": 0.8837148348768129, "grad_norm": 0.44922799620569137, "learning_rate": 8.890480806678616e-06, "loss": 0.2686115264892578, "step": 10115 }, { "epoch": 0.8841516687052245, "grad_norm": 0.4923992428923496, "learning_rate": 8.888883928096767e-06, "loss": 0.3045828819274902, "step": 10120 }, { "epoch": 0.8845885025336362, "grad_norm": 0.4519010479638227, "learning_rate": 8.887286044817386e-06, "loss": 0.28792080879211424, "step": 10125 }, { "epoch": 0.8850253363620478, "grad_norm": 0.6622494769449881, "learning_rate": 8.885687157253288e-06, "loss": 0.2674598455429077, "step": 10130 }, { "epoch": 0.8854621701904596, "grad_norm": 0.46677204299955166, "learning_rate": 8.884087265817548e-06, "loss": 0.3082862138748169, "step": 10135 }, { "epoch": 0.8858990040188712, "grad_norm": 0.5138239965800577, "learning_rate": 8.882486370923498e-06, "loss": 0.2831382751464844, "step": 10140 }, { "epoch": 0.8863358378472829, "grad_norm": 0.4806337479875103, "learning_rate": 8.880884472984734e-06, "loss": 0.2643167495727539, "step": 10145 }, { "epoch": 0.8867726716756945, "grad_norm": 0.46131138292722856, "learning_rate": 8.879281572415104e-06, "loss": 0.28806700706481936, "step": 10150 }, { "epoch": 0.8872095055041063, "grad_norm": 0.4426084404275845, "learning_rate": 8.877677669628724e-06, "loss": 0.2844530582427979, "step": 10155 }, { "epoch": 0.8876463393325179, "grad_norm": 0.521353562130687, "learning_rate": 8.876072765039961e-06, "loss": 0.2611421585083008, "step": 10160 }, { "epoch": 0.8880831731609296, "grad_norm": 0.5924832930444568, "learning_rate": 8.874466859063444e-06, "loss": 0.2867248058319092, "step": 10165 }, { "epoch": 0.8885200069893413, "grad_norm": 0.47294317603536695, "learning_rate": 8.872859952114061e-06, "loss": 0.28222217559814455, "step": 10170 }, { "epoch": 0.888956840817753, "grad_norm": 0.43724261105925405, "learning_rate": 8.871252044606959e-06, "loss": 0.2911630630493164, "step": 10175 }, { "epoch": 0.8893936746461646, "grad_norm": 0.5516299398196985, "learning_rate": 8.869643136957541e-06, "loss": 0.2627579927444458, "step": 10180 }, { "epoch": 0.8898305084745762, "grad_norm": 0.5146445649821181, "learning_rate": 8.868033229581473e-06, "loss": 0.25904068946838377, "step": 10185 }, { "epoch": 0.890267342302988, "grad_norm": 0.6208409116912962, "learning_rate": 8.866422322894674e-06, "loss": 0.26237752437591555, "step": 10190 }, { "epoch": 0.8907041761313996, "grad_norm": 0.5523021659221502, "learning_rate": 8.864810417313324e-06, "loss": 0.2923889636993408, "step": 10195 }, { "epoch": 0.8911410099598113, "grad_norm": 0.47300234256059365, "learning_rate": 8.86319751325386e-06, "loss": 0.29525620937347413, "step": 10200 }, { "epoch": 0.8915778437882229, "grad_norm": 0.5737258433976085, "learning_rate": 8.86158361113298e-06, "loss": 0.2864107131958008, "step": 10205 }, { "epoch": 0.8920146776166347, "grad_norm": 0.4506855743705989, "learning_rate": 8.859968711367635e-06, "loss": 0.28885436058044434, "step": 10210 }, { "epoch": 0.8924515114450463, "grad_norm": 0.49798357504053387, "learning_rate": 8.858352814375035e-06, "loss": 0.2798494338989258, "step": 10215 }, { "epoch": 0.892888345273458, "grad_norm": 0.5945576518114688, "learning_rate": 8.856735920572653e-06, "loss": 0.2786175966262817, "step": 10220 }, { "epoch": 0.8933251791018697, "grad_norm": 0.5074212543424347, "learning_rate": 8.855118030378213e-06, "loss": 0.31025376319885256, "step": 10225 }, { "epoch": 0.8937620129302813, "grad_norm": 0.5159934604054921, "learning_rate": 8.853499144209699e-06, "loss": 0.31079936027526855, "step": 10230 }, { "epoch": 0.894198846758693, "grad_norm": 0.5733415254018701, "learning_rate": 8.85187926248535e-06, "loss": 0.29919188022613524, "step": 10235 }, { "epoch": 0.8946356805871046, "grad_norm": 0.4787453050542764, "learning_rate": 8.850258385623666e-06, "loss": 0.2887302875518799, "step": 10240 }, { "epoch": 0.8950725144155164, "grad_norm": 0.4706524247398021, "learning_rate": 8.848636514043405e-06, "loss": 0.28795561790466306, "step": 10245 }, { "epoch": 0.895509348243928, "grad_norm": 0.5510197996989042, "learning_rate": 8.847013648163576e-06, "loss": 0.3117710828781128, "step": 10250 }, { "epoch": 0.8959461820723397, "grad_norm": 0.6094033118804294, "learning_rate": 8.84538978840345e-06, "loss": 0.2942368507385254, "step": 10255 }, { "epoch": 0.8963830159007513, "grad_norm": 0.5149884820135032, "learning_rate": 8.84376493518255e-06, "loss": 0.28515594005584716, "step": 10260 }, { "epoch": 0.8968198497291631, "grad_norm": 0.5289391918856972, "learning_rate": 8.842139088920661e-06, "loss": 0.306098198890686, "step": 10265 }, { "epoch": 0.8972566835575747, "grad_norm": 0.5486157401027159, "learning_rate": 8.840512250037823e-06, "loss": 0.2867579936981201, "step": 10270 }, { "epoch": 0.8976935173859864, "grad_norm": 0.6030841888380469, "learning_rate": 8.83888441895433e-06, "loss": 0.2904814720153809, "step": 10275 }, { "epoch": 0.898130351214398, "grad_norm": 0.47749030322527297, "learning_rate": 8.837255596090734e-06, "loss": 0.29210762977600097, "step": 10280 }, { "epoch": 0.8985671850428097, "grad_norm": 0.4979129995985084, "learning_rate": 8.835625781867844e-06, "loss": 0.2946381807327271, "step": 10285 }, { "epoch": 0.8990040188712214, "grad_norm": 0.5268763200689197, "learning_rate": 8.833994976706723e-06, "loss": 0.3053908824920654, "step": 10290 }, { "epoch": 0.899440852699633, "grad_norm": 0.5350591306307596, "learning_rate": 8.832363181028695e-06, "loss": 0.3086702346801758, "step": 10295 }, { "epoch": 0.8998776865280448, "grad_norm": 0.5829410696162621, "learning_rate": 8.830730395255333e-06, "loss": 0.2613457202911377, "step": 10300 }, { "epoch": 0.9003145203564564, "grad_norm": 0.4831641030030435, "learning_rate": 8.829096619808469e-06, "loss": 0.27321629524230956, "step": 10305 }, { "epoch": 0.9007513541848681, "grad_norm": 0.4789066940800895, "learning_rate": 8.82746185511019e-06, "loss": 0.28621573448181153, "step": 10310 }, { "epoch": 0.9011881880132797, "grad_norm": 0.45194344042869833, "learning_rate": 8.825826101582844e-06, "loss": 0.27152695655822756, "step": 10315 }, { "epoch": 0.9016250218416915, "grad_norm": 0.5134355920088303, "learning_rate": 8.824189359649025e-06, "loss": 0.27720372676849364, "step": 10320 }, { "epoch": 0.9020618556701031, "grad_norm": 0.5427258380868573, "learning_rate": 8.822551629731588e-06, "loss": 0.2778025150299072, "step": 10325 }, { "epoch": 0.9024986894985148, "grad_norm": 0.49730254948194197, "learning_rate": 8.820912912253644e-06, "loss": 0.31234302520751955, "step": 10330 }, { "epoch": 0.9029355233269264, "grad_norm": 0.45301248484282275, "learning_rate": 8.819273207638556e-06, "loss": 0.26839261054992675, "step": 10335 }, { "epoch": 0.9033723571553381, "grad_norm": 0.5136492164770579, "learning_rate": 8.817632516309944e-06, "loss": 0.3363525390625, "step": 10340 }, { "epoch": 0.9038091909837498, "grad_norm": 0.4924304337330986, "learning_rate": 8.815990838691685e-06, "loss": 0.2993743419647217, "step": 10345 }, { "epoch": 0.9042460248121614, "grad_norm": 0.48026808196915566, "learning_rate": 8.8143481752079e-06, "loss": 0.291105318069458, "step": 10350 }, { "epoch": 0.9046828586405731, "grad_norm": 0.5360687667028303, "learning_rate": 8.812704526282984e-06, "loss": 0.27686028480529784, "step": 10355 }, { "epoch": 0.9051196924689848, "grad_norm": 0.572477897287098, "learning_rate": 8.811059892341569e-06, "loss": 0.2741427421569824, "step": 10360 }, { "epoch": 0.9055565262973965, "grad_norm": 0.56998719380745, "learning_rate": 8.809414273808547e-06, "loss": 0.2614758968353271, "step": 10365 }, { "epoch": 0.9059933601258081, "grad_norm": 0.420364516020887, "learning_rate": 8.80776767110907e-06, "loss": 0.24618017673492432, "step": 10370 }, { "epoch": 0.9064301939542199, "grad_norm": 0.5428612023198405, "learning_rate": 8.806120084668538e-06, "loss": 0.2704817533493042, "step": 10375 }, { "epoch": 0.9068670277826315, "grad_norm": 0.49352811076389985, "learning_rate": 8.804471514912602e-06, "loss": 0.2879830837249756, "step": 10380 }, { "epoch": 0.9073038616110431, "grad_norm": 0.5624703936187743, "learning_rate": 8.802821962267181e-06, "loss": 0.2823371887207031, "step": 10385 }, { "epoch": 0.9077406954394548, "grad_norm": 0.6869960175468536, "learning_rate": 8.801171427158431e-06, "loss": 0.2701408863067627, "step": 10390 }, { "epoch": 0.9081775292678665, "grad_norm": 0.523762125098418, "learning_rate": 8.799519910012772e-06, "loss": 0.28600664138793946, "step": 10395 }, { "epoch": 0.9086143630962782, "grad_norm": 0.5262629043207792, "learning_rate": 8.797867411256877e-06, "loss": 0.27770869731903075, "step": 10400 }, { "epoch": 0.9090511969246898, "grad_norm": 0.6046547644345269, "learning_rate": 8.796213931317668e-06, "loss": 0.32122802734375, "step": 10405 }, { "epoch": 0.9094880307531015, "grad_norm": 0.46255982527192624, "learning_rate": 8.794559470622327e-06, "loss": 0.2811276435852051, "step": 10410 }, { "epoch": 0.9099248645815132, "grad_norm": 0.5093384194427939, "learning_rate": 8.792904029598283e-06, "loss": 0.3168567895889282, "step": 10415 }, { "epoch": 0.9103616984099249, "grad_norm": 0.543394297176325, "learning_rate": 8.79124760867322e-06, "loss": 0.29462831020355223, "step": 10420 }, { "epoch": 0.9107985322383365, "grad_norm": 0.431137964916728, "learning_rate": 8.78959020827508e-06, "loss": 0.2789581060409546, "step": 10425 }, { "epoch": 0.9112353660667482, "grad_norm": 0.5231613234179577, "learning_rate": 8.787931828832053e-06, "loss": 0.29688570499420164, "step": 10430 }, { "epoch": 0.9116721998951599, "grad_norm": 0.45225936969653613, "learning_rate": 8.786272470772582e-06, "loss": 0.27682545185089114, "step": 10435 }, { "epoch": 0.9121090337235715, "grad_norm": 0.4920460144839862, "learning_rate": 8.784612134525363e-06, "loss": 0.27374792098999023, "step": 10440 }, { "epoch": 0.9125458675519832, "grad_norm": 0.5073395756573644, "learning_rate": 8.782950820519348e-06, "loss": 0.2781497955322266, "step": 10445 }, { "epoch": 0.912982701380395, "grad_norm": 0.5577999740035496, "learning_rate": 8.781288529183738e-06, "loss": 0.2777007818222046, "step": 10450 }, { "epoch": 0.9134195352088066, "grad_norm": 0.5684324469503185, "learning_rate": 8.779625260947991e-06, "loss": 0.2743826866149902, "step": 10455 }, { "epoch": 0.9138563690372182, "grad_norm": 0.4895726380995986, "learning_rate": 8.777961016241809e-06, "loss": 0.2810529232025146, "step": 10460 }, { "epoch": 0.9142932028656299, "grad_norm": 0.47957797199178026, "learning_rate": 8.776295795495154e-06, "loss": 0.2950024127960205, "step": 10465 }, { "epoch": 0.9147300366940416, "grad_norm": 0.6643903710419007, "learning_rate": 8.774629599138238e-06, "loss": 0.2549773693084717, "step": 10470 }, { "epoch": 0.9151668705224533, "grad_norm": 0.5859315054353562, "learning_rate": 8.772962427601525e-06, "loss": 0.2823502063751221, "step": 10475 }, { "epoch": 0.9156037043508649, "grad_norm": 0.6084868757802835, "learning_rate": 8.771294281315729e-06, "loss": 0.26756126880645753, "step": 10480 }, { "epoch": 0.9160405381792766, "grad_norm": 0.4558705977032391, "learning_rate": 8.769625160711822e-06, "loss": 0.26926794052124026, "step": 10485 }, { "epoch": 0.9164773720076883, "grad_norm": 0.43877285525518545, "learning_rate": 8.767955066221017e-06, "loss": 0.25567634105682374, "step": 10490 }, { "epoch": 0.9169142058361, "grad_norm": 0.6022604039366686, "learning_rate": 8.766283998274788e-06, "loss": 0.278735089302063, "step": 10495 }, { "epoch": 0.9173510396645116, "grad_norm": 0.6361977624398163, "learning_rate": 8.764611957304856e-06, "loss": 0.2700082063674927, "step": 10500 }, { "epoch": 0.9177878734929233, "grad_norm": 0.5872281792234103, "learning_rate": 8.762938943743196e-06, "loss": 0.2874948978424072, "step": 10505 }, { "epoch": 0.918224707321335, "grad_norm": 0.4563822036791866, "learning_rate": 8.761264958022031e-06, "loss": 0.2735483169555664, "step": 10510 }, { "epoch": 0.9186615411497466, "grad_norm": 0.54350175463169, "learning_rate": 8.75959000057384e-06, "loss": 0.3002284049987793, "step": 10515 }, { "epoch": 0.9190983749781583, "grad_norm": 0.6174615647819738, "learning_rate": 8.75791407183135e-06, "loss": 0.29213852882385255, "step": 10520 }, { "epoch": 0.91953520880657, "grad_norm": 0.445637621547036, "learning_rate": 8.756237172227535e-06, "loss": 0.3214211940765381, "step": 10525 }, { "epoch": 0.9199720426349817, "grad_norm": 0.5338518275151459, "learning_rate": 8.754559302195628e-06, "loss": 0.32288126945495604, "step": 10530 }, { "epoch": 0.9204088764633933, "grad_norm": 0.5346667177157777, "learning_rate": 8.752880462169107e-06, "loss": 0.2911365032196045, "step": 10535 }, { "epoch": 0.920845710291805, "grad_norm": 0.5004247208707846, "learning_rate": 8.751200652581703e-06, "loss": 0.2654887199401855, "step": 10540 }, { "epoch": 0.9212825441202167, "grad_norm": 0.5222648140619507, "learning_rate": 8.749519873867397e-06, "loss": 0.28133273124694824, "step": 10545 }, { "epoch": 0.9217193779486283, "grad_norm": 0.39805713687193617, "learning_rate": 8.747838126460417e-06, "loss": 0.26159119606018066, "step": 10550 }, { "epoch": 0.92215621177704, "grad_norm": 0.5324427630062945, "learning_rate": 8.746155410795248e-06, "loss": 0.30904972553253174, "step": 10555 }, { "epoch": 0.9225930456054516, "grad_norm": 0.445452343403099, "learning_rate": 8.744471727306619e-06, "loss": 0.26029210090637206, "step": 10560 }, { "epoch": 0.9230298794338634, "grad_norm": 0.7021234908457972, "learning_rate": 8.74278707642951e-06, "loss": 0.27826662063598634, "step": 10565 }, { "epoch": 0.923466713262275, "grad_norm": 0.48651422893981827, "learning_rate": 8.74110145859916e-06, "loss": 0.2956676959991455, "step": 10570 }, { "epoch": 0.9239035470906867, "grad_norm": 0.4943775987600608, "learning_rate": 8.739414874251043e-06, "loss": 0.30953378677368165, "step": 10575 }, { "epoch": 0.9243403809190984, "grad_norm": 0.4546413068126174, "learning_rate": 8.737727323820892e-06, "loss": 0.24652657508850098, "step": 10580 }, { "epoch": 0.9247772147475101, "grad_norm": 0.48179690522768454, "learning_rate": 8.736038807744687e-06, "loss": 0.2775883197784424, "step": 10585 }, { "epoch": 0.9252140485759217, "grad_norm": 0.43025717666480146, "learning_rate": 8.73434932645866e-06, "loss": 0.28451900482177733, "step": 10590 }, { "epoch": 0.9256508824043334, "grad_norm": 0.47082593497355885, "learning_rate": 8.73265888039929e-06, "loss": 0.2620342493057251, "step": 10595 }, { "epoch": 0.9260877162327451, "grad_norm": 0.5569120849692186, "learning_rate": 8.730967470003302e-06, "loss": 0.27934813499450684, "step": 10600 }, { "epoch": 0.9265245500611567, "grad_norm": 0.8508530159651286, "learning_rate": 8.729275095707679e-06, "loss": 0.29230637550354005, "step": 10605 }, { "epoch": 0.9269613838895684, "grad_norm": 0.5697451119770883, "learning_rate": 8.727581757949644e-06, "loss": 0.2873208045959473, "step": 10610 }, { "epoch": 0.92739821771798, "grad_norm": 0.48588141289189557, "learning_rate": 8.725887457166675e-06, "loss": 0.26313271522521975, "step": 10615 }, { "epoch": 0.9278350515463918, "grad_norm": 0.4241570147439287, "learning_rate": 8.724192193796495e-06, "loss": 0.2802678823471069, "step": 10620 }, { "epoch": 0.9282718853748034, "grad_norm": 0.6097762730045373, "learning_rate": 8.722495968277079e-06, "loss": 0.28146777153015134, "step": 10625 }, { "epoch": 0.9287087192032151, "grad_norm": 0.46061690772681757, "learning_rate": 8.720798781046644e-06, "loss": 0.2877309322357178, "step": 10630 }, { "epoch": 0.9291455530316267, "grad_norm": 0.5500870350257885, "learning_rate": 8.719100632543667e-06, "loss": 0.3034529685974121, "step": 10635 }, { "epoch": 0.9295823868600385, "grad_norm": 0.524743926594118, "learning_rate": 8.717401523206863e-06, "loss": 0.2856189966201782, "step": 10640 }, { "epoch": 0.9300192206884501, "grad_norm": 0.4441544593126037, "learning_rate": 8.715701453475199e-06, "loss": 0.2722090005874634, "step": 10645 }, { "epoch": 0.9304560545168618, "grad_norm": 0.5237765891859478, "learning_rate": 8.71400042378789e-06, "loss": 0.3116408348083496, "step": 10650 }, { "epoch": 0.9308928883452735, "grad_norm": 0.5070948773100931, "learning_rate": 8.712298434584398e-06, "loss": 0.2788883686065674, "step": 10655 }, { "epoch": 0.9313297221736851, "grad_norm": 0.5496174416386514, "learning_rate": 8.710595486304436e-06, "loss": 0.2871818542480469, "step": 10660 }, { "epoch": 0.9317665560020968, "grad_norm": 0.6615333258049721, "learning_rate": 8.708891579387962e-06, "loss": 0.30494251251220705, "step": 10665 }, { "epoch": 0.9322033898305084, "grad_norm": 0.5005230518902967, "learning_rate": 8.70718671427518e-06, "loss": 0.29863953590393066, "step": 10670 }, { "epoch": 0.9326402236589202, "grad_norm": 0.49224131059520515, "learning_rate": 8.705480891406545e-06, "loss": 0.27972636222839353, "step": 10675 }, { "epoch": 0.9330770574873318, "grad_norm": 0.5063382129523563, "learning_rate": 8.703774111222761e-06, "loss": 0.28886871337890624, "step": 10680 }, { "epoch": 0.9335138913157435, "grad_norm": 0.5622212557726296, "learning_rate": 8.702066374164774e-06, "loss": 0.27976651191711427, "step": 10685 }, { "epoch": 0.9339507251441551, "grad_norm": 0.5620677260232957, "learning_rate": 8.700357680673779e-06, "loss": 0.2987724542617798, "step": 10690 }, { "epoch": 0.9343875589725669, "grad_norm": 0.586378981243592, "learning_rate": 8.69864803119122e-06, "loss": 0.2690612316131592, "step": 10695 }, { "epoch": 0.9348243928009785, "grad_norm": 0.6298938493319123, "learning_rate": 8.696937426158789e-06, "loss": 0.27891201972961427, "step": 10700 }, { "epoch": 0.9352612266293902, "grad_norm": 0.5913029808439382, "learning_rate": 8.69522586601842e-06, "loss": 0.2761885166168213, "step": 10705 }, { "epoch": 0.9356980604578019, "grad_norm": 0.521560230974983, "learning_rate": 8.693513351212294e-06, "loss": 0.2820094108581543, "step": 10710 }, { "epoch": 0.9361348942862135, "grad_norm": 0.46879648476663965, "learning_rate": 8.691799882182847e-06, "loss": 0.2823336124420166, "step": 10715 }, { "epoch": 0.9365717281146252, "grad_norm": 0.5171563164158199, "learning_rate": 8.690085459372753e-06, "loss": 0.2974942684173584, "step": 10720 }, { "epoch": 0.9370085619430368, "grad_norm": 0.5296498438227163, "learning_rate": 8.688370083224934e-06, "loss": 0.2513756275177002, "step": 10725 }, { "epoch": 0.9374453957714486, "grad_norm": 0.48176398995674147, "learning_rate": 8.686653754182562e-06, "loss": 0.26259889602661135, "step": 10730 }, { "epoch": 0.9378822295998602, "grad_norm": 0.5195445589315876, "learning_rate": 8.684936472689048e-06, "loss": 0.30336833000183105, "step": 10735 }, { "epoch": 0.9383190634282719, "grad_norm": 0.4439531792756147, "learning_rate": 8.683218239188058e-06, "loss": 0.26585214138031005, "step": 10740 }, { "epoch": 0.9387558972566835, "grad_norm": 0.45708339949272586, "learning_rate": 8.681499054123498e-06, "loss": 0.286708664894104, "step": 10745 }, { "epoch": 0.9391927310850953, "grad_norm": 0.4564611426399935, "learning_rate": 8.679778917939521e-06, "loss": 0.27121312618255616, "step": 10750 }, { "epoch": 0.9396295649135069, "grad_norm": 0.4760115843109403, "learning_rate": 8.678057831080527e-06, "loss": 0.3104276180267334, "step": 10755 }, { "epoch": 0.9400663987419186, "grad_norm": 0.44669318812411846, "learning_rate": 8.676335793991159e-06, "loss": 0.3325453281402588, "step": 10760 }, { "epoch": 0.9405032325703302, "grad_norm": 0.5771298001977173, "learning_rate": 8.674612807116309e-06, "loss": 0.28998730182647703, "step": 10765 }, { "epoch": 0.940940066398742, "grad_norm": 0.5433756890833089, "learning_rate": 8.672888870901113e-06, "loss": 0.3067406415939331, "step": 10770 }, { "epoch": 0.9413769002271536, "grad_norm": 0.5183925989555878, "learning_rate": 8.67116398579095e-06, "loss": 0.27137026786804197, "step": 10775 }, { "epoch": 0.9418137340555652, "grad_norm": 0.5053980986349551, "learning_rate": 8.669438152231446e-06, "loss": 0.2968433141708374, "step": 10780 }, { "epoch": 0.942250567883977, "grad_norm": 0.46432854788064853, "learning_rate": 8.667711370668474e-06, "loss": 0.28692376613616943, "step": 10785 }, { "epoch": 0.9426874017123886, "grad_norm": 0.486827017438211, "learning_rate": 8.66598364154815e-06, "loss": 0.29697418212890625, "step": 10790 }, { "epoch": 0.9431242355408003, "grad_norm": 0.49048251533681325, "learning_rate": 8.664254965316833e-06, "loss": 0.2735886573791504, "step": 10795 }, { "epoch": 0.9435610693692119, "grad_norm": 0.48181194300328783, "learning_rate": 8.662525342421128e-06, "loss": 0.2616727828979492, "step": 10800 }, { "epoch": 0.9439979031976237, "grad_norm": 0.6972323511784498, "learning_rate": 8.660794773307888e-06, "loss": 0.2876762390136719, "step": 10805 }, { "epoch": 0.9444347370260353, "grad_norm": 0.5209329503719812, "learning_rate": 8.659063258424203e-06, "loss": 0.2846013069152832, "step": 10810 }, { "epoch": 0.944871570854447, "grad_norm": 0.6024179117316298, "learning_rate": 8.657330798217417e-06, "loss": 0.26872682571411133, "step": 10815 }, { "epoch": 0.9453084046828586, "grad_norm": 0.4599264108870192, "learning_rate": 8.655597393135108e-06, "loss": 0.2771960735321045, "step": 10820 }, { "epoch": 0.9457452385112703, "grad_norm": 0.5152973116287402, "learning_rate": 8.653863043625108e-06, "loss": 0.3070712089538574, "step": 10825 }, { "epoch": 0.946182072339682, "grad_norm": 0.4453046416884014, "learning_rate": 8.652127750135484e-06, "loss": 0.31016757488250735, "step": 10830 }, { "epoch": 0.9466189061680936, "grad_norm": 0.5073992445977769, "learning_rate": 8.650391513114553e-06, "loss": 0.2656559944152832, "step": 10835 }, { "epoch": 0.9470557399965053, "grad_norm": 0.430763600711409, "learning_rate": 8.648654333010876e-06, "loss": 0.3247964859008789, "step": 10840 }, { "epoch": 0.947492573824917, "grad_norm": 0.5719142224191331, "learning_rate": 8.646916210273248e-06, "loss": 0.3126096725463867, "step": 10845 }, { "epoch": 0.9479294076533287, "grad_norm": 0.46953560760620106, "learning_rate": 8.645177145350724e-06, "loss": 0.29030890464782716, "step": 10850 }, { "epoch": 0.9483662414817403, "grad_norm": 0.5110254397487802, "learning_rate": 8.643437138692587e-06, "loss": 0.27594945430755613, "step": 10855 }, { "epoch": 0.9488030753101521, "grad_norm": 0.43356231700916115, "learning_rate": 8.641696190748374e-06, "loss": 0.26840934753417967, "step": 10860 }, { "epoch": 0.9492399091385637, "grad_norm": 0.5397857283199405, "learning_rate": 8.639954301967858e-06, "loss": 0.2869222164154053, "step": 10865 }, { "epoch": 0.9496767429669754, "grad_norm": 0.549771007790222, "learning_rate": 8.638211472801058e-06, "loss": 0.27577033042907717, "step": 10870 }, { "epoch": 0.950113576795387, "grad_norm": 0.500273741709047, "learning_rate": 8.636467703698238e-06, "loss": 0.29017593860626223, "step": 10875 }, { "epoch": 0.9505504106237987, "grad_norm": 0.5774284988379409, "learning_rate": 8.634722995109901e-06, "loss": 0.25912938117980955, "step": 10880 }, { "epoch": 0.9509872444522104, "grad_norm": 0.5630743511012558, "learning_rate": 8.632977347486794e-06, "loss": 0.2883486270904541, "step": 10885 }, { "epoch": 0.951424078280622, "grad_norm": 0.4481632247464094, "learning_rate": 8.63123076127991e-06, "loss": 0.2876639127731323, "step": 10890 }, { "epoch": 0.9518609121090337, "grad_norm": 0.42948311809957557, "learning_rate": 8.629483236940477e-06, "loss": 0.3106717824935913, "step": 10895 }, { "epoch": 0.9522977459374454, "grad_norm": 0.5644449075081798, "learning_rate": 8.62773477491997e-06, "loss": 0.2764299392700195, "step": 10900 }, { "epoch": 0.9527345797658571, "grad_norm": 0.5477393929431045, "learning_rate": 8.625985375670112e-06, "loss": 0.2695319175720215, "step": 10905 }, { "epoch": 0.9531714135942687, "grad_norm": 0.5220725584864737, "learning_rate": 8.624235039642858e-06, "loss": 0.2724383592605591, "step": 10910 }, { "epoch": 0.9536082474226805, "grad_norm": 0.5551609509236548, "learning_rate": 8.62248376729041e-06, "loss": 0.27652454376220703, "step": 10915 }, { "epoch": 0.9540450812510921, "grad_norm": 0.5489626419226257, "learning_rate": 8.620731559065212e-06, "loss": 0.28460376262664794, "step": 10920 }, { "epoch": 0.9544819150795038, "grad_norm": 0.5375205365959127, "learning_rate": 8.618978415419945e-06, "loss": 0.28477160930633544, "step": 10925 }, { "epoch": 0.9549187489079154, "grad_norm": 0.5586733936879019, "learning_rate": 8.61722433680754e-06, "loss": 0.31400156021118164, "step": 10930 }, { "epoch": 0.9553555827363271, "grad_norm": 0.5053310520422, "learning_rate": 8.615469323681165e-06, "loss": 0.279723596572876, "step": 10935 }, { "epoch": 0.9557924165647388, "grad_norm": 0.5333196867227767, "learning_rate": 8.613713376494226e-06, "loss": 0.2908888816833496, "step": 10940 }, { "epoch": 0.9562292503931504, "grad_norm": 0.45177702823358323, "learning_rate": 8.611956495700379e-06, "loss": 0.2731215000152588, "step": 10945 }, { "epoch": 0.9566660842215621, "grad_norm": 0.5770322081231619, "learning_rate": 8.610198681753513e-06, "loss": 0.2937196731567383, "step": 10950 }, { "epoch": 0.9571029180499738, "grad_norm": 0.5016326480688182, "learning_rate": 8.608439935107759e-06, "loss": 0.26808629035949705, "step": 10955 }, { "epoch": 0.9575397518783855, "grad_norm": 0.5272199393311584, "learning_rate": 8.606680256217497e-06, "loss": 0.2863886594772339, "step": 10960 }, { "epoch": 0.9579765857067971, "grad_norm": 0.5304580745782556, "learning_rate": 8.604919645537338e-06, "loss": 0.2725583553314209, "step": 10965 }, { "epoch": 0.9584134195352088, "grad_norm": 0.5518178034225579, "learning_rate": 8.603158103522138e-06, "loss": 0.263462495803833, "step": 10970 }, { "epoch": 0.9588502533636205, "grad_norm": 0.4760997028643968, "learning_rate": 8.601395630626995e-06, "loss": 0.27234175205230715, "step": 10975 }, { "epoch": 0.9592870871920322, "grad_norm": 0.46393886068711565, "learning_rate": 8.599632227307242e-06, "loss": 0.2724531412124634, "step": 10980 }, { "epoch": 0.9597239210204438, "grad_norm": 0.5922209690081247, "learning_rate": 8.597867894018462e-06, "loss": 0.28583564758300783, "step": 10985 }, { "epoch": 0.9601607548488555, "grad_norm": 0.4424659526036808, "learning_rate": 8.596102631216468e-06, "loss": 0.2862715721130371, "step": 10990 }, { "epoch": 0.9605975886772672, "grad_norm": 0.4958350171096283, "learning_rate": 8.59433643935732e-06, "loss": 0.27404394149780276, "step": 10995 }, { "epoch": 0.9610344225056788, "grad_norm": 0.6773540502297165, "learning_rate": 8.592569318897315e-06, "loss": 0.2961676597595215, "step": 11000 }, { "epoch": 0.9614712563340905, "grad_norm": 0.5302822573860192, "learning_rate": 8.59080127029299e-06, "loss": 0.29593462944030763, "step": 11005 }, { "epoch": 0.9619080901625022, "grad_norm": 0.45096191806807195, "learning_rate": 8.589032294001122e-06, "loss": 0.26218857765197756, "step": 11010 }, { "epoch": 0.9623449239909139, "grad_norm": 0.5062265232474031, "learning_rate": 8.58726239047873e-06, "loss": 0.30715322494506836, "step": 11015 }, { "epoch": 0.9627817578193255, "grad_norm": 0.4541737760163574, "learning_rate": 8.585491560183068e-06, "loss": 0.3018615484237671, "step": 11020 }, { "epoch": 0.9632185916477372, "grad_norm": 0.46913940425146833, "learning_rate": 8.583719803571635e-06, "loss": 0.28756203651428225, "step": 11025 }, { "epoch": 0.9636554254761489, "grad_norm": 0.5107802433782712, "learning_rate": 8.581947121102162e-06, "loss": 0.280017614364624, "step": 11030 }, { "epoch": 0.9640922593045606, "grad_norm": 0.5233644675842907, "learning_rate": 8.580173513232629e-06, "loss": 0.2984398603439331, "step": 11035 }, { "epoch": 0.9645290931329722, "grad_norm": 0.48215079338154604, "learning_rate": 8.578398980421245e-06, "loss": 0.2756936550140381, "step": 11040 }, { "epoch": 0.9649659269613838, "grad_norm": 0.4946771549737981, "learning_rate": 8.576623523126465e-06, "loss": 0.26265032291412355, "step": 11045 }, { "epoch": 0.9654027607897956, "grad_norm": 0.46745019075692346, "learning_rate": 8.57484714180698e-06, "loss": 0.277689528465271, "step": 11050 }, { "epoch": 0.9658395946182072, "grad_norm": 0.532733572139839, "learning_rate": 8.57306983692172e-06, "loss": 0.3172933578491211, "step": 11055 }, { "epoch": 0.9662764284466189, "grad_norm": 0.513477138257485, "learning_rate": 8.57129160892985e-06, "loss": 0.2783055305480957, "step": 11060 }, { "epoch": 0.9667132622750306, "grad_norm": 0.4912532073306008, "learning_rate": 8.569512458290786e-06, "loss": 0.29609832763671873, "step": 11065 }, { "epoch": 0.9671500961034423, "grad_norm": 0.5833503288120316, "learning_rate": 8.567732385464164e-06, "loss": 0.26279287338256835, "step": 11070 }, { "epoch": 0.9675869299318539, "grad_norm": 0.6438998712980241, "learning_rate": 8.565951390909874e-06, "loss": 0.2823063850402832, "step": 11075 }, { "epoch": 0.9680237637602656, "grad_norm": 0.5071857643524723, "learning_rate": 8.564169475088036e-06, "loss": 0.2732293128967285, "step": 11080 }, { "epoch": 0.9684605975886773, "grad_norm": 0.5973403050530177, "learning_rate": 8.562386638459008e-06, "loss": 0.2865267753601074, "step": 11085 }, { "epoch": 0.968897431417089, "grad_norm": 0.5505383290522706, "learning_rate": 8.560602881483391e-06, "loss": 0.2739409923553467, "step": 11090 }, { "epoch": 0.9693342652455006, "grad_norm": 0.5286546198150442, "learning_rate": 8.55881820462202e-06, "loss": 0.2815336942672729, "step": 11095 }, { "epoch": 0.9697710990739122, "grad_norm": 0.48188281304466174, "learning_rate": 8.557032608335966e-06, "loss": 0.2749753952026367, "step": 11100 }, { "epoch": 0.970207932902324, "grad_norm": 0.6147813713398422, "learning_rate": 8.555246093086544e-06, "loss": 0.2645130634307861, "step": 11105 }, { "epoch": 0.9706447667307356, "grad_norm": 0.639242010718234, "learning_rate": 8.553458659335298e-06, "loss": 0.2975849390029907, "step": 11110 }, { "epoch": 0.9710816005591473, "grad_norm": 0.5470844025076144, "learning_rate": 8.551670307544015e-06, "loss": 0.26600644588470457, "step": 11115 }, { "epoch": 0.971518434387559, "grad_norm": 0.6212145482697885, "learning_rate": 8.54988103817472e-06, "loss": 0.26743259429931643, "step": 11120 }, { "epoch": 0.9719552682159707, "grad_norm": 0.5458997759712017, "learning_rate": 8.54809085168967e-06, "loss": 0.2958528518676758, "step": 11125 }, { "epoch": 0.9723921020443823, "grad_norm": 0.5150665027796802, "learning_rate": 8.546299748551361e-06, "loss": 0.2590750217437744, "step": 11130 }, { "epoch": 0.972828935872794, "grad_norm": 0.47436004185658787, "learning_rate": 8.544507729222528e-06, "loss": 0.29920573234558107, "step": 11135 }, { "epoch": 0.9732657697012057, "grad_norm": 0.5311629949728095, "learning_rate": 8.542714794166142e-06, "loss": 0.29096450805664065, "step": 11140 }, { "epoch": 0.9737026035296173, "grad_norm": 0.45691226654746675, "learning_rate": 8.540920943845408e-06, "loss": 0.2665391445159912, "step": 11145 }, { "epoch": 0.974139437358029, "grad_norm": 0.61007230797278, "learning_rate": 8.539126178723771e-06, "loss": 0.2802713871002197, "step": 11150 }, { "epoch": 0.9745762711864406, "grad_norm": 0.5643119848430814, "learning_rate": 8.537330499264908e-06, "loss": 0.24997665882110595, "step": 11155 }, { "epoch": 0.9750131050148524, "grad_norm": 0.4710683026346761, "learning_rate": 8.535533905932739e-06, "loss": 0.2878725528717041, "step": 11160 }, { "epoch": 0.975449938843264, "grad_norm": 0.5440530964094672, "learning_rate": 8.533736399191411e-06, "loss": 0.27296762466430663, "step": 11165 }, { "epoch": 0.9758867726716757, "grad_norm": 0.4985310507234747, "learning_rate": 8.531937979505316e-06, "loss": 0.2818188190460205, "step": 11170 }, { "epoch": 0.9763236065000873, "grad_norm": 0.4839965967735579, "learning_rate": 8.530138647339074e-06, "loss": 0.286635160446167, "step": 11175 }, { "epoch": 0.9767604403284991, "grad_norm": 0.4383865636341414, "learning_rate": 8.528338403157549e-06, "loss": 0.27664780616760254, "step": 11180 }, { "epoch": 0.9771972741569107, "grad_norm": 0.6710290241305132, "learning_rate": 8.526537247425832e-06, "loss": 0.2792991638183594, "step": 11185 }, { "epoch": 0.9776341079853224, "grad_norm": 0.4996264857515186, "learning_rate": 8.524735180609256e-06, "loss": 0.27917909622192383, "step": 11190 }, { "epoch": 0.9780709418137341, "grad_norm": 0.4589085664629315, "learning_rate": 8.522932203173387e-06, "loss": 0.290206241607666, "step": 11195 }, { "epoch": 0.9785077756421457, "grad_norm": 0.5362085267372223, "learning_rate": 8.521128315584024e-06, "loss": 0.2944842100143433, "step": 11200 }, { "epoch": 0.9789446094705574, "grad_norm": 0.5008322101862667, "learning_rate": 8.519323518307205e-06, "loss": 0.2615218162536621, "step": 11205 }, { "epoch": 0.979381443298969, "grad_norm": 0.4956657847208969, "learning_rate": 8.517517811809202e-06, "loss": 0.2744802951812744, "step": 11210 }, { "epoch": 0.9798182771273808, "grad_norm": 0.5207122885930808, "learning_rate": 8.51571119655652e-06, "loss": 0.2751176118850708, "step": 11215 }, { "epoch": 0.9802551109557924, "grad_norm": 0.5817084347894079, "learning_rate": 8.5139036730159e-06, "loss": 0.26561365127563474, "step": 11220 }, { "epoch": 0.9806919447842041, "grad_norm": 0.5024127494999139, "learning_rate": 8.51209524165432e-06, "loss": 0.2925488710403442, "step": 11225 }, { "epoch": 0.9811287786126157, "grad_norm": 0.5584630885921095, "learning_rate": 8.510285902938986e-06, "loss": 0.28539655208587644, "step": 11230 }, { "epoch": 0.9815656124410275, "grad_norm": 0.529697487876979, "learning_rate": 8.508475657337349e-06, "loss": 0.27278847694396974, "step": 11235 }, { "epoch": 0.9820024462694391, "grad_norm": 0.5441509582978508, "learning_rate": 8.506664505317082e-06, "loss": 0.27901079654693606, "step": 11240 }, { "epoch": 0.9824392800978508, "grad_norm": 0.4513419608326286, "learning_rate": 8.5048524473461e-06, "loss": 0.2869394063949585, "step": 11245 }, { "epoch": 0.9828761139262624, "grad_norm": 0.5397686520556272, "learning_rate": 8.503039483892552e-06, "loss": 0.3091031312942505, "step": 11250 }, { "epoch": 0.9833129477546741, "grad_norm": 0.5960915433541455, "learning_rate": 8.501225615424818e-06, "loss": 0.29305472373962405, "step": 11255 }, { "epoch": 0.9837497815830858, "grad_norm": 0.43332875297727197, "learning_rate": 8.499410842411511e-06, "loss": 0.24571938514709474, "step": 11260 }, { "epoch": 0.9841866154114974, "grad_norm": 0.4515420981966719, "learning_rate": 8.497595165321484e-06, "loss": 0.26895937919616697, "step": 11265 }, { "epoch": 0.9846234492399092, "grad_norm": 0.45220679497279936, "learning_rate": 8.495778584623815e-06, "loss": 0.27781190872192385, "step": 11270 }, { "epoch": 0.9850602830683208, "grad_norm": 0.5900993148982746, "learning_rate": 8.49396110078782e-06, "loss": 0.29890098571777346, "step": 11275 }, { "epoch": 0.9854971168967325, "grad_norm": 0.4638676762970239, "learning_rate": 8.492142714283051e-06, "loss": 0.26296770572662354, "step": 11280 }, { "epoch": 0.9859339507251441, "grad_norm": 0.6076806806239896, "learning_rate": 8.49032342557929e-06, "loss": 0.28972439765930175, "step": 11285 }, { "epoch": 0.9863707845535559, "grad_norm": 0.4961610702538568, "learning_rate": 8.488503235146548e-06, "loss": 0.2875235080718994, "step": 11290 }, { "epoch": 0.9868076183819675, "grad_norm": 0.5843738670969392, "learning_rate": 8.486682143455077e-06, "loss": 0.257581090927124, "step": 11295 }, { "epoch": 0.9872444522103792, "grad_norm": 0.559136902254784, "learning_rate": 8.484860150975359e-06, "loss": 0.2770348072052002, "step": 11300 }, { "epoch": 0.9876812860387908, "grad_norm": 0.4299218526511328, "learning_rate": 8.483037258178103e-06, "loss": 0.28555328845977784, "step": 11305 }, { "epoch": 0.9881181198672025, "grad_norm": 0.47750060642855985, "learning_rate": 8.48121346553426e-06, "loss": 0.2831839323043823, "step": 11310 }, { "epoch": 0.9885549536956142, "grad_norm": 0.46930723966038207, "learning_rate": 8.479388773515007e-06, "loss": 0.29001269340515134, "step": 11315 }, { "epoch": 0.9889917875240258, "grad_norm": 0.6932589010407768, "learning_rate": 8.477563182591755e-06, "loss": 0.28151645660400393, "step": 11320 }, { "epoch": 0.9894286213524376, "grad_norm": 0.4741428308926247, "learning_rate": 8.475736693236148e-06, "loss": 0.27585668563842775, "step": 11325 }, { "epoch": 0.9898654551808492, "grad_norm": 0.5082043554656452, "learning_rate": 8.473909305920062e-06, "loss": 0.2617331981658936, "step": 11330 }, { "epoch": 0.9903022890092609, "grad_norm": 0.6919151914104212, "learning_rate": 8.472081021115604e-06, "loss": 0.2987868070602417, "step": 11335 }, { "epoch": 0.9907391228376725, "grad_norm": 0.5450524887219702, "learning_rate": 8.470251839295113e-06, "loss": 0.28712151050567625, "step": 11340 }, { "epoch": 0.9911759566660843, "grad_norm": 0.5692819784671554, "learning_rate": 8.46842176093116e-06, "loss": 0.2864240169525146, "step": 11345 }, { "epoch": 0.9916127904944959, "grad_norm": 0.45456864227535676, "learning_rate": 8.466590786496548e-06, "loss": 0.26450314521789553, "step": 11350 }, { "epoch": 0.9920496243229076, "grad_norm": 0.5011020778089775, "learning_rate": 8.464758916464315e-06, "loss": 0.28657922744750974, "step": 11355 }, { "epoch": 0.9924864581513192, "grad_norm": 0.4477037003162742, "learning_rate": 8.462926151307722e-06, "loss": 0.2700667858123779, "step": 11360 }, { "epoch": 0.992923291979731, "grad_norm": 0.4334509437096671, "learning_rate": 8.461092491500267e-06, "loss": 0.28173744678497314, "step": 11365 }, { "epoch": 0.9933601258081426, "grad_norm": 0.5019587620922998, "learning_rate": 8.459257937515678e-06, "loss": 0.30713481903076173, "step": 11370 }, { "epoch": 0.9937969596365542, "grad_norm": 0.6342614552845118, "learning_rate": 8.457422489827913e-06, "loss": 0.28800134658813475, "step": 11375 }, { "epoch": 0.9942337934649659, "grad_norm": 0.4649457550501972, "learning_rate": 8.455586148911166e-06, "loss": 0.27587318420410156, "step": 11380 }, { "epoch": 0.9946706272933776, "grad_norm": 0.5869598276908126, "learning_rate": 8.453748915239855e-06, "loss": 0.2965510845184326, "step": 11385 }, { "epoch": 0.9951074611217893, "grad_norm": 0.6424035709028756, "learning_rate": 8.451910789288631e-06, "loss": 0.2767165184020996, "step": 11390 }, { "epoch": 0.9955442949502009, "grad_norm": 0.5441980438783974, "learning_rate": 8.450071771532377e-06, "loss": 0.2807156562805176, "step": 11395 }, { "epoch": 0.9959811287786127, "grad_norm": 0.5855468673730057, "learning_rate": 8.448231862446204e-06, "loss": 0.2849952459335327, "step": 11400 }, { "epoch": 0.9964179626070243, "grad_norm": 0.4563841782078231, "learning_rate": 8.446391062505456e-06, "loss": 0.2945958375930786, "step": 11405 }, { "epoch": 0.996854796435436, "grad_norm": 0.48801621386528843, "learning_rate": 8.444549372185707e-06, "loss": 0.2713450908660889, "step": 11410 }, { "epoch": 0.9972916302638476, "grad_norm": 0.5478158638712148, "learning_rate": 8.442706791962754e-06, "loss": 0.2603780508041382, "step": 11415 }, { "epoch": 0.9977284640922593, "grad_norm": 0.4778904794931985, "learning_rate": 8.440863322312636e-06, "loss": 0.2687117576599121, "step": 11420 }, { "epoch": 0.998165297920671, "grad_norm": 0.5404619303888638, "learning_rate": 8.439018963711613e-06, "loss": 0.2521535396575928, "step": 11425 }, { "epoch": 0.9986021317490826, "grad_norm": 0.5834850876031541, "learning_rate": 8.437173716636176e-06, "loss": 0.2878591775894165, "step": 11430 }, { "epoch": 0.9990389655774943, "grad_norm": 0.5237402629419017, "learning_rate": 8.43532758156305e-06, "loss": 0.2754480838775635, "step": 11435 }, { "epoch": 0.999475799405906, "grad_norm": 0.4822383388954611, "learning_rate": 8.433480558969183e-06, "loss": 0.2656769037246704, "step": 11440 }, { "epoch": 0.9999126332343177, "grad_norm": 0.578714171040683, "learning_rate": 8.431632649331754e-06, "loss": 0.2830246925354004, "step": 11445 }, { "epoch": 1.0, "eval_loss": 0.2398964762687683, "eval_runtime": 0.812, "eval_samples_per_second": 11.084, "eval_steps_per_second": 2.463, "eval_token_acc": 0.905162899072068, "step": 11446 }, { "epoch": 1.0003494670627293, "grad_norm": 0.5539060475514858, "learning_rate": 8.429783853128176e-06, "loss": 0.22842612266540527, "step": 11450 }, { "epoch": 1.000786300891141, "grad_norm": 0.5881147320469493, "learning_rate": 8.427934170836087e-06, "loss": 0.18868544101715087, "step": 11455 }, { "epoch": 1.0012231347195526, "grad_norm": 0.6190724730257601, "learning_rate": 8.426083602933355e-06, "loss": 0.22684342861175538, "step": 11460 }, { "epoch": 1.0016599685479644, "grad_norm": 0.6064444641812423, "learning_rate": 8.424232149898073e-06, "loss": 0.21175966262817383, "step": 11465 }, { "epoch": 1.002096802376376, "grad_norm": 0.5714710726132796, "learning_rate": 8.422379812208568e-06, "loss": 0.20163121223449706, "step": 11470 }, { "epoch": 1.0025336362047876, "grad_norm": 0.49324538729151135, "learning_rate": 8.420526590343395e-06, "loss": 0.2064953327178955, "step": 11475 }, { "epoch": 1.0029704700331994, "grad_norm": 0.5858807637706407, "learning_rate": 8.418672484781335e-06, "loss": 0.22922167778015137, "step": 11480 }, { "epoch": 1.0034073038616111, "grad_norm": 0.523589369770196, "learning_rate": 8.416817496001396e-06, "loss": 0.2061837673187256, "step": 11485 }, { "epoch": 1.0038441376900227, "grad_norm": 0.5766871440458601, "learning_rate": 8.414961624482821e-06, "loss": 0.21291182041168213, "step": 11490 }, { "epoch": 1.0042809715184344, "grad_norm": 0.44107000109470584, "learning_rate": 8.41310487070507e-06, "loss": 0.23874399662017823, "step": 11495 }, { "epoch": 1.004717805346846, "grad_norm": 0.4728201147719466, "learning_rate": 8.411247235147844e-06, "loss": 0.18068938255310057, "step": 11500 }, { "epoch": 1.0051546391752577, "grad_norm": 0.4568706407838821, "learning_rate": 8.409388718291061e-06, "loss": 0.20360305309295654, "step": 11505 }, { "epoch": 1.0055914730036695, "grad_norm": 0.527187810326415, "learning_rate": 8.407529320614874e-06, "loss": 0.1976691246032715, "step": 11510 }, { "epoch": 1.006028306832081, "grad_norm": 0.5609587100576682, "learning_rate": 8.405669042599657e-06, "loss": 0.22570700645446778, "step": 11515 }, { "epoch": 1.0064651406604928, "grad_norm": 0.566649719911523, "learning_rate": 8.403807884726018e-06, "loss": 0.21502537727355958, "step": 11520 }, { "epoch": 1.0069019744889045, "grad_norm": 0.5775191336312627, "learning_rate": 8.401945847474786e-06, "loss": 0.2046968460083008, "step": 11525 }, { "epoch": 1.007338808317316, "grad_norm": 0.5448920924879067, "learning_rate": 8.400082931327025e-06, "loss": 0.2298736095428467, "step": 11530 }, { "epoch": 1.0077756421457278, "grad_norm": 0.48123334328780853, "learning_rate": 8.398219136764017e-06, "loss": 0.2048954486846924, "step": 11535 }, { "epoch": 1.0082124759741395, "grad_norm": 0.5369712328536916, "learning_rate": 8.396354464267278e-06, "loss": 0.19684526920318604, "step": 11540 }, { "epoch": 1.008649309802551, "grad_norm": 0.6039382720124554, "learning_rate": 8.394488914318549e-06, "loss": 0.23232922554016114, "step": 11545 }, { "epoch": 1.0090861436309628, "grad_norm": 0.5652252148947527, "learning_rate": 8.392622487399793e-06, "loss": 0.23913979530334473, "step": 11550 }, { "epoch": 1.0095229774593744, "grad_norm": 0.592918661280847, "learning_rate": 8.390755183993206e-06, "loss": 0.2224879264831543, "step": 11555 }, { "epoch": 1.0099598112877861, "grad_norm": 0.5427791259597883, "learning_rate": 8.38888700458121e-06, "loss": 0.24510765075683594, "step": 11560 }, { "epoch": 1.0103966451161979, "grad_norm": 0.4920644719002412, "learning_rate": 8.387017949646448e-06, "loss": 0.2279806613922119, "step": 11565 }, { "epoch": 1.0108334789446094, "grad_norm": 0.47004039044720497, "learning_rate": 8.385148019671793e-06, "loss": 0.2248149871826172, "step": 11570 }, { "epoch": 1.0112703127730212, "grad_norm": 0.4492623024785014, "learning_rate": 8.383277215140345e-06, "loss": 0.21764492988586426, "step": 11575 }, { "epoch": 1.011707146601433, "grad_norm": 0.44384578586269585, "learning_rate": 8.381405536535425e-06, "loss": 0.23100061416625978, "step": 11580 }, { "epoch": 1.0121439804298444, "grad_norm": 0.509801557173113, "learning_rate": 8.379532984340589e-06, "loss": 0.23776633739471437, "step": 11585 }, { "epoch": 1.0125808142582562, "grad_norm": 0.5580196929354918, "learning_rate": 8.377659559039608e-06, "loss": 0.24029746055603027, "step": 11590 }, { "epoch": 1.013017648086668, "grad_norm": 0.5601435932701433, "learning_rate": 8.375785261116487e-06, "loss": 0.22023723125457764, "step": 11595 }, { "epoch": 1.0134544819150795, "grad_norm": 0.5622483236754214, "learning_rate": 8.373910091055451e-06, "loss": 0.21713497638702392, "step": 11600 }, { "epoch": 1.0138913157434912, "grad_norm": 0.551558330027011, "learning_rate": 8.37203404934095e-06, "loss": 0.22477328777313232, "step": 11605 }, { "epoch": 1.0143281495719028, "grad_norm": 0.5846035686401251, "learning_rate": 8.370157136457666e-06, "loss": 0.2224438190460205, "step": 11610 }, { "epoch": 1.0147649834003145, "grad_norm": 0.5396255877121436, "learning_rate": 8.368279352890499e-06, "loss": 0.21595315933227538, "step": 11615 }, { "epoch": 1.0152018172287263, "grad_norm": 0.6799450384989506, "learning_rate": 8.366400699124578e-06, "loss": 0.2302614450454712, "step": 11620 }, { "epoch": 1.0156386510571378, "grad_norm": 0.5421405445047992, "learning_rate": 8.364521175645253e-06, "loss": 0.19817644357681274, "step": 11625 }, { "epoch": 1.0160754848855496, "grad_norm": 0.6417835178948736, "learning_rate": 8.362640782938103e-06, "loss": 0.21627793312072754, "step": 11630 }, { "epoch": 1.0165123187139613, "grad_norm": 0.6014005903156078, "learning_rate": 8.36075952148893e-06, "loss": 0.2027726650238037, "step": 11635 }, { "epoch": 1.0169491525423728, "grad_norm": 0.6539114533316293, "learning_rate": 8.358877391783757e-06, "loss": 0.21222012042999266, "step": 11640 }, { "epoch": 1.0173859863707846, "grad_norm": 0.5834908934801362, "learning_rate": 8.356994394308838e-06, "loss": 0.20724754333496093, "step": 11645 }, { "epoch": 1.0178228201991961, "grad_norm": 0.4981946184249738, "learning_rate": 8.355110529550645e-06, "loss": 0.23500256538391112, "step": 11650 }, { "epoch": 1.0182596540276079, "grad_norm": 0.5516772297151828, "learning_rate": 8.353225797995877e-06, "loss": 0.21117873191833497, "step": 11655 }, { "epoch": 1.0186964878560196, "grad_norm": 0.4923880222241355, "learning_rate": 8.351340200131456e-06, "loss": 0.23821547031402587, "step": 11660 }, { "epoch": 1.0191333216844312, "grad_norm": 0.46099003509361475, "learning_rate": 8.349453736444532e-06, "loss": 0.23482828140258788, "step": 11665 }, { "epoch": 1.019570155512843, "grad_norm": 0.5422118711465019, "learning_rate": 8.347566407422471e-06, "loss": 0.2415933132171631, "step": 11670 }, { "epoch": 1.0200069893412547, "grad_norm": 0.518638512653724, "learning_rate": 8.345678213552868e-06, "loss": 0.2277397632598877, "step": 11675 }, { "epoch": 1.0204438231696662, "grad_norm": 0.5576872059953074, "learning_rate": 8.343789155323538e-06, "loss": 0.2284313678741455, "step": 11680 }, { "epoch": 1.020880656998078, "grad_norm": 0.6380718033831536, "learning_rate": 8.341899233222525e-06, "loss": 0.2214797019958496, "step": 11685 }, { "epoch": 1.0213174908264897, "grad_norm": 0.6238440271144222, "learning_rate": 8.34000844773809e-06, "loss": 0.23145537376403807, "step": 11690 }, { "epoch": 1.0217543246549012, "grad_norm": 0.5829608936811427, "learning_rate": 8.338116799358722e-06, "loss": 0.2007359504699707, "step": 11695 }, { "epoch": 1.022191158483313, "grad_norm": 0.5647683299182086, "learning_rate": 8.336224288573128e-06, "loss": 0.217714262008667, "step": 11700 }, { "epoch": 1.0226279923117245, "grad_norm": 0.6173615401934484, "learning_rate": 8.334330915870239e-06, "loss": 0.19979406595230104, "step": 11705 }, { "epoch": 1.0230648261401363, "grad_norm": 0.4765574272807639, "learning_rate": 8.332436681739213e-06, "loss": 0.2216360330581665, "step": 11710 }, { "epoch": 1.023501659968548, "grad_norm": 0.4991023990631885, "learning_rate": 8.330541586669425e-06, "loss": 0.21386637687683105, "step": 11715 }, { "epoch": 1.0239384937969596, "grad_norm": 0.6166037884239641, "learning_rate": 8.328645631150478e-06, "loss": 0.2124884843826294, "step": 11720 }, { "epoch": 1.0243753276253713, "grad_norm": 0.5761142394402088, "learning_rate": 8.326748815672192e-06, "loss": 0.19391849040985107, "step": 11725 }, { "epoch": 1.024812161453783, "grad_norm": 0.6340699449378124, "learning_rate": 8.324851140724614e-06, "loss": 0.22210874557495117, "step": 11730 }, { "epoch": 1.0252489952821946, "grad_norm": 0.5760578468937798, "learning_rate": 8.322952606798007e-06, "loss": 0.24115843772888185, "step": 11735 }, { "epoch": 1.0256858291106064, "grad_norm": 0.5275126702878856, "learning_rate": 8.321053214382859e-06, "loss": 0.23240630626678466, "step": 11740 }, { "epoch": 1.026122662939018, "grad_norm": 0.5541457331966875, "learning_rate": 8.319152963969885e-06, "loss": 0.22120208740234376, "step": 11745 }, { "epoch": 1.0265594967674296, "grad_norm": 0.7616253963009586, "learning_rate": 8.317251856050013e-06, "loss": 0.20449528694152833, "step": 11750 }, { "epoch": 1.0269963305958414, "grad_norm": 0.48313799056730306, "learning_rate": 8.315349891114396e-06, "loss": 0.21050572395324707, "step": 11755 }, { "epoch": 1.027433164424253, "grad_norm": 0.6521346013138561, "learning_rate": 8.313447069654412e-06, "loss": 0.1869420051574707, "step": 11760 }, { "epoch": 1.0278699982526647, "grad_norm": 0.5010001951513522, "learning_rate": 8.311543392161653e-06, "loss": 0.2034611225128174, "step": 11765 }, { "epoch": 1.0283068320810764, "grad_norm": 0.6093183529520644, "learning_rate": 8.30963885912794e-06, "loss": 0.20770010948181153, "step": 11770 }, { "epoch": 1.028743665909488, "grad_norm": 0.5091566973101349, "learning_rate": 8.307733471045309e-06, "loss": 0.23159391880035402, "step": 11775 }, { "epoch": 1.0291804997378997, "grad_norm": 0.5622359218100934, "learning_rate": 8.305827228406019e-06, "loss": 0.22453179359436035, "step": 11780 }, { "epoch": 1.0296173335663115, "grad_norm": 0.6165805858847171, "learning_rate": 8.303920131702549e-06, "loss": 0.22826757431030273, "step": 11785 }, { "epoch": 1.030054167394723, "grad_norm": 0.4946438652206574, "learning_rate": 8.302012181427605e-06, "loss": 0.20661299228668212, "step": 11790 }, { "epoch": 1.0304910012231348, "grad_norm": 0.4528168103599959, "learning_rate": 8.300103378074103e-06, "loss": 0.19442272186279297, "step": 11795 }, { "epoch": 1.0309278350515463, "grad_norm": 0.489600375235478, "learning_rate": 8.298193722135183e-06, "loss": 0.22984585762023926, "step": 11800 }, { "epoch": 1.031364668879958, "grad_norm": 0.5441701131289708, "learning_rate": 8.296283214104212e-06, "loss": 0.21106834411621095, "step": 11805 }, { "epoch": 1.0318015027083698, "grad_norm": 0.5030969540752934, "learning_rate": 8.294371854474769e-06, "loss": 0.25221638679504393, "step": 11810 }, { "epoch": 1.0322383365367813, "grad_norm": 0.5503615616168877, "learning_rate": 8.292459643740658e-06, "loss": 0.2174626350402832, "step": 11815 }, { "epoch": 1.032675170365193, "grad_norm": 0.5333068932305692, "learning_rate": 8.290546582395898e-06, "loss": 0.2068784713745117, "step": 11820 }, { "epoch": 1.0331120041936048, "grad_norm": 0.5017800204736158, "learning_rate": 8.288632670934731e-06, "loss": 0.1969527006149292, "step": 11825 }, { "epoch": 1.0335488380220164, "grad_norm": 0.5301166624702629, "learning_rate": 8.286717909851617e-06, "loss": 0.22935922145843507, "step": 11830 }, { "epoch": 1.0339856718504281, "grad_norm": 0.538810011226642, "learning_rate": 8.284802299641242e-06, "loss": 0.19878160953521729, "step": 11835 }, { "epoch": 1.0344225056788399, "grad_norm": 0.6397638967993797, "learning_rate": 8.2828858407985e-06, "loss": 0.22042350769042968, "step": 11840 }, { "epoch": 1.0348593395072514, "grad_norm": 0.4407208373392854, "learning_rate": 8.280968533818515e-06, "loss": 0.23090157508850098, "step": 11845 }, { "epoch": 1.0352961733356632, "grad_norm": 0.4841518231382711, "learning_rate": 8.279050379196623e-06, "loss": 0.19876365661621093, "step": 11850 }, { "epoch": 1.0357330071640747, "grad_norm": 0.511359879969172, "learning_rate": 8.27713137742838e-06, "loss": 0.21133086681365967, "step": 11855 }, { "epoch": 1.0361698409924864, "grad_norm": 0.5192953780892022, "learning_rate": 8.275211529009565e-06, "loss": 0.24060652256011963, "step": 11860 }, { "epoch": 1.0366066748208982, "grad_norm": 0.5894890480810564, "learning_rate": 8.273290834436172e-06, "loss": 0.20869057178497313, "step": 11865 }, { "epoch": 1.0370435086493097, "grad_norm": 0.6076939493390349, "learning_rate": 8.271369294204414e-06, "loss": 0.21034085750579834, "step": 11870 }, { "epoch": 1.0374803424777215, "grad_norm": 1.4261781546187995, "learning_rate": 8.269446908810726e-06, "loss": 0.21701807975769044, "step": 11875 }, { "epoch": 1.0379171763061332, "grad_norm": 0.5468331798714547, "learning_rate": 8.267523678751752e-06, "loss": 0.21032843589782715, "step": 11880 }, { "epoch": 1.0383540101345448, "grad_norm": 0.4940939404041514, "learning_rate": 8.265599604524367e-06, "loss": 0.2447211742401123, "step": 11885 }, { "epoch": 1.0387908439629565, "grad_norm": 0.589081928254534, "learning_rate": 8.263674686625655e-06, "loss": 0.19758530855178832, "step": 11890 }, { "epoch": 1.0392276777913683, "grad_norm": 0.6992419446905136, "learning_rate": 8.261748925552922e-06, "loss": 0.22735462188720704, "step": 11895 }, { "epoch": 1.0396645116197798, "grad_norm": 0.5167396191293591, "learning_rate": 8.259822321803689e-06, "loss": 0.22797017097473143, "step": 11900 }, { "epoch": 1.0401013454481915, "grad_norm": 0.5065442026464932, "learning_rate": 8.257894875875698e-06, "loss": 0.21162524223327636, "step": 11905 }, { "epoch": 1.040538179276603, "grad_norm": 0.5359712999823026, "learning_rate": 8.255966588266903e-06, "loss": 0.2526949644088745, "step": 11910 }, { "epoch": 1.0409750131050148, "grad_norm": 0.5949205686921112, "learning_rate": 8.254037459475484e-06, "loss": 0.214188289642334, "step": 11915 }, { "epoch": 1.0414118469334266, "grad_norm": 0.5734264863926901, "learning_rate": 8.252107489999831e-06, "loss": 0.18569064140319824, "step": 11920 }, { "epoch": 1.0418486807618381, "grad_norm": 0.46020928171660025, "learning_rate": 8.250176680338556e-06, "loss": 0.19381299018859863, "step": 11925 }, { "epoch": 1.0422855145902499, "grad_norm": 0.5287726671364652, "learning_rate": 8.24824503099048e-06, "loss": 0.24189884662628175, "step": 11930 }, { "epoch": 1.0427223484186616, "grad_norm": 0.572524319637107, "learning_rate": 8.246312542454655e-06, "loss": 0.19112069606781007, "step": 11935 }, { "epoch": 1.0431591822470732, "grad_norm": 0.5573657328283587, "learning_rate": 8.244379215230336e-06, "loss": 0.22727546691894532, "step": 11940 }, { "epoch": 1.043596016075485, "grad_norm": 0.5396614924118164, "learning_rate": 8.242445049817003e-06, "loss": 0.2327432632446289, "step": 11945 }, { "epoch": 1.0440328499038967, "grad_norm": 0.5789489238084433, "learning_rate": 8.240510046714347e-06, "loss": 0.21447715759277344, "step": 11950 }, { "epoch": 1.0444696837323082, "grad_norm": 0.5852326651398383, "learning_rate": 8.238574206422282e-06, "loss": 0.21853988170623778, "step": 11955 }, { "epoch": 1.04490651756072, "grad_norm": 0.6434239800144129, "learning_rate": 8.23663752944093e-06, "loss": 0.19979621171951295, "step": 11960 }, { "epoch": 1.0453433513891315, "grad_norm": 0.5253234971644062, "learning_rate": 8.234700016270638e-06, "loss": 0.23935203552246093, "step": 11965 }, { "epoch": 1.0457801852175432, "grad_norm": 0.5137639093001132, "learning_rate": 8.232761667411962e-06, "loss": 0.2070690155029297, "step": 11970 }, { "epoch": 1.046217019045955, "grad_norm": 0.565896539752874, "learning_rate": 8.230822483365676e-06, "loss": 0.20216641426086426, "step": 11975 }, { "epoch": 1.0466538528743665, "grad_norm": 0.5860480820047214, "learning_rate": 8.228882464632772e-06, "loss": 0.2005773067474365, "step": 11980 }, { "epoch": 1.0470906867027783, "grad_norm": 0.6858013415287256, "learning_rate": 8.226941611714458e-06, "loss": 0.20558626651763917, "step": 11985 }, { "epoch": 1.04752752053119, "grad_norm": 0.5753384101307125, "learning_rate": 8.224999925112152e-06, "loss": 0.21753945350646972, "step": 11990 }, { "epoch": 1.0479643543596016, "grad_norm": 0.4894844914621523, "learning_rate": 8.223057405327493e-06, "loss": 0.22581093311309813, "step": 11995 }, { "epoch": 1.0484011881880133, "grad_norm": 0.5873582820657384, "learning_rate": 8.221114052862332e-06, "loss": 0.18577258586883544, "step": 12000 }, { "epoch": 1.048838022016425, "grad_norm": 0.734717419854974, "learning_rate": 8.219169868218735e-06, "loss": 0.23306987285614014, "step": 12005 }, { "epoch": 1.0492748558448366, "grad_norm": 0.6303221556366384, "learning_rate": 8.217224851898988e-06, "loss": 0.20268735885620118, "step": 12010 }, { "epoch": 1.0497116896732483, "grad_norm": 0.5455684609919287, "learning_rate": 8.215279004405586e-06, "loss": 0.19650136232376098, "step": 12015 }, { "epoch": 1.0501485235016599, "grad_norm": 0.6134008670289578, "learning_rate": 8.213332326241239e-06, "loss": 0.23042707443237304, "step": 12020 }, { "epoch": 1.0505853573300716, "grad_norm": 0.5760888407819196, "learning_rate": 8.211384817908877e-06, "loss": 0.23069436550140382, "step": 12025 }, { "epoch": 1.0510221911584834, "grad_norm": 0.5530658529308132, "learning_rate": 8.20943647991164e-06, "loss": 0.21607136726379395, "step": 12030 }, { "epoch": 1.051459024986895, "grad_norm": 0.5526587959597785, "learning_rate": 8.207487312752882e-06, "loss": 0.21252901554107667, "step": 12035 }, { "epoch": 1.0518958588153067, "grad_norm": 0.5596399682812966, "learning_rate": 8.205537316936174e-06, "loss": 0.20187878608703613, "step": 12040 }, { "epoch": 1.0523326926437184, "grad_norm": 0.6534721426094339, "learning_rate": 8.203586492965298e-06, "loss": 0.2199857234954834, "step": 12045 }, { "epoch": 1.05276952647213, "grad_norm": 0.5540121019067007, "learning_rate": 8.201634841344253e-06, "loss": 0.21825170516967773, "step": 12050 }, { "epoch": 1.0532063603005417, "grad_norm": 0.5902692580233077, "learning_rate": 8.199682362577249e-06, "loss": 0.21472768783569335, "step": 12055 }, { "epoch": 1.0536431941289532, "grad_norm": 0.4648999786073315, "learning_rate": 8.197729057168712e-06, "loss": 0.20673375129699706, "step": 12060 }, { "epoch": 1.054080027957365, "grad_norm": 0.5034839433569218, "learning_rate": 8.195774925623282e-06, "loss": 0.20670609474182128, "step": 12065 }, { "epoch": 1.0545168617857767, "grad_norm": 0.5853223843619799, "learning_rate": 8.19381996844581e-06, "loss": 0.2160050630569458, "step": 12070 }, { "epoch": 1.0549536956141883, "grad_norm": 0.5061742077995458, "learning_rate": 8.191864186141359e-06, "loss": 0.19185309410095214, "step": 12075 }, { "epoch": 1.0553905294426, "grad_norm": 0.5691192419728005, "learning_rate": 8.18990757921521e-06, "loss": 0.1943892478942871, "step": 12080 }, { "epoch": 1.0558273632710118, "grad_norm": 0.5058328776279594, "learning_rate": 8.187950148172854e-06, "loss": 0.21460800170898436, "step": 12085 }, { "epoch": 1.0562641970994233, "grad_norm": 0.5023610164620778, "learning_rate": 8.185991893519997e-06, "loss": 0.2258467674255371, "step": 12090 }, { "epoch": 1.056701030927835, "grad_norm": 0.6153733335926718, "learning_rate": 8.184032815762555e-06, "loss": 0.24834184646606444, "step": 12095 }, { "epoch": 1.0571378647562468, "grad_norm": 0.526079655324656, "learning_rate": 8.182072915406656e-06, "loss": 0.2109243392944336, "step": 12100 }, { "epoch": 1.0575746985846584, "grad_norm": 0.5423640958631781, "learning_rate": 8.180112192958646e-06, "loss": 0.23975539207458496, "step": 12105 }, { "epoch": 1.05801153241307, "grad_norm": 0.6302589064968857, "learning_rate": 8.178150648925079e-06, "loss": 0.23275861740112305, "step": 12110 }, { "epoch": 1.0584483662414816, "grad_norm": 0.5041526530363026, "learning_rate": 8.176188283812721e-06, "loss": 0.1979214072227478, "step": 12115 }, { "epoch": 1.0588852000698934, "grad_norm": 0.6897487078414678, "learning_rate": 8.17422509812855e-06, "loss": 0.20526156425476075, "step": 12120 }, { "epoch": 1.0593220338983051, "grad_norm": 0.5927665066739105, "learning_rate": 8.17226109237976e-06, "loss": 0.23104534149169922, "step": 12125 }, { "epoch": 1.0597588677267167, "grad_norm": 0.5666683701643219, "learning_rate": 8.170296267073752e-06, "loss": 0.24179587364196778, "step": 12130 }, { "epoch": 1.0601957015551284, "grad_norm": 0.4726120712308709, "learning_rate": 8.168330622718141e-06, "loss": 0.20081379413604736, "step": 12135 }, { "epoch": 1.0606325353835402, "grad_norm": 0.5183779685092746, "learning_rate": 8.166364159820757e-06, "loss": 0.236832332611084, "step": 12140 }, { "epoch": 1.0610693692119517, "grad_norm": 0.6506420019433177, "learning_rate": 8.164396878889633e-06, "loss": 0.2321639060974121, "step": 12145 }, { "epoch": 1.0615062030403635, "grad_norm": 0.4801590667103163, "learning_rate": 8.16242878043302e-06, "loss": 0.20804753303527831, "step": 12150 }, { "epoch": 1.0619430368687752, "grad_norm": 0.49836394037937476, "learning_rate": 8.16045986495938e-06, "loss": 0.19398231506347657, "step": 12155 }, { "epoch": 1.0623798706971868, "grad_norm": 0.5338173984597502, "learning_rate": 8.158490132977384e-06, "loss": 0.19348809719085694, "step": 12160 }, { "epoch": 1.0628167045255985, "grad_norm": 0.499436614430467, "learning_rate": 8.156519584995913e-06, "loss": 0.2121037244796753, "step": 12165 }, { "epoch": 1.06325353835401, "grad_norm": 0.5376997705652349, "learning_rate": 8.154548221524058e-06, "loss": 0.2339644432067871, "step": 12170 }, { "epoch": 1.0636903721824218, "grad_norm": 0.571120278807604, "learning_rate": 8.15257604307113e-06, "loss": 0.21467945575714112, "step": 12175 }, { "epoch": 1.0641272060108335, "grad_norm": 0.4996731913536443, "learning_rate": 8.150603050146639e-06, "loss": 0.24246103763580323, "step": 12180 }, { "epoch": 1.064564039839245, "grad_norm": 0.4542562371145667, "learning_rate": 8.148629243260308e-06, "loss": 0.2285412311553955, "step": 12185 }, { "epoch": 1.0650008736676568, "grad_norm": 0.6011748964906238, "learning_rate": 8.146654622922077e-06, "loss": 0.26097912788391114, "step": 12190 }, { "epoch": 1.0654377074960686, "grad_norm": 0.5439116096191826, "learning_rate": 8.144679189642087e-06, "loss": 0.19139249324798585, "step": 12195 }, { "epoch": 1.0658745413244801, "grad_norm": 0.5165570245121289, "learning_rate": 8.142702943930697e-06, "loss": 0.22013430595397948, "step": 12200 }, { "epoch": 1.0663113751528919, "grad_norm": 0.5475232004288781, "learning_rate": 8.14072588629847e-06, "loss": 0.22654972076416016, "step": 12205 }, { "epoch": 1.0667482089813034, "grad_norm": 0.5703297143859326, "learning_rate": 8.138748017256184e-06, "loss": 0.22369897365570068, "step": 12210 }, { "epoch": 1.0671850428097152, "grad_norm": 0.5281233950151043, "learning_rate": 8.136769337314818e-06, "loss": 0.18966902494430543, "step": 12215 }, { "epoch": 1.067621876638127, "grad_norm": 0.5730003840365119, "learning_rate": 8.134789846985573e-06, "loss": 0.23808012008666993, "step": 12220 }, { "epoch": 1.0680587104665384, "grad_norm": 0.47350056422740017, "learning_rate": 8.132809546779848e-06, "loss": 0.2124497652053833, "step": 12225 }, { "epoch": 1.0684955442949502, "grad_norm": 0.5647055307786312, "learning_rate": 8.130828437209254e-06, "loss": 0.2474133253097534, "step": 12230 }, { "epoch": 1.068932378123362, "grad_norm": 0.6039366797010116, "learning_rate": 8.12884651878562e-06, "loss": 0.1903379440307617, "step": 12235 }, { "epoch": 1.0693692119517735, "grad_norm": 0.4774080408863145, "learning_rate": 8.126863792020972e-06, "loss": 0.22240819931030273, "step": 12240 }, { "epoch": 1.0698060457801852, "grad_norm": 0.5074687602356013, "learning_rate": 8.12488025742755e-06, "loss": 0.19922246932983398, "step": 12245 }, { "epoch": 1.070242879608597, "grad_norm": 0.623698097644071, "learning_rate": 8.122895915517802e-06, "loss": 0.20528244972229004, "step": 12250 }, { "epoch": 1.0706797134370085, "grad_norm": 0.5121073405054667, "learning_rate": 8.120910766804386e-06, "loss": 0.2267075777053833, "step": 12255 }, { "epoch": 1.0711165472654203, "grad_norm": 0.5018550679774587, "learning_rate": 8.118924811800168e-06, "loss": 0.2190169334411621, "step": 12260 }, { "epoch": 1.0715533810938318, "grad_norm": 0.4943072116941466, "learning_rate": 8.11693805101822e-06, "loss": 0.22936620712280273, "step": 12265 }, { "epoch": 1.0719902149222436, "grad_norm": 0.6360111415531746, "learning_rate": 8.114950484971827e-06, "loss": 0.21024596691131592, "step": 12270 }, { "epoch": 1.0724270487506553, "grad_norm": 0.582409658373802, "learning_rate": 8.112962114174474e-06, "loss": 0.17971601486206054, "step": 12275 }, { "epoch": 1.0728638825790668, "grad_norm": 0.7287708935303929, "learning_rate": 8.110972939139863e-06, "loss": 0.20834064483642578, "step": 12280 }, { "epoch": 1.0733007164074786, "grad_norm": 0.5001300104342367, "learning_rate": 8.108982960381897e-06, "loss": 0.23156986236572266, "step": 12285 }, { "epoch": 1.0737375502358903, "grad_norm": 0.5172283088359346, "learning_rate": 8.106992178414692e-06, "loss": 0.19927055835723878, "step": 12290 }, { "epoch": 1.0741743840643019, "grad_norm": 0.5213787030444709, "learning_rate": 8.105000593752565e-06, "loss": 0.19553452730178833, "step": 12295 }, { "epoch": 1.0746112178927136, "grad_norm": 0.5734870949137542, "learning_rate": 8.103008206910048e-06, "loss": 0.21995954513549804, "step": 12300 }, { "epoch": 1.0750480517211254, "grad_norm": 0.5017927214078578, "learning_rate": 8.101015018401874e-06, "loss": 0.22842864990234374, "step": 12305 }, { "epoch": 1.075484885549537, "grad_norm": 0.5680205259218273, "learning_rate": 8.099021028742986e-06, "loss": 0.2211442470550537, "step": 12310 }, { "epoch": 1.0759217193779487, "grad_norm": 0.5079271658104461, "learning_rate": 8.097026238448535e-06, "loss": 0.22803523540496826, "step": 12315 }, { "epoch": 1.0763585532063602, "grad_norm": 0.4827999767630567, "learning_rate": 8.095030648033874e-06, "loss": 0.20344090461730957, "step": 12320 }, { "epoch": 1.076795387034772, "grad_norm": 0.46772614888467023, "learning_rate": 8.093034258014569e-06, "loss": 0.22079482078552246, "step": 12325 }, { "epoch": 1.0772322208631837, "grad_norm": 0.5127220035354346, "learning_rate": 8.091037068906388e-06, "loss": 0.22682929039001465, "step": 12330 }, { "epoch": 1.0776690546915952, "grad_norm": 0.5360590844267957, "learning_rate": 8.08903908122531e-06, "loss": 0.21321706771850585, "step": 12335 }, { "epoch": 1.078105888520007, "grad_norm": 0.5327025392076641, "learning_rate": 8.087040295487511e-06, "loss": 0.22420005798339843, "step": 12340 }, { "epoch": 1.0785427223484187, "grad_norm": 0.5201204236221589, "learning_rate": 8.085040712209387e-06, "loss": 0.21088774204254152, "step": 12345 }, { "epoch": 1.0789795561768303, "grad_norm": 0.510133086086692, "learning_rate": 8.083040331907527e-06, "loss": 0.22390317916870117, "step": 12350 }, { "epoch": 1.079416390005242, "grad_norm": 0.5489388021115206, "learning_rate": 8.081039155098734e-06, "loss": 0.2011104106903076, "step": 12355 }, { "epoch": 1.0798532238336538, "grad_norm": 0.5809674236678462, "learning_rate": 8.079037182300014e-06, "loss": 0.21478445529937745, "step": 12360 }, { "epoch": 1.0802900576620653, "grad_norm": 0.5984970149136862, "learning_rate": 8.07703441402858e-06, "loss": 0.2094465970993042, "step": 12365 }, { "epoch": 1.080726891490477, "grad_norm": 0.44458730362881715, "learning_rate": 8.075030850801848e-06, "loss": 0.21033105850219727, "step": 12370 }, { "epoch": 1.0811637253188886, "grad_norm": 0.5229880325263175, "learning_rate": 8.073026493137439e-06, "loss": 0.21313009262084961, "step": 12375 }, { "epoch": 1.0816005591473004, "grad_norm": 0.5715743882337018, "learning_rate": 8.071021341553185e-06, "loss": 0.20194654464721679, "step": 12380 }, { "epoch": 1.082037392975712, "grad_norm": 0.5735563425069432, "learning_rate": 8.069015396567117e-06, "loss": 0.2283034324645996, "step": 12385 }, { "epoch": 1.0824742268041236, "grad_norm": 0.5522926132565819, "learning_rate": 8.067008658697473e-06, "loss": 0.20628619194030762, "step": 12390 }, { "epoch": 1.0829110606325354, "grad_norm": 0.5181602775210733, "learning_rate": 8.065001128462695e-06, "loss": 0.20762860774993896, "step": 12395 }, { "epoch": 1.0833478944609471, "grad_norm": 0.537694570651194, "learning_rate": 8.062992806381433e-06, "loss": 0.20755889415740966, "step": 12400 }, { "epoch": 1.0837847282893587, "grad_norm": 0.5015885752994725, "learning_rate": 8.06098369297254e-06, "loss": 0.19032233953475952, "step": 12405 }, { "epoch": 1.0842215621177704, "grad_norm": 0.49974851917388025, "learning_rate": 8.05897378875507e-06, "loss": 0.20304415225982667, "step": 12410 }, { "epoch": 1.0846583959461822, "grad_norm": 0.5245473278816685, "learning_rate": 8.056963094248285e-06, "loss": 0.21568596363067627, "step": 12415 }, { "epoch": 1.0850952297745937, "grad_norm": 0.49826620087647855, "learning_rate": 8.05495160997165e-06, "loss": 0.2166433811187744, "step": 12420 }, { "epoch": 1.0855320636030055, "grad_norm": 0.8344008315895047, "learning_rate": 8.052939336444835e-06, "loss": 0.23002095222473146, "step": 12425 }, { "epoch": 1.085968897431417, "grad_norm": 0.6259418213807865, "learning_rate": 8.050926274187711e-06, "loss": 0.2239530563354492, "step": 12430 }, { "epoch": 1.0864057312598288, "grad_norm": 0.5846667798221642, "learning_rate": 8.04891242372036e-06, "loss": 0.23141560554504395, "step": 12435 }, { "epoch": 1.0868425650882405, "grad_norm": 0.511136849052489, "learning_rate": 8.046897785563054e-06, "loss": 0.22752628326416016, "step": 12440 }, { "epoch": 1.087279398916652, "grad_norm": 0.5703157291235891, "learning_rate": 8.044882360236286e-06, "loss": 0.19233757257461548, "step": 12445 }, { "epoch": 1.0877162327450638, "grad_norm": 0.7231314302426983, "learning_rate": 8.042866148260738e-06, "loss": 0.22310137748718262, "step": 12450 }, { "epoch": 1.0881530665734755, "grad_norm": 0.5820652904276481, "learning_rate": 8.040849150157302e-06, "loss": 0.23062772750854493, "step": 12455 }, { "epoch": 1.088589900401887, "grad_norm": 0.5215060940744991, "learning_rate": 8.038831366447073e-06, "loss": 0.2216115951538086, "step": 12460 }, { "epoch": 1.0890267342302988, "grad_norm": 0.5044954418440933, "learning_rate": 8.036812797651345e-06, "loss": 0.21200296878814698, "step": 12465 }, { "epoch": 1.0894635680587104, "grad_norm": 0.5308577360416153, "learning_rate": 8.034793444291619e-06, "loss": 0.22200171947479247, "step": 12470 }, { "epoch": 1.0899004018871221, "grad_norm": 0.5781507921365213, "learning_rate": 8.032773306889598e-06, "loss": 0.19005593061447143, "step": 12475 }, { "epoch": 1.0903372357155339, "grad_norm": 0.5063299213760575, "learning_rate": 8.030752385967186e-06, "loss": 0.21613609790802002, "step": 12480 }, { "epoch": 1.0907740695439454, "grad_norm": 0.5247944106841883, "learning_rate": 8.028730682046491e-06, "loss": 0.20753297805786133, "step": 12485 }, { "epoch": 1.0912109033723572, "grad_norm": 0.7964626162592042, "learning_rate": 8.026708195649819e-06, "loss": 0.231054425239563, "step": 12490 }, { "epoch": 1.091647737200769, "grad_norm": 0.5314535518671178, "learning_rate": 8.024684927299688e-06, "loss": 0.1986405611038208, "step": 12495 }, { "epoch": 1.0920845710291804, "grad_norm": 0.4371758055226367, "learning_rate": 8.022660877518804e-06, "loss": 0.21954526901245117, "step": 12500 }, { "epoch": 1.0925214048575922, "grad_norm": 0.7615316308035058, "learning_rate": 8.02063604683009e-06, "loss": 0.19786134958267212, "step": 12505 }, { "epoch": 1.092958238686004, "grad_norm": 0.5311726862595162, "learning_rate": 8.018610435756658e-06, "loss": 0.2377143383026123, "step": 12510 }, { "epoch": 1.0933950725144155, "grad_norm": 0.4904746369218933, "learning_rate": 8.01658404482183e-06, "loss": 0.20292670726776124, "step": 12515 }, { "epoch": 1.0938319063428272, "grad_norm": 0.5435901164664535, "learning_rate": 8.014556874549122e-06, "loss": 0.21793127059936523, "step": 12520 }, { "epoch": 1.0942687401712388, "grad_norm": 0.6737187774714043, "learning_rate": 8.012528925462262e-06, "loss": 0.21143760681152343, "step": 12525 }, { "epoch": 1.0947055739996505, "grad_norm": 0.7534167572560517, "learning_rate": 8.010500198085167e-06, "loss": 0.20636289119720458, "step": 12530 }, { "epoch": 1.0951424078280623, "grad_norm": 0.5346345840566873, "learning_rate": 8.008470692941965e-06, "loss": 0.2374185800552368, "step": 12535 }, { "epoch": 1.0955792416564738, "grad_norm": 0.4999798268417625, "learning_rate": 8.00644041055698e-06, "loss": 0.23432722091674804, "step": 12540 }, { "epoch": 1.0960160754848856, "grad_norm": 0.6353025697174824, "learning_rate": 8.004409351454736e-06, "loss": 0.2102632999420166, "step": 12545 }, { "epoch": 1.0964529093132973, "grad_norm": 0.5464188084570746, "learning_rate": 8.002377516159962e-06, "loss": 0.19209811687469483, "step": 12550 }, { "epoch": 1.0968897431417088, "grad_norm": 0.5957955433353749, "learning_rate": 8.000344905197584e-06, "loss": 0.1742907166481018, "step": 12555 }, { "epoch": 1.0973265769701206, "grad_norm": 0.4918014437763153, "learning_rate": 7.998311519092728e-06, "loss": 0.20224227905273437, "step": 12560 }, { "epoch": 1.0977634107985321, "grad_norm": 0.47182848863686105, "learning_rate": 7.996277358370723e-06, "loss": 0.20952353477478028, "step": 12565 }, { "epoch": 1.0982002446269439, "grad_norm": 0.6337716624579167, "learning_rate": 7.994242423557094e-06, "loss": 0.22393887042999266, "step": 12570 }, { "epoch": 1.0986370784553556, "grad_norm": 0.4782662272409883, "learning_rate": 7.992206715177576e-06, "loss": 0.1808383584022522, "step": 12575 }, { "epoch": 1.0990739122837672, "grad_norm": 0.499995470097094, "learning_rate": 7.990170233758088e-06, "loss": 0.21991877555847167, "step": 12580 }, { "epoch": 1.099510746112179, "grad_norm": 0.4539603541399064, "learning_rate": 7.98813297982476e-06, "loss": 0.20223360061645507, "step": 12585 }, { "epoch": 1.0999475799405907, "grad_norm": 0.49453448804489897, "learning_rate": 7.986094953903923e-06, "loss": 0.19967079162597656, "step": 12590 }, { "epoch": 1.1003844137690022, "grad_norm": 0.48722488852298446, "learning_rate": 7.984056156522097e-06, "loss": 0.24856297969818114, "step": 12595 }, { "epoch": 1.100821247597414, "grad_norm": 0.5693257589370673, "learning_rate": 7.982016588206012e-06, "loss": 0.21470293998718262, "step": 12600 }, { "epoch": 1.1012580814258257, "grad_norm": 0.5540406875966503, "learning_rate": 7.97997624948259e-06, "loss": 0.2318356990814209, "step": 12605 }, { "epoch": 1.1016949152542372, "grad_norm": 0.618191612568772, "learning_rate": 7.977935140878958e-06, "loss": 0.20261807441711427, "step": 12610 }, { "epoch": 1.102131749082649, "grad_norm": 0.49123279077408694, "learning_rate": 7.975893262922435e-06, "loss": 0.2184213161468506, "step": 12615 }, { "epoch": 1.1025685829110605, "grad_norm": 0.5206635744819507, "learning_rate": 7.973850616140546e-06, "loss": 0.21278033256530762, "step": 12620 }, { "epoch": 1.1030054167394723, "grad_norm": 0.6334948544683018, "learning_rate": 7.971807201061008e-06, "loss": 0.197745943069458, "step": 12625 }, { "epoch": 1.103442250567884, "grad_norm": 0.5537257273734113, "learning_rate": 7.96976301821174e-06, "loss": 0.21638712882995606, "step": 12630 }, { "epoch": 1.1038790843962956, "grad_norm": 0.5728976582564221, "learning_rate": 7.967718068120862e-06, "loss": 0.16696603298187257, "step": 12635 }, { "epoch": 1.1043159182247073, "grad_norm": 0.7405934729959677, "learning_rate": 7.965672351316685e-06, "loss": 0.22389650344848633, "step": 12640 }, { "epoch": 1.104752752053119, "grad_norm": 0.5654959877140695, "learning_rate": 7.963625868327726e-06, "loss": 0.20279297828674317, "step": 12645 }, { "epoch": 1.1051895858815306, "grad_norm": 0.57636136901573, "learning_rate": 7.961578619682692e-06, "loss": 0.2183175802230835, "step": 12650 }, { "epoch": 1.1056264197099424, "grad_norm": 0.4742720708691008, "learning_rate": 7.959530605910498e-06, "loss": 0.22365541458129884, "step": 12655 }, { "epoch": 1.106063253538354, "grad_norm": 0.49621298729747304, "learning_rate": 7.957481827540245e-06, "loss": 0.20326976776123046, "step": 12660 }, { "epoch": 1.1065000873667656, "grad_norm": 0.48640343189549345, "learning_rate": 7.955432285101238e-06, "loss": 0.20489072799682617, "step": 12665 }, { "epoch": 1.1069369211951774, "grad_norm": 0.7414717138238202, "learning_rate": 7.953381979122982e-06, "loss": 0.20765080451965331, "step": 12670 }, { "epoch": 1.107373755023589, "grad_norm": 0.6133865147421809, "learning_rate": 7.951330910135174e-06, "loss": 0.22950356006622313, "step": 12675 }, { "epoch": 1.1078105888520007, "grad_norm": 0.5674568566863545, "learning_rate": 7.949279078667708e-06, "loss": 0.19296677112579347, "step": 12680 }, { "epoch": 1.1082474226804124, "grad_norm": 0.5347542315901128, "learning_rate": 7.947226485250681e-06, "loss": 0.21423099040985108, "step": 12685 }, { "epoch": 1.108684256508824, "grad_norm": 0.5561283200709259, "learning_rate": 7.945173130414379e-06, "loss": 0.20292277336120607, "step": 12690 }, { "epoch": 1.1091210903372357, "grad_norm": 0.5368052422410877, "learning_rate": 7.943119014689288e-06, "loss": 0.22844715118408204, "step": 12695 }, { "epoch": 1.1095579241656475, "grad_norm": 0.5504176355442918, "learning_rate": 7.941064138606095e-06, "loss": 0.2521472930908203, "step": 12700 }, { "epoch": 1.109994757994059, "grad_norm": 0.5742491043696804, "learning_rate": 7.939008502695679e-06, "loss": 0.18804428577423096, "step": 12705 }, { "epoch": 1.1104315918224708, "grad_norm": 0.49273638579410023, "learning_rate": 7.936952107489112e-06, "loss": 0.2097111701965332, "step": 12710 }, { "epoch": 1.1108684256508825, "grad_norm": 0.48381372118782423, "learning_rate": 7.934894953517672e-06, "loss": 0.22905099391937256, "step": 12715 }, { "epoch": 1.111305259479294, "grad_norm": 0.4782308136043025, "learning_rate": 7.93283704131282e-06, "loss": 0.22480340003967286, "step": 12720 }, { "epoch": 1.1117420933077058, "grad_norm": 0.508377329832453, "learning_rate": 7.930778371406224e-06, "loss": 0.21059772968292237, "step": 12725 }, { "epoch": 1.1121789271361173, "grad_norm": 0.5427294615940981, "learning_rate": 7.928718944329743e-06, "loss": 0.2051009178161621, "step": 12730 }, { "epoch": 1.112615760964529, "grad_norm": 0.5788074519432015, "learning_rate": 7.926658760615434e-06, "loss": 0.19163529872894286, "step": 12735 }, { "epoch": 1.1130525947929408, "grad_norm": 0.5943172034502926, "learning_rate": 7.924597820795545e-06, "loss": 0.19194207191467286, "step": 12740 }, { "epoch": 1.1134894286213524, "grad_norm": 0.6394854844469973, "learning_rate": 7.922536125402524e-06, "loss": 0.19191473722457886, "step": 12745 }, { "epoch": 1.1139262624497641, "grad_norm": 0.5528504510454403, "learning_rate": 7.920473674969011e-06, "loss": 0.2237565040588379, "step": 12750 }, { "epoch": 1.1143630962781759, "grad_norm": 0.6368663557210619, "learning_rate": 7.918410470027845e-06, "loss": 0.19453643560409545, "step": 12755 }, { "epoch": 1.1147999301065874, "grad_norm": 0.5841915031991906, "learning_rate": 7.916346511112054e-06, "loss": 0.2183788776397705, "step": 12760 }, { "epoch": 1.1152367639349992, "grad_norm": 0.6169673553574483, "learning_rate": 7.914281798754866e-06, "loss": 0.19406358003616334, "step": 12765 }, { "epoch": 1.115673597763411, "grad_norm": 0.5453518317713286, "learning_rate": 7.9122163334897e-06, "loss": 0.2064366817474365, "step": 12770 }, { "epoch": 1.1161104315918224, "grad_norm": 0.5084314514498975, "learning_rate": 7.910150115850173e-06, "loss": 0.2084209442138672, "step": 12775 }, { "epoch": 1.1165472654202342, "grad_norm": 0.5462649981732857, "learning_rate": 7.908083146370093e-06, "loss": 0.22436378002166749, "step": 12780 }, { "epoch": 1.1169840992486457, "grad_norm": 0.48313013742563793, "learning_rate": 7.906015425583466e-06, "loss": 0.2032853126525879, "step": 12785 }, { "epoch": 1.1174209330770575, "grad_norm": 0.5968872081827775, "learning_rate": 7.90394695402449e-06, "loss": 0.2082759380340576, "step": 12790 }, { "epoch": 1.1178577669054692, "grad_norm": 0.5200923006260809, "learning_rate": 7.901877732227557e-06, "loss": 0.20855214595794677, "step": 12795 }, { "epoch": 1.1182946007338808, "grad_norm": 0.532199451970177, "learning_rate": 7.89980776072725e-06, "loss": 0.21797432899475097, "step": 12800 }, { "epoch": 1.1187314345622925, "grad_norm": 0.6589821455629851, "learning_rate": 7.897737040058348e-06, "loss": 0.19756112098693848, "step": 12805 }, { "epoch": 1.1191682683907043, "grad_norm": 0.5291308835234555, "learning_rate": 7.89566557075583e-06, "loss": 0.19473559856414796, "step": 12810 }, { "epoch": 1.1196051022191158, "grad_norm": 0.7403569810810646, "learning_rate": 7.893593353354857e-06, "loss": 0.19483411312103271, "step": 12815 }, { "epoch": 1.1200419360475276, "grad_norm": 0.7431197381031536, "learning_rate": 7.89152038839079e-06, "loss": 0.20451536178588867, "step": 12820 }, { "epoch": 1.1204787698759393, "grad_norm": 0.6207409573332858, "learning_rate": 7.889446676399183e-06, "loss": 0.20016708374023437, "step": 12825 }, { "epoch": 1.1209156037043508, "grad_norm": 0.5407105547527169, "learning_rate": 7.88737221791578e-06, "loss": 0.19392238855361937, "step": 12830 }, { "epoch": 1.1213524375327626, "grad_norm": 0.5547639627245905, "learning_rate": 7.885297013476524e-06, "loss": 0.22403998374938966, "step": 12835 }, { "epoch": 1.1217892713611741, "grad_norm": 0.6190135729136481, "learning_rate": 7.88322106361754e-06, "loss": 0.22588508129119872, "step": 12840 }, { "epoch": 1.1222261051895859, "grad_norm": 0.5009350716970855, "learning_rate": 7.881144368875154e-06, "loss": 0.22514374256134034, "step": 12845 }, { "epoch": 1.1226629390179976, "grad_norm": 0.6111440843826945, "learning_rate": 7.879066929785885e-06, "loss": 0.20147833824157715, "step": 12850 }, { "epoch": 1.1230997728464092, "grad_norm": 0.5706895479893539, "learning_rate": 7.876988746886441e-06, "loss": 0.22193877696990966, "step": 12855 }, { "epoch": 1.123536606674821, "grad_norm": 0.5496855892926164, "learning_rate": 7.87490982071372e-06, "loss": 0.21773390769958495, "step": 12860 }, { "epoch": 1.1239734405032327, "grad_norm": 0.5106585177943721, "learning_rate": 7.872830151804819e-06, "loss": 0.23239359855651856, "step": 12865 }, { "epoch": 1.1244102743316442, "grad_norm": 0.6121825133653978, "learning_rate": 7.870749740697019e-06, "loss": 0.21877870559692383, "step": 12870 }, { "epoch": 1.124847108160056, "grad_norm": 0.6252470907105817, "learning_rate": 7.8686685879278e-06, "loss": 0.20937957763671874, "step": 12875 }, { "epoch": 1.1252839419884677, "grad_norm": 0.5029750164709903, "learning_rate": 7.866586694034828e-06, "loss": 0.23023638725280762, "step": 12880 }, { "epoch": 1.1257207758168792, "grad_norm": 0.5570634018660051, "learning_rate": 7.864504059555962e-06, "loss": 0.21897897720336915, "step": 12885 }, { "epoch": 1.126157609645291, "grad_norm": 0.6146432382278499, "learning_rate": 7.862420685029257e-06, "loss": 0.22900309562683105, "step": 12890 }, { "epoch": 1.1265944434737025, "grad_norm": 0.6449362370784254, "learning_rate": 7.86033657099295e-06, "loss": 0.21685569286346434, "step": 12895 }, { "epoch": 1.1270312773021143, "grad_norm": 0.5145318250753733, "learning_rate": 7.858251717985477e-06, "loss": 0.21894631385803223, "step": 12900 }, { "epoch": 1.127468111130526, "grad_norm": 0.5449429729768065, "learning_rate": 7.85616612654546e-06, "loss": 0.18963146209716797, "step": 12905 }, { "epoch": 1.1279049449589376, "grad_norm": 0.5699008481124302, "learning_rate": 7.854079797211717e-06, "loss": 0.22016892433166504, "step": 12910 }, { "epoch": 1.1283417787873493, "grad_norm": 0.6109780865141603, "learning_rate": 7.851992730523252e-06, "loss": 0.20352027416229249, "step": 12915 }, { "epoch": 1.1287786126157608, "grad_norm": 0.5467693047526215, "learning_rate": 7.849904927019261e-06, "loss": 0.2058732032775879, "step": 12920 }, { "epoch": 1.1292154464441726, "grad_norm": 0.5377671199745037, "learning_rate": 7.84781638723913e-06, "loss": 0.2002046823501587, "step": 12925 }, { "epoch": 1.1296522802725844, "grad_norm": 0.5598087728431552, "learning_rate": 7.845727111722437e-06, "loss": 0.17757863998413087, "step": 12930 }, { "epoch": 1.1300891141009959, "grad_norm": 0.8325518681209622, "learning_rate": 7.843637101008945e-06, "loss": 0.20939760208129882, "step": 12935 }, { "epoch": 1.1305259479294076, "grad_norm": 0.5658070129053795, "learning_rate": 7.841546355638616e-06, "loss": 0.191656756401062, "step": 12940 }, { "epoch": 1.1309627817578194, "grad_norm": 0.7565796242561383, "learning_rate": 7.839454876151591e-06, "loss": 0.19037199020385742, "step": 12945 }, { "epoch": 1.131399615586231, "grad_norm": 0.6152574004040373, "learning_rate": 7.837362663088211e-06, "loss": 0.2339259624481201, "step": 12950 }, { "epoch": 1.1318364494146427, "grad_norm": 0.5546800822777019, "learning_rate": 7.835269716989e-06, "loss": 0.2043598175048828, "step": 12955 }, { "epoch": 1.1322732832430544, "grad_norm": 0.5374105844655581, "learning_rate": 7.83317603839467e-06, "loss": 0.20198941230773926, "step": 12960 }, { "epoch": 1.132710117071466, "grad_norm": 0.49121334720045423, "learning_rate": 7.831081627846128e-06, "loss": 0.2158205986022949, "step": 12965 }, { "epoch": 1.1331469508998777, "grad_norm": 0.5551302439906096, "learning_rate": 7.82898648588447e-06, "loss": 0.2052175521850586, "step": 12970 }, { "epoch": 1.1335837847282892, "grad_norm": 0.5344318901460606, "learning_rate": 7.82689061305097e-06, "loss": 0.177542781829834, "step": 12975 }, { "epoch": 1.134020618556701, "grad_norm": 0.5637239677547108, "learning_rate": 7.824794009887108e-06, "loss": 0.20840773582458497, "step": 12980 }, { "epoch": 1.1344574523851128, "grad_norm": 0.5582737768791519, "learning_rate": 7.82269667693454e-06, "loss": 0.19569610357284545, "step": 12985 }, { "epoch": 1.1348942862135243, "grad_norm": 0.596090492820514, "learning_rate": 7.820598614735112e-06, "loss": 0.19673852920532225, "step": 12990 }, { "epoch": 1.135331120041936, "grad_norm": 0.6144743250146303, "learning_rate": 7.818499823830864e-06, "loss": 0.22245275974273682, "step": 12995 }, { "epoch": 1.1357679538703478, "grad_norm": 0.6091403429212608, "learning_rate": 7.81640030476402e-06, "loss": 0.22815876007080077, "step": 13000 }, { "epoch": 1.1362047876987593, "grad_norm": 0.613938128496238, "learning_rate": 7.814300058076993e-06, "loss": 0.19764311313629152, "step": 13005 }, { "epoch": 1.136641621527171, "grad_norm": 0.538963815380852, "learning_rate": 7.812199084312386e-06, "loss": 0.21861491203308106, "step": 13010 }, { "epoch": 1.1370784553555828, "grad_norm": 0.5381701607761781, "learning_rate": 7.810097384012985e-06, "loss": 0.18655867576599122, "step": 13015 }, { "epoch": 1.1375152891839944, "grad_norm": 1.441898769699339, "learning_rate": 7.807994957721767e-06, "loss": 0.2220008373260498, "step": 13020 }, { "epoch": 1.1379521230124061, "grad_norm": 0.513553718921987, "learning_rate": 7.805891805981898e-06, "loss": 0.1982468008995056, "step": 13025 }, { "epoch": 1.1383889568408176, "grad_norm": 0.5906859658212167, "learning_rate": 7.803787929336727e-06, "loss": 0.19191422462463378, "step": 13030 }, { "epoch": 1.1388257906692294, "grad_norm": 0.5026810814092894, "learning_rate": 7.801683328329798e-06, "loss": 0.21341524124145508, "step": 13035 }, { "epoch": 1.1392626244976412, "grad_norm": 0.6036556105834123, "learning_rate": 7.799578003504831e-06, "loss": 0.22568883895874023, "step": 13040 }, { "epoch": 1.1396994583260527, "grad_norm": 0.5418441482071403, "learning_rate": 7.797471955405743e-06, "loss": 0.23218798637390137, "step": 13045 }, { "epoch": 1.1401362921544644, "grad_norm": 0.6233065659342898, "learning_rate": 7.79536518457663e-06, "loss": 0.19984216690063478, "step": 13050 }, { "epoch": 1.1405731259828762, "grad_norm": 0.600308479844316, "learning_rate": 7.793257691561784e-06, "loss": 0.20632686614990234, "step": 13055 }, { "epoch": 1.1410099598112877, "grad_norm": 0.5713818773379827, "learning_rate": 7.791149476905673e-06, "loss": 0.21870779991149902, "step": 13060 }, { "epoch": 1.1414467936396995, "grad_norm": 0.5627846023867276, "learning_rate": 7.78904054115296e-06, "loss": 0.21721992492675782, "step": 13065 }, { "epoch": 1.1418836274681112, "grad_norm": 0.5449864677063179, "learning_rate": 7.78693088484849e-06, "loss": 0.18525075912475586, "step": 13070 }, { "epoch": 1.1423204612965228, "grad_norm": 0.6166116011747653, "learning_rate": 7.784820508537296e-06, "loss": 0.21398420333862306, "step": 13075 }, { "epoch": 1.1427572951249345, "grad_norm": 0.5639396154566475, "learning_rate": 7.782709412764594e-06, "loss": 0.19293111562728882, "step": 13080 }, { "epoch": 1.143194128953346, "grad_norm": 0.4697467944147267, "learning_rate": 7.780597598075788e-06, "loss": 0.21572680473327638, "step": 13085 }, { "epoch": 1.1436309627817578, "grad_norm": 0.5700031047252818, "learning_rate": 7.77848506501647e-06, "loss": 0.22657437324523927, "step": 13090 }, { "epoch": 1.1440677966101696, "grad_norm": 0.5337308254246409, "learning_rate": 7.776371814132412e-06, "loss": 0.20457987785339354, "step": 13095 }, { "epoch": 1.144504630438581, "grad_norm": 0.9812580920763226, "learning_rate": 7.774257845969579e-06, "loss": 0.2045222282409668, "step": 13100 }, { "epoch": 1.1449414642669928, "grad_norm": 0.4309364137084918, "learning_rate": 7.772143161074114e-06, "loss": 0.2322650671005249, "step": 13105 }, { "epoch": 1.1453782980954046, "grad_norm": 0.5751855806537476, "learning_rate": 7.770027759992347e-06, "loss": 0.218648099899292, "step": 13110 }, { "epoch": 1.1458151319238161, "grad_norm": 0.5761421094921659, "learning_rate": 7.767911643270799e-06, "loss": 0.19181275367736816, "step": 13115 }, { "epoch": 1.1462519657522279, "grad_norm": 0.4901020610795011, "learning_rate": 7.765794811456167e-06, "loss": 0.2082974910736084, "step": 13120 }, { "epoch": 1.1466887995806396, "grad_norm": 0.5067181827857423, "learning_rate": 7.763677265095338e-06, "loss": 0.1912222146987915, "step": 13125 }, { "epoch": 1.1471256334090512, "grad_norm": 0.5384702238906296, "learning_rate": 7.761559004735383e-06, "loss": 0.2207408905029297, "step": 13130 }, { "epoch": 1.147562467237463, "grad_norm": 0.514271284776163, "learning_rate": 7.759440030923557e-06, "loss": 0.20198369026184082, "step": 13135 }, { "epoch": 1.1479993010658744, "grad_norm": 0.6245170363664017, "learning_rate": 7.757320344207299e-06, "loss": 0.16876089572906494, "step": 13140 }, { "epoch": 1.1484361348942862, "grad_norm": 0.5677689899171586, "learning_rate": 7.755199945134234e-06, "loss": 0.20861403942108153, "step": 13145 }, { "epoch": 1.148872968722698, "grad_norm": 0.5762382009931446, "learning_rate": 7.753078834252164e-06, "loss": 0.22028143405914308, "step": 13150 }, { "epoch": 1.1493098025511095, "grad_norm": 0.6158207146085695, "learning_rate": 7.750957012109086e-06, "loss": 0.2187018632888794, "step": 13155 }, { "epoch": 1.1497466363795212, "grad_norm": 0.7204013808117026, "learning_rate": 7.748834479253174e-06, "loss": 0.1994011640548706, "step": 13160 }, { "epoch": 1.150183470207933, "grad_norm": 0.5297627187532603, "learning_rate": 7.746711236232786e-06, "loss": 0.21248264312744142, "step": 13165 }, { "epoch": 1.1506203040363445, "grad_norm": 0.5330120396927016, "learning_rate": 7.744587283596463e-06, "loss": 0.2106560707092285, "step": 13170 }, { "epoch": 1.1510571378647563, "grad_norm": 0.5905032655420092, "learning_rate": 7.742462621892933e-06, "loss": 0.2271711826324463, "step": 13175 }, { "epoch": 1.151493971693168, "grad_norm": 0.46494720714965476, "learning_rate": 7.740337251671104e-06, "loss": 0.2226266860961914, "step": 13180 }, { "epoch": 1.1519308055215796, "grad_norm": 0.5833203167326987, "learning_rate": 7.738211173480067e-06, "loss": 0.20212559700012206, "step": 13185 }, { "epoch": 1.1523676393499913, "grad_norm": 0.6178494372128179, "learning_rate": 7.736084387869095e-06, "loss": 0.21377913951873778, "step": 13190 }, { "epoch": 1.1528044731784028, "grad_norm": 0.5229868920866233, "learning_rate": 7.733956895387651e-06, "loss": 0.21124825477600098, "step": 13195 }, { "epoch": 1.1532413070068146, "grad_norm": 0.4689511255911209, "learning_rate": 7.73182869658537e-06, "loss": 0.2089022159576416, "step": 13200 }, { "epoch": 1.1536781408352264, "grad_norm": 0.48347936711532485, "learning_rate": 7.729699792012076e-06, "loss": 0.22180466651916503, "step": 13205 }, { "epoch": 1.1541149746636379, "grad_norm": 0.5041565821879674, "learning_rate": 7.727570182217775e-06, "loss": 0.18135842084884643, "step": 13210 }, { "epoch": 1.1545518084920496, "grad_norm": 0.5826460407374244, "learning_rate": 7.725439867752653e-06, "loss": 0.21405625343322754, "step": 13215 }, { "epoch": 1.1549886423204614, "grad_norm": 0.6696525564346536, "learning_rate": 7.723308849167077e-06, "loss": 0.1973870277404785, "step": 13220 }, { "epoch": 1.155425476148873, "grad_norm": 0.6268048579948561, "learning_rate": 7.721177127011603e-06, "loss": 0.21957173347473144, "step": 13225 }, { "epoch": 1.1558623099772847, "grad_norm": 0.5103396996764288, "learning_rate": 7.71904470183696e-06, "loss": 0.2019728899002075, "step": 13230 }, { "epoch": 1.1562991438056964, "grad_norm": 0.6494039843671913, "learning_rate": 7.716911574194064e-06, "loss": 0.2030958890914917, "step": 13235 }, { "epoch": 1.156735977634108, "grad_norm": 0.5815836800885202, "learning_rate": 7.71477774463401e-06, "loss": 0.20739421844482422, "step": 13240 }, { "epoch": 1.1571728114625197, "grad_norm": 0.5602656490948331, "learning_rate": 7.712643213708075e-06, "loss": 0.20896446704864502, "step": 13245 }, { "epoch": 1.1576096452909312, "grad_norm": 0.49477542668460994, "learning_rate": 7.71050798196772e-06, "loss": 0.21357965469360352, "step": 13250 }, { "epoch": 1.158046479119343, "grad_norm": 0.614135535737574, "learning_rate": 7.70837204996458e-06, "loss": 0.19353317022323607, "step": 13255 }, { "epoch": 1.1584833129477548, "grad_norm": 0.6227168842433258, "learning_rate": 7.70623541825048e-06, "loss": 0.22809078693389892, "step": 13260 }, { "epoch": 1.1589201467761663, "grad_norm": 0.45457602127719504, "learning_rate": 7.704098087377421e-06, "loss": 0.20728120803833008, "step": 13265 }, { "epoch": 1.159356980604578, "grad_norm": 0.4675770435739207, "learning_rate": 7.701960057897581e-06, "loss": 0.22159123420715332, "step": 13270 }, { "epoch": 1.1597938144329896, "grad_norm": 0.597316368166362, "learning_rate": 7.699821330363325e-06, "loss": 0.1873644232749939, "step": 13275 }, { "epoch": 1.1602306482614013, "grad_norm": 0.5728703667872503, "learning_rate": 7.697681905327195e-06, "loss": 0.19177842140197754, "step": 13280 }, { "epoch": 1.160667482089813, "grad_norm": 0.6223054528630566, "learning_rate": 7.695541783341914e-06, "loss": 0.2034766912460327, "step": 13285 }, { "epoch": 1.1611043159182248, "grad_norm": 0.5568278728458512, "learning_rate": 7.693400964960386e-06, "loss": 0.22752048969268798, "step": 13290 }, { "epoch": 1.1615411497466364, "grad_norm": 0.530106795096586, "learning_rate": 7.691259450735691e-06, "loss": 0.20742225646972656, "step": 13295 }, { "epoch": 1.1619779835750481, "grad_norm": 0.5272300574969996, "learning_rate": 7.689117241221096e-06, "loss": 0.2222903251647949, "step": 13300 }, { "epoch": 1.1624148174034596, "grad_norm": 0.4785980927017729, "learning_rate": 7.686974336970039e-06, "loss": 0.21317877769470214, "step": 13305 }, { "epoch": 1.1628516512318714, "grad_norm": 0.5846386169147331, "learning_rate": 7.684830738536143e-06, "loss": 0.1875956654548645, "step": 13310 }, { "epoch": 1.1632884850602832, "grad_norm": 0.538403976038177, "learning_rate": 7.682686446473208e-06, "loss": 0.1782707929611206, "step": 13315 }, { "epoch": 1.1637253188886947, "grad_norm": 0.549845520961799, "learning_rate": 7.68054146133522e-06, "loss": 0.22872729301452638, "step": 13320 }, { "epoch": 1.1641621527171064, "grad_norm": 0.7451584837924543, "learning_rate": 7.678395783676332e-06, "loss": 0.22764267921447753, "step": 13325 }, { "epoch": 1.164598986545518, "grad_norm": 0.5449536789222399, "learning_rate": 7.676249414050882e-06, "loss": 0.24149565696716307, "step": 13330 }, { "epoch": 1.1650358203739297, "grad_norm": 0.5954882897123586, "learning_rate": 7.67410235301339e-06, "loss": 0.19085569381713868, "step": 13335 }, { "epoch": 1.1654726542023415, "grad_norm": 0.5507732847839744, "learning_rate": 7.671954601118554e-06, "loss": 0.22427549362182617, "step": 13340 }, { "epoch": 1.165909488030753, "grad_norm": 0.5644648834931075, "learning_rate": 7.669806158921242e-06, "loss": 0.21392550468444824, "step": 13345 }, { "epoch": 1.1663463218591648, "grad_norm": 0.5580554655569588, "learning_rate": 7.66765702697651e-06, "loss": 0.1922219514846802, "step": 13350 }, { "epoch": 1.1667831556875765, "grad_norm": 0.5635297830899624, "learning_rate": 7.665507205839588e-06, "loss": 0.22944164276123047, "step": 13355 }, { "epoch": 1.167219989515988, "grad_norm": 0.6100255828164336, "learning_rate": 7.663356696065887e-06, "loss": 0.19629513025283812, "step": 13360 }, { "epoch": 1.1676568233443998, "grad_norm": 0.5152229700440725, "learning_rate": 7.661205498210989e-06, "loss": 0.2249232530593872, "step": 13365 }, { "epoch": 1.1680936571728116, "grad_norm": 0.4539285625795342, "learning_rate": 7.65905361283066e-06, "loss": 0.20431127548217773, "step": 13370 }, { "epoch": 1.168530491001223, "grad_norm": 0.4903719606960929, "learning_rate": 7.656901040480845e-06, "loss": 0.23330307006835938, "step": 13375 }, { "epoch": 1.1689673248296348, "grad_norm": 0.6019446081939523, "learning_rate": 7.654747781717659e-06, "loss": 0.23774001598358155, "step": 13380 }, { "epoch": 1.1694041586580464, "grad_norm": 0.6568798905968514, "learning_rate": 7.652593837097404e-06, "loss": 0.20699987411499024, "step": 13385 }, { "epoch": 1.1698409924864581, "grad_norm": 0.5335376287579539, "learning_rate": 7.65043920717655e-06, "loss": 0.1976545810699463, "step": 13390 }, { "epoch": 1.1702778263148699, "grad_norm": 0.5717994650531789, "learning_rate": 7.648283892511748e-06, "loss": 0.21765706539154053, "step": 13395 }, { "epoch": 1.1707146601432814, "grad_norm": 0.6567070977022926, "learning_rate": 7.646127893659829e-06, "loss": 0.21556329727172852, "step": 13400 }, { "epoch": 1.1711514939716932, "grad_norm": 0.5786568922833696, "learning_rate": 7.643971211177795e-06, "loss": 0.19286584854125977, "step": 13405 }, { "epoch": 1.171588327800105, "grad_norm": 0.5899091191360736, "learning_rate": 7.641813845622828e-06, "loss": 0.20370523929595946, "step": 13410 }, { "epoch": 1.1720251616285164, "grad_norm": 0.616460670862284, "learning_rate": 7.639655797552285e-06, "loss": 0.21753401756286622, "step": 13415 }, { "epoch": 1.1724619954569282, "grad_norm": 0.5330677728435612, "learning_rate": 7.637497067523704e-06, "loss": 0.20036332607269286, "step": 13420 }, { "epoch": 1.17289882928534, "grad_norm": 0.6077517599428377, "learning_rate": 7.63533765609479e-06, "loss": 0.20272090435028076, "step": 13425 }, { "epoch": 1.1733356631137515, "grad_norm": 0.6558293995663184, "learning_rate": 7.633177563823433e-06, "loss": 0.20389852523803711, "step": 13430 }, { "epoch": 1.1737724969421632, "grad_norm": 0.4330133938451995, "learning_rate": 7.631016791267694e-06, "loss": 0.21364946365356446, "step": 13435 }, { "epoch": 1.1742093307705748, "grad_norm": 0.5461646826192149, "learning_rate": 7.628855338985811e-06, "loss": 0.2167203426361084, "step": 13440 }, { "epoch": 1.1746461645989865, "grad_norm": 0.5001761392149898, "learning_rate": 7.626693207536198e-06, "loss": 0.2201094150543213, "step": 13445 }, { "epoch": 1.1750829984273983, "grad_norm": 0.52947658270695, "learning_rate": 7.624530397477443e-06, "loss": 0.21399455070495604, "step": 13450 }, { "epoch": 1.1755198322558098, "grad_norm": 0.5632883739201707, "learning_rate": 7.622366909368313e-06, "loss": 0.2266789197921753, "step": 13455 }, { "epoch": 1.1759566660842216, "grad_norm": 0.5065685028425087, "learning_rate": 7.620202743767744e-06, "loss": 0.22647507190704347, "step": 13460 }, { "epoch": 1.1763934999126333, "grad_norm": 0.5109129926663101, "learning_rate": 7.618037901234854e-06, "loss": 0.21035480499267578, "step": 13465 }, { "epoch": 1.1768303337410448, "grad_norm": 0.5836324972356554, "learning_rate": 7.61587238232893e-06, "loss": 0.17751502990722656, "step": 13470 }, { "epoch": 1.1772671675694566, "grad_norm": 0.5453183608993846, "learning_rate": 7.613706187609438e-06, "loss": 0.22310233116149902, "step": 13475 }, { "epoch": 1.1777040013978683, "grad_norm": 0.460828792711975, "learning_rate": 7.611539317636015e-06, "loss": 0.20764153003692626, "step": 13480 }, { "epoch": 1.1781408352262799, "grad_norm": 0.5972326801513111, "learning_rate": 7.6093717729684755e-06, "loss": 0.21112985610961915, "step": 13485 }, { "epoch": 1.1785776690546916, "grad_norm": 0.514146680279674, "learning_rate": 7.607203554166807e-06, "loss": 0.18931055068969727, "step": 13490 }, { "epoch": 1.1790145028831032, "grad_norm": 0.45805488357077656, "learning_rate": 7.605034661791173e-06, "loss": 0.2124117374420166, "step": 13495 }, { "epoch": 1.179451336711515, "grad_norm": 0.6122404633646391, "learning_rate": 7.602865096401905e-06, "loss": 0.2176229476928711, "step": 13500 }, { "epoch": 1.1798881705399267, "grad_norm": 0.48491943783260893, "learning_rate": 7.600694858559517e-06, "loss": 0.22236232757568358, "step": 13505 }, { "epoch": 1.1803250043683382, "grad_norm": 0.742808455919493, "learning_rate": 7.59852394882469e-06, "loss": 0.20496485233306885, "step": 13510 }, { "epoch": 1.18076183819675, "grad_norm": 0.5558199107701085, "learning_rate": 7.59635236775828e-06, "loss": 0.20523037910461425, "step": 13515 }, { "epoch": 1.1811986720251617, "grad_norm": 0.5042389125874633, "learning_rate": 7.594180115921321e-06, "loss": 0.21000297069549562, "step": 13520 }, { "epoch": 1.1816355058535732, "grad_norm": 0.5289379552115565, "learning_rate": 7.592007193875013e-06, "loss": 0.21725189685821533, "step": 13525 }, { "epoch": 1.182072339681985, "grad_norm": 0.43534009882563907, "learning_rate": 7.589833602180736e-06, "loss": 0.19560387134552001, "step": 13530 }, { "epoch": 1.1825091735103967, "grad_norm": 0.5761808337022875, "learning_rate": 7.587659341400037e-06, "loss": 0.22058210372924805, "step": 13535 }, { "epoch": 1.1829460073388083, "grad_norm": 0.47122294531446923, "learning_rate": 7.585484412094639e-06, "loss": 0.20670151710510254, "step": 13540 }, { "epoch": 1.18338284116722, "grad_norm": 0.452601501840219, "learning_rate": 7.583308814826442e-06, "loss": 0.23625473976135253, "step": 13545 }, { "epoch": 1.1838196749956316, "grad_norm": 0.6142005100452724, "learning_rate": 7.581132550157508e-06, "loss": 0.2079953670501709, "step": 13550 }, { "epoch": 1.1842565088240433, "grad_norm": 0.6241092121459901, "learning_rate": 7.578955618650081e-06, "loss": 0.21525936126708983, "step": 13555 }, { "epoch": 1.184693342652455, "grad_norm": 0.5247845293815844, "learning_rate": 7.576778020866572e-06, "loss": 0.2177363395690918, "step": 13560 }, { "epoch": 1.1851301764808666, "grad_norm": 0.5333287713290533, "learning_rate": 7.5745997573695666e-06, "loss": 0.1983396053314209, "step": 13565 }, { "epoch": 1.1855670103092784, "grad_norm": 0.5710869760595715, "learning_rate": 7.572420828721821e-06, "loss": 0.21999022960662842, "step": 13570 }, { "epoch": 1.18600384413769, "grad_norm": 0.5459802417115887, "learning_rate": 7.570241235486264e-06, "loss": 0.21573920249938966, "step": 13575 }, { "epoch": 1.1864406779661016, "grad_norm": 0.5372018075527568, "learning_rate": 7.568060978225997e-06, "loss": 0.21620469093322753, "step": 13580 }, { "epoch": 1.1868775117945134, "grad_norm": 0.5242050262024621, "learning_rate": 7.565880057504291e-06, "loss": 0.1979835271835327, "step": 13585 }, { "epoch": 1.1873143456229251, "grad_norm": 0.588110747255791, "learning_rate": 7.563698473884589e-06, "loss": 0.21347708702087403, "step": 13590 }, { "epoch": 1.1877511794513367, "grad_norm": 0.5611322944597811, "learning_rate": 7.561516227930506e-06, "loss": 0.18831450939178468, "step": 13595 }, { "epoch": 1.1881880132797484, "grad_norm": 0.528438623010727, "learning_rate": 7.5593333202058285e-06, "loss": 0.19900681972503662, "step": 13600 }, { "epoch": 1.18862484710816, "grad_norm": 0.6808727833987206, "learning_rate": 7.557149751274513e-06, "loss": 0.21549267768859864, "step": 13605 }, { "epoch": 1.1890616809365717, "grad_norm": 0.4996778093841175, "learning_rate": 7.554965521700685e-06, "loss": 0.2206289291381836, "step": 13610 }, { "epoch": 1.1894985147649835, "grad_norm": 0.5656734995794951, "learning_rate": 7.552780632048646e-06, "loss": 0.22596158981323242, "step": 13615 }, { "epoch": 1.189935348593395, "grad_norm": 0.5073404238640047, "learning_rate": 7.5505950828828615e-06, "loss": 0.21472673416137694, "step": 13620 }, { "epoch": 1.1903721824218068, "grad_norm": 0.5122569167329831, "learning_rate": 7.548408874767972e-06, "loss": 0.2403160810470581, "step": 13625 }, { "epoch": 1.1908090162502185, "grad_norm": 0.584689693017508, "learning_rate": 7.5462220082687865e-06, "loss": 0.19935956001281738, "step": 13630 }, { "epoch": 1.19124585007863, "grad_norm": 0.6242761442309043, "learning_rate": 7.5440344839502845e-06, "loss": 0.2160736083984375, "step": 13635 }, { "epoch": 1.1916826839070418, "grad_norm": 0.49755201212096567, "learning_rate": 7.541846302377616e-06, "loss": 0.19709064960479736, "step": 13640 }, { "epoch": 1.1921195177354535, "grad_norm": 0.5003466976860507, "learning_rate": 7.539657464116099e-06, "loss": 0.1943000078201294, "step": 13645 }, { "epoch": 1.192556351563865, "grad_norm": 0.536578089030191, "learning_rate": 7.537467969731223e-06, "loss": 0.21794772148132324, "step": 13650 }, { "epoch": 1.1929931853922768, "grad_norm": 0.5733366642302539, "learning_rate": 7.535277819788647e-06, "loss": 0.2047109365463257, "step": 13655 }, { "epoch": 1.1934300192206884, "grad_norm": 0.4681917380821514, "learning_rate": 7.533087014854198e-06, "loss": 0.19006133079528809, "step": 13660 }, { "epoch": 1.1938668530491001, "grad_norm": 0.5704380573609414, "learning_rate": 7.530895555493871e-06, "loss": 0.20139081478118898, "step": 13665 }, { "epoch": 1.1943036868775119, "grad_norm": 0.5139011398669833, "learning_rate": 7.528703442273837e-06, "loss": 0.19792354106903076, "step": 13670 }, { "epoch": 1.1947405207059234, "grad_norm": 0.5116154537219421, "learning_rate": 7.526510675760425e-06, "loss": 0.22982265949249267, "step": 13675 }, { "epoch": 1.1951773545343352, "grad_norm": 0.5052383679538223, "learning_rate": 7.524317256520143e-06, "loss": 0.19529930353164673, "step": 13680 }, { "epoch": 1.1956141883627467, "grad_norm": 0.5726730586053079, "learning_rate": 7.5221231851196604e-06, "loss": 0.1991356372833252, "step": 13685 }, { "epoch": 1.1960510221911584, "grad_norm": 0.4845904975530908, "learning_rate": 7.5199284621258216e-06, "loss": 0.24194765090942383, "step": 13690 }, { "epoch": 1.1964878560195702, "grad_norm": 0.5164080281518945, "learning_rate": 7.517733088105632e-06, "loss": 0.18854289054870604, "step": 13695 }, { "epoch": 1.196924689847982, "grad_norm": 0.6628212793284659, "learning_rate": 7.5155370636262705e-06, "loss": 0.21457891464233397, "step": 13700 }, { "epoch": 1.1973615236763935, "grad_norm": 0.5619828950773139, "learning_rate": 7.5133403892550835e-06, "loss": 0.20414676666259765, "step": 13705 }, { "epoch": 1.1977983575048052, "grad_norm": 0.5198329060485444, "learning_rate": 7.511143065559584e-06, "loss": 0.22049520015716553, "step": 13710 }, { "epoch": 1.1982351913332168, "grad_norm": 0.4902821985313662, "learning_rate": 7.5089450931074514e-06, "loss": 0.18178515434265136, "step": 13715 }, { "epoch": 1.1986720251616285, "grad_norm": 0.5910816591887298, "learning_rate": 7.506746472466535e-06, "loss": 0.20643420219421388, "step": 13720 }, { "epoch": 1.1991088589900403, "grad_norm": 0.5754930984524821, "learning_rate": 7.50454720420485e-06, "loss": 0.18542881011962892, "step": 13725 }, { "epoch": 1.1995456928184518, "grad_norm": 0.6712864856441426, "learning_rate": 7.502347288890584e-06, "loss": 0.18799202442169188, "step": 13730 }, { "epoch": 1.1999825266468636, "grad_norm": 0.7696369683604866, "learning_rate": 7.500146727092084e-06, "loss": 0.1992427349090576, "step": 13735 }, { "epoch": 1.200419360475275, "grad_norm": 0.5761419502523834, "learning_rate": 7.497945519377866e-06, "loss": 0.2287731647491455, "step": 13740 }, { "epoch": 1.2008561943036868, "grad_norm": 0.6844201146532598, "learning_rate": 7.495743666316617e-06, "loss": 0.20844106674194335, "step": 13745 }, { "epoch": 1.2012930281320986, "grad_norm": 0.5879730816492291, "learning_rate": 7.493541168477191e-06, "loss": 0.19726736545562745, "step": 13750 }, { "epoch": 1.2017298619605101, "grad_norm": 0.6068037755904142, "learning_rate": 7.491338026428601e-06, "loss": 0.1755337357521057, "step": 13755 }, { "epoch": 1.2021666957889219, "grad_norm": 0.6976483849977948, "learning_rate": 7.489134240740033e-06, "loss": 0.1847466230392456, "step": 13760 }, { "epoch": 1.2026035296173336, "grad_norm": 0.6115314217151067, "learning_rate": 7.486929811980836e-06, "loss": 0.21033811569213867, "step": 13765 }, { "epoch": 1.2030403634457452, "grad_norm": 0.541131317630834, "learning_rate": 7.48472474072053e-06, "loss": 0.21759815216064454, "step": 13770 }, { "epoch": 1.203477197274157, "grad_norm": 0.48226948862835706, "learning_rate": 7.4825190275287955e-06, "loss": 0.21630871295928955, "step": 13775 }, { "epoch": 1.2039140311025687, "grad_norm": 0.5739238580036995, "learning_rate": 7.480312672975481e-06, "loss": 0.1836596369743347, "step": 13780 }, { "epoch": 1.2043508649309802, "grad_norm": 0.6595378317964824, "learning_rate": 7.4781056776306005e-06, "loss": 0.19076216220855713, "step": 13785 }, { "epoch": 1.204787698759392, "grad_norm": 0.4878050161858482, "learning_rate": 7.475898042064333e-06, "loss": 0.21428146362304687, "step": 13790 }, { "epoch": 1.2052245325878035, "grad_norm": 0.5517303169060835, "learning_rate": 7.4736897668470275e-06, "loss": 0.25053465366363525, "step": 13795 }, { "epoch": 1.2056613664162152, "grad_norm": 0.5802247763129492, "learning_rate": 7.471480852549191e-06, "loss": 0.20055663585662842, "step": 13800 }, { "epoch": 1.206098200244627, "grad_norm": 0.5787325224579082, "learning_rate": 7.4692712997414985e-06, "loss": 0.1984941005706787, "step": 13805 }, { "epoch": 1.2065350340730385, "grad_norm": 0.47918550492335693, "learning_rate": 7.467061108994793e-06, "loss": 0.20884194374084472, "step": 13810 }, { "epoch": 1.2069718679014503, "grad_norm": 0.5274331915317091, "learning_rate": 7.464850280880076e-06, "loss": 0.2035358428955078, "step": 13815 }, { "epoch": 1.207408701729862, "grad_norm": 1.0390366856224498, "learning_rate": 7.462638815968522e-06, "loss": 0.208212947845459, "step": 13820 }, { "epoch": 1.2078455355582736, "grad_norm": 0.5124254322099452, "learning_rate": 7.460426714831463e-06, "loss": 0.22092053890228272, "step": 13825 }, { "epoch": 1.2082823693866853, "grad_norm": 0.6154294440305947, "learning_rate": 7.458213978040398e-06, "loss": 0.20896027088165284, "step": 13830 }, { "epoch": 1.208719203215097, "grad_norm": 0.623493003751794, "learning_rate": 7.4560006061669885e-06, "loss": 0.2219170093536377, "step": 13835 }, { "epoch": 1.2091560370435086, "grad_norm": 0.5043856024288965, "learning_rate": 7.453786599783065e-06, "loss": 0.19662806987762452, "step": 13840 }, { "epoch": 1.2095928708719204, "grad_norm": 0.5957774277700917, "learning_rate": 7.451571959460615e-06, "loss": 0.20221300125122071, "step": 13845 }, { "epoch": 1.2100297047003319, "grad_norm": 0.5118232373735684, "learning_rate": 7.449356685771797e-06, "loss": 0.20011613368988038, "step": 13850 }, { "epoch": 1.2104665385287436, "grad_norm": 0.6085509877869405, "learning_rate": 7.447140779288926e-06, "loss": 0.19483718872070313, "step": 13855 }, { "epoch": 1.2109033723571554, "grad_norm": 0.6295642033259395, "learning_rate": 7.444924240584488e-06, "loss": 0.19517654180526733, "step": 13860 }, { "epoch": 1.211340206185567, "grad_norm": 0.5067675695811348, "learning_rate": 7.4427070702311235e-06, "loss": 0.1990424394607544, "step": 13865 }, { "epoch": 1.2117770400139787, "grad_norm": 0.5450016650195822, "learning_rate": 7.440489268801643e-06, "loss": 0.21263327598571777, "step": 13870 }, { "epoch": 1.2122138738423904, "grad_norm": 0.6379746975693984, "learning_rate": 7.438270836869018e-06, "loss": 0.20650920867919922, "step": 13875 }, { "epoch": 1.212650707670802, "grad_norm": 0.5624940345595228, "learning_rate": 7.436051775006384e-06, "loss": 0.22516083717346191, "step": 13880 }, { "epoch": 1.2130875414992137, "grad_norm": 0.5630843522180167, "learning_rate": 7.433832083787037e-06, "loss": 0.18361983299255372, "step": 13885 }, { "epoch": 1.2135243753276255, "grad_norm": 0.5158959458129887, "learning_rate": 7.431611763784436e-06, "loss": 0.1816021680831909, "step": 13890 }, { "epoch": 1.213961209156037, "grad_norm": 0.76314704647592, "learning_rate": 7.429390815572205e-06, "loss": 0.21007833480834961, "step": 13895 }, { "epoch": 1.2143980429844488, "grad_norm": 0.706990961081674, "learning_rate": 7.4271692397241265e-06, "loss": 0.21286411285400392, "step": 13900 }, { "epoch": 1.2148348768128603, "grad_norm": 0.5961697889076748, "learning_rate": 7.424947036814148e-06, "loss": 0.20116772651672363, "step": 13905 }, { "epoch": 1.215271710641272, "grad_norm": 0.5442568208944337, "learning_rate": 7.4227242074163806e-06, "loss": 0.2087008237838745, "step": 13910 }, { "epoch": 1.2157085444696838, "grad_norm": 0.4923731626077173, "learning_rate": 7.420500752105089e-06, "loss": 0.20843143463134767, "step": 13915 }, { "epoch": 1.2161453782980953, "grad_norm": 0.6770224314283567, "learning_rate": 7.418276671454711e-06, "loss": 0.20803732872009278, "step": 13920 }, { "epoch": 1.216582212126507, "grad_norm": 0.4737647505498788, "learning_rate": 7.416051966039839e-06, "loss": 0.21142282485961914, "step": 13925 }, { "epoch": 1.2170190459549188, "grad_norm": 0.5319022486039231, "learning_rate": 7.413826636435226e-06, "loss": 0.19577900171279908, "step": 13930 }, { "epoch": 1.2174558797833304, "grad_norm": 0.5590385766686778, "learning_rate": 7.41160068321579e-06, "loss": 0.18469560146331787, "step": 13935 }, { "epoch": 1.2178927136117421, "grad_norm": 0.46214681572007316, "learning_rate": 7.4093741069566064e-06, "loss": 0.23874340057373047, "step": 13940 }, { "epoch": 1.2183295474401539, "grad_norm": 0.6328783195388225, "learning_rate": 7.407146908232917e-06, "loss": 0.2019500255584717, "step": 13945 }, { "epoch": 1.2187663812685654, "grad_norm": 0.5159589165481088, "learning_rate": 7.404919087620119e-06, "loss": 0.18449442386627196, "step": 13950 }, { "epoch": 1.2192032150969772, "grad_norm": 0.5588314346508092, "learning_rate": 7.402690645693773e-06, "loss": 0.21559789180755615, "step": 13955 }, { "epoch": 1.2196400489253887, "grad_norm": 0.5076970763543327, "learning_rate": 7.4004615830296e-06, "loss": 0.21538331508636474, "step": 13960 }, { "epoch": 1.2200768827538004, "grad_norm": 0.610044159584297, "learning_rate": 7.39823190020348e-06, "loss": 0.19028609991073608, "step": 13965 }, { "epoch": 1.2205137165822122, "grad_norm": 0.6180867473773495, "learning_rate": 7.396001597791455e-06, "loss": 0.20694401264190673, "step": 13970 }, { "epoch": 1.2209505504106237, "grad_norm": 0.530116828623235, "learning_rate": 7.393770676369725e-06, "loss": 0.20100395679473876, "step": 13975 }, { "epoch": 1.2213873842390355, "grad_norm": 0.6484766330278832, "learning_rate": 7.3915391365146496e-06, "loss": 0.23042030334472657, "step": 13980 }, { "epoch": 1.2218242180674472, "grad_norm": 0.6933872551019239, "learning_rate": 7.389306978802753e-06, "loss": 0.20130882263183594, "step": 13985 }, { "epoch": 1.2222610518958588, "grad_norm": 0.5341578557931803, "learning_rate": 7.387074203810714e-06, "loss": 0.21723155975341796, "step": 13990 }, { "epoch": 1.2226978857242705, "grad_norm": 0.5482626246551636, "learning_rate": 7.384840812115373e-06, "loss": 0.22504129409790039, "step": 13995 }, { "epoch": 1.2231347195526823, "grad_norm": 0.4960525284505456, "learning_rate": 7.382606804293728e-06, "loss": 0.18506333827972413, "step": 14000 }, { "epoch": 1.2235715533810938, "grad_norm": 0.5796679021429844, "learning_rate": 7.380372180922939e-06, "loss": 0.22101023197174072, "step": 14005 }, { "epoch": 1.2240083872095056, "grad_norm": 0.516372170163841, "learning_rate": 7.378136942580323e-06, "loss": 0.20207207202911376, "step": 14010 }, { "epoch": 1.224445221037917, "grad_norm": 0.5477682653388039, "learning_rate": 7.375901089843356e-06, "loss": 0.20686588287353516, "step": 14015 }, { "epoch": 1.2248820548663288, "grad_norm": 0.5725397131681695, "learning_rate": 7.373664623289674e-06, "loss": 0.22452201843261718, "step": 14020 }, { "epoch": 1.2253188886947406, "grad_norm": 0.4854571008659246, "learning_rate": 7.371427543497069e-06, "loss": 0.19791386127471924, "step": 14025 }, { "epoch": 1.2257557225231521, "grad_norm": 0.6709162402116583, "learning_rate": 7.369189851043494e-06, "loss": 0.249634051322937, "step": 14030 }, { "epoch": 1.2261925563515639, "grad_norm": 0.6324088177973365, "learning_rate": 7.366951546507059e-06, "loss": 0.20604047775268555, "step": 14035 }, { "epoch": 1.2266293901799756, "grad_norm": 0.44407154810292127, "learning_rate": 7.3647126304660336e-06, "loss": 0.1868537187576294, "step": 14040 }, { "epoch": 1.2270662240083872, "grad_norm": 0.647209501111749, "learning_rate": 7.362473103498842e-06, "loss": 0.20652780532836915, "step": 14045 }, { "epoch": 1.227503057836799, "grad_norm": 0.5454042488988456, "learning_rate": 7.360232966184071e-06, "loss": 0.20865678787231445, "step": 14050 }, { "epoch": 1.2279398916652107, "grad_norm": 0.5591584734254718, "learning_rate": 7.357992219100463e-06, "loss": 0.19295227527618408, "step": 14055 }, { "epoch": 1.2283767254936222, "grad_norm": 0.5748443054322526, "learning_rate": 7.355750862826914e-06, "loss": 0.19314515590667725, "step": 14060 }, { "epoch": 1.228813559322034, "grad_norm": 0.5888302177357703, "learning_rate": 7.353508897942483e-06, "loss": 0.22514917850494384, "step": 14065 }, { "epoch": 1.2292503931504455, "grad_norm": 0.49154052195532977, "learning_rate": 7.351266325026385e-06, "loss": 0.22116415500640868, "step": 14070 }, { "epoch": 1.2296872269788572, "grad_norm": 0.4398726254353763, "learning_rate": 7.349023144657991e-06, "loss": 0.22567672729492189, "step": 14075 }, { "epoch": 1.230124060807269, "grad_norm": 0.660835729701488, "learning_rate": 7.3467793574168275e-06, "loss": 0.2077984094619751, "step": 14080 }, { "epoch": 1.2305608946356805, "grad_norm": 0.7027145658612826, "learning_rate": 7.344534963882582e-06, "loss": 0.22260100841522218, "step": 14085 }, { "epoch": 1.2309977284640923, "grad_norm": 0.6089530952551181, "learning_rate": 7.342289964635092e-06, "loss": 0.2127762794494629, "step": 14090 }, { "epoch": 1.2314345622925038, "grad_norm": 0.7621359835988399, "learning_rate": 7.340044360254359e-06, "loss": 0.22343058586120607, "step": 14095 }, { "epoch": 1.2318713961209156, "grad_norm": 0.5859080053031585, "learning_rate": 7.337798151320538e-06, "loss": 0.19640698432922363, "step": 14100 }, { "epoch": 1.2323082299493273, "grad_norm": 0.6641195246708449, "learning_rate": 7.335551338413937e-06, "loss": 0.2230691432952881, "step": 14105 }, { "epoch": 1.232745063777739, "grad_norm": 0.5174636123633884, "learning_rate": 7.333303922115025e-06, "loss": 0.19155142307281495, "step": 14110 }, { "epoch": 1.2331818976061506, "grad_norm": 0.5169834957096799, "learning_rate": 7.331055903004421e-06, "loss": 0.19082165956497193, "step": 14115 }, { "epoch": 1.2336187314345624, "grad_norm": 0.4999129875180656, "learning_rate": 7.328807281662909e-06, "loss": 0.19111964702606202, "step": 14120 }, { "epoch": 1.2340555652629739, "grad_norm": 0.5320528906720309, "learning_rate": 7.326558058671418e-06, "loss": 0.21087746620178222, "step": 14125 }, { "epoch": 1.2344923990913856, "grad_norm": 0.6165318101209444, "learning_rate": 7.324308234611037e-06, "loss": 0.18223986625671387, "step": 14130 }, { "epoch": 1.2349292329197974, "grad_norm": 0.7597885213109039, "learning_rate": 7.322057810063015e-06, "loss": 0.20830039978027343, "step": 14135 }, { "epoch": 1.235366066748209, "grad_norm": 0.5344820777805532, "learning_rate": 7.3198067856087475e-06, "loss": 0.2178797245025635, "step": 14140 }, { "epoch": 1.2358029005766207, "grad_norm": 0.500647618802638, "learning_rate": 7.317555161829791e-06, "loss": 0.2254789352416992, "step": 14145 }, { "epoch": 1.2362397344050322, "grad_norm": 0.6006619530078029, "learning_rate": 7.315302939307856e-06, "loss": 0.21158690452575685, "step": 14150 }, { "epoch": 1.236676568233444, "grad_norm": 0.6272478153539779, "learning_rate": 7.313050118624804e-06, "loss": 0.2321688175201416, "step": 14155 }, { "epoch": 1.2371134020618557, "grad_norm": 0.6192344029059438, "learning_rate": 7.3107967003626545e-06, "loss": 0.20386385917663574, "step": 14160 }, { "epoch": 1.2375502358902672, "grad_norm": 0.46074567391325477, "learning_rate": 7.308542685103584e-06, "loss": 0.18208105564117433, "step": 14165 }, { "epoch": 1.237987069718679, "grad_norm": 0.5297910831829781, "learning_rate": 7.306288073429915e-06, "loss": 0.20774564743041993, "step": 14170 }, { "epoch": 1.2384239035470908, "grad_norm": 0.5459680976924727, "learning_rate": 7.304032865924132e-06, "loss": 0.1818066954612732, "step": 14175 }, { "epoch": 1.2388607373755023, "grad_norm": 0.6410826511199523, "learning_rate": 7.301777063168871e-06, "loss": 0.1843437671661377, "step": 14180 }, { "epoch": 1.239297571203914, "grad_norm": 0.6656274678632742, "learning_rate": 7.299520665746918e-06, "loss": 0.21028380393981932, "step": 14185 }, { "epoch": 1.2397344050323258, "grad_norm": 0.617339046148684, "learning_rate": 7.297263674241217e-06, "loss": 0.2088325023651123, "step": 14190 }, { "epoch": 1.2401712388607373, "grad_norm": 0.5802672902229319, "learning_rate": 7.295006089234866e-06, "loss": 0.21828632354736327, "step": 14195 }, { "epoch": 1.240608072689149, "grad_norm": 0.6120743852119986, "learning_rate": 7.292747911311115e-06, "loss": 0.19835201501846314, "step": 14200 }, { "epoch": 1.2410449065175606, "grad_norm": 0.6303608131009455, "learning_rate": 7.290489141053363e-06, "loss": 0.18548606634140014, "step": 14205 }, { "epoch": 1.2414817403459724, "grad_norm": 0.6371062762264215, "learning_rate": 7.2882297790451685e-06, "loss": 0.21124825477600098, "step": 14210 }, { "epoch": 1.2419185741743841, "grad_norm": 0.5559216036013028, "learning_rate": 7.28596982587024e-06, "loss": 0.20526669025421143, "step": 14215 }, { "epoch": 1.2423554080027956, "grad_norm": 0.6126235113854697, "learning_rate": 7.283709282112439e-06, "loss": 0.22869391441345216, "step": 14220 }, { "epoch": 1.2427922418312074, "grad_norm": 0.47101247942514474, "learning_rate": 7.28144814835578e-06, "loss": 0.19998183250427246, "step": 14225 }, { "epoch": 1.2432290756596192, "grad_norm": 0.48861762414676024, "learning_rate": 7.2791864251844295e-06, "loss": 0.1869894742965698, "step": 14230 }, { "epoch": 1.2436659094880307, "grad_norm": 0.5674027224843221, "learning_rate": 7.276924113182703e-06, "loss": 0.20323915481567384, "step": 14235 }, { "epoch": 1.2441027433164424, "grad_norm": 0.6161509734528241, "learning_rate": 7.274661212935075e-06, "loss": 0.18517624139785765, "step": 14240 }, { "epoch": 1.2445395771448542, "grad_norm": 0.6381080048462459, "learning_rate": 7.2723977250261675e-06, "loss": 0.21601827144622804, "step": 14245 }, { "epoch": 1.2449764109732657, "grad_norm": 0.5960982061714104, "learning_rate": 7.270133650040754e-06, "loss": 0.21985917091369628, "step": 14250 }, { "epoch": 1.2454132448016775, "grad_norm": 0.6398689149514394, "learning_rate": 7.267868988563762e-06, "loss": 0.20944397449493407, "step": 14255 }, { "epoch": 1.245850078630089, "grad_norm": 0.5472782980840308, "learning_rate": 7.265603741180269e-06, "loss": 0.2176764965057373, "step": 14260 }, { "epoch": 1.2462869124585008, "grad_norm": 0.5287339177410156, "learning_rate": 7.263337908475505e-06, "loss": 0.18161338567733765, "step": 14265 }, { "epoch": 1.2467237462869125, "grad_norm": 0.5929895628659575, "learning_rate": 7.261071491034848e-06, "loss": 0.2311274528503418, "step": 14270 }, { "epoch": 1.247160580115324, "grad_norm": 0.46370291468276725, "learning_rate": 7.258804489443833e-06, "loss": 0.20047519207000733, "step": 14275 }, { "epoch": 1.2475974139437358, "grad_norm": 0.4952490087289498, "learning_rate": 7.256536904288139e-06, "loss": 0.18464584350585939, "step": 14280 }, { "epoch": 1.2480342477721476, "grad_norm": 0.5092065524067978, "learning_rate": 7.254268736153601e-06, "loss": 0.19317243099212647, "step": 14285 }, { "epoch": 1.248471081600559, "grad_norm": 0.5770187399187691, "learning_rate": 7.251999985626203e-06, "loss": 0.20741424560546876, "step": 14290 }, { "epoch": 1.2489079154289708, "grad_norm": 0.6099554639078982, "learning_rate": 7.2497306532920785e-06, "loss": 0.21500983238220214, "step": 14295 }, { "epoch": 1.2493447492573826, "grad_norm": 0.5450310166205475, "learning_rate": 7.24746073973751e-06, "loss": 0.20808515548706055, "step": 14300 }, { "epoch": 1.2497815830857941, "grad_norm": 0.5294281658050731, "learning_rate": 7.245190245548937e-06, "loss": 0.2134557247161865, "step": 14305 }, { "epoch": 1.2502184169142059, "grad_norm": 0.6950936961860337, "learning_rate": 7.242919171312942e-06, "loss": 0.187199866771698, "step": 14310 }, { "epoch": 1.2506552507426174, "grad_norm": 0.6037869601787249, "learning_rate": 7.240647517616258e-06, "loss": 0.19746706485748292, "step": 14315 }, { "epoch": 1.2510920845710292, "grad_norm": 0.5341827077983068, "learning_rate": 7.238375285045771e-06, "loss": 0.2112839937210083, "step": 14320 }, { "epoch": 1.251528918399441, "grad_norm": 0.6147990821124708, "learning_rate": 7.236102474188515e-06, "loss": 0.19745209217071533, "step": 14325 }, { "epoch": 1.2519657522278524, "grad_norm": 0.4962613637787663, "learning_rate": 7.233829085631673e-06, "loss": 0.22779154777526855, "step": 14330 }, { "epoch": 1.2524025860562642, "grad_norm": 0.6132271525725164, "learning_rate": 7.231555119962576e-06, "loss": 0.21057815551757814, "step": 14335 }, { "epoch": 1.2528394198846757, "grad_norm": 0.6101253954054177, "learning_rate": 7.229280577768707e-06, "loss": 0.16915512084960938, "step": 14340 }, { "epoch": 1.2532762537130875, "grad_norm": 0.516969775020166, "learning_rate": 7.2270054596376955e-06, "loss": 0.20716626644134523, "step": 14345 }, { "epoch": 1.2537130875414992, "grad_norm": 0.7119462256645089, "learning_rate": 7.224729766157321e-06, "loss": 0.20879135131835938, "step": 14350 }, { "epoch": 1.254149921369911, "grad_norm": 0.710153657933422, "learning_rate": 7.222453497915514e-06, "loss": 0.19944531917572023, "step": 14355 }, { "epoch": 1.2545867551983225, "grad_norm": 0.6822663752353977, "learning_rate": 7.220176655500348e-06, "loss": 0.16791799068450927, "step": 14360 }, { "epoch": 1.2550235890267343, "grad_norm": 0.45766153447402974, "learning_rate": 7.217899239500049e-06, "loss": 0.223526668548584, "step": 14365 }, { "epoch": 1.2554604228551458, "grad_norm": 0.5227455439211627, "learning_rate": 7.21562125050299e-06, "loss": 0.20431084632873536, "step": 14370 }, { "epoch": 1.2558972566835576, "grad_norm": 0.64576938563781, "learning_rate": 7.213342689097692e-06, "loss": 0.1994416356086731, "step": 14375 }, { "epoch": 1.2563340905119693, "grad_norm": 0.5526867647652645, "learning_rate": 7.211063555872824e-06, "loss": 0.18822476863861085, "step": 14380 }, { "epoch": 1.2567709243403808, "grad_norm": 0.5889494923618389, "learning_rate": 7.2087838514172005e-06, "loss": 0.18256698846817015, "step": 14385 }, { "epoch": 1.2572077581687926, "grad_norm": 0.5998581251071919, "learning_rate": 7.20650357631979e-06, "loss": 0.19493480920791625, "step": 14390 }, { "epoch": 1.2576445919972041, "grad_norm": 0.5309307012097413, "learning_rate": 7.204222731169699e-06, "loss": 0.21269626617431642, "step": 14395 }, { "epoch": 1.2580814258256159, "grad_norm": 0.5581733809064433, "learning_rate": 7.201941316556191e-06, "loss": 0.19212124347686768, "step": 14400 }, { "epoch": 1.2585182596540276, "grad_norm": 0.610937749699916, "learning_rate": 7.1996593330686694e-06, "loss": 0.17587404251098632, "step": 14405 }, { "epoch": 1.2589550934824394, "grad_norm": 0.5715804590899995, "learning_rate": 7.197376781296688e-06, "loss": 0.1984747290611267, "step": 14410 }, { "epoch": 1.259391927310851, "grad_norm": 0.5551731307935533, "learning_rate": 7.195093661829946e-06, "loss": 0.20591998100280762, "step": 14415 }, { "epoch": 1.2598287611392627, "grad_norm": 0.5351962022565965, "learning_rate": 7.19280997525829e-06, "loss": 0.21172478199005126, "step": 14420 }, { "epoch": 1.2602655949676742, "grad_norm": 0.6082158848693613, "learning_rate": 7.190525722171715e-06, "loss": 0.19986107349395751, "step": 14425 }, { "epoch": 1.260702428796086, "grad_norm": 0.5275380383859921, "learning_rate": 7.1882409031603585e-06, "loss": 0.1873807430267334, "step": 14430 }, { "epoch": 1.2611392626244977, "grad_norm": 0.5325482413128204, "learning_rate": 7.1859555188145044e-06, "loss": 0.21128449440002442, "step": 14435 }, { "epoch": 1.2615760964529092, "grad_norm": 0.565253723103066, "learning_rate": 7.183669569724589e-06, "loss": 0.21474266052246094, "step": 14440 }, { "epoch": 1.262012930281321, "grad_norm": 0.7485679520026778, "learning_rate": 7.181383056481185e-06, "loss": 0.2003714323043823, "step": 14445 }, { "epoch": 1.2624497641097325, "grad_norm": 0.664448783432084, "learning_rate": 7.179095979675017e-06, "loss": 0.22943789958953859, "step": 14450 }, { "epoch": 1.2628865979381443, "grad_norm": 0.6034362755248465, "learning_rate": 7.1768083398969566e-06, "loss": 0.19631998538970946, "step": 14455 }, { "epoch": 1.263323431766556, "grad_norm": 0.5478023085901093, "learning_rate": 7.174520137738015e-06, "loss": 0.20935001373291015, "step": 14460 }, { "epoch": 1.2637602655949678, "grad_norm": 0.54842625187583, "learning_rate": 7.1722313737893524e-06, "loss": 0.2294788360595703, "step": 14465 }, { "epoch": 1.2641970994233793, "grad_norm": 0.5455595093535032, "learning_rate": 7.169942048642275e-06, "loss": 0.21540956497192382, "step": 14470 }, { "epoch": 1.264633933251791, "grad_norm": 0.5065872931194529, "learning_rate": 7.167652162888231e-06, "loss": 0.2056530475616455, "step": 14475 }, { "epoch": 1.2650707670802026, "grad_norm": 0.5649814647040072, "learning_rate": 7.165361717118816e-06, "loss": 0.20969486236572266, "step": 14480 }, { "epoch": 1.2655076009086144, "grad_norm": 0.5734770526402962, "learning_rate": 7.163070711925768e-06, "loss": 0.20530462265014648, "step": 14485 }, { "epoch": 1.2659444347370261, "grad_norm": 0.49096850091623095, "learning_rate": 7.160779147900971e-06, "loss": 0.20380125045776368, "step": 14490 }, { "epoch": 1.2663812685654376, "grad_norm": 0.7442728339482947, "learning_rate": 7.158487025636455e-06, "loss": 0.22067337036132811, "step": 14495 }, { "epoch": 1.2668181023938494, "grad_norm": 0.44160759824186224, "learning_rate": 7.156194345724391e-06, "loss": 0.19451137781143188, "step": 14500 }, { "epoch": 1.267254936222261, "grad_norm": 0.609983032724517, "learning_rate": 7.153901108757095e-06, "loss": 0.2087002992630005, "step": 14505 }, { "epoch": 1.2676917700506727, "grad_norm": 0.579044963619826, "learning_rate": 7.151607315327026e-06, "loss": 0.19568625688552857, "step": 14510 }, { "epoch": 1.2681286038790844, "grad_norm": 0.7205154177597614, "learning_rate": 7.149312966026793e-06, "loss": 0.19879299402236938, "step": 14515 }, { "epoch": 1.2685654377074962, "grad_norm": 0.6344133973323812, "learning_rate": 7.147018061449138e-06, "loss": 0.20491113662719726, "step": 14520 }, { "epoch": 1.2690022715359077, "grad_norm": 0.6572486514651711, "learning_rate": 7.144722602186955e-06, "loss": 0.19736173152923583, "step": 14525 }, { "epoch": 1.2694391053643195, "grad_norm": 0.5633355894221934, "learning_rate": 7.142426588833279e-06, "loss": 0.22274541854858398, "step": 14530 }, { "epoch": 1.269875939192731, "grad_norm": 0.6361030177328849, "learning_rate": 7.140130021981286e-06, "loss": 0.20746092796325682, "step": 14535 }, { "epoch": 1.2703127730211428, "grad_norm": 0.5374735596332895, "learning_rate": 7.1378329022243e-06, "loss": 0.22432827949523926, "step": 14540 }, { "epoch": 1.2707496068495545, "grad_norm": 0.5300899548321768, "learning_rate": 7.135535230155779e-06, "loss": 0.20038235187530518, "step": 14545 }, { "epoch": 1.271186440677966, "grad_norm": 0.5975589249309817, "learning_rate": 7.133237006369334e-06, "loss": 0.17775024175643922, "step": 14550 }, { "epoch": 1.2716232745063778, "grad_norm": 0.5887795439103718, "learning_rate": 7.130938231458711e-06, "loss": 0.21580824851989747, "step": 14555 }, { "epoch": 1.2720601083347893, "grad_norm": 0.4885114905001732, "learning_rate": 7.1286389060178005e-06, "loss": 0.19905858039855956, "step": 14560 }, { "epoch": 1.272496942163201, "grad_norm": 0.596893970611733, "learning_rate": 7.12633903064064e-06, "loss": 0.22916202545166015, "step": 14565 }, { "epoch": 1.2729337759916128, "grad_norm": 0.73594010903506, "learning_rate": 7.124038605921401e-06, "loss": 0.22252602577209474, "step": 14570 }, { "epoch": 1.2733706098200246, "grad_norm": 0.5573943644319207, "learning_rate": 7.121737632454401e-06, "loss": 0.1987122654914856, "step": 14575 }, { "epoch": 1.2738074436484361, "grad_norm": 0.5085630685469241, "learning_rate": 7.119436110834101e-06, "loss": 0.22831134796142577, "step": 14580 }, { "epoch": 1.2742442774768479, "grad_norm": 0.5370787198102925, "learning_rate": 7.1171340416551e-06, "loss": 0.21591465473175048, "step": 14585 }, { "epoch": 1.2746811113052594, "grad_norm": 0.575442044623214, "learning_rate": 7.114831425512141e-06, "loss": 0.2174166202545166, "step": 14590 }, { "epoch": 1.2751179451336712, "grad_norm": 0.4582322137275844, "learning_rate": 7.112528263000109e-06, "loss": 0.24170286655426027, "step": 14595 }, { "epoch": 1.275554778962083, "grad_norm": 0.6176055917869488, "learning_rate": 7.110224554714025e-06, "loss": 0.19081509113311768, "step": 14600 }, { "epoch": 1.2759916127904944, "grad_norm": 0.5913125432474755, "learning_rate": 7.1079203012490565e-06, "loss": 0.19847574234008789, "step": 14605 }, { "epoch": 1.2764284466189062, "grad_norm": 0.5260633169939055, "learning_rate": 7.105615503200509e-06, "loss": 0.19095617532730103, "step": 14610 }, { "epoch": 1.2768652804473177, "grad_norm": 0.5191676158308931, "learning_rate": 7.103310161163833e-06, "loss": 0.18541154861450196, "step": 14615 }, { "epoch": 1.2773021142757295, "grad_norm": 0.4951420265894193, "learning_rate": 7.101004275734613e-06, "loss": 0.21021676063537598, "step": 14620 }, { "epoch": 1.2777389481041412, "grad_norm": 0.6022187199379193, "learning_rate": 7.098697847508578e-06, "loss": 0.21486301422119142, "step": 14625 }, { "epoch": 1.278175781932553, "grad_norm": 0.6105843179947353, "learning_rate": 7.096390877081597e-06, "loss": 0.19872332811355592, "step": 14630 }, { "epoch": 1.2786126157609645, "grad_norm": 0.6179762542379752, "learning_rate": 7.094083365049677e-06, "loss": 0.2083073616027832, "step": 14635 }, { "epoch": 1.2790494495893763, "grad_norm": 0.6500996146403082, "learning_rate": 7.091775312008967e-06, "loss": 0.22033615112304689, "step": 14640 }, { "epoch": 1.2794862834177878, "grad_norm": 0.5650867204328295, "learning_rate": 7.089466718555756e-06, "loss": 0.19009519815444947, "step": 14645 }, { "epoch": 1.2799231172461996, "grad_norm": 0.7196421344025229, "learning_rate": 7.087157585286469e-06, "loss": 0.21915068626403808, "step": 14650 }, { "epoch": 1.2803599510746113, "grad_norm": 0.6050379631054437, "learning_rate": 7.084847912797677e-06, "loss": 0.21338295936584473, "step": 14655 }, { "epoch": 1.2807967849030228, "grad_norm": 0.5999349833684121, "learning_rate": 7.082537701686083e-06, "loss": 0.22252497673034669, "step": 14660 }, { "epoch": 1.2812336187314346, "grad_norm": 0.5524650035375667, "learning_rate": 7.080226952548534e-06, "loss": 0.20177922248840333, "step": 14665 }, { "epoch": 1.2816704525598461, "grad_norm": 0.5106838548509315, "learning_rate": 7.077915665982016e-06, "loss": 0.20268700122833253, "step": 14670 }, { "epoch": 1.2821072863882579, "grad_norm": 0.49664391018142584, "learning_rate": 7.075603842583651e-06, "loss": 0.21928012371063232, "step": 14675 }, { "epoch": 1.2825441202166696, "grad_norm": 0.6321073526319839, "learning_rate": 7.073291482950701e-06, "loss": 0.21574106216430664, "step": 14680 }, { "epoch": 1.2829809540450814, "grad_norm": 0.5839210062949368, "learning_rate": 7.070978587680567e-06, "loss": 0.20138232707977294, "step": 14685 }, { "epoch": 1.283417787873493, "grad_norm": 0.5787163223184941, "learning_rate": 7.068665157370789e-06, "loss": 0.23652687072753906, "step": 14690 }, { "epoch": 1.2838546217019047, "grad_norm": 0.47937963936254635, "learning_rate": 7.0663511926190445e-06, "loss": 0.23120565414428712, "step": 14695 }, { "epoch": 1.2842914555303162, "grad_norm": 0.5781713922717251, "learning_rate": 7.064036694023145e-06, "loss": 0.22206051349639894, "step": 14700 }, { "epoch": 1.284728289358728, "grad_norm": 0.6708452942919911, "learning_rate": 7.061721662181049e-06, "loss": 0.19101982116699218, "step": 14705 }, { "epoch": 1.2851651231871397, "grad_norm": 0.6833619110659465, "learning_rate": 7.059406097690845e-06, "loss": 0.20747904777526854, "step": 14710 }, { "epoch": 1.2856019570155512, "grad_norm": 0.6387293536654374, "learning_rate": 7.057090001150761e-06, "loss": 0.20778958797454833, "step": 14715 }, { "epoch": 1.286038790843963, "grad_norm": 0.5160446279819987, "learning_rate": 7.054773373159165e-06, "loss": 0.18350273370742798, "step": 14720 }, { "epoch": 1.2864756246723745, "grad_norm": 0.8116932945314613, "learning_rate": 7.052456214314559e-06, "loss": 0.19076173305511473, "step": 14725 }, { "epoch": 1.2869124585007863, "grad_norm": 0.529779101542606, "learning_rate": 7.050138525215584e-06, "loss": 0.20140938758850097, "step": 14730 }, { "epoch": 1.287349292329198, "grad_norm": 0.5713314264088671, "learning_rate": 7.047820306461018e-06, "loss": 0.20522053241729737, "step": 14735 }, { "epoch": 1.2877861261576096, "grad_norm": 0.8657344874385996, "learning_rate": 7.045501558649776e-06, "loss": 0.2026134967803955, "step": 14740 }, { "epoch": 1.2882229599860213, "grad_norm": 0.5307563027711413, "learning_rate": 7.0431822823809095e-06, "loss": 0.20891084671020507, "step": 14745 }, { "epoch": 1.2886597938144329, "grad_norm": 0.6540192510536097, "learning_rate": 7.040862478253603e-06, "loss": 0.21188414096832275, "step": 14750 }, { "epoch": 1.2890966276428446, "grad_norm": 0.4693053633273341, "learning_rate": 7.038542146867184e-06, "loss": 0.19607479572296144, "step": 14755 }, { "epoch": 1.2895334614712564, "grad_norm": 0.5457846540666635, "learning_rate": 7.036221288821108e-06, "loss": 0.2024383068084717, "step": 14760 }, { "epoch": 1.2899702952996681, "grad_norm": 0.5364313819879163, "learning_rate": 7.033899904714976e-06, "loss": 0.20746517181396484, "step": 14765 }, { "epoch": 1.2904071291280796, "grad_norm": 0.5750293986161845, "learning_rate": 7.03157799514852e-06, "loss": 0.21237998008728026, "step": 14770 }, { "epoch": 1.2908439629564914, "grad_norm": 0.5172252563316472, "learning_rate": 7.0292555607216055e-06, "loss": 0.1996644377708435, "step": 14775 }, { "epoch": 1.291280796784903, "grad_norm": 0.7073293935765733, "learning_rate": 7.026932602034237e-06, "loss": 0.2083993911743164, "step": 14780 }, { "epoch": 1.2917176306133147, "grad_norm": 0.6046557965934503, "learning_rate": 7.024609119686553e-06, "loss": 0.2001891851425171, "step": 14785 }, { "epoch": 1.2921544644417264, "grad_norm": 0.5563135868500787, "learning_rate": 7.022285114278829e-06, "loss": 0.20107274055480956, "step": 14790 }, { "epoch": 1.292591298270138, "grad_norm": 0.7203513336289166, "learning_rate": 7.019960586411475e-06, "loss": 0.18661625385284425, "step": 14795 }, { "epoch": 1.2930281320985497, "grad_norm": 0.5659830040879013, "learning_rate": 7.0176355366850325e-06, "loss": 0.22418599128723143, "step": 14800 }, { "epoch": 1.2934649659269613, "grad_norm": 0.605290513677476, "learning_rate": 7.015309965700184e-06, "loss": 0.22504446506500245, "step": 14805 }, { "epoch": 1.293901799755373, "grad_norm": 0.6663156746561478, "learning_rate": 7.012983874057739e-06, "loss": 0.1917961597442627, "step": 14810 }, { "epoch": 1.2943386335837848, "grad_norm": 0.46712913693458935, "learning_rate": 7.010657262358651e-06, "loss": 0.1715124249458313, "step": 14815 }, { "epoch": 1.2947754674121965, "grad_norm": 0.5630404037866887, "learning_rate": 7.008330131204e-06, "loss": 0.194452702999115, "step": 14820 }, { "epoch": 1.295212301240608, "grad_norm": 0.5777971094457478, "learning_rate": 7.006002481195003e-06, "loss": 0.20895664691925048, "step": 14825 }, { "epoch": 1.2956491350690198, "grad_norm": 0.7804117062852411, "learning_rate": 7.0036743129330106e-06, "loss": 0.21864991188049315, "step": 14830 }, { "epoch": 1.2960859688974313, "grad_norm": 0.5420372960191161, "learning_rate": 7.00134562701951e-06, "loss": 0.20040569305419922, "step": 14835 }, { "epoch": 1.296522802725843, "grad_norm": 0.5010715272133137, "learning_rate": 6.999016424056117e-06, "loss": 0.18505071401596068, "step": 14840 }, { "epoch": 1.2969596365542548, "grad_norm": 0.5327240817189779, "learning_rate": 6.9966867046445865e-06, "loss": 0.22399568557739258, "step": 14845 }, { "epoch": 1.2973964703826664, "grad_norm": 0.6128428516163764, "learning_rate": 6.9943564693868025e-06, "loss": 0.2046356678009033, "step": 14850 }, { "epoch": 1.2978333042110781, "grad_norm": 0.5531368177789748, "learning_rate": 6.992025718884783e-06, "loss": 0.19824676513671874, "step": 14855 }, { "epoch": 1.2982701380394897, "grad_norm": 0.49311939769405627, "learning_rate": 6.989694453740681e-06, "loss": 0.21672439575195312, "step": 14860 }, { "epoch": 1.2987069718679014, "grad_norm": 0.5423141765744972, "learning_rate": 6.987362674556781e-06, "loss": 0.23439693450927734, "step": 14865 }, { "epoch": 1.2991438056963132, "grad_norm": 0.6631194595034559, "learning_rate": 6.985030381935502e-06, "loss": 0.17450084686279296, "step": 14870 }, { "epoch": 1.299580639524725, "grad_norm": 0.6950480055066135, "learning_rate": 6.982697576479393e-06, "loss": 0.17264168262481688, "step": 14875 }, { "epoch": 1.3000174733531364, "grad_norm": 0.6135527985473315, "learning_rate": 6.980364258791139e-06, "loss": 0.18357843160629272, "step": 14880 }, { "epoch": 1.3004543071815482, "grad_norm": 0.6278221186412195, "learning_rate": 6.978030429473554e-06, "loss": 0.1850365400314331, "step": 14885 }, { "epoch": 1.3008911410099597, "grad_norm": 0.6097147493049767, "learning_rate": 6.975696089129585e-06, "loss": 0.20594508647918702, "step": 14890 }, { "epoch": 1.3013279748383715, "grad_norm": 0.5307460004043753, "learning_rate": 6.973361238362312e-06, "loss": 0.17430646419525148, "step": 14895 }, { "epoch": 1.3017648086667832, "grad_norm": 0.5782377987065931, "learning_rate": 6.971025877774947e-06, "loss": 0.22181954383850097, "step": 14900 }, { "epoch": 1.3022016424951948, "grad_norm": 0.4954518362820385, "learning_rate": 6.96869000797083e-06, "loss": 0.19985085725784302, "step": 14905 }, { "epoch": 1.3026384763236065, "grad_norm": 0.5123652725362314, "learning_rate": 6.966353629553437e-06, "loss": 0.19389970302581788, "step": 14910 }, { "epoch": 1.303075310152018, "grad_norm": 0.5755482390216407, "learning_rate": 6.964016743126378e-06, "loss": 0.19247351884841918, "step": 14915 }, { "epoch": 1.3035121439804298, "grad_norm": 0.7699997977745161, "learning_rate": 6.961679349293384e-06, "loss": 0.22491440773010254, "step": 14920 }, { "epoch": 1.3039489778088416, "grad_norm": 0.5432807628220803, "learning_rate": 6.959341448658328e-06, "loss": 0.21104509830474855, "step": 14925 }, { "epoch": 1.3043858116372533, "grad_norm": 0.5436427982518772, "learning_rate": 6.9570030418252066e-06, "loss": 0.2057502508163452, "step": 14930 }, { "epoch": 1.3048226454656648, "grad_norm": 0.6040383645518179, "learning_rate": 6.954664129398151e-06, "loss": 0.19508085250854493, "step": 14935 }, { "epoch": 1.3052594792940766, "grad_norm": 0.6959972222120505, "learning_rate": 6.952324711981422e-06, "loss": 0.2022184371948242, "step": 14940 }, { "epoch": 1.3056963131224881, "grad_norm": 0.6169733083170426, "learning_rate": 6.94998479017941e-06, "loss": 0.20123820304870604, "step": 14945 }, { "epoch": 1.3061331469508999, "grad_norm": 0.6028413278425824, "learning_rate": 6.947644364596637e-06, "loss": 0.2013258934020996, "step": 14950 }, { "epoch": 1.3065699807793116, "grad_norm": 0.523842503518505, "learning_rate": 6.945303435837754e-06, "loss": 0.17740294933319092, "step": 14955 }, { "epoch": 1.3070068146077232, "grad_norm": 0.6005477507486542, "learning_rate": 6.942962004507542e-06, "loss": 0.21088461875915526, "step": 14960 }, { "epoch": 1.307443648436135, "grad_norm": 0.5040580477316112, "learning_rate": 6.940620071210915e-06, "loss": 0.22354769706726074, "step": 14965 }, { "epoch": 1.3078804822645465, "grad_norm": 0.5377443015394102, "learning_rate": 6.938277636552912e-06, "loss": 0.2242039680480957, "step": 14970 }, { "epoch": 1.3083173160929582, "grad_norm": 0.6124140862773594, "learning_rate": 6.935934701138702e-06, "loss": 0.2076946973800659, "step": 14975 }, { "epoch": 1.30875414992137, "grad_norm": 0.5760951022041942, "learning_rate": 6.9335912655735895e-06, "loss": 0.20374999046325684, "step": 14980 }, { "epoch": 1.3091909837497817, "grad_norm": 0.5905196894391954, "learning_rate": 6.931247330463e-06, "loss": 0.18130413293838502, "step": 14985 }, { "epoch": 1.3096278175781932, "grad_norm": 0.5646787884139651, "learning_rate": 6.928902896412494e-06, "loss": 0.1873328447341919, "step": 14990 }, { "epoch": 1.310064651406605, "grad_norm": 0.6539405237009965, "learning_rate": 6.926557964027757e-06, "loss": 0.2020854949951172, "step": 14995 }, { "epoch": 1.3105014852350165, "grad_norm": 0.6516756043409504, "learning_rate": 6.924212533914607e-06, "loss": 0.20637822151184082, "step": 15000 }, { "epoch": 1.3109383190634283, "grad_norm": 0.5744902291117484, "learning_rate": 6.921866606678985e-06, "loss": 0.18148322105407716, "step": 15005 }, { "epoch": 1.31137515289184, "grad_norm": 0.5687224599966373, "learning_rate": 6.919520182926968e-06, "loss": 0.17671386003494263, "step": 15010 }, { "epoch": 1.3118119867202516, "grad_norm": 0.5246865144771748, "learning_rate": 6.917173263264753e-06, "loss": 0.22848820686340332, "step": 15015 }, { "epoch": 1.3122488205486633, "grad_norm": 0.5444831114807109, "learning_rate": 6.914825848298673e-06, "loss": 0.1977104902267456, "step": 15020 }, { "epoch": 1.3126856543770749, "grad_norm": 0.4995955388552544, "learning_rate": 6.912477938635184e-06, "loss": 0.24890599250793458, "step": 15025 }, { "epoch": 1.3131224882054866, "grad_norm": 0.5577597311637884, "learning_rate": 6.910129534880869e-06, "loss": 0.2272031784057617, "step": 15030 }, { "epoch": 1.3135593220338984, "grad_norm": 0.5554511819718528, "learning_rate": 6.907780637642445e-06, "loss": 0.19444431066513063, "step": 15035 }, { "epoch": 1.3139961558623101, "grad_norm": 0.47210992703107596, "learning_rate": 6.905431247526748e-06, "loss": 0.1827130913734436, "step": 15040 }, { "epoch": 1.3144329896907216, "grad_norm": 0.6300132921374952, "learning_rate": 6.90308136514075e-06, "loss": 0.20508880615234376, "step": 15045 }, { "epoch": 1.3148698235191334, "grad_norm": 0.6353329405451456, "learning_rate": 6.900730991091543e-06, "loss": 0.2027278423309326, "step": 15050 }, { "epoch": 1.315306657347545, "grad_norm": 0.5207294744233094, "learning_rate": 6.898380125986348e-06, "loss": 0.20973472595214843, "step": 15055 }, { "epoch": 1.3157434911759567, "grad_norm": 0.8550351408677278, "learning_rate": 6.896028770432515e-06, "loss": 0.18946750164031984, "step": 15060 }, { "epoch": 1.3161803250043684, "grad_norm": 0.49471544394275646, "learning_rate": 6.893676925037518e-06, "loss": 0.18097131252288817, "step": 15065 }, { "epoch": 1.31661715883278, "grad_norm": 0.5630503807969608, "learning_rate": 6.891324590408961e-06, "loss": 0.18084454536437988, "step": 15070 }, { "epoch": 1.3170539926611917, "grad_norm": 0.5471043817260511, "learning_rate": 6.888971767154572e-06, "loss": 0.20627951622009277, "step": 15075 }, { "epoch": 1.3174908264896033, "grad_norm": 0.5838705835283988, "learning_rate": 6.8866184558822034e-06, "loss": 0.21151244640350342, "step": 15080 }, { "epoch": 1.317927660318015, "grad_norm": 0.6407153011530082, "learning_rate": 6.884264657199837e-06, "loss": 0.21148252487182617, "step": 15085 }, { "epoch": 1.3183644941464268, "grad_norm": 0.6144783586244709, "learning_rate": 6.881910371715581e-06, "loss": 0.17221513986587525, "step": 15090 }, { "epoch": 1.3188013279748383, "grad_norm": 0.6001597719693587, "learning_rate": 6.879555600037665e-06, "loss": 0.22056326866149903, "step": 15095 }, { "epoch": 1.31923816180325, "grad_norm": 0.543998810451564, "learning_rate": 6.87720034277445e-06, "loss": 0.19385459423065185, "step": 15100 }, { "epoch": 1.3196749956316618, "grad_norm": 0.5305357003399263, "learning_rate": 6.874844600534415e-06, "loss": 0.20253872871398926, "step": 15105 }, { "epoch": 1.3201118294600733, "grad_norm": 0.6927757337840575, "learning_rate": 6.872488373926173e-06, "loss": 0.24268145561218263, "step": 15110 }, { "epoch": 1.320548663288485, "grad_norm": 0.5305614918961334, "learning_rate": 6.870131663558455e-06, "loss": 0.2182760715484619, "step": 15115 }, { "epoch": 1.3209854971168968, "grad_norm": 0.617194795823799, "learning_rate": 6.86777447004012e-06, "loss": 0.1879274368286133, "step": 15120 }, { "epoch": 1.3214223309453084, "grad_norm": 0.6361403841585198, "learning_rate": 6.865416793980154e-06, "loss": 0.1926802635192871, "step": 15125 }, { "epoch": 1.3218591647737201, "grad_norm": 0.6634427445931147, "learning_rate": 6.863058635987663e-06, "loss": 0.21209514141082764, "step": 15130 }, { "epoch": 1.3222959986021317, "grad_norm": 0.591966894422455, "learning_rate": 6.860699996671879e-06, "loss": 0.20687296390533447, "step": 15135 }, { "epoch": 1.3227328324305434, "grad_norm": 0.545681363142807, "learning_rate": 6.8583408766421625e-06, "loss": 0.206196928024292, "step": 15140 }, { "epoch": 1.3231696662589552, "grad_norm": 0.5031068394666666, "learning_rate": 6.855981276507991e-06, "loss": 0.21155138015747071, "step": 15145 }, { "epoch": 1.3236065000873667, "grad_norm": 0.6827196155494981, "learning_rate": 6.853621196878973e-06, "loss": 0.21107656955718995, "step": 15150 }, { "epoch": 1.3240433339157784, "grad_norm": 0.678915318512434, "learning_rate": 6.851260638364836e-06, "loss": 0.21038007736206055, "step": 15155 }, { "epoch": 1.32448016774419, "grad_norm": 0.6725644397382972, "learning_rate": 6.848899601575434e-06, "loss": 0.2039898157119751, "step": 15160 }, { "epoch": 1.3249170015726017, "grad_norm": 0.48088041964989764, "learning_rate": 6.846538087120741e-06, "loss": 0.19673629999160766, "step": 15165 }, { "epoch": 1.3253538354010135, "grad_norm": 0.5496672682979702, "learning_rate": 6.8441760956108605e-06, "loss": 0.20082995891571045, "step": 15170 }, { "epoch": 1.3257906692294252, "grad_norm": 0.49322503304649506, "learning_rate": 6.8418136276560125e-06, "loss": 0.18131961822509765, "step": 15175 }, { "epoch": 1.3262275030578368, "grad_norm": 0.6382908610023801, "learning_rate": 6.839450683866545e-06, "loss": 0.22051832675933838, "step": 15180 }, { "epoch": 1.3266643368862485, "grad_norm": 0.6784181467021807, "learning_rate": 6.837087264852926e-06, "loss": 0.195631742477417, "step": 15185 }, { "epoch": 1.32710117071466, "grad_norm": 0.5280681188781274, "learning_rate": 6.834723371225749e-06, "loss": 0.1847706913948059, "step": 15190 }, { "epoch": 1.3275380045430718, "grad_norm": 0.6042376055571508, "learning_rate": 6.832359003595728e-06, "loss": 0.19291436672210693, "step": 15195 }, { "epoch": 1.3279748383714836, "grad_norm": 0.532558361124557, "learning_rate": 6.8299941625737e-06, "loss": 0.2141528844833374, "step": 15200 }, { "epoch": 1.328411672199895, "grad_norm": 0.5372505570584769, "learning_rate": 6.827628848770623e-06, "loss": 0.18524014949798584, "step": 15205 }, { "epoch": 1.3288485060283068, "grad_norm": 0.6120001146097078, "learning_rate": 6.825263062797582e-06, "loss": 0.20353612899780274, "step": 15210 }, { "epoch": 1.3292853398567184, "grad_norm": 0.5603019915621564, "learning_rate": 6.822896805265777e-06, "loss": 0.2344193935394287, "step": 15215 }, { "epoch": 1.3297221736851301, "grad_norm": 0.5578690484385131, "learning_rate": 6.820530076786533e-06, "loss": 0.186834979057312, "step": 15220 }, { "epoch": 1.3301590075135419, "grad_norm": 0.4471626106338507, "learning_rate": 6.8181628779713e-06, "loss": 0.1743377685546875, "step": 15225 }, { "epoch": 1.3305958413419536, "grad_norm": 0.5553074687503644, "learning_rate": 6.815795209431644e-06, "loss": 0.19689064025878905, "step": 15230 }, { "epoch": 1.3310326751703652, "grad_norm": 0.7229972692377697, "learning_rate": 6.813427071779256e-06, "loss": 0.21266641616821289, "step": 15235 }, { "epoch": 1.331469508998777, "grad_norm": 0.6740650223435944, "learning_rate": 6.8110584656259475e-06, "loss": 0.18256065845489503, "step": 15240 }, { "epoch": 1.3319063428271884, "grad_norm": 0.5487009813201843, "learning_rate": 6.808689391583648e-06, "loss": 0.22006335258483886, "step": 15245 }, { "epoch": 1.3323431766556002, "grad_norm": 0.5243410940886797, "learning_rate": 6.806319850264413e-06, "loss": 0.17703739404678345, "step": 15250 }, { "epoch": 1.332780010484012, "grad_norm": 0.4842899553316911, "learning_rate": 6.8039498422804166e-06, "loss": 0.216084623336792, "step": 15255 }, { "epoch": 1.3332168443124235, "grad_norm": 0.5012534292429014, "learning_rate": 6.801579368243951e-06, "loss": 0.21176457405090332, "step": 15260 }, { "epoch": 1.3336536781408352, "grad_norm": 0.6714017419549121, "learning_rate": 6.799208428767432e-06, "loss": 0.1764833450317383, "step": 15265 }, { "epoch": 1.3340905119692468, "grad_norm": 0.6071580991994651, "learning_rate": 6.796837024463393e-06, "loss": 0.23534913063049318, "step": 15270 }, { "epoch": 1.3345273457976585, "grad_norm": 0.5535661325985528, "learning_rate": 6.79446515594449e-06, "loss": 0.20677518844604492, "step": 15275 }, { "epoch": 1.3349641796260703, "grad_norm": 0.5238457311792567, "learning_rate": 6.7920928238234995e-06, "loss": 0.18484864234924317, "step": 15280 }, { "epoch": 1.335401013454482, "grad_norm": 0.6996157227744358, "learning_rate": 6.789720028713313e-06, "loss": 0.2105794906616211, "step": 15285 }, { "epoch": 1.3358378472828936, "grad_norm": 0.5372459932087821, "learning_rate": 6.787346771226947e-06, "loss": 0.19033688306808472, "step": 15290 }, { "epoch": 1.3362746811113053, "grad_norm": 0.5652100256580478, "learning_rate": 6.784973051977534e-06, "loss": 0.1891411542892456, "step": 15295 }, { "epoch": 1.3367115149397168, "grad_norm": 0.5194220255359245, "learning_rate": 6.7825988715783294e-06, "loss": 0.20218257904052733, "step": 15300 }, { "epoch": 1.3371483487681286, "grad_norm": 0.5521266947210872, "learning_rate": 6.780224230642702e-06, "loss": 0.2019361972808838, "step": 15305 }, { "epoch": 1.3375851825965404, "grad_norm": 0.5344098055147508, "learning_rate": 6.777849129784146e-06, "loss": 0.2246175527572632, "step": 15310 }, { "epoch": 1.3380220164249519, "grad_norm": 0.5739151512505005, "learning_rate": 6.77547356961627e-06, "loss": 0.20157923698425292, "step": 15315 }, { "epoch": 1.3384588502533636, "grad_norm": 0.6041534387979193, "learning_rate": 6.773097550752801e-06, "loss": 0.19776949882507325, "step": 15320 }, { "epoch": 1.3388956840817752, "grad_norm": 0.5502347373778569, "learning_rate": 6.770721073807589e-06, "loss": 0.19610799551010133, "step": 15325 }, { "epoch": 1.339332517910187, "grad_norm": 0.502821640228216, "learning_rate": 6.768344139394598e-06, "loss": 0.2172839879989624, "step": 15330 }, { "epoch": 1.3397693517385987, "grad_norm": 0.5967298159220114, "learning_rate": 6.765966748127912e-06, "loss": 0.19793262481689453, "step": 15335 }, { "epoch": 1.3402061855670104, "grad_norm": 0.5286110193196549, "learning_rate": 6.763588900621731e-06, "loss": 0.19220278263092042, "step": 15340 }, { "epoch": 1.340643019395422, "grad_norm": 0.6316636070659797, "learning_rate": 6.7612105974903795e-06, "loss": 0.1957947015762329, "step": 15345 }, { "epoch": 1.3410798532238337, "grad_norm": 0.5713871746421824, "learning_rate": 6.7588318393482885e-06, "loss": 0.1848907232284546, "step": 15350 }, { "epoch": 1.3415166870522452, "grad_norm": 0.6732326989606218, "learning_rate": 6.756452626810017e-06, "loss": 0.22365059852600097, "step": 15355 }, { "epoch": 1.341953520880657, "grad_norm": 0.6179901191973252, "learning_rate": 6.754072960490237e-06, "loss": 0.199831759929657, "step": 15360 }, { "epoch": 1.3423903547090688, "grad_norm": 0.5119197286478567, "learning_rate": 6.751692841003736e-06, "loss": 0.20071237087249755, "step": 15365 }, { "epoch": 1.3428271885374803, "grad_norm": 0.49730575170056335, "learning_rate": 6.749312268965422e-06, "loss": 0.20085620880126953, "step": 15370 }, { "epoch": 1.343264022365892, "grad_norm": 0.5297478301122835, "learning_rate": 6.746931244990318e-06, "loss": 0.20823462009429933, "step": 15375 }, { "epoch": 1.3437008561943036, "grad_norm": 0.4980717932667083, "learning_rate": 6.744549769693565e-06, "loss": 0.1975444436073303, "step": 15380 }, { "epoch": 1.3441376900227153, "grad_norm": 0.5647570185978246, "learning_rate": 6.742167843690417e-06, "loss": 0.1812394976615906, "step": 15385 }, { "epoch": 1.344574523851127, "grad_norm": 0.6063573178629332, "learning_rate": 6.73978546759625e-06, "loss": 0.2041318893432617, "step": 15390 }, { "epoch": 1.3450113576795388, "grad_norm": 0.6095024640101353, "learning_rate": 6.737402642026554e-06, "loss": 0.2036651849746704, "step": 15395 }, { "epoch": 1.3454481915079504, "grad_norm": 0.6062761501555091, "learning_rate": 6.735019367596933e-06, "loss": 0.16822588443756104, "step": 15400 }, { "epoch": 1.3458850253363621, "grad_norm": 0.5444076826933234, "learning_rate": 6.732635644923109e-06, "loss": 0.22547645568847657, "step": 15405 }, { "epoch": 1.3463218591647736, "grad_norm": 0.573622242511249, "learning_rate": 6.730251474620921e-06, "loss": 0.18130872249603272, "step": 15410 }, { "epoch": 1.3467586929931854, "grad_norm": 0.48981178284438054, "learning_rate": 6.727866857306319e-06, "loss": 0.20472316741943358, "step": 15415 }, { "epoch": 1.3471955268215972, "grad_norm": 0.5401609262712548, "learning_rate": 6.725481793595373e-06, "loss": 0.18751707077026367, "step": 15420 }, { "epoch": 1.3476323606500087, "grad_norm": 0.7546582970334298, "learning_rate": 6.7230962841042695e-06, "loss": 0.21064162254333496, "step": 15425 }, { "epoch": 1.3480691944784204, "grad_norm": 0.6663713812857492, "learning_rate": 6.720710329449302e-06, "loss": 0.1577957272529602, "step": 15430 }, { "epoch": 1.348506028306832, "grad_norm": 0.701373902982418, "learning_rate": 6.718323930246888e-06, "loss": 0.186648952960968, "step": 15435 }, { "epoch": 1.3489428621352437, "grad_norm": 0.5800556973579184, "learning_rate": 6.715937087113559e-06, "loss": 0.19714784622192383, "step": 15440 }, { "epoch": 1.3493796959636555, "grad_norm": 0.5435149275409968, "learning_rate": 6.713549800665954e-06, "loss": 0.1993152379989624, "step": 15445 }, { "epoch": 1.3498165297920672, "grad_norm": 0.5380418329536343, "learning_rate": 6.7111620715208325e-06, "loss": 0.21161153316497802, "step": 15450 }, { "epoch": 1.3502533636204788, "grad_norm": 0.7041225527599083, "learning_rate": 6.708773900295068e-06, "loss": 0.20658626556396484, "step": 15455 }, { "epoch": 1.3506901974488905, "grad_norm": 0.6724748905759764, "learning_rate": 6.706385287605647e-06, "loss": 0.1985664486885071, "step": 15460 }, { "epoch": 1.351127031277302, "grad_norm": 0.6174345900977155, "learning_rate": 6.7039962340696695e-06, "loss": 0.21696128845214843, "step": 15465 }, { "epoch": 1.3515638651057138, "grad_norm": 0.5687339099684152, "learning_rate": 6.701606740304351e-06, "loss": 0.1905431866645813, "step": 15470 }, { "epoch": 1.3520006989341256, "grad_norm": 0.5372956433788941, "learning_rate": 6.6992168069270195e-06, "loss": 0.19614713191986083, "step": 15475 }, { "epoch": 1.352437532762537, "grad_norm": 0.4874059854060385, "learning_rate": 6.696826434555115e-06, "loss": 0.17478950023651124, "step": 15480 }, { "epoch": 1.3528743665909488, "grad_norm": 0.5715142777894973, "learning_rate": 6.694435623806197e-06, "loss": 0.20718035697937012, "step": 15485 }, { "epoch": 1.3533112004193604, "grad_norm": 0.6478077189175071, "learning_rate": 6.692044375297931e-06, "loss": 0.20202951431274413, "step": 15490 }, { "epoch": 1.3537480342477721, "grad_norm": 0.5138790292075658, "learning_rate": 6.6896526896481e-06, "loss": 0.20382957458496093, "step": 15495 }, { "epoch": 1.3541848680761839, "grad_norm": 0.6274550173726507, "learning_rate": 6.687260567474599e-06, "loss": 0.19039760828018187, "step": 15500 }, { "epoch": 1.3546217019045954, "grad_norm": 0.695612712176462, "learning_rate": 6.6848680093954336e-06, "loss": 0.17999646663665772, "step": 15505 }, { "epoch": 1.3550585357330072, "grad_norm": 0.5733410692698356, "learning_rate": 6.682475016028725e-06, "loss": 0.23214459419250488, "step": 15510 }, { "epoch": 1.355495369561419, "grad_norm": 0.7381109791077318, "learning_rate": 6.680081587992707e-06, "loss": 0.18528461456298828, "step": 15515 }, { "epoch": 1.3559322033898304, "grad_norm": 0.5137874064041675, "learning_rate": 6.677687725905724e-06, "loss": 0.20740633010864257, "step": 15520 }, { "epoch": 1.3563690372182422, "grad_norm": 0.647603773412755, "learning_rate": 6.6752934303862305e-06, "loss": 0.1886524200439453, "step": 15525 }, { "epoch": 1.356805871046654, "grad_norm": 0.5896576907219843, "learning_rate": 6.672898702052797e-06, "loss": 0.20972371101379395, "step": 15530 }, { "epoch": 1.3572427048750655, "grad_norm": 0.6164540958010437, "learning_rate": 6.670503541524106e-06, "loss": 0.1909210205078125, "step": 15535 }, { "epoch": 1.3576795387034772, "grad_norm": 0.6533738830275786, "learning_rate": 6.668107949418947e-06, "loss": 0.22050042152404786, "step": 15540 }, { "epoch": 1.3581163725318888, "grad_norm": 0.5736452596777725, "learning_rate": 6.665711926356225e-06, "loss": 0.19423093795776367, "step": 15545 }, { "epoch": 1.3585532063603005, "grad_norm": 0.5737576372493108, "learning_rate": 6.6633154729549556e-06, "loss": 0.1974717378616333, "step": 15550 }, { "epoch": 1.3589900401887123, "grad_norm": 0.5062575449293495, "learning_rate": 6.660918589834267e-06, "loss": 0.2124626636505127, "step": 15555 }, { "epoch": 1.3594268740171238, "grad_norm": 0.5361787632222594, "learning_rate": 6.658521277613392e-06, "loss": 0.2051474332809448, "step": 15560 }, { "epoch": 1.3598637078455356, "grad_norm": 0.49328744222886955, "learning_rate": 6.656123536911684e-06, "loss": 0.21662635803222657, "step": 15565 }, { "epoch": 1.360300541673947, "grad_norm": 0.5658812768731639, "learning_rate": 6.653725368348599e-06, "loss": 0.21406385898590088, "step": 15570 }, { "epoch": 1.3607373755023588, "grad_norm": 0.6452993062822419, "learning_rate": 6.651326772543706e-06, "loss": 0.18789215087890626, "step": 15575 }, { "epoch": 1.3611742093307706, "grad_norm": 0.5533608896689374, "learning_rate": 6.648927750116687e-06, "loss": 0.2159799814224243, "step": 15580 }, { "epoch": 1.3616110431591824, "grad_norm": 0.5576217575010217, "learning_rate": 6.6465283016873315e-06, "loss": 0.19790980815887452, "step": 15585 }, { "epoch": 1.3620478769875939, "grad_norm": 0.6599626209051975, "learning_rate": 6.644128427875539e-06, "loss": 0.20406997203826904, "step": 15590 }, { "epoch": 1.3624847108160056, "grad_norm": 0.5608249203625436, "learning_rate": 6.641728129301319e-06, "loss": 0.20917634963989257, "step": 15595 }, { "epoch": 1.3629215446444172, "grad_norm": 0.6987859222654167, "learning_rate": 6.639327406584795e-06, "loss": 0.20110273361206055, "step": 15600 }, { "epoch": 1.363358378472829, "grad_norm": 0.5080047131877372, "learning_rate": 6.6369262603461924e-06, "loss": 0.20471620559692383, "step": 15605 }, { "epoch": 1.3637952123012407, "grad_norm": 0.5449470842667234, "learning_rate": 6.634524691205851e-06, "loss": 0.2079195499420166, "step": 15610 }, { "epoch": 1.3642320461296522, "grad_norm": 0.5913070416689458, "learning_rate": 6.63212269978422e-06, "loss": 0.20872654914855956, "step": 15615 }, { "epoch": 1.364668879958064, "grad_norm": 0.68896814964604, "learning_rate": 6.629720286701856e-06, "loss": 0.18878650665283203, "step": 15620 }, { "epoch": 1.3651057137864755, "grad_norm": 0.5793081836550683, "learning_rate": 6.627317452579425e-06, "loss": 0.18932392597198486, "step": 15625 }, { "epoch": 1.3655425476148872, "grad_norm": 0.6105535839720965, "learning_rate": 6.6249141980377005e-06, "loss": 0.17351758480072021, "step": 15630 }, { "epoch": 1.365979381443299, "grad_norm": 0.641500908317494, "learning_rate": 6.62251052369757e-06, "loss": 0.1954630970954895, "step": 15635 }, { "epoch": 1.3664162152717108, "grad_norm": 0.7003797909338777, "learning_rate": 6.62010643018002e-06, "loss": 0.16300259828567504, "step": 15640 }, { "epoch": 1.3668530491001223, "grad_norm": 0.5585722517706877, "learning_rate": 6.617701918106155e-06, "loss": 0.22333848476409912, "step": 15645 }, { "epoch": 1.367289882928534, "grad_norm": 0.5768601394999089, "learning_rate": 6.615296988097183e-06, "loss": 0.16481428146362304, "step": 15650 }, { "epoch": 1.3677267167569456, "grad_norm": 0.6266628832717431, "learning_rate": 6.6128916407744185e-06, "loss": 0.1911777973175049, "step": 15655 }, { "epoch": 1.3681635505853573, "grad_norm": 0.6542898032607343, "learning_rate": 6.610485876759286e-06, "loss": 0.20757043361663818, "step": 15660 }, { "epoch": 1.368600384413769, "grad_norm": 0.5454248518637465, "learning_rate": 6.6080796966733195e-06, "loss": 0.1864437699317932, "step": 15665 }, { "epoch": 1.3690372182421806, "grad_norm": 0.5398639022997699, "learning_rate": 6.605673101138156e-06, "loss": 0.19809799194335936, "step": 15670 }, { "epoch": 1.3694740520705924, "grad_norm": 0.5225304309739495, "learning_rate": 6.603266090775542e-06, "loss": 0.23480374813079835, "step": 15675 }, { "epoch": 1.369910885899004, "grad_norm": 0.5525544451083674, "learning_rate": 6.600858666207334e-06, "loss": 0.17027584314346314, "step": 15680 }, { "epoch": 1.3703477197274156, "grad_norm": 0.6006049897891764, "learning_rate": 6.5984508280554895e-06, "loss": 0.1874699831008911, "step": 15685 }, { "epoch": 1.3707845535558274, "grad_norm": 0.5672697025353033, "learning_rate": 6.596042576942079e-06, "loss": 0.18213547468185426, "step": 15690 }, { "epoch": 1.3712213873842392, "grad_norm": 0.5918938979469022, "learning_rate": 6.5936339134892746e-06, "loss": 0.22742667198181152, "step": 15695 }, { "epoch": 1.3716582212126507, "grad_norm": 0.4622106086460335, "learning_rate": 6.591224838319359e-06, "loss": 0.1773201584815979, "step": 15700 }, { "epoch": 1.3720950550410624, "grad_norm": 0.6623166875983872, "learning_rate": 6.5888153520547185e-06, "loss": 0.1704052448272705, "step": 15705 }, { "epoch": 1.372531888869474, "grad_norm": 0.5416166132456899, "learning_rate": 6.586405455317847e-06, "loss": 0.2073071002960205, "step": 15710 }, { "epoch": 1.3729687226978857, "grad_norm": 0.8878665010798629, "learning_rate": 6.583995148731343e-06, "loss": 0.19909827709197997, "step": 15715 }, { "epoch": 1.3734055565262975, "grad_norm": 0.5846521064952351, "learning_rate": 6.581584432917914e-06, "loss": 0.19672441482543945, "step": 15720 }, { "epoch": 1.373842390354709, "grad_norm": 0.6732728956562977, "learning_rate": 6.579173308500369e-06, "loss": 0.22251346111297607, "step": 15725 }, { "epoch": 1.3742792241831208, "grad_norm": 0.5931199619779184, "learning_rate": 6.576761776101627e-06, "loss": 0.18110520839691163, "step": 15730 }, { "epoch": 1.3747160580115323, "grad_norm": 0.5360156572571301, "learning_rate": 6.574349836344707e-06, "loss": 0.22585413455963135, "step": 15735 }, { "epoch": 1.375152891839944, "grad_norm": 0.5747325981320998, "learning_rate": 6.571937489852739e-06, "loss": 0.19865384101867675, "step": 15740 }, { "epoch": 1.3755897256683558, "grad_norm": 0.6004704588552195, "learning_rate": 6.569524737248955e-06, "loss": 0.1867504596710205, "step": 15745 }, { "epoch": 1.3760265594967676, "grad_norm": 0.7759068232674908, "learning_rate": 6.567111579156691e-06, "loss": 0.18474867343902587, "step": 15750 }, { "epoch": 1.376463393325179, "grad_norm": 0.567447915177241, "learning_rate": 6.564698016199389e-06, "loss": 0.2013455867767334, "step": 15755 }, { "epoch": 1.3769002271535908, "grad_norm": 0.6026911873979145, "learning_rate": 6.562284049000598e-06, "loss": 0.18738937377929688, "step": 15760 }, { "epoch": 1.3773370609820024, "grad_norm": 0.6757302734680127, "learning_rate": 6.559869678183968e-06, "loss": 0.19006240367889404, "step": 15765 }, { "epoch": 1.3777738948104141, "grad_norm": 0.8577817154409864, "learning_rate": 6.557454904373254e-06, "loss": 0.18262454271316528, "step": 15770 }, { "epoch": 1.3782107286388259, "grad_norm": 0.5828009489670505, "learning_rate": 6.555039728192316e-06, "loss": 0.19801061153411864, "step": 15775 }, { "epoch": 1.3786475624672374, "grad_norm": 0.6230293639661317, "learning_rate": 6.5526241502651165e-06, "loss": 0.2123096466064453, "step": 15780 }, { "epoch": 1.3790843962956492, "grad_norm": 0.7110472579604911, "learning_rate": 6.550208171215723e-06, "loss": 0.18170931339263915, "step": 15785 }, { "epoch": 1.3795212301240607, "grad_norm": 0.49464768186555313, "learning_rate": 6.547791791668309e-06, "loss": 0.20493297576904296, "step": 15790 }, { "epoch": 1.3799580639524724, "grad_norm": 0.7792012051064108, "learning_rate": 6.5453750122471435e-06, "loss": 0.17378745079040528, "step": 15795 }, { "epoch": 1.3803948977808842, "grad_norm": 0.4475029889464294, "learning_rate": 6.54295783357661e-06, "loss": 0.20463082790374756, "step": 15800 }, { "epoch": 1.380831731609296, "grad_norm": 0.6939178932047269, "learning_rate": 6.540540256281185e-06, "loss": 0.20679922103881837, "step": 15805 }, { "epoch": 1.3812685654377075, "grad_norm": 0.6262671292133292, "learning_rate": 6.538122280985456e-06, "loss": 0.19225544929504396, "step": 15810 }, { "epoch": 1.3817053992661192, "grad_norm": 0.5990785603757303, "learning_rate": 6.535703908314106e-06, "loss": 0.1697854518890381, "step": 15815 }, { "epoch": 1.3821422330945308, "grad_norm": 0.5092886737239501, "learning_rate": 6.533285138891927e-06, "loss": 0.20452704429626464, "step": 15820 }, { "epoch": 1.3825790669229425, "grad_norm": 0.6871674474604182, "learning_rate": 6.53086597334381e-06, "loss": 0.21067590713500978, "step": 15825 }, { "epoch": 1.3830159007513543, "grad_norm": 0.6572384944289733, "learning_rate": 6.528446412294749e-06, "loss": 0.19690864086151122, "step": 15830 }, { "epoch": 1.3834527345797658, "grad_norm": 0.7439297503531732, "learning_rate": 6.526026456369839e-06, "loss": 0.1763787031173706, "step": 15835 }, { "epoch": 1.3838895684081776, "grad_norm": 0.6416304593002485, "learning_rate": 6.523606106194281e-06, "loss": 0.21046149730682373, "step": 15840 }, { "epoch": 1.384326402236589, "grad_norm": 0.5211079130018736, "learning_rate": 6.521185362393373e-06, "loss": 0.1855154037475586, "step": 15845 }, { "epoch": 1.3847632360650008, "grad_norm": 0.4985597695615439, "learning_rate": 6.518764225592518e-06, "loss": 0.217000412940979, "step": 15850 }, { "epoch": 1.3852000698934126, "grad_norm": 0.5321738551083209, "learning_rate": 6.516342696417218e-06, "loss": 0.20793752670288085, "step": 15855 }, { "epoch": 1.3856369037218244, "grad_norm": 0.57630666477551, "learning_rate": 6.51392077549308e-06, "loss": 0.18812735080718995, "step": 15860 }, { "epoch": 1.3860737375502359, "grad_norm": 0.6630996939626728, "learning_rate": 6.511498463445808e-06, "loss": 0.19932384490966798, "step": 15865 }, { "epoch": 1.3865105713786476, "grad_norm": 0.5618657423681588, "learning_rate": 6.509075760901211e-06, "loss": 0.18846118450164795, "step": 15870 }, { "epoch": 1.3869474052070592, "grad_norm": 0.6054750802196791, "learning_rate": 6.506652668485196e-06, "loss": 0.19253194332122803, "step": 15875 }, { "epoch": 1.387384239035471, "grad_norm": 0.5282545732350963, "learning_rate": 6.504229186823769e-06, "loss": 0.2105107307434082, "step": 15880 }, { "epoch": 1.3878210728638827, "grad_norm": 0.6110177017043734, "learning_rate": 6.501805316543044e-06, "loss": 0.17432128190994262, "step": 15885 }, { "epoch": 1.3882579066922942, "grad_norm": 0.5702616209013621, "learning_rate": 6.499381058269227e-06, "loss": 0.1990957260131836, "step": 15890 }, { "epoch": 1.388694740520706, "grad_norm": 0.6063237392128514, "learning_rate": 6.49695641262863e-06, "loss": 0.19542305469512938, "step": 15895 }, { "epoch": 1.3891315743491175, "grad_norm": 0.6118737119546886, "learning_rate": 6.494531380247661e-06, "loss": 0.17961210012435913, "step": 15900 }, { "epoch": 1.3895684081775292, "grad_norm": 0.7382714811015503, "learning_rate": 6.492105961752833e-06, "loss": 0.2248448133468628, "step": 15905 }, { "epoch": 1.390005242005941, "grad_norm": 0.6484629004337078, "learning_rate": 6.48968015777075e-06, "loss": 0.2266906499862671, "step": 15910 }, { "epoch": 1.3904420758343525, "grad_norm": 0.6861774308145732, "learning_rate": 6.487253968928127e-06, "loss": 0.20934314727783204, "step": 15915 }, { "epoch": 1.3908789096627643, "grad_norm": 0.5315834071891486, "learning_rate": 6.48482739585177e-06, "loss": 0.19841675758361815, "step": 15920 }, { "epoch": 1.391315743491176, "grad_norm": 0.6033743763797619, "learning_rate": 6.482400439168588e-06, "loss": 0.18315274715423585, "step": 15925 }, { "epoch": 1.3917525773195876, "grad_norm": 0.612577447032494, "learning_rate": 6.479973099505585e-06, "loss": 0.17418861389160156, "step": 15930 }, { "epoch": 1.3921894111479993, "grad_norm": 0.6707663450332069, "learning_rate": 6.477545377489872e-06, "loss": 0.21188220977783204, "step": 15935 }, { "epoch": 1.392626244976411, "grad_norm": 0.5994248171290472, "learning_rate": 6.475117273748648e-06, "loss": 0.18733866214752198, "step": 15940 }, { "epoch": 1.3930630788048226, "grad_norm": 0.5429905204831461, "learning_rate": 6.472688788909221e-06, "loss": 0.17437710762023925, "step": 15945 }, { "epoch": 1.3934999126332344, "grad_norm": 0.9893183080083672, "learning_rate": 6.47025992359899e-06, "loss": 0.21510715484619142, "step": 15950 }, { "epoch": 1.393936746461646, "grad_norm": 0.595233069571719, "learning_rate": 6.4678306784454545e-06, "loss": 0.17619731426239013, "step": 15955 }, { "epoch": 1.3943735802900576, "grad_norm": 0.5295505837517165, "learning_rate": 6.465401054076216e-06, "loss": 0.20235424041748046, "step": 15960 }, { "epoch": 1.3948104141184694, "grad_norm": 0.7258013644602632, "learning_rate": 6.462971051118969e-06, "loss": 0.1824526906013489, "step": 15965 }, { "epoch": 1.395247247946881, "grad_norm": 0.5212914453421293, "learning_rate": 6.460540670201507e-06, "loss": 0.22169017791748047, "step": 15970 }, { "epoch": 1.3956840817752927, "grad_norm": 0.5029307122342246, "learning_rate": 6.458109911951722e-06, "loss": 0.19771091938018798, "step": 15975 }, { "epoch": 1.3961209156037042, "grad_norm": 0.5770072415179307, "learning_rate": 6.455678776997602e-06, "loss": 0.2194532871246338, "step": 15980 }, { "epoch": 1.396557749432116, "grad_norm": 0.5389109381344593, "learning_rate": 6.453247265967237e-06, "loss": 0.21529912948608398, "step": 15985 }, { "epoch": 1.3969945832605277, "grad_norm": 0.5910126630505018, "learning_rate": 6.450815379488806e-06, "loss": 0.20934679508209228, "step": 15990 }, { "epoch": 1.3974314170889395, "grad_norm": 0.6359439501948908, "learning_rate": 6.448383118190593e-06, "loss": 0.20536556243896484, "step": 15995 }, { "epoch": 1.397868250917351, "grad_norm": 0.6765408388984456, "learning_rate": 6.445950482700974e-06, "loss": 0.21382904052734375, "step": 16000 }, { "epoch": 1.3983050847457628, "grad_norm": 0.5593942941087191, "learning_rate": 6.443517473648423e-06, "loss": 0.19032318592071534, "step": 16005 }, { "epoch": 1.3987419185741743, "grad_norm": 0.8422965280452291, "learning_rate": 6.441084091661512e-06, "loss": 0.1939000368118286, "step": 16010 }, { "epoch": 1.399178752402586, "grad_norm": 0.5722550120689904, "learning_rate": 6.438650337368906e-06, "loss": 0.17928175926208495, "step": 16015 }, { "epoch": 1.3996155862309978, "grad_norm": 0.5890500578811143, "learning_rate": 6.436216211399368e-06, "loss": 0.1945176601409912, "step": 16020 }, { "epoch": 1.4000524200594093, "grad_norm": 0.5334557168406168, "learning_rate": 6.433781714381761e-06, "loss": 0.21066584587097167, "step": 16025 }, { "epoch": 1.400489253887821, "grad_norm": 0.6532822574978125, "learning_rate": 6.431346846945034e-06, "loss": 0.1655795931816101, "step": 16030 }, { "epoch": 1.4009260877162326, "grad_norm": 0.6964951391988332, "learning_rate": 6.428911609718244e-06, "loss": 0.19650464057922362, "step": 16035 }, { "epoch": 1.4013629215446444, "grad_norm": 0.5945893092589206, "learning_rate": 6.426476003330533e-06, "loss": 0.21693000793457032, "step": 16040 }, { "epoch": 1.4017997553730561, "grad_norm": 0.5705964265818169, "learning_rate": 6.4240400284111445e-06, "loss": 0.23656721115112306, "step": 16045 }, { "epoch": 1.4022365892014679, "grad_norm": 0.5895817276800782, "learning_rate": 6.421603685589414e-06, "loss": 0.19954833984375, "step": 16050 }, { "epoch": 1.4026734230298794, "grad_norm": 0.5192500762841603, "learning_rate": 6.419166975494774e-06, "loss": 0.15657535791397095, "step": 16055 }, { "epoch": 1.4031102568582912, "grad_norm": 0.5099894898444248, "learning_rate": 6.416729898756752e-06, "loss": 0.20177831649780273, "step": 16060 }, { "epoch": 1.4035470906867027, "grad_norm": 0.4591594466131166, "learning_rate": 6.414292456004969e-06, "loss": 0.2147972345352173, "step": 16065 }, { "epoch": 1.4039839245151144, "grad_norm": 0.6536825951733847, "learning_rate": 6.411854647869141e-06, "loss": 0.20162053108215333, "step": 16070 }, { "epoch": 1.4044207583435262, "grad_norm": 0.6188750028990232, "learning_rate": 6.40941647497908e-06, "loss": 0.1836063504219055, "step": 16075 }, { "epoch": 1.4048575921719377, "grad_norm": 0.5031812162483558, "learning_rate": 6.406977937964686e-06, "loss": 0.2031106472015381, "step": 16080 }, { "epoch": 1.4052944260003495, "grad_norm": 0.4678061332441204, "learning_rate": 6.404539037455965e-06, "loss": 0.20632777214050294, "step": 16085 }, { "epoch": 1.405731259828761, "grad_norm": 0.6003795946509533, "learning_rate": 6.402099774083004e-06, "loss": 0.19311048984527587, "step": 16090 }, { "epoch": 1.4061680936571728, "grad_norm": 0.5844718932541936, "learning_rate": 6.39966014847599e-06, "loss": 0.18274080753326416, "step": 16095 }, { "epoch": 1.4066049274855845, "grad_norm": 0.5890572459282578, "learning_rate": 6.397220161265205e-06, "loss": 0.19891546964645385, "step": 16100 }, { "epoch": 1.4070417613139963, "grad_norm": 0.5978538409678502, "learning_rate": 6.394779813081023e-06, "loss": 0.19114620685577394, "step": 16105 }, { "epoch": 1.4074785951424078, "grad_norm": 0.698379370795522, "learning_rate": 6.392339104553907e-06, "loss": 0.19117145538330077, "step": 16110 }, { "epoch": 1.4079154289708196, "grad_norm": 0.6510335406116871, "learning_rate": 6.38989803631442e-06, "loss": 0.19856700897216797, "step": 16115 }, { "epoch": 1.408352262799231, "grad_norm": 0.6055572451489536, "learning_rate": 6.387456608993214e-06, "loss": 0.19600929021835328, "step": 16120 }, { "epoch": 1.4087890966276428, "grad_norm": 0.593751390946943, "learning_rate": 6.385014823221035e-06, "loss": 0.21646394729614257, "step": 16125 }, { "epoch": 1.4092259304560546, "grad_norm": 0.5602747929723167, "learning_rate": 6.3825726796287206e-06, "loss": 0.18508018255233766, "step": 16130 }, { "epoch": 1.4096627642844661, "grad_norm": 0.7246765549562352, "learning_rate": 6.3801301788472e-06, "loss": 0.18390365839004516, "step": 16135 }, { "epoch": 1.4100995981128779, "grad_norm": 0.6088524494437242, "learning_rate": 6.377687321507497e-06, "loss": 0.21191282272338868, "step": 16140 }, { "epoch": 1.4105364319412894, "grad_norm": 0.6592328080891072, "learning_rate": 6.375244108240728e-06, "loss": 0.19807368516921997, "step": 16145 }, { "epoch": 1.4109732657697012, "grad_norm": 0.5584764361223412, "learning_rate": 6.372800539678098e-06, "loss": 0.2070678949356079, "step": 16150 }, { "epoch": 1.411410099598113, "grad_norm": 0.6643180584299198, "learning_rate": 6.3703566164509055e-06, "loss": 0.18621289730072021, "step": 16155 }, { "epoch": 1.4118469334265247, "grad_norm": 0.5063379139594588, "learning_rate": 6.367912339190543e-06, "loss": 0.1863294005393982, "step": 16160 }, { "epoch": 1.4122837672549362, "grad_norm": 0.6170078609219439, "learning_rate": 6.36546770852849e-06, "loss": 0.1752922773361206, "step": 16165 }, { "epoch": 1.412720601083348, "grad_norm": 0.5376682029071902, "learning_rate": 6.36302272509632e-06, "loss": 0.22407071590423583, "step": 16170 }, { "epoch": 1.4131574349117595, "grad_norm": 0.678982613942435, "learning_rate": 6.3605773895257e-06, "loss": 0.2129345417022705, "step": 16175 }, { "epoch": 1.4135942687401712, "grad_norm": 0.6387147196440773, "learning_rate": 6.3581317024483805e-06, "loss": 0.18040544986724855, "step": 16180 }, { "epoch": 1.414031102568583, "grad_norm": 0.5152064677279159, "learning_rate": 6.35568566449621e-06, "loss": 0.2162109613418579, "step": 16185 }, { "epoch": 1.4144679363969945, "grad_norm": 0.6039767995452069, "learning_rate": 6.353239276301127e-06, "loss": 0.21920437812805177, "step": 16190 }, { "epoch": 1.4149047702254063, "grad_norm": 0.6519340293921059, "learning_rate": 6.350792538495155e-06, "loss": 0.19237382411956788, "step": 16195 }, { "epoch": 1.4153416040538178, "grad_norm": 0.6228305947662427, "learning_rate": 6.348345451710413e-06, "loss": 0.21116721630096436, "step": 16200 }, { "epoch": 1.4157784378822296, "grad_norm": 0.5189744633827522, "learning_rate": 6.34589801657911e-06, "loss": 0.1974470853805542, "step": 16205 }, { "epoch": 1.4162152717106413, "grad_norm": 0.5246255138129005, "learning_rate": 6.343450233733541e-06, "loss": 0.1889462947845459, "step": 16210 }, { "epoch": 1.416652105539053, "grad_norm": 0.7249582994082127, "learning_rate": 6.3410021038060955e-06, "loss": 0.19577068090438843, "step": 16215 }, { "epoch": 1.4170889393674646, "grad_norm": 0.49175210905623373, "learning_rate": 6.338553627429249e-06, "loss": 0.21900572776794433, "step": 16220 }, { "epoch": 1.4175257731958764, "grad_norm": 0.5559537051676914, "learning_rate": 6.33610480523557e-06, "loss": 0.18409332036972045, "step": 16225 }, { "epoch": 1.417962607024288, "grad_norm": 0.5964543775244938, "learning_rate": 6.333655637857713e-06, "loss": 0.16717157363891602, "step": 16230 }, { "epoch": 1.4183994408526996, "grad_norm": 0.5885648291309808, "learning_rate": 6.331206125928425e-06, "loss": 0.1828668475151062, "step": 16235 }, { "epoch": 1.4188362746811114, "grad_norm": 0.6142139027568898, "learning_rate": 6.328756270080539e-06, "loss": 0.19785737991333008, "step": 16240 }, { "epoch": 1.419273108509523, "grad_norm": 0.6197464100432358, "learning_rate": 6.326306070946976e-06, "loss": 0.16915279626846313, "step": 16245 }, { "epoch": 1.4197099423379347, "grad_norm": 0.5614029015092918, "learning_rate": 6.3238555291607496e-06, "loss": 0.19366796016693116, "step": 16250 }, { "epoch": 1.4201467761663462, "grad_norm": 0.8941448572525291, "learning_rate": 6.32140464535496e-06, "loss": 0.18658462762832642, "step": 16255 }, { "epoch": 1.420583609994758, "grad_norm": 0.7685344339672138, "learning_rate": 6.318953420162796e-06, "loss": 0.19524056911468507, "step": 16260 }, { "epoch": 1.4210204438231697, "grad_norm": 0.5985292092517848, "learning_rate": 6.316501854217535e-06, "loss": 0.20327937602996826, "step": 16265 }, { "epoch": 1.4214572776515815, "grad_norm": 0.7978566352192716, "learning_rate": 6.314049948152541e-06, "loss": 0.20733098983764647, "step": 16270 }, { "epoch": 1.421894111479993, "grad_norm": 0.6892314219321355, "learning_rate": 6.311597702601266e-06, "loss": 0.21753735542297364, "step": 16275 }, { "epoch": 1.4223309453084048, "grad_norm": 0.6022567029111819, "learning_rate": 6.309145118197249e-06, "loss": 0.2221273422241211, "step": 16280 }, { "epoch": 1.4227677791368163, "grad_norm": 0.5418281905584784, "learning_rate": 6.306692195574123e-06, "loss": 0.1928692102432251, "step": 16285 }, { "epoch": 1.423204612965228, "grad_norm": 0.5602572985979976, "learning_rate": 6.304238935365601e-06, "loss": 0.2196345806121826, "step": 16290 }, { "epoch": 1.4236414467936398, "grad_norm": 0.548002363928378, "learning_rate": 6.301785338205482e-06, "loss": 0.2086233139038086, "step": 16295 }, { "epoch": 1.4240782806220513, "grad_norm": 0.5848366461904769, "learning_rate": 6.29933140472766e-06, "loss": 0.22305567264556886, "step": 16300 }, { "epoch": 1.424515114450463, "grad_norm": 0.5521531657743378, "learning_rate": 6.296877135566108e-06, "loss": 0.21330056190490723, "step": 16305 }, { "epoch": 1.4249519482788746, "grad_norm": 0.5851744060360765, "learning_rate": 6.294422531354892e-06, "loss": 0.202727746963501, "step": 16310 }, { "epoch": 1.4253887821072864, "grad_norm": 0.5876495206795808, "learning_rate": 6.291967592728161e-06, "loss": 0.1946438193321228, "step": 16315 }, { "epoch": 1.4258256159356981, "grad_norm": 0.5405877415732977, "learning_rate": 6.2895123203201525e-06, "loss": 0.18161113262176515, "step": 16320 }, { "epoch": 1.4262624497641097, "grad_norm": 0.6248471096901485, "learning_rate": 6.287056714765185e-06, "loss": 0.20969161987304688, "step": 16325 }, { "epoch": 1.4266992835925214, "grad_norm": 0.5342915420577455, "learning_rate": 6.284600776697669e-06, "loss": 0.20658230781555176, "step": 16330 }, { "epoch": 1.4271361174209332, "grad_norm": 0.520547327350681, "learning_rate": 6.2821445067521016e-06, "loss": 0.19562921524047852, "step": 16335 }, { "epoch": 1.4275729512493447, "grad_norm": 0.5811292934358164, "learning_rate": 6.27968790556306e-06, "loss": 0.18006640672683716, "step": 16340 }, { "epoch": 1.4280097850777564, "grad_norm": 0.5969219667561079, "learning_rate": 6.277230973765209e-06, "loss": 0.20011017322540284, "step": 16345 }, { "epoch": 1.4284466189061682, "grad_norm": 0.5058485798699724, "learning_rate": 6.274773711993302e-06, "loss": 0.20900359153747558, "step": 16350 }, { "epoch": 1.4288834527345797, "grad_norm": 1.4848188750295166, "learning_rate": 6.272316120882174e-06, "loss": 0.19243083000183106, "step": 16355 }, { "epoch": 1.4293202865629915, "grad_norm": 0.6088008457150733, "learning_rate": 6.2698582010667455e-06, "loss": 0.1897615075111389, "step": 16360 }, { "epoch": 1.429757120391403, "grad_norm": 0.6247019612602548, "learning_rate": 6.267399953182026e-06, "loss": 0.20245800018310547, "step": 16365 }, { "epoch": 1.4301939542198148, "grad_norm": 0.5149407166481421, "learning_rate": 6.264941377863104e-06, "loss": 0.18923406600952147, "step": 16370 }, { "epoch": 1.4306307880482265, "grad_norm": 0.5704267330722687, "learning_rate": 6.262482475745155e-06, "loss": 0.20376086235046387, "step": 16375 }, { "epoch": 1.431067621876638, "grad_norm": 0.5968795163090598, "learning_rate": 6.260023247463443e-06, "loss": 0.19563891887664794, "step": 16380 }, { "epoch": 1.4315044557050498, "grad_norm": 0.6641658258360885, "learning_rate": 6.257563693653306e-06, "loss": 0.20100510120391846, "step": 16385 }, { "epoch": 1.4319412895334613, "grad_norm": 0.5497048168163754, "learning_rate": 6.255103814950179e-06, "loss": 0.19175965785980226, "step": 16390 }, { "epoch": 1.432378123361873, "grad_norm": 0.7027873030770501, "learning_rate": 6.252643611989568e-06, "loss": 0.20394742488861084, "step": 16395 }, { "epoch": 1.4328149571902848, "grad_norm": 0.5488494222604602, "learning_rate": 6.250183085407076e-06, "loss": 0.18273742198944093, "step": 16400 }, { "epoch": 1.4332517910186966, "grad_norm": 0.5105130927466005, "learning_rate": 6.247722235838376e-06, "loss": 0.2006554365158081, "step": 16405 }, { "epoch": 1.4336886248471081, "grad_norm": 0.6264433272007217, "learning_rate": 6.245261063919237e-06, "loss": 0.18924869298934938, "step": 16410 }, { "epoch": 1.4341254586755199, "grad_norm": 0.6635966746611316, "learning_rate": 6.242799570285502e-06, "loss": 0.19123412370681764, "step": 16415 }, { "epoch": 1.4345622925039314, "grad_norm": 0.5231782185689835, "learning_rate": 6.2403377555731025e-06, "loss": 0.18824031352996826, "step": 16420 }, { "epoch": 1.4349991263323432, "grad_norm": 0.8518496808305758, "learning_rate": 6.237875620418049e-06, "loss": 0.1803314208984375, "step": 16425 }, { "epoch": 1.435435960160755, "grad_norm": 0.630269864887374, "learning_rate": 6.23541316545644e-06, "loss": 0.20741596221923828, "step": 16430 }, { "epoch": 1.4358727939891665, "grad_norm": 0.5532527336357947, "learning_rate": 6.2329503913244505e-06, "loss": 0.19267245531082153, "step": 16435 }, { "epoch": 1.4363096278175782, "grad_norm": 0.5989814502357986, "learning_rate": 6.230487298658344e-06, "loss": 0.21414875984191895, "step": 16440 }, { "epoch": 1.4367464616459897, "grad_norm": 0.526313674951174, "learning_rate": 6.228023888094461e-06, "loss": 0.19122716188430786, "step": 16445 }, { "epoch": 1.4371832954744015, "grad_norm": 0.7356736989337967, "learning_rate": 6.2255601602692265e-06, "loss": 0.2136219024658203, "step": 16450 }, { "epoch": 1.4376201293028132, "grad_norm": 0.501036265862632, "learning_rate": 6.223096115819146e-06, "loss": 0.19518530368804932, "step": 16455 }, { "epoch": 1.438056963131225, "grad_norm": 0.5553627982358802, "learning_rate": 6.220631755380813e-06, "loss": 0.20363264083862304, "step": 16460 }, { "epoch": 1.4384937969596365, "grad_norm": 0.6985221613842147, "learning_rate": 6.2181670795908924e-06, "loss": 0.20105323791503907, "step": 16465 }, { "epoch": 1.4389306307880483, "grad_norm": 0.536443674946208, "learning_rate": 6.215702089086139e-06, "loss": 0.17218310832977296, "step": 16470 }, { "epoch": 1.4393674646164598, "grad_norm": 0.5416770145360004, "learning_rate": 6.213236784503385e-06, "loss": 0.18469569683074952, "step": 16475 }, { "epoch": 1.4398042984448716, "grad_norm": 0.6458003529912907, "learning_rate": 6.210771166479545e-06, "loss": 0.2039346694946289, "step": 16480 }, { "epoch": 1.4402411322732833, "grad_norm": 0.594775352001962, "learning_rate": 6.208305235651613e-06, "loss": 0.19689081907272338, "step": 16485 }, { "epoch": 1.4406779661016949, "grad_norm": 0.5369816563030971, "learning_rate": 6.2058389926566665e-06, "loss": 0.19602649211883544, "step": 16490 }, { "epoch": 1.4411147999301066, "grad_norm": 0.6716980480461351, "learning_rate": 6.203372438131862e-06, "loss": 0.20982847213745118, "step": 16495 }, { "epoch": 1.4415516337585181, "grad_norm": 0.6452920051972538, "learning_rate": 6.2009055727144355e-06, "loss": 0.17999342679977418, "step": 16500 }, { "epoch": 1.44198846758693, "grad_norm": 0.6882705385653373, "learning_rate": 6.1984383970417054e-06, "loss": 0.21689155101776122, "step": 16505 }, { "epoch": 1.4424253014153416, "grad_norm": 0.6587853949303337, "learning_rate": 6.195970911751069e-06, "loss": 0.19800305366516113, "step": 16510 }, { "epoch": 1.4428621352437534, "grad_norm": 0.5864185632303502, "learning_rate": 6.193503117480003e-06, "loss": 0.19132236242294312, "step": 16515 }, { "epoch": 1.443298969072165, "grad_norm": 0.8463073932122366, "learning_rate": 6.191035014866067e-06, "loss": 0.2186068296432495, "step": 16520 }, { "epoch": 1.4437358029005767, "grad_norm": 0.5614978138061282, "learning_rate": 6.188566604546897e-06, "loss": 0.19662578105926515, "step": 16525 }, { "epoch": 1.4441726367289882, "grad_norm": 0.623885955898118, "learning_rate": 6.186097887160208e-06, "loss": 0.21580686569213867, "step": 16530 }, { "epoch": 1.4446094705574, "grad_norm": 0.5004549901473171, "learning_rate": 6.183628863343798e-06, "loss": 0.19485074281692505, "step": 16535 }, { "epoch": 1.4450463043858117, "grad_norm": 0.5138316836299627, "learning_rate": 6.181159533735542e-06, "loss": 0.1914502739906311, "step": 16540 }, { "epoch": 1.4454831382142233, "grad_norm": 0.5552210559235301, "learning_rate": 6.178689898973394e-06, "loss": 0.22385261058807374, "step": 16545 }, { "epoch": 1.445919972042635, "grad_norm": 0.6713836644830485, "learning_rate": 6.176219959695386e-06, "loss": 0.17487473487854005, "step": 16550 }, { "epoch": 1.4463568058710465, "grad_norm": 0.6834038019374131, "learning_rate": 6.17374971653963e-06, "loss": 0.1937771439552307, "step": 16555 }, { "epoch": 1.4467936396994583, "grad_norm": 0.6510968079594283, "learning_rate": 6.171279170144314e-06, "loss": 0.24119811058044432, "step": 16560 }, { "epoch": 1.44723047352787, "grad_norm": 0.5550732868739915, "learning_rate": 6.16880832114771e-06, "loss": 0.17284758090972902, "step": 16565 }, { "epoch": 1.4476673073562818, "grad_norm": 0.6189066162702005, "learning_rate": 6.166337170188164e-06, "loss": 0.1899041175842285, "step": 16570 }, { "epoch": 1.4481041411846933, "grad_norm": 0.5470449838893177, "learning_rate": 6.163865717904098e-06, "loss": 0.17862166166305543, "step": 16575 }, { "epoch": 1.448540975013105, "grad_norm": 0.6419424122368007, "learning_rate": 6.161393964934017e-06, "loss": 0.1893848180770874, "step": 16580 }, { "epoch": 1.4489778088415166, "grad_norm": 0.6073542326384072, "learning_rate": 6.1589219119165015e-06, "loss": 0.1753084897994995, "step": 16585 }, { "epoch": 1.4494146426699284, "grad_norm": 0.5967871087637936, "learning_rate": 6.1564495594902075e-06, "loss": 0.16677802801132202, "step": 16590 }, { "epoch": 1.4498514764983401, "grad_norm": 0.6102895505408222, "learning_rate": 6.153976908293871e-06, "loss": 0.23101472854614258, "step": 16595 }, { "epoch": 1.4502883103267517, "grad_norm": 0.7036364662454725, "learning_rate": 6.1515039589663024e-06, "loss": 0.22623374462127685, "step": 16600 }, { "epoch": 1.4507251441551634, "grad_norm": 0.6738816458970215, "learning_rate": 6.149030712146393e-06, "loss": 0.19777061939239501, "step": 16605 }, { "epoch": 1.451161977983575, "grad_norm": 0.5959616516083921, "learning_rate": 6.146557168473109e-06, "loss": 0.20510497093200683, "step": 16610 }, { "epoch": 1.4515988118119867, "grad_norm": 0.6184587436789252, "learning_rate": 6.144083328585491e-06, "loss": 0.1866826057434082, "step": 16615 }, { "epoch": 1.4520356456403984, "grad_norm": 0.5981257419157808, "learning_rate": 6.141609193122661e-06, "loss": 0.20686559677124022, "step": 16620 }, { "epoch": 1.4524724794688102, "grad_norm": 0.6648313503307236, "learning_rate": 6.139134762723812e-06, "loss": 0.18336758613586426, "step": 16625 }, { "epoch": 1.4529093132972217, "grad_norm": 0.689978529501631, "learning_rate": 6.136660038028218e-06, "loss": 0.1948979139328003, "step": 16630 }, { "epoch": 1.4533461471256335, "grad_norm": 0.6001285916881469, "learning_rate": 6.134185019675227e-06, "loss": 0.1930086612701416, "step": 16635 }, { "epoch": 1.453782980954045, "grad_norm": 0.4546242551697723, "learning_rate": 6.131709708304261e-06, "loss": 0.20622241497039795, "step": 16640 }, { "epoch": 1.4542198147824568, "grad_norm": 0.6382752918211585, "learning_rate": 6.129234104554821e-06, "loss": 0.2204207420349121, "step": 16645 }, { "epoch": 1.4546566486108685, "grad_norm": 0.5386177330621505, "learning_rate": 6.126758209066481e-06, "loss": 0.1772557020187378, "step": 16650 }, { "epoch": 1.45509348243928, "grad_norm": 0.5527824715356692, "learning_rate": 6.124282022478892e-06, "loss": 0.1974060893058777, "step": 16655 }, { "epoch": 1.4555303162676918, "grad_norm": 0.5267852401317376, "learning_rate": 6.121805545431779e-06, "loss": 0.20096845626831056, "step": 16660 }, { "epoch": 1.4559671500961033, "grad_norm": 0.6103452213362828, "learning_rate": 6.119328778564944e-06, "loss": 0.18090907335281373, "step": 16665 }, { "epoch": 1.456403983924515, "grad_norm": 0.6250826000946013, "learning_rate": 6.116851722518261e-06, "loss": 0.17071524858474732, "step": 16670 }, { "epoch": 1.4568408177529268, "grad_norm": 0.5622107615352151, "learning_rate": 6.11437437793168e-06, "loss": 0.16408846378326417, "step": 16675 }, { "epoch": 1.4572776515813386, "grad_norm": 0.6069802786445233, "learning_rate": 6.111896745445227e-06, "loss": 0.18676974773406982, "step": 16680 }, { "epoch": 1.4577144854097501, "grad_norm": 0.6005416602217231, "learning_rate": 6.1094188256990016e-06, "loss": 0.1933762788772583, "step": 16685 }, { "epoch": 1.4581513192381619, "grad_norm": 0.5552540770639237, "learning_rate": 6.1069406193331745e-06, "loss": 0.2001436710357666, "step": 16690 }, { "epoch": 1.4585881530665734, "grad_norm": 0.5854502233069818, "learning_rate": 6.104462126987997e-06, "loss": 0.2012087345123291, "step": 16695 }, { "epoch": 1.4590249868949852, "grad_norm": 0.6126489401334075, "learning_rate": 6.1019833493037865e-06, "loss": 0.20096902847290038, "step": 16700 }, { "epoch": 1.459461820723397, "grad_norm": 0.5877200238048447, "learning_rate": 6.099504286920941e-06, "loss": 0.16282408237457274, "step": 16705 }, { "epoch": 1.4598986545518085, "grad_norm": 0.5652170825765076, "learning_rate": 6.097024940479927e-06, "loss": 0.22291021347045897, "step": 16710 }, { "epoch": 1.4603354883802202, "grad_norm": 0.4541153875827209, "learning_rate": 6.094545310621287e-06, "loss": 0.22137026786804198, "step": 16715 }, { "epoch": 1.4607723222086317, "grad_norm": 0.8426374804520719, "learning_rate": 6.092065397985636e-06, "loss": 0.17189903259277345, "step": 16720 }, { "epoch": 1.4612091560370435, "grad_norm": 0.6363885225064005, "learning_rate": 6.089585203213662e-06, "loss": 0.175591778755188, "step": 16725 }, { "epoch": 1.4616459898654552, "grad_norm": 0.7312082940300322, "learning_rate": 6.087104726946129e-06, "loss": 0.2077488899230957, "step": 16730 }, { "epoch": 1.4620828236938668, "grad_norm": 0.5275075350969177, "learning_rate": 6.084623969823866e-06, "loss": 0.1836326003074646, "step": 16735 }, { "epoch": 1.4625196575222785, "grad_norm": 0.5478827225514695, "learning_rate": 6.0821429324877835e-06, "loss": 0.19713051319122316, "step": 16740 }, { "epoch": 1.4629564913506903, "grad_norm": 0.6264154437044149, "learning_rate": 6.079661615578858e-06, "loss": 0.19023621082305908, "step": 16745 }, { "epoch": 1.4633933251791018, "grad_norm": 0.5439722728850274, "learning_rate": 6.0771800197381414e-06, "loss": 0.1882038116455078, "step": 16750 }, { "epoch": 1.4638301590075136, "grad_norm": 0.6339433096645462, "learning_rate": 6.074698145606757e-06, "loss": 0.17650964260101318, "step": 16755 }, { "epoch": 1.4642669928359253, "grad_norm": 0.6581514707576581, "learning_rate": 6.072215993825899e-06, "loss": 0.18166956901550294, "step": 16760 }, { "epoch": 1.4647038266643368, "grad_norm": 0.6473399848174952, "learning_rate": 6.069733565036834e-06, "loss": 0.196710467338562, "step": 16765 }, { "epoch": 1.4651406604927486, "grad_norm": 0.5494285468121387, "learning_rate": 6.0672508598809e-06, "loss": 0.1857455253601074, "step": 16770 }, { "epoch": 1.4655774943211601, "grad_norm": 0.6356539790406144, "learning_rate": 6.064767878999508e-06, "loss": 0.20911035537719727, "step": 16775 }, { "epoch": 1.4660143281495719, "grad_norm": 0.6050872594603162, "learning_rate": 6.062284623034139e-06, "loss": 0.17935744524002076, "step": 16780 }, { "epoch": 1.4664511619779836, "grad_norm": 0.5708174971253224, "learning_rate": 6.059801092626343e-06, "loss": 0.1964101552963257, "step": 16785 }, { "epoch": 1.4668879958063952, "grad_norm": 0.589803707324903, "learning_rate": 6.057317288417744e-06, "loss": 0.1694033145904541, "step": 16790 }, { "epoch": 1.467324829634807, "grad_norm": 0.6650208087963037, "learning_rate": 6.054833211050037e-06, "loss": 0.22651784420013427, "step": 16795 }, { "epoch": 1.4677616634632185, "grad_norm": 0.6173869648128193, "learning_rate": 6.052348861164985e-06, "loss": 0.18015002012252807, "step": 16800 }, { "epoch": 1.4681984972916302, "grad_norm": 0.6259870798690419, "learning_rate": 6.049864239404422e-06, "loss": 0.17447137832641602, "step": 16805 }, { "epoch": 1.468635331120042, "grad_norm": 0.6055185284594367, "learning_rate": 6.047379346410254e-06, "loss": 0.18356056213378907, "step": 16810 }, { "epoch": 1.4690721649484537, "grad_norm": 0.5786857726350664, "learning_rate": 6.044894182824453e-06, "loss": 0.21986207962036133, "step": 16815 }, { "epoch": 1.4695089987768652, "grad_norm": 0.6457398145642189, "learning_rate": 6.042408749289067e-06, "loss": 0.1662203073501587, "step": 16820 }, { "epoch": 1.469945832605277, "grad_norm": 0.6163200996382643, "learning_rate": 6.039923046446209e-06, "loss": 0.18955127000808716, "step": 16825 }, { "epoch": 1.4703826664336885, "grad_norm": 0.5953028244465738, "learning_rate": 6.037437074938065e-06, "loss": 0.18750298023223877, "step": 16830 }, { "epoch": 1.4708195002621003, "grad_norm": 0.5323216508909961, "learning_rate": 6.034950835406886e-06, "loss": 0.18226401805877684, "step": 16835 }, { "epoch": 1.471256334090512, "grad_norm": 0.5927538284864443, "learning_rate": 6.0324643284949955e-06, "loss": 0.19970691204071045, "step": 16840 }, { "epoch": 1.4716931679189236, "grad_norm": 0.6683048573379453, "learning_rate": 6.029977554844788e-06, "loss": 0.19287407398223877, "step": 16845 }, { "epoch": 1.4721300017473353, "grad_norm": 0.6483849900134188, "learning_rate": 6.027490515098721e-06, "loss": 0.18621068000793456, "step": 16850 }, { "epoch": 1.4725668355757469, "grad_norm": 0.684801868956243, "learning_rate": 6.025003209899325e-06, "loss": 0.21359014511108398, "step": 16855 }, { "epoch": 1.4730036694041586, "grad_norm": 0.5393470870467493, "learning_rate": 6.022515639889199e-06, "loss": 0.22878069877624513, "step": 16860 }, { "epoch": 1.4734405032325704, "grad_norm": 0.6905440261499444, "learning_rate": 6.020027805711007e-06, "loss": 0.1813215970993042, "step": 16865 }, { "epoch": 1.4738773370609821, "grad_norm": 0.6610953625172303, "learning_rate": 6.017539708007486e-06, "loss": 0.20993947982788086, "step": 16870 }, { "epoch": 1.4743141708893936, "grad_norm": 0.6385232859740532, "learning_rate": 6.015051347421439e-06, "loss": 0.16418867111206054, "step": 16875 }, { "epoch": 1.4747510047178054, "grad_norm": 0.6040297914843682, "learning_rate": 6.012562724595735e-06, "loss": 0.20021605491638184, "step": 16880 }, { "epoch": 1.475187838546217, "grad_norm": 0.5114017263203643, "learning_rate": 6.010073840173314e-06, "loss": 0.19398126602172852, "step": 16885 }, { "epoch": 1.4756246723746287, "grad_norm": 0.6553980292238039, "learning_rate": 6.007584694797183e-06, "loss": 0.18577256202697753, "step": 16890 }, { "epoch": 1.4760615062030404, "grad_norm": 0.587897400654432, "learning_rate": 6.005095289110412e-06, "loss": 0.16873621940612793, "step": 16895 }, { "epoch": 1.476498340031452, "grad_norm": 0.8155672198986086, "learning_rate": 6.0026056237561434e-06, "loss": 0.22185144424438477, "step": 16900 }, { "epoch": 1.4769351738598637, "grad_norm": 0.6093879076244894, "learning_rate": 6.000115699377588e-06, "loss": 0.19990212917327882, "step": 16905 }, { "epoch": 1.4773720076882753, "grad_norm": 0.6074089232949873, "learning_rate": 5.997625516618018e-06, "loss": 0.1964162826538086, "step": 16910 }, { "epoch": 1.477808841516687, "grad_norm": 0.6449964107658906, "learning_rate": 5.995135076120773e-06, "loss": 0.17752933502197266, "step": 16915 }, { "epoch": 1.4782456753450988, "grad_norm": 0.7203332928986533, "learning_rate": 5.992644378529264e-06, "loss": 0.18225282430648804, "step": 16920 }, { "epoch": 1.4786825091735105, "grad_norm": 0.6409918471244275, "learning_rate": 5.9901534244869655e-06, "loss": 0.20408172607421876, "step": 16925 }, { "epoch": 1.479119343001922, "grad_norm": 0.4811534740520326, "learning_rate": 5.987662214637415e-06, "loss": 0.17953412532806395, "step": 16930 }, { "epoch": 1.4795561768303338, "grad_norm": 0.7206093351423712, "learning_rate": 5.985170749624224e-06, "loss": 0.20593671798706054, "step": 16935 }, { "epoch": 1.4799930106587453, "grad_norm": 0.5125160164320411, "learning_rate": 5.982679030091063e-06, "loss": 0.1791846513748169, "step": 16940 }, { "epoch": 1.480429844487157, "grad_norm": 0.6636034828513336, "learning_rate": 5.980187056681669e-06, "loss": 0.19675612449645996, "step": 16945 }, { "epoch": 1.4808666783155688, "grad_norm": 0.5879795747761559, "learning_rate": 5.977694830039848e-06, "loss": 0.193877649307251, "step": 16950 }, { "epoch": 1.4813035121439804, "grad_norm": 0.6224636900510917, "learning_rate": 5.975202350809471e-06, "loss": 0.19039669036865234, "step": 16955 }, { "epoch": 1.4817403459723921, "grad_norm": 0.6178760829443509, "learning_rate": 5.97270961963447e-06, "loss": 0.17318378686904906, "step": 16960 }, { "epoch": 1.4821771798008037, "grad_norm": 0.5984111843749298, "learning_rate": 5.970216637158845e-06, "loss": 0.19229737520217896, "step": 16965 }, { "epoch": 1.4826140136292154, "grad_norm": 0.5687469653354994, "learning_rate": 5.967723404026664e-06, "loss": 0.1823662042617798, "step": 16970 }, { "epoch": 1.4830508474576272, "grad_norm": 0.6340420746089296, "learning_rate": 5.965229920882053e-06, "loss": 0.16720069646835328, "step": 16975 }, { "epoch": 1.483487681286039, "grad_norm": 0.5898067614315929, "learning_rate": 5.962736188369207e-06, "loss": 0.15898025035858154, "step": 16980 }, { "epoch": 1.4839245151144504, "grad_norm": 0.6031196533524705, "learning_rate": 5.960242207132387e-06, "loss": 0.20632777214050294, "step": 16985 }, { "epoch": 1.4843613489428622, "grad_norm": 0.6004876830868852, "learning_rate": 5.957747977815912e-06, "loss": 0.17935031652450562, "step": 16990 }, { "epoch": 1.4847981827712737, "grad_norm": 0.5440892996983412, "learning_rate": 5.955253501064171e-06, "loss": 0.1686359763145447, "step": 16995 }, { "epoch": 1.4852350165996855, "grad_norm": 0.6053402803272985, "learning_rate": 5.9527587775216154e-06, "loss": 0.1699061155319214, "step": 17000 }, { "epoch": 1.4856718504280972, "grad_norm": 0.5650640072910115, "learning_rate": 5.95026380783276e-06, "loss": 0.20723319053649902, "step": 17005 }, { "epoch": 1.4861086842565088, "grad_norm": 0.72498222489436, "learning_rate": 5.947768592642183e-06, "loss": 0.1831466317176819, "step": 17010 }, { "epoch": 1.4865455180849205, "grad_norm": 0.5101021826116664, "learning_rate": 5.945273132594522e-06, "loss": 0.1881713628768921, "step": 17015 }, { "epoch": 1.486982351913332, "grad_norm": 0.5338111158109573, "learning_rate": 5.942777428334488e-06, "loss": 0.19528813362121583, "step": 17020 }, { "epoch": 1.4874191857417438, "grad_norm": 0.5221734738452948, "learning_rate": 5.940281480506845e-06, "loss": 0.16904209852218627, "step": 17025 }, { "epoch": 1.4878560195701556, "grad_norm": 0.5271330827073201, "learning_rate": 5.937785289756426e-06, "loss": 0.2021691083908081, "step": 17030 }, { "epoch": 1.4882928533985673, "grad_norm": 0.6186665890589874, "learning_rate": 5.935288856728124e-06, "loss": 0.18314790725708008, "step": 17035 }, { "epoch": 1.4887296872269788, "grad_norm": 0.5185404325970259, "learning_rate": 5.932792182066894e-06, "loss": 0.2099764108657837, "step": 17040 }, { "epoch": 1.4891665210553906, "grad_norm": 0.583991406811151, "learning_rate": 5.930295266417756e-06, "loss": 0.16860675811767578, "step": 17045 }, { "epoch": 1.4896033548838021, "grad_norm": 0.5447066523347975, "learning_rate": 5.927798110425792e-06, "loss": 0.22932310104370118, "step": 17050 }, { "epoch": 1.4900401887122139, "grad_norm": 0.4994880450362924, "learning_rate": 5.925300714736142e-06, "loss": 0.18322105407714845, "step": 17055 }, { "epoch": 1.4904770225406256, "grad_norm": 0.6581484000333736, "learning_rate": 5.922803079994015e-06, "loss": 0.2035806655883789, "step": 17060 }, { "epoch": 1.4909138563690372, "grad_norm": 0.577178862515109, "learning_rate": 5.920305206844674e-06, "loss": 0.18716152906417846, "step": 17065 }, { "epoch": 1.491350690197449, "grad_norm": 0.5258397097373655, "learning_rate": 5.91780709593345e-06, "loss": 0.21327929496765136, "step": 17070 }, { "epoch": 1.4917875240258605, "grad_norm": 0.4986435749326805, "learning_rate": 5.91530874790573e-06, "loss": 0.19999630451202394, "step": 17075 }, { "epoch": 1.4922243578542722, "grad_norm": 0.5265018331351926, "learning_rate": 5.912810163406966e-06, "loss": 0.17518173456192015, "step": 17080 }, { "epoch": 1.492661191682684, "grad_norm": 0.5919160882782767, "learning_rate": 5.910311343082674e-06, "loss": 0.17539104223251342, "step": 17085 }, { "epoch": 1.4930980255110957, "grad_norm": 0.6162713839162642, "learning_rate": 5.907812287578422e-06, "loss": 0.19597746133804322, "step": 17090 }, { "epoch": 1.4935348593395072, "grad_norm": 1.476679089347169, "learning_rate": 5.905312997539845e-06, "loss": 0.20449514389038087, "step": 17095 }, { "epoch": 1.493971693167919, "grad_norm": 0.5477066442064596, "learning_rate": 5.90281347361264e-06, "loss": 0.18165791034698486, "step": 17100 }, { "epoch": 1.4944085269963305, "grad_norm": 0.5290372779076847, "learning_rate": 5.900313716442558e-06, "loss": 0.18737552165985108, "step": 17105 }, { "epoch": 1.4948453608247423, "grad_norm": 0.5853312651053828, "learning_rate": 5.897813726675418e-06, "loss": 0.18501358032226561, "step": 17110 }, { "epoch": 1.495282194653154, "grad_norm": 0.684322348301226, "learning_rate": 5.895313504957092e-06, "loss": 0.20751357078552246, "step": 17115 }, { "epoch": 1.4957190284815656, "grad_norm": 0.7000869411760535, "learning_rate": 5.892813051933515e-06, "loss": 0.1781686782836914, "step": 17120 }, { "epoch": 1.4961558623099773, "grad_norm": 0.8849243126051562, "learning_rate": 5.890312368250684e-06, "loss": 0.22133712768554686, "step": 17125 }, { "epoch": 1.4965926961383889, "grad_norm": 0.611927780939651, "learning_rate": 5.887811454554652e-06, "loss": 0.16545482873916625, "step": 17130 }, { "epoch": 1.4970295299668006, "grad_norm": 0.5508143951755371, "learning_rate": 5.885310311491533e-06, "loss": 0.1875913381576538, "step": 17135 }, { "epoch": 1.4974663637952124, "grad_norm": 0.7323527163881689, "learning_rate": 5.8828089397075005e-06, "loss": 0.22935292720794678, "step": 17140 }, { "epoch": 1.497903197623624, "grad_norm": 0.5966928551027323, "learning_rate": 5.880307339848788e-06, "loss": 0.19658564329147338, "step": 17145 }, { "epoch": 1.4983400314520356, "grad_norm": 0.6155572606637449, "learning_rate": 5.877805512561683e-06, "loss": 0.17584338188171386, "step": 17150 }, { "epoch": 1.4987768652804472, "grad_norm": 0.7633448133325327, "learning_rate": 5.8753034584925394e-06, "loss": 0.19362366199493408, "step": 17155 }, { "epoch": 1.499213699108859, "grad_norm": 0.6028336095993002, "learning_rate": 5.872801178287764e-06, "loss": 0.1646486282348633, "step": 17160 }, { "epoch": 1.4996505329372707, "grad_norm": 0.6334217607841863, "learning_rate": 5.870298672593824e-06, "loss": 0.19665693044662474, "step": 17165 }, { "epoch": 1.5000873667656824, "grad_norm": 0.6283096792122671, "learning_rate": 5.867795942057243e-06, "loss": 0.1887303590774536, "step": 17170 }, { "epoch": 1.500524200594094, "grad_norm": 0.6267795106251282, "learning_rate": 5.865292987324605e-06, "loss": 0.18643317222595215, "step": 17175 }, { "epoch": 1.5009610344225057, "grad_norm": 0.6355801271231195, "learning_rate": 5.862789809042553e-06, "loss": 0.17391622066497803, "step": 17180 }, { "epoch": 1.5013978682509173, "grad_norm": 0.5700234625364002, "learning_rate": 5.8602864078577834e-06, "loss": 0.19019533395767213, "step": 17185 }, { "epoch": 1.501834702079329, "grad_norm": 0.5217032789764777, "learning_rate": 5.857782784417054e-06, "loss": 0.1961112141609192, "step": 17190 }, { "epoch": 1.5022715359077408, "grad_norm": 0.5333712535477938, "learning_rate": 5.855278939367179e-06, "loss": 0.17773562669754028, "step": 17195 }, { "epoch": 1.5027083697361525, "grad_norm": 0.6549452505487372, "learning_rate": 5.8527748733550295e-06, "loss": 0.22125546932220458, "step": 17200 }, { "epoch": 1.503145203564564, "grad_norm": 0.6306060402674866, "learning_rate": 5.850270587027533e-06, "loss": 0.17556424140930177, "step": 17205 }, { "epoch": 1.5035820373929756, "grad_norm": 0.7044513713376084, "learning_rate": 5.847766081031675e-06, "loss": 0.1719510316848755, "step": 17210 }, { "epoch": 1.5040188712213873, "grad_norm": 0.6932618508779467, "learning_rate": 5.8452613560144976e-06, "loss": 0.16908320188522338, "step": 17215 }, { "epoch": 1.504455705049799, "grad_norm": 0.6896153028406921, "learning_rate": 5.8427564126230995e-06, "loss": 0.18314542770385742, "step": 17220 }, { "epoch": 1.5048925388782108, "grad_norm": 0.5665266878221873, "learning_rate": 5.840251251504636e-06, "loss": 0.2058760404586792, "step": 17225 }, { "epoch": 1.5053293727066224, "grad_norm": 0.4866182310353826, "learning_rate": 5.837745873306317e-06, "loss": 0.20254459381103515, "step": 17230 }, { "epoch": 1.5057662065350341, "grad_norm": 0.6252890086923594, "learning_rate": 5.835240278675409e-06, "loss": 0.18917207717895507, "step": 17235 }, { "epoch": 1.5062030403634457, "grad_norm": 0.5545993377987223, "learning_rate": 5.832734468259239e-06, "loss": 0.2102170467376709, "step": 17240 }, { "epoch": 1.5066398741918574, "grad_norm": 0.5064778444561764, "learning_rate": 5.830228442705181e-06, "loss": 0.21224472522735596, "step": 17245 }, { "epoch": 1.5070767080202692, "grad_norm": 0.6338152907502328, "learning_rate": 5.827722202660674e-06, "loss": 0.16021184921264647, "step": 17250 }, { "epoch": 1.507513541848681, "grad_norm": 0.5366250424769802, "learning_rate": 5.825215748773205e-06, "loss": 0.18752458095550537, "step": 17255 }, { "epoch": 1.5079503756770924, "grad_norm": 0.5858215164472836, "learning_rate": 5.822709081690322e-06, "loss": 0.17199273109436036, "step": 17260 }, { "epoch": 1.508387209505504, "grad_norm": 0.6431096163688066, "learning_rate": 5.8202022020596226e-06, "loss": 0.17046196460723878, "step": 17265 }, { "epoch": 1.5088240433339157, "grad_norm": 0.49649251998842353, "learning_rate": 5.8176951105287625e-06, "loss": 0.17922009229660035, "step": 17270 }, { "epoch": 1.5092608771623275, "grad_norm": 0.7718330081446303, "learning_rate": 5.815187807745453e-06, "loss": 0.15907827615737916, "step": 17275 }, { "epoch": 1.5096977109907392, "grad_norm": 0.6649989206125718, "learning_rate": 5.812680294357456e-06, "loss": 0.21008458137512206, "step": 17280 }, { "epoch": 1.5101345448191508, "grad_norm": 0.6665623255529831, "learning_rate": 5.810172571012592e-06, "loss": 0.18806259632110595, "step": 17285 }, { "epoch": 1.5105713786475623, "grad_norm": 0.6171847907100126, "learning_rate": 5.8076646383587355e-06, "loss": 0.18278579711914061, "step": 17290 }, { "epoch": 1.511008212475974, "grad_norm": 0.5511322818030202, "learning_rate": 5.805156497043811e-06, "loss": 0.20013151168823243, "step": 17295 }, { "epoch": 1.5114450463043858, "grad_norm": 0.5326913736034616, "learning_rate": 5.802648147715802e-06, "loss": 0.20823235511779786, "step": 17300 }, { "epoch": 1.5118818801327976, "grad_norm": 0.6465019817880424, "learning_rate": 5.800139591022742e-06, "loss": 0.1927370548248291, "step": 17305 }, { "epoch": 1.5123187139612093, "grad_norm": 0.5733170130882054, "learning_rate": 5.797630827612719e-06, "loss": 0.18541522026062013, "step": 17310 }, { "epoch": 1.5127555477896208, "grad_norm": 0.49144661237202053, "learning_rate": 5.795121858133876e-06, "loss": 0.18161000013351442, "step": 17315 }, { "epoch": 1.5131923816180324, "grad_norm": 0.6465099638559021, "learning_rate": 5.792612683234408e-06, "loss": 0.1878972053527832, "step": 17320 }, { "epoch": 1.5136292154464441, "grad_norm": 0.4467549552612733, "learning_rate": 5.790103303562563e-06, "loss": 0.17769625186920165, "step": 17325 }, { "epoch": 1.5140660492748559, "grad_norm": 0.6835050937825266, "learning_rate": 5.787593719766641e-06, "loss": 0.17734158039093018, "step": 17330 }, { "epoch": 1.5145028831032676, "grad_norm": 0.5469217473983995, "learning_rate": 5.7850839324949955e-06, "loss": 0.20690252780914306, "step": 17335 }, { "epoch": 1.5149397169316792, "grad_norm": 0.5785339944413598, "learning_rate": 5.782573942396037e-06, "loss": 0.19971376657485962, "step": 17340 }, { "epoch": 1.5153765507600907, "grad_norm": 1.3306182159002875, "learning_rate": 5.78006375011822e-06, "loss": 0.20263419151306153, "step": 17345 }, { "epoch": 1.5158133845885025, "grad_norm": 0.4618618644060804, "learning_rate": 5.777553356310056e-06, "loss": 0.17345092296600342, "step": 17350 }, { "epoch": 1.5162502184169142, "grad_norm": 0.6684802267194879, "learning_rate": 5.77504276162011e-06, "loss": 0.19676005840301514, "step": 17355 }, { "epoch": 1.516687052245326, "grad_norm": 0.5369840696458945, "learning_rate": 5.772531966696995e-06, "loss": 0.17170028686523436, "step": 17360 }, { "epoch": 1.5171238860737377, "grad_norm": 0.7350830889748774, "learning_rate": 5.77002097218938e-06, "loss": 0.18918516635894775, "step": 17365 }, { "epoch": 1.5175607199021492, "grad_norm": 0.5987255405346112, "learning_rate": 5.76750977874598e-06, "loss": 0.17274949550628663, "step": 17370 }, { "epoch": 1.5179975537305608, "grad_norm": 0.5689821928231271, "learning_rate": 5.764998387015568e-06, "loss": 0.17566388845443726, "step": 17375 }, { "epoch": 1.5184343875589725, "grad_norm": 0.5004043171474826, "learning_rate": 5.762486797646961e-06, "loss": 0.17663402557373048, "step": 17380 }, { "epoch": 1.5188712213873843, "grad_norm": 0.670372655777269, "learning_rate": 5.759975011289035e-06, "loss": 0.1764540672302246, "step": 17385 }, { "epoch": 1.519308055215796, "grad_norm": 0.6465839634536974, "learning_rate": 5.757463028590711e-06, "loss": 0.1750849485397339, "step": 17390 }, { "epoch": 1.5197448890442076, "grad_norm": 0.5381302694335688, "learning_rate": 5.754950850200961e-06, "loss": 0.17126692533493043, "step": 17395 }, { "epoch": 1.520181722872619, "grad_norm": 0.6506191504292557, "learning_rate": 5.752438476768813e-06, "loss": 0.17359042167663574, "step": 17400 }, { "epoch": 1.5206185567010309, "grad_norm": 0.599198571485543, "learning_rate": 5.749925908943338e-06, "loss": 0.21698670387268065, "step": 17405 }, { "epoch": 1.5210553905294426, "grad_norm": 0.6367879688607603, "learning_rate": 5.7474131473736616e-06, "loss": 0.19738357067108153, "step": 17410 }, { "epoch": 1.5214922243578544, "grad_norm": 0.6552202892453525, "learning_rate": 5.7449001927089595e-06, "loss": 0.2077183723449707, "step": 17415 }, { "epoch": 1.521929058186266, "grad_norm": 0.6843333198576707, "learning_rate": 5.742387045598456e-06, "loss": 0.20442681312561034, "step": 17420 }, { "epoch": 1.5223658920146776, "grad_norm": 0.5849399988502798, "learning_rate": 5.7398737066914245e-06, "loss": 0.19541828632354735, "step": 17425 }, { "epoch": 1.5228027258430892, "grad_norm": 0.6368675271458036, "learning_rate": 5.737360176637189e-06, "loss": 0.19873721599578859, "step": 17430 }, { "epoch": 1.523239559671501, "grad_norm": 0.5669472637862008, "learning_rate": 5.734846456085126e-06, "loss": 0.17873270511627198, "step": 17435 }, { "epoch": 1.5236763934999127, "grad_norm": 0.5027433646192361, "learning_rate": 5.732332545684653e-06, "loss": 0.17882574796676637, "step": 17440 }, { "epoch": 1.5241132273283244, "grad_norm": 0.5225124400421238, "learning_rate": 5.7298184460852445e-06, "loss": 0.1931428551673889, "step": 17445 }, { "epoch": 1.524550061156736, "grad_norm": 0.5272400467850884, "learning_rate": 5.727304157936422e-06, "loss": 0.18032827377319335, "step": 17450 }, { "epoch": 1.5249868949851475, "grad_norm": 0.6010650233863641, "learning_rate": 5.7247896818877515e-06, "loss": 0.2033385753631592, "step": 17455 }, { "epoch": 1.5254237288135593, "grad_norm": 0.5641546250108102, "learning_rate": 5.722275018588852e-06, "loss": 0.2169647693634033, "step": 17460 }, { "epoch": 1.525860562641971, "grad_norm": 0.5726628016523947, "learning_rate": 5.719760168689392e-06, "loss": 0.16400526762008666, "step": 17465 }, { "epoch": 1.5262973964703828, "grad_norm": 0.6178260942616, "learning_rate": 5.7172451328390834e-06, "loss": 0.19262717962265014, "step": 17470 }, { "epoch": 1.5267342302987943, "grad_norm": 0.5952218773556824, "learning_rate": 5.7147299116876886e-06, "loss": 0.185699987411499, "step": 17475 }, { "epoch": 1.527171064127206, "grad_norm": 0.635517614529806, "learning_rate": 5.712214505885019e-06, "loss": 0.21802563667297364, "step": 17480 }, { "epoch": 1.5276078979556176, "grad_norm": 0.6048079377674453, "learning_rate": 5.70969891608093e-06, "loss": 0.20456018447875976, "step": 17485 }, { "epoch": 1.5280447317840293, "grad_norm": 0.7992065440428795, "learning_rate": 5.707183142925329e-06, "loss": 0.14444346427917482, "step": 17490 }, { "epoch": 1.528481565612441, "grad_norm": 0.49598633973424716, "learning_rate": 5.7046671870681695e-06, "loss": 0.19082267284393312, "step": 17495 }, { "epoch": 1.5289183994408528, "grad_norm": 0.5918657751039577, "learning_rate": 5.70215104915945e-06, "loss": 0.1751593232154846, "step": 17500 }, { "epoch": 1.5293552332692644, "grad_norm": 0.547999913772686, "learning_rate": 5.699634729849217e-06, "loss": 0.20549440383911133, "step": 17505 }, { "epoch": 1.529792067097676, "grad_norm": 0.5776227307441538, "learning_rate": 5.697118229787566e-06, "loss": 0.1989351511001587, "step": 17510 }, { "epoch": 1.5302289009260877, "grad_norm": 0.5995491039480064, "learning_rate": 5.694601549624639e-06, "loss": 0.20526504516601562, "step": 17515 }, { "epoch": 1.5306657347544994, "grad_norm": 0.60764736138305, "learning_rate": 5.6920846900106195e-06, "loss": 0.2060934066772461, "step": 17520 }, { "epoch": 1.5311025685829112, "grad_norm": 0.6149404656988766, "learning_rate": 5.689567651595745e-06, "loss": 0.19281502962112426, "step": 17525 }, { "epoch": 1.5315394024113227, "grad_norm": 0.5461322507599065, "learning_rate": 5.687050435030292e-06, "loss": 0.1928807497024536, "step": 17530 }, { "epoch": 1.5319762362397344, "grad_norm": 0.7532577465007533, "learning_rate": 5.684533040964586e-06, "loss": 0.18386974334716796, "step": 17535 }, { "epoch": 1.532413070068146, "grad_norm": 0.5781489115662546, "learning_rate": 5.6820154700489996e-06, "loss": 0.2324425220489502, "step": 17540 }, { "epoch": 1.5328499038965577, "grad_norm": 0.626053845288777, "learning_rate": 5.679497722933952e-06, "loss": 0.21561236381530763, "step": 17545 }, { "epoch": 1.5332867377249695, "grad_norm": 0.6216890432817287, "learning_rate": 5.676979800269902e-06, "loss": 0.211777400970459, "step": 17550 }, { "epoch": 1.5337235715533812, "grad_norm": 0.5468596968198594, "learning_rate": 5.674461702707362e-06, "loss": 0.18985626697540284, "step": 17555 }, { "epoch": 1.5341604053817928, "grad_norm": 0.6307768417139675, "learning_rate": 5.6719434308968834e-06, "loss": 0.1862906575202942, "step": 17560 }, { "epoch": 1.5345972392102043, "grad_norm": 0.5717654781098307, "learning_rate": 5.669424985489064e-06, "loss": 0.19996240139007568, "step": 17565 }, { "epoch": 1.535034073038616, "grad_norm": 0.694229515506501, "learning_rate": 5.666906367134547e-06, "loss": 0.20167028903961182, "step": 17570 }, { "epoch": 1.5354709068670278, "grad_norm": 0.707623686477424, "learning_rate": 5.664387576484023e-06, "loss": 0.17576906681060792, "step": 17575 }, { "epoch": 1.5359077406954396, "grad_norm": 0.6367842539249956, "learning_rate": 5.661868614188221e-06, "loss": 0.20425279140472413, "step": 17580 }, { "epoch": 1.536344574523851, "grad_norm": 0.49225272500356343, "learning_rate": 5.659349480897918e-06, "loss": 0.20955147743225097, "step": 17585 }, { "epoch": 1.5367814083522628, "grad_norm": 0.6448800248279355, "learning_rate": 5.656830177263936e-06, "loss": 0.19139323234558106, "step": 17590 }, { "epoch": 1.5372182421806744, "grad_norm": 0.6544052976308502, "learning_rate": 5.65431070393714e-06, "loss": 0.20857446193695067, "step": 17595 }, { "epoch": 1.5376550760090861, "grad_norm": 0.6278978000383005, "learning_rate": 5.651791061568439e-06, "loss": 0.19624109268188478, "step": 17600 }, { "epoch": 1.5380919098374979, "grad_norm": 0.5097503593188968, "learning_rate": 5.649271250808784e-06, "loss": 0.17754743099212647, "step": 17605 }, { "epoch": 1.5385287436659096, "grad_norm": 0.60923605109401, "learning_rate": 5.646751272309173e-06, "loss": 0.2039968490600586, "step": 17610 }, { "epoch": 1.5389655774943212, "grad_norm": 0.7116535566330552, "learning_rate": 5.644231126720644e-06, "loss": 0.17534942626953126, "step": 17615 }, { "epoch": 1.5394024113227327, "grad_norm": 0.5694527329613673, "learning_rate": 5.641710814694279e-06, "loss": 0.1962894916534424, "step": 17620 }, { "epoch": 1.5398392451511445, "grad_norm": 0.5325264013384423, "learning_rate": 5.6391903368812055e-06, "loss": 0.1828845262527466, "step": 17625 }, { "epoch": 1.5402760789795562, "grad_norm": 0.6523431497093033, "learning_rate": 5.636669693932591e-06, "loss": 0.1963653564453125, "step": 17630 }, { "epoch": 1.540712912807968, "grad_norm": 0.5794120430003329, "learning_rate": 5.634148886499643e-06, "loss": 0.17312939167022706, "step": 17635 }, { "epoch": 1.5411497466363795, "grad_norm": 0.544852222355658, "learning_rate": 5.6316279152336195e-06, "loss": 0.18050765991210938, "step": 17640 }, { "epoch": 1.5415865804647912, "grad_norm": 0.6330475061338459, "learning_rate": 5.629106780785813e-06, "loss": 0.18647842407226561, "step": 17645 }, { "epoch": 1.5420234142932028, "grad_norm": 0.5792966438400109, "learning_rate": 5.626585483807564e-06, "loss": 0.18839653730392455, "step": 17650 }, { "epoch": 1.5424602481216145, "grad_norm": 0.6075347674278292, "learning_rate": 5.624064024950252e-06, "loss": 0.1991940140724182, "step": 17655 }, { "epoch": 1.5428970819500263, "grad_norm": 0.6557401739933629, "learning_rate": 5.621542404865297e-06, "loss": 0.19561680555343627, "step": 17660 }, { "epoch": 1.543333915778438, "grad_norm": 0.5865849570432754, "learning_rate": 5.619020624204163e-06, "loss": 0.18347567319869995, "step": 17665 }, { "epoch": 1.5437707496068496, "grad_norm": 0.5614204767744883, "learning_rate": 5.616498683618355e-06, "loss": 0.17357815504074098, "step": 17670 }, { "epoch": 1.544207583435261, "grad_norm": 0.6311258379304582, "learning_rate": 5.613976583759422e-06, "loss": 0.18614388704299928, "step": 17675 }, { "epoch": 1.5446444172636729, "grad_norm": 0.5151809952231131, "learning_rate": 5.611454325278948e-06, "loss": 0.19493248462677001, "step": 17680 }, { "epoch": 1.5450812510920846, "grad_norm": 0.6005514020927112, "learning_rate": 5.608931908828561e-06, "loss": 0.19247908592224122, "step": 17685 }, { "epoch": 1.5455180849204964, "grad_norm": 0.6174929872410082, "learning_rate": 5.6064093350599326e-06, "loss": 0.18455673456192018, "step": 17690 }, { "epoch": 1.545954918748908, "grad_norm": 0.4885843982973166, "learning_rate": 5.60388660462477e-06, "loss": 0.18384567499160767, "step": 17695 }, { "epoch": 1.5463917525773194, "grad_norm": 0.571702706495367, "learning_rate": 5.6013637181748235e-06, "loss": 0.1877178430557251, "step": 17700 }, { "epoch": 1.5468285864057312, "grad_norm": 0.6034140643758239, "learning_rate": 5.5988406763618875e-06, "loss": 0.21818194389343262, "step": 17705 }, { "epoch": 1.547265420234143, "grad_norm": 0.5279830895700732, "learning_rate": 5.596317479837788e-06, "loss": 0.1813034176826477, "step": 17710 }, { "epoch": 1.5477022540625547, "grad_norm": 0.7546583709061787, "learning_rate": 5.593794129254399e-06, "loss": 0.1975954294204712, "step": 17715 }, { "epoch": 1.5481390878909664, "grad_norm": 0.710259735630053, "learning_rate": 5.5912706252636295e-06, "loss": 0.1848433017730713, "step": 17720 }, { "epoch": 1.548575921719378, "grad_norm": 0.5987142574228995, "learning_rate": 5.588746968517429e-06, "loss": 0.18217675685882567, "step": 17725 }, { "epoch": 1.5490127555477895, "grad_norm": 0.6384865965410877, "learning_rate": 5.586223159667789e-06, "loss": 0.22677431106567383, "step": 17730 }, { "epoch": 1.5494495893762013, "grad_norm": 0.5324808848452806, "learning_rate": 5.583699199366735e-06, "loss": 0.20019853115081787, "step": 17735 }, { "epoch": 1.549886423204613, "grad_norm": 0.6530639129085902, "learning_rate": 5.581175088266339e-06, "loss": 0.18315472602844238, "step": 17740 }, { "epoch": 1.5503232570330248, "grad_norm": 0.6583609871114102, "learning_rate": 5.578650827018704e-06, "loss": 0.19273477792739868, "step": 17745 }, { "epoch": 1.5507600908614363, "grad_norm": 0.6365926716743382, "learning_rate": 5.57612641627598e-06, "loss": 0.18403751850128175, "step": 17750 }, { "epoch": 1.5511969246898478, "grad_norm": 0.5550699038662168, "learning_rate": 5.573601856690347e-06, "loss": 0.17317304611206055, "step": 17755 }, { "epoch": 1.5516337585182596, "grad_norm": 0.5533489840360206, "learning_rate": 5.571077148914031e-06, "loss": 0.20718271732330323, "step": 17760 }, { "epoch": 1.5520705923466713, "grad_norm": 0.827150584742355, "learning_rate": 5.568552293599292e-06, "loss": 0.16805722713470458, "step": 17765 }, { "epoch": 1.552507426175083, "grad_norm": 0.6389367291005897, "learning_rate": 5.56602729139843e-06, "loss": 0.19956933259963988, "step": 17770 }, { "epoch": 1.5529442600034948, "grad_norm": 0.5617869524394238, "learning_rate": 5.563502142963779e-06, "loss": 0.1876317262649536, "step": 17775 }, { "epoch": 1.5533810938319064, "grad_norm": 0.6949555152469364, "learning_rate": 5.5609768489477186e-06, "loss": 0.1543951988220215, "step": 17780 }, { "epoch": 1.553817927660318, "grad_norm": 0.5586136578951963, "learning_rate": 5.55845141000266e-06, "loss": 0.22500598430633545, "step": 17785 }, { "epoch": 1.5542547614887297, "grad_norm": 0.5342229854386065, "learning_rate": 5.55592582678105e-06, "loss": 0.16829168796539307, "step": 17790 }, { "epoch": 1.5546915953171414, "grad_norm": 0.7535645821040897, "learning_rate": 5.553400099935377e-06, "loss": 0.16491479873657228, "step": 17795 }, { "epoch": 1.5551284291455532, "grad_norm": 0.7199232355442312, "learning_rate": 5.550874230118169e-06, "loss": 0.174999463558197, "step": 17800 }, { "epoch": 1.5555652629739647, "grad_norm": 0.4940811131603153, "learning_rate": 5.548348217981984e-06, "loss": 0.18872324228286744, "step": 17805 }, { "epoch": 1.5560020968023762, "grad_norm": 0.5994048537667555, "learning_rate": 5.54582206417942e-06, "loss": 0.2136749029159546, "step": 17810 }, { "epoch": 1.556438930630788, "grad_norm": 0.6041741084830162, "learning_rate": 5.543295769363114e-06, "loss": 0.19923231601715088, "step": 17815 }, { "epoch": 1.5568757644591997, "grad_norm": 0.49943342386145173, "learning_rate": 5.5407693341857335e-06, "loss": 0.1654196262359619, "step": 17820 }, { "epoch": 1.5573125982876115, "grad_norm": 0.6072621468841813, "learning_rate": 5.53824275929999e-06, "loss": 0.19989384412765504, "step": 17825 }, { "epoch": 1.557749432116023, "grad_norm": 0.6254208203247883, "learning_rate": 5.535716045358624e-06, "loss": 0.18314303159713746, "step": 17830 }, { "epoch": 1.5581862659444348, "grad_norm": 0.5605930041948807, "learning_rate": 5.533189193014418e-06, "loss": 0.22113502025604248, "step": 17835 }, { "epoch": 1.5586230997728463, "grad_norm": 0.5735099413552962, "learning_rate": 5.530662202920182e-06, "loss": 0.17929137945175172, "step": 17840 }, { "epoch": 1.559059933601258, "grad_norm": 0.5370881635961339, "learning_rate": 5.52813507572877e-06, "loss": 0.17349853515625, "step": 17845 }, { "epoch": 1.5594967674296698, "grad_norm": 0.5419146551824164, "learning_rate": 5.525607812093069e-06, "loss": 0.17638897895812988, "step": 17850 }, { "epoch": 1.5599336012580816, "grad_norm": 0.6189510926435706, "learning_rate": 5.5230804126659985e-06, "loss": 0.18203877210617064, "step": 17855 }, { "epoch": 1.560370435086493, "grad_norm": 0.5554445574818413, "learning_rate": 5.520552878100515e-06, "loss": 0.17423681020736695, "step": 17860 }, { "epoch": 1.5608072689149046, "grad_norm": 0.6350112551795326, "learning_rate": 5.518025209049613e-06, "loss": 0.1828685522079468, "step": 17865 }, { "epoch": 1.5612441027433164, "grad_norm": 0.5311888437442609, "learning_rate": 5.515497406166315e-06, "loss": 0.18738994598388672, "step": 17870 }, { "epoch": 1.5616809365717281, "grad_norm": 0.6064087120925099, "learning_rate": 5.512969470103682e-06, "loss": 0.16869833469390869, "step": 17875 }, { "epoch": 1.5621177704001399, "grad_norm": 0.5420362483660799, "learning_rate": 5.510441401514813e-06, "loss": 0.16871850490570067, "step": 17880 }, { "epoch": 1.5625546042285514, "grad_norm": 0.5257191141242221, "learning_rate": 5.507913201052832e-06, "loss": 0.17241373062133789, "step": 17885 }, { "epoch": 1.5629914380569632, "grad_norm": 0.5493998278651347, "learning_rate": 5.5053848693709055e-06, "loss": 0.233168888092041, "step": 17890 }, { "epoch": 1.5634282718853747, "grad_norm": 0.5086413311555694, "learning_rate": 5.5028564071222305e-06, "loss": 0.182183837890625, "step": 17895 }, { "epoch": 1.5638651057137865, "grad_norm": 0.5708892018750606, "learning_rate": 5.500327814960037e-06, "loss": 0.19690461158752443, "step": 17900 }, { "epoch": 1.5643019395421982, "grad_norm": 0.728491484155753, "learning_rate": 5.49779909353759e-06, "loss": 0.17382142543792725, "step": 17905 }, { "epoch": 1.56473877337061, "grad_norm": 0.5227479558316055, "learning_rate": 5.4952702435081896e-06, "loss": 0.18301303386688234, "step": 17910 }, { "epoch": 1.5651756071990215, "grad_norm": 0.6232327622681776, "learning_rate": 5.492741265525165e-06, "loss": 0.19269819259643556, "step": 17915 }, { "epoch": 1.565612441027433, "grad_norm": 0.5555429624676023, "learning_rate": 5.490212160241879e-06, "loss": 0.15974276065826415, "step": 17920 }, { "epoch": 1.5660492748558448, "grad_norm": 0.535662640677499, "learning_rate": 5.487682928311732e-06, "loss": 0.19085416793823243, "step": 17925 }, { "epoch": 1.5664861086842565, "grad_norm": 0.5608721628659205, "learning_rate": 5.485153570388153e-06, "loss": 0.19644901752471924, "step": 17930 }, { "epoch": 1.5669229425126683, "grad_norm": 0.5901466317115963, "learning_rate": 5.482624087124604e-06, "loss": 0.19150185585021973, "step": 17935 }, { "epoch": 1.5673597763410798, "grad_norm": 0.5310099152801857, "learning_rate": 5.480094479174579e-06, "loss": 0.1850261688232422, "step": 17940 }, { "epoch": 1.5677966101694916, "grad_norm": 0.6889732640827079, "learning_rate": 5.477564747191608e-06, "loss": 0.2172178268432617, "step": 17945 }, { "epoch": 1.568233443997903, "grad_norm": 0.6430023050788737, "learning_rate": 5.475034891829246e-06, "loss": 0.15092604160308837, "step": 17950 }, { "epoch": 1.5686702778263149, "grad_norm": 0.5409391178820251, "learning_rate": 5.472504913741086e-06, "loss": 0.18232262134552002, "step": 17955 }, { "epoch": 1.5691071116547266, "grad_norm": 0.621697388182877, "learning_rate": 5.469974813580751e-06, "loss": 0.20843157768249512, "step": 17960 }, { "epoch": 1.5695439454831384, "grad_norm": 0.6336640711360454, "learning_rate": 5.467444592001896e-06, "loss": 0.17706214189529418, "step": 17965 }, { "epoch": 1.56998077931155, "grad_norm": 0.5544507430313872, "learning_rate": 5.464914249658204e-06, "loss": 0.15970993041992188, "step": 17970 }, { "epoch": 1.5704176131399614, "grad_norm": 0.5269603086953409, "learning_rate": 5.462383787203395e-06, "loss": 0.17787938117980956, "step": 17975 }, { "epoch": 1.5708544469683732, "grad_norm": 0.627993975681069, "learning_rate": 5.459853205291214e-06, "loss": 0.21664111614227294, "step": 17980 }, { "epoch": 1.571291280796785, "grad_norm": 0.5592763173394791, "learning_rate": 5.457322504575441e-06, "loss": 0.18050658702850342, "step": 17985 }, { "epoch": 1.5717281146251967, "grad_norm": 0.7457884031266897, "learning_rate": 5.454791685709886e-06, "loss": 0.16990244388580322, "step": 17990 }, { "epoch": 1.5721649484536082, "grad_norm": 0.5727419760768098, "learning_rate": 5.452260749348386e-06, "loss": 0.17206146717071533, "step": 17995 }, { "epoch": 1.57260178228202, "grad_norm": 0.6036016368378475, "learning_rate": 5.4497296961448145e-06, "loss": 0.17822844982147218, "step": 18000 }, { "epoch": 1.5730386161104315, "grad_norm": 0.5852678910721623, "learning_rate": 5.447198526753072e-06, "loss": 0.19080497026443483, "step": 18005 }, { "epoch": 1.5734754499388433, "grad_norm": 0.6208024162977175, "learning_rate": 5.444667241827085e-06, "loss": 0.1891645908355713, "step": 18010 }, { "epoch": 1.573912283767255, "grad_norm": 0.6985767762702061, "learning_rate": 5.442135842020817e-06, "loss": 0.1944675326347351, "step": 18015 }, { "epoch": 1.5743491175956668, "grad_norm": 0.5253964383017076, "learning_rate": 5.4396043279882584e-06, "loss": 0.18757920265197753, "step": 18020 }, { "epoch": 1.5747859514240783, "grad_norm": 0.5356050520806153, "learning_rate": 5.437072700383429e-06, "loss": 0.20270876884460448, "step": 18025 }, { "epoch": 1.5752227852524898, "grad_norm": 0.5295649466281857, "learning_rate": 5.434540959860375e-06, "loss": 0.20485868453979492, "step": 18030 }, { "epoch": 1.5756596190809016, "grad_norm": 0.5350454929501283, "learning_rate": 5.432009107073176e-06, "loss": 0.1776043176651001, "step": 18035 }, { "epoch": 1.5760964529093133, "grad_norm": 0.6320325776034396, "learning_rate": 5.42947714267594e-06, "loss": 0.16850982904434203, "step": 18040 }, { "epoch": 1.576533286737725, "grad_norm": 0.5625116543315642, "learning_rate": 5.4269450673228e-06, "loss": 0.16939170360565187, "step": 18045 }, { "epoch": 1.5769701205661366, "grad_norm": 0.6063918043493866, "learning_rate": 5.424412881667922e-06, "loss": 0.18104240894317628, "step": 18050 }, { "epoch": 1.5774069543945484, "grad_norm": 0.6663673295858015, "learning_rate": 5.421880586365501e-06, "loss": 0.19045549631118774, "step": 18055 }, { "epoch": 1.57784378822296, "grad_norm": 0.47509286903655856, "learning_rate": 5.4193481820697544e-06, "loss": 0.20027871131896974, "step": 18060 }, { "epoch": 1.5782806220513717, "grad_norm": 0.562933274978198, "learning_rate": 5.416815669434935e-06, "loss": 0.1985496997833252, "step": 18065 }, { "epoch": 1.5787174558797834, "grad_norm": 0.7437517967949234, "learning_rate": 5.41428304911532e-06, "loss": 0.18241333961486816, "step": 18070 }, { "epoch": 1.5791542897081952, "grad_norm": 0.5173939395769882, "learning_rate": 5.411750321765212e-06, "loss": 0.1665891647338867, "step": 18075 }, { "epoch": 1.5795911235366067, "grad_norm": 0.6483341553222043, "learning_rate": 5.409217488038947e-06, "loss": 0.16834917068481445, "step": 18080 }, { "epoch": 1.5800279573650182, "grad_norm": 0.5706509454481463, "learning_rate": 5.406684548590885e-06, "loss": 0.1942469835281372, "step": 18085 }, { "epoch": 1.58046479119343, "grad_norm": 0.519502785955837, "learning_rate": 5.404151504075412e-06, "loss": 0.1755385160446167, "step": 18090 }, { "epoch": 1.5809016250218417, "grad_norm": 0.5924208702645022, "learning_rate": 5.401618355146944e-06, "loss": 0.17333288192749025, "step": 18095 }, { "epoch": 1.5813384588502535, "grad_norm": 0.523852631633863, "learning_rate": 5.399085102459921e-06, "loss": 0.17891849279403688, "step": 18100 }, { "epoch": 1.581775292678665, "grad_norm": 0.5509963702452958, "learning_rate": 5.396551746668817e-06, "loss": 0.16482434272766114, "step": 18105 }, { "epoch": 1.5822121265070765, "grad_norm": 0.5745676849598693, "learning_rate": 5.39401828842812e-06, "loss": 0.19421985149383544, "step": 18110 }, { "epoch": 1.5826489603354883, "grad_norm": 0.6176746625800218, "learning_rate": 5.391484728392358e-06, "loss": 0.16799232959747315, "step": 18115 }, { "epoch": 1.5830857941639, "grad_norm": 0.5231831800636096, "learning_rate": 5.3889510672160765e-06, "loss": 0.17911254167556762, "step": 18120 }, { "epoch": 1.5835226279923118, "grad_norm": 0.5450035016575336, "learning_rate": 5.386417305553848e-06, "loss": 0.1563943862915039, "step": 18125 }, { "epoch": 1.5839594618207236, "grad_norm": 0.6022698068710346, "learning_rate": 5.383883444060275e-06, "loss": 0.19372597932815552, "step": 18130 }, { "epoch": 1.584396295649135, "grad_norm": 0.5457979295331664, "learning_rate": 5.381349483389985e-06, "loss": 0.17335526943206786, "step": 18135 }, { "epoch": 1.5848331294775466, "grad_norm": 0.5309341869067898, "learning_rate": 5.3788154241976265e-06, "loss": 0.18926165103912354, "step": 18140 }, { "epoch": 1.5852699633059584, "grad_norm": 0.6496501319569514, "learning_rate": 5.376281267137877e-06, "loss": 0.1576543927192688, "step": 18145 }, { "epoch": 1.5857067971343701, "grad_norm": 0.5273683237817749, "learning_rate": 5.37374701286544e-06, "loss": 0.1729048013687134, "step": 18150 }, { "epoch": 1.5861436309627819, "grad_norm": 0.6741450240302467, "learning_rate": 5.371212662035042e-06, "loss": 0.18926509618759155, "step": 18155 }, { "epoch": 1.5865804647911934, "grad_norm": 1.2443636377912006, "learning_rate": 5.368678215301435e-06, "loss": 0.20459260940551757, "step": 18160 }, { "epoch": 1.587017298619605, "grad_norm": 0.5704962720967132, "learning_rate": 5.366143673319398e-06, "loss": 0.1963687777519226, "step": 18165 }, { "epoch": 1.5874541324480167, "grad_norm": 0.5322015449471601, "learning_rate": 5.363609036743731e-06, "loss": 0.18278086185455322, "step": 18170 }, { "epoch": 1.5878909662764285, "grad_norm": 0.6272184478193756, "learning_rate": 5.3610743062292605e-06, "loss": 0.18844888210296631, "step": 18175 }, { "epoch": 1.5883278001048402, "grad_norm": 0.5398562605877414, "learning_rate": 5.358539482430838e-06, "loss": 0.1703731894493103, "step": 18180 }, { "epoch": 1.588764633933252, "grad_norm": 0.6325712762114183, "learning_rate": 5.356004566003338e-06, "loss": 0.17367252111434936, "step": 18185 }, { "epoch": 1.5892014677616635, "grad_norm": 0.5923930036543449, "learning_rate": 5.353469557601659e-06, "loss": 0.16749875545501708, "step": 18190 }, { "epoch": 1.589638301590075, "grad_norm": 0.570815140181774, "learning_rate": 5.35093445788072e-06, "loss": 0.19100003242492675, "step": 18195 }, { "epoch": 1.5900751354184868, "grad_norm": 0.5725071580248247, "learning_rate": 5.348399267495472e-06, "loss": 0.17206964492797852, "step": 18200 }, { "epoch": 1.5905119692468985, "grad_norm": 0.5160364808260709, "learning_rate": 5.34586398710088e-06, "loss": 0.17200396060943604, "step": 18205 }, { "epoch": 1.5909488030753103, "grad_norm": 0.7272496520508687, "learning_rate": 5.34332861735194e-06, "loss": 0.16266953945159912, "step": 18210 }, { "epoch": 1.5913856369037218, "grad_norm": 0.7191746998497037, "learning_rate": 5.340793158903665e-06, "loss": 0.17811815738677977, "step": 18215 }, { "epoch": 1.5918224707321333, "grad_norm": 0.5674973171545418, "learning_rate": 5.338257612411094e-06, "loss": 0.15953603982925416, "step": 18220 }, { "epoch": 1.592259304560545, "grad_norm": 0.596582764433435, "learning_rate": 5.335721978529289e-06, "loss": 0.16823335886001586, "step": 18225 }, { "epoch": 1.5926961383889569, "grad_norm": 0.5797913145183027, "learning_rate": 5.333186257913335e-06, "loss": 0.16440006494522094, "step": 18230 }, { "epoch": 1.5931329722173686, "grad_norm": 0.5535716156745044, "learning_rate": 5.330650451218337e-06, "loss": 0.19270741939544678, "step": 18235 }, { "epoch": 1.5935698060457801, "grad_norm": 0.5461337954437399, "learning_rate": 5.328114559099422e-06, "loss": 0.20667786598205568, "step": 18240 }, { "epoch": 1.5940066398741919, "grad_norm": 0.6201138381455676, "learning_rate": 5.325578582211742e-06, "loss": 0.17687052488327026, "step": 18245 }, { "epoch": 1.5944434737026034, "grad_norm": 0.6279515493764858, "learning_rate": 5.323042521210472e-06, "loss": 0.21825528144836426, "step": 18250 }, { "epoch": 1.5948803075310152, "grad_norm": 0.5411301265188982, "learning_rate": 5.320506376750801e-06, "loss": 0.16324621438980103, "step": 18255 }, { "epoch": 1.595317141359427, "grad_norm": 0.5774593114143646, "learning_rate": 5.317970149487948e-06, "loss": 0.17832272052764891, "step": 18260 }, { "epoch": 1.5957539751878387, "grad_norm": 0.5574328891735657, "learning_rate": 5.315433840077151e-06, "loss": 0.18637921810150146, "step": 18265 }, { "epoch": 1.5961908090162502, "grad_norm": 0.6010700794948807, "learning_rate": 5.312897449173665e-06, "loss": 0.1801734447479248, "step": 18270 }, { "epoch": 1.5966276428446617, "grad_norm": 0.6265132559657229, "learning_rate": 5.310360977432772e-06, "loss": 0.18793056011199952, "step": 18275 }, { "epoch": 1.5970644766730735, "grad_norm": 0.6166733588807818, "learning_rate": 5.3078244255097734e-06, "loss": 0.19323889017105103, "step": 18280 }, { "epoch": 1.5975013105014852, "grad_norm": 0.5837154930841486, "learning_rate": 5.305287794059988e-06, "loss": 0.1741802930831909, "step": 18285 }, { "epoch": 1.597938144329897, "grad_norm": 0.5614120232373333, "learning_rate": 5.3027510837387576e-06, "loss": 0.1854259967803955, "step": 18290 }, { "epoch": 1.5983749781583085, "grad_norm": 0.5453415197424415, "learning_rate": 5.300214295201444e-06, "loss": 0.15866644382476808, "step": 18295 }, { "epoch": 1.5988118119867203, "grad_norm": 0.5012709781203403, "learning_rate": 5.2976774291034315e-06, "loss": 0.17303727865219115, "step": 18300 }, { "epoch": 1.5992486458151318, "grad_norm": 0.6155799092877564, "learning_rate": 5.29514048610012e-06, "loss": 0.1742408037185669, "step": 18305 }, { "epoch": 1.5996854796435436, "grad_norm": 0.6208571953892571, "learning_rate": 5.292603466846933e-06, "loss": 0.19144251346588134, "step": 18310 }, { "epoch": 1.6001223134719553, "grad_norm": 0.687158413744566, "learning_rate": 5.29006637199931e-06, "loss": 0.2057422161102295, "step": 18315 }, { "epoch": 1.600559147300367, "grad_norm": 0.5453249083237541, "learning_rate": 5.287529202212714e-06, "loss": 0.16535422801971436, "step": 18320 }, { "epoch": 1.6009959811287786, "grad_norm": 0.6870870938571602, "learning_rate": 5.284991958142627e-06, "loss": 0.19777345657348633, "step": 18325 }, { "epoch": 1.6014328149571901, "grad_norm": 0.5873288581339888, "learning_rate": 5.282454640444546e-06, "loss": 0.20940713882446288, "step": 18330 }, { "epoch": 1.601869648785602, "grad_norm": 0.49316851053683197, "learning_rate": 5.279917249773993e-06, "loss": 0.19180219173431395, "step": 18335 }, { "epoch": 1.6023064826140136, "grad_norm": 0.6228370993298589, "learning_rate": 5.277379786786503e-06, "loss": 0.19557477235794068, "step": 18340 }, { "epoch": 1.6027433164424254, "grad_norm": 0.6193998914381196, "learning_rate": 5.274842252137633e-06, "loss": 0.16756395101547242, "step": 18345 }, { "epoch": 1.603180150270837, "grad_norm": 0.7075493545542465, "learning_rate": 5.27230464648296e-06, "loss": 0.18209010362625122, "step": 18350 }, { "epoch": 1.6036169840992487, "grad_norm": 0.5793632492698153, "learning_rate": 5.269766970478074e-06, "loss": 0.19079346656799318, "step": 18355 }, { "epoch": 1.6040538179276602, "grad_norm": 0.7327136182313596, "learning_rate": 5.26722922477859e-06, "loss": 0.18492062091827394, "step": 18360 }, { "epoch": 1.604490651756072, "grad_norm": 0.4739756335998127, "learning_rate": 5.264691410040134e-06, "loss": 0.19506187438964845, "step": 18365 }, { "epoch": 1.6049274855844837, "grad_norm": 0.5672420501448558, "learning_rate": 5.262153526918354e-06, "loss": 0.21404919624328614, "step": 18370 }, { "epoch": 1.6053643194128955, "grad_norm": 0.49243206028248615, "learning_rate": 5.259615576068917e-06, "loss": 0.1606461763381958, "step": 18375 }, { "epoch": 1.605801153241307, "grad_norm": 0.7027930744237014, "learning_rate": 5.257077558147504e-06, "loss": 0.198431396484375, "step": 18380 }, { "epoch": 1.6062379870697185, "grad_norm": 0.6212362518721558, "learning_rate": 5.254539473809816e-06, "loss": 0.19861071109771727, "step": 18385 }, { "epoch": 1.6066748208981303, "grad_norm": 0.5179801465123934, "learning_rate": 5.252001323711568e-06, "loss": 0.18414618968963622, "step": 18390 }, { "epoch": 1.607111654726542, "grad_norm": 0.6488278097505945, "learning_rate": 5.249463108508495e-06, "loss": 0.22538979053497316, "step": 18395 }, { "epoch": 1.6075484885549538, "grad_norm": 0.6035143125871428, "learning_rate": 5.246924828856348e-06, "loss": 0.17991442680358888, "step": 18400 }, { "epoch": 1.6079853223833653, "grad_norm": 0.5732697708270392, "learning_rate": 5.244386485410895e-06, "loss": 0.1884100079536438, "step": 18405 }, { "epoch": 1.608422156211777, "grad_norm": 0.5523636744253428, "learning_rate": 5.241848078827917e-06, "loss": 0.17439024448394774, "step": 18410 }, { "epoch": 1.6088589900401886, "grad_norm": 0.6405366225020018, "learning_rate": 5.239309609763217e-06, "loss": 0.211348819732666, "step": 18415 }, { "epoch": 1.6092958238686004, "grad_norm": 0.5784711485001556, "learning_rate": 5.23677107887261e-06, "loss": 0.18774027824401857, "step": 18420 }, { "epoch": 1.6097326576970121, "grad_norm": 0.5826425275274927, "learning_rate": 5.2342324868119295e-06, "loss": 0.1630805492401123, "step": 18425 }, { "epoch": 1.6101694915254239, "grad_norm": 0.5747138768897354, "learning_rate": 5.2316938342370215e-06, "loss": 0.17217692136764526, "step": 18430 }, { "epoch": 1.6106063253538354, "grad_norm": 0.6191059377098684, "learning_rate": 5.229155121803753e-06, "loss": 0.19852330684661865, "step": 18435 }, { "epoch": 1.611043159182247, "grad_norm": 0.5611751796183594, "learning_rate": 5.226616350168e-06, "loss": 0.18645509481430053, "step": 18440 }, { "epoch": 1.6114799930106587, "grad_norm": 0.5897364714020821, "learning_rate": 5.224077519985658e-06, "loss": 0.17060558795928954, "step": 18445 }, { "epoch": 1.6119168268390704, "grad_norm": 0.7323457003467545, "learning_rate": 5.221538631912639e-06, "loss": 0.1734040141105652, "step": 18450 }, { "epoch": 1.6123536606674822, "grad_norm": 0.5544419404150017, "learning_rate": 5.218999686604866e-06, "loss": 0.18635571002960205, "step": 18455 }, { "epoch": 1.6127904944958937, "grad_norm": 0.6646900441324256, "learning_rate": 5.2164606847182755e-06, "loss": 0.1813650369644165, "step": 18460 }, { "epoch": 1.6132273283243055, "grad_norm": 0.5254393252008969, "learning_rate": 5.2139216269088245e-06, "loss": 0.16091110706329345, "step": 18465 }, { "epoch": 1.613664162152717, "grad_norm": 0.5417006383700483, "learning_rate": 5.211382513832481e-06, "loss": 0.20087320804595948, "step": 18470 }, { "epoch": 1.6141009959811288, "grad_norm": 0.568938104653238, "learning_rate": 5.208843346145227e-06, "loss": 0.15045320987701416, "step": 18475 }, { "epoch": 1.6145378298095405, "grad_norm": 0.6822745685452178, "learning_rate": 5.206304124503061e-06, "loss": 0.19673213958740235, "step": 18480 }, { "epoch": 1.6149746636379523, "grad_norm": 0.6943571593670895, "learning_rate": 5.203764849561992e-06, "loss": 0.15607714653015137, "step": 18485 }, { "epoch": 1.6154114974663638, "grad_norm": 0.5603925434274031, "learning_rate": 5.201225521978045e-06, "loss": 0.18560028076171875, "step": 18490 }, { "epoch": 1.6158483312947753, "grad_norm": 0.6285610380930629, "learning_rate": 5.198686142407259e-06, "loss": 0.1744853138923645, "step": 18495 }, { "epoch": 1.616285165123187, "grad_norm": 0.5442461494545334, "learning_rate": 5.196146711505685e-06, "loss": 0.17425512075424193, "step": 18500 }, { "epoch": 1.6167219989515988, "grad_norm": 0.7310346449930061, "learning_rate": 5.193607229929389e-06, "loss": 0.1985979676246643, "step": 18505 }, { "epoch": 1.6171588327800106, "grad_norm": 0.635670617559612, "learning_rate": 5.191067698334445e-06, "loss": 0.2004024028778076, "step": 18510 }, { "epoch": 1.6175956666084221, "grad_norm": 0.5533367447379504, "learning_rate": 5.188528117376947e-06, "loss": 0.21651341915130615, "step": 18515 }, { "epoch": 1.6180325004368337, "grad_norm": 0.685261110740863, "learning_rate": 5.185988487712999e-06, "loss": 0.19772083759307862, "step": 18520 }, { "epoch": 1.6184693342652454, "grad_norm": 0.6119777316520755, "learning_rate": 5.183448809998714e-06, "loss": 0.1986724853515625, "step": 18525 }, { "epoch": 1.6189061680936572, "grad_norm": 0.5572379305009414, "learning_rate": 5.180909084890226e-06, "loss": 0.1973886728286743, "step": 18530 }, { "epoch": 1.619343001922069, "grad_norm": 0.5305724432786212, "learning_rate": 5.1783693130436715e-06, "loss": 0.19964791536331178, "step": 18535 }, { "epoch": 1.6197798357504807, "grad_norm": 0.6013257099967942, "learning_rate": 5.175829495115204e-06, "loss": 0.18685294389724733, "step": 18540 }, { "epoch": 1.6202166695788922, "grad_norm": 0.6651110396895096, "learning_rate": 5.173289631760988e-06, "loss": 0.19025362730026246, "step": 18545 }, { "epoch": 1.6206535034073037, "grad_norm": 0.7105100646908843, "learning_rate": 5.170749723637204e-06, "loss": 0.18401397466659547, "step": 18550 }, { "epoch": 1.6210903372357155, "grad_norm": 0.7000823003418157, "learning_rate": 5.168209771400036e-06, "loss": 0.15873336791992188, "step": 18555 }, { "epoch": 1.6215271710641272, "grad_norm": 0.5745660387730245, "learning_rate": 5.165669775705683e-06, "loss": 0.16842100620269776, "step": 18560 }, { "epoch": 1.621964004892539, "grad_norm": 0.5925573609226233, "learning_rate": 5.1631297372103585e-06, "loss": 0.21554183959960938, "step": 18565 }, { "epoch": 1.6224008387209505, "grad_norm": 0.5855643390685416, "learning_rate": 5.160589656570283e-06, "loss": 0.21620769500732423, "step": 18570 }, { "epoch": 1.622837672549362, "grad_norm": 0.6244544974512609, "learning_rate": 5.158049534441688e-06, "loss": 0.19979232549667358, "step": 18575 }, { "epoch": 1.6232745063777738, "grad_norm": 0.5983739577647275, "learning_rate": 5.15550937148082e-06, "loss": 0.1435101866722107, "step": 18580 }, { "epoch": 1.6237113402061856, "grad_norm": 0.6274749868437893, "learning_rate": 5.1529691683439305e-06, "loss": 0.20623471736907958, "step": 18585 }, { "epoch": 1.6241481740345973, "grad_norm": 0.5108132817041895, "learning_rate": 5.150428925687284e-06, "loss": 0.18980605602264405, "step": 18590 }, { "epoch": 1.6245850078630089, "grad_norm": 0.5527765512676927, "learning_rate": 5.147888644167157e-06, "loss": 0.2125952959060669, "step": 18595 }, { "epoch": 1.6250218416914206, "grad_norm": 0.6082684836650177, "learning_rate": 5.145348324439832e-06, "loss": 0.1829596996307373, "step": 18600 }, { "epoch": 1.6254586755198321, "grad_norm": 0.6910617091472625, "learning_rate": 5.142807967161604e-06, "loss": 0.2081758499145508, "step": 18605 }, { "epoch": 1.625895509348244, "grad_norm": 0.5740383060260665, "learning_rate": 5.1402675729887765e-06, "loss": 0.18324809074401854, "step": 18610 }, { "epoch": 1.6263323431766556, "grad_norm": 0.6304110886208836, "learning_rate": 5.137727142577665e-06, "loss": 0.17036099433898927, "step": 18615 }, { "epoch": 1.6267691770050674, "grad_norm": 0.5758378224426092, "learning_rate": 5.135186676584591e-06, "loss": 0.18351333141326903, "step": 18620 }, { "epoch": 1.627206010833479, "grad_norm": 0.5577206177527694, "learning_rate": 5.132646175665887e-06, "loss": 0.20150012969970704, "step": 18625 }, { "epoch": 1.6276428446618905, "grad_norm": 0.49892042448139773, "learning_rate": 5.130105640477895e-06, "loss": 0.18507883548736573, "step": 18630 }, { "epoch": 1.6280796784903022, "grad_norm": 0.6722485985802332, "learning_rate": 5.127565071676965e-06, "loss": 0.19234765768051149, "step": 18635 }, { "epoch": 1.628516512318714, "grad_norm": 0.6596160767535773, "learning_rate": 5.125024469919453e-06, "loss": 0.15877227783203124, "step": 18640 }, { "epoch": 1.6289533461471257, "grad_norm": 0.49009192416539715, "learning_rate": 5.122483835861731e-06, "loss": 0.22377367019653321, "step": 18645 }, { "epoch": 1.6293901799755373, "grad_norm": 0.6171935506513727, "learning_rate": 5.119943170160172e-06, "loss": 0.1792771816253662, "step": 18650 }, { "epoch": 1.629827013803949, "grad_norm": 0.6215422540768095, "learning_rate": 5.11740247347116e-06, "loss": 0.18234107494354249, "step": 18655 }, { "epoch": 1.6302638476323605, "grad_norm": 0.5330666933968279, "learning_rate": 5.114861746451089e-06, "loss": 0.21297531127929686, "step": 18660 }, { "epoch": 1.6307006814607723, "grad_norm": 0.7445535247421272, "learning_rate": 5.112320989756354e-06, "loss": 0.18897022008895875, "step": 18665 }, { "epoch": 1.631137515289184, "grad_norm": 0.5622165520783023, "learning_rate": 5.109780204043365e-06, "loss": 0.20126035213470458, "step": 18670 }, { "epoch": 1.6315743491175958, "grad_norm": 0.5061812530287508, "learning_rate": 5.1072393899685394e-06, "loss": 0.18462808132171632, "step": 18675 }, { "epoch": 1.6320111829460073, "grad_norm": 0.5684694275004766, "learning_rate": 5.104698548188295e-06, "loss": 0.193247652053833, "step": 18680 }, { "epoch": 1.6324480167744189, "grad_norm": 0.6315433046445315, "learning_rate": 5.1021576793590636e-06, "loss": 0.206982421875, "step": 18685 }, { "epoch": 1.6328848506028306, "grad_norm": 0.5699147245084957, "learning_rate": 5.099616784137281e-06, "loss": 0.18004536628723145, "step": 18690 }, { "epoch": 1.6333216844312424, "grad_norm": 0.6820735575628855, "learning_rate": 5.097075863179391e-06, "loss": 0.18673219680786132, "step": 18695 }, { "epoch": 1.6337585182596541, "grad_norm": 0.5467133427987521, "learning_rate": 5.09453491714184e-06, "loss": 0.17148449420928955, "step": 18700 }, { "epoch": 1.6341953520880657, "grad_norm": 0.6344603645728191, "learning_rate": 5.091993946681089e-06, "loss": 0.17187049388885497, "step": 18705 }, { "epoch": 1.6346321859164774, "grad_norm": 0.6246189541124535, "learning_rate": 5.089452952453601e-06, "loss": 0.19455485343933104, "step": 18710 }, { "epoch": 1.635069019744889, "grad_norm": 0.6111007122557085, "learning_rate": 5.086911935115838e-06, "loss": 0.17365130186080932, "step": 18715 }, { "epoch": 1.6355058535733007, "grad_norm": 0.5443786275366058, "learning_rate": 5.084370895324281e-06, "loss": 0.17804449796676636, "step": 18720 }, { "epoch": 1.6359426874017124, "grad_norm": 0.6569971558980907, "learning_rate": 5.081829833735408e-06, "loss": 0.18420966863632202, "step": 18725 }, { "epoch": 1.6363795212301242, "grad_norm": 0.5950713341454963, "learning_rate": 5.079288751005706e-06, "loss": 0.17394076585769652, "step": 18730 }, { "epoch": 1.6368163550585357, "grad_norm": 0.49977871888670256, "learning_rate": 5.076747647791665e-06, "loss": 0.17072439193725586, "step": 18735 }, { "epoch": 1.6372531888869473, "grad_norm": 0.5122516163683826, "learning_rate": 5.074206524749784e-06, "loss": 0.18050333261489868, "step": 18740 }, { "epoch": 1.637690022715359, "grad_norm": 0.6614786722101673, "learning_rate": 5.071665382536562e-06, "loss": 0.19634668827056884, "step": 18745 }, { "epoch": 1.6381268565437708, "grad_norm": 0.6045342226665755, "learning_rate": 5.06912422180851e-06, "loss": 0.18131824731826782, "step": 18750 }, { "epoch": 1.6385636903721825, "grad_norm": 0.5065450881314382, "learning_rate": 5.066583043222137e-06, "loss": 0.17188475131988526, "step": 18755 }, { "epoch": 1.639000524200594, "grad_norm": 0.6146828040983976, "learning_rate": 5.064041847433961e-06, "loss": 0.17933282852172852, "step": 18760 }, { "epoch": 1.6394373580290058, "grad_norm": 0.7562122835517024, "learning_rate": 5.061500635100502e-06, "loss": 0.16959255933761597, "step": 18765 }, { "epoch": 1.6398741918574173, "grad_norm": 0.5520498205892163, "learning_rate": 5.058959406878283e-06, "loss": 0.20441758632659912, "step": 18770 }, { "epoch": 1.640311025685829, "grad_norm": 0.6518952930959158, "learning_rate": 5.056418163423835e-06, "loss": 0.1927473783493042, "step": 18775 }, { "epoch": 1.6407478595142408, "grad_norm": 0.543509597977552, "learning_rate": 5.053876905393692e-06, "loss": 0.18393406867980958, "step": 18780 }, { "epoch": 1.6411846933426526, "grad_norm": 0.5698917910424002, "learning_rate": 5.05133563344439e-06, "loss": 0.18315517902374268, "step": 18785 }, { "epoch": 1.6416215271710641, "grad_norm": 0.5410419990876726, "learning_rate": 5.048794348232468e-06, "loss": 0.17521060705184938, "step": 18790 }, { "epoch": 1.6420583609994757, "grad_norm": 0.51196848814098, "learning_rate": 5.0462530504144736e-06, "loss": 0.2109306812286377, "step": 18795 }, { "epoch": 1.6424951948278874, "grad_norm": 0.5925021428173457, "learning_rate": 5.04371174064695e-06, "loss": 0.16661441326141357, "step": 18800 }, { "epoch": 1.6429320286562992, "grad_norm": 0.6955686753659948, "learning_rate": 5.04117041958645e-06, "loss": 0.16345744132995604, "step": 18805 }, { "epoch": 1.643368862484711, "grad_norm": 0.6153838210742942, "learning_rate": 5.038629087889526e-06, "loss": 0.18283448219299317, "step": 18810 }, { "epoch": 1.6438056963131225, "grad_norm": 0.5659038116814676, "learning_rate": 5.036087746212733e-06, "loss": 0.18001577854156495, "step": 18815 }, { "epoch": 1.6442425301415342, "grad_norm": 0.5762571347770156, "learning_rate": 5.033546395212631e-06, "loss": 0.19949510097503662, "step": 18820 }, { "epoch": 1.6446793639699457, "grad_norm": 0.5535697245395443, "learning_rate": 5.031005035545779e-06, "loss": 0.18514300584793092, "step": 18825 }, { "epoch": 1.6451161977983575, "grad_norm": 0.6733155070142195, "learning_rate": 5.028463667868741e-06, "loss": 0.18928589820861816, "step": 18830 }, { "epoch": 1.6455530316267692, "grad_norm": 0.5524579762456034, "learning_rate": 5.0259222928380845e-06, "loss": 0.16582136154174804, "step": 18835 }, { "epoch": 1.645989865455181, "grad_norm": 0.6452863735462662, "learning_rate": 5.0233809111103726e-06, "loss": 0.1636986255645752, "step": 18840 }, { "epoch": 1.6464266992835925, "grad_norm": 0.7528950815874808, "learning_rate": 5.020839523342176e-06, "loss": 0.19821022748947142, "step": 18845 }, { "epoch": 1.646863533112004, "grad_norm": 0.597443266485653, "learning_rate": 5.018298130190067e-06, "loss": 0.17372329235076905, "step": 18850 }, { "epoch": 1.6473003669404158, "grad_norm": 0.5788648039246639, "learning_rate": 5.015756732310613e-06, "loss": 0.16962018013000488, "step": 18855 }, { "epoch": 1.6477372007688276, "grad_norm": 0.6470856834525749, "learning_rate": 5.013215330360392e-06, "loss": 0.16701793670654297, "step": 18860 }, { "epoch": 1.6481740345972393, "grad_norm": 0.6372974538382319, "learning_rate": 5.010673924995974e-06, "loss": 0.1510587215423584, "step": 18865 }, { "epoch": 1.6486108684256509, "grad_norm": 0.639661239374068, "learning_rate": 5.008132516873937e-06, "loss": 0.1968867301940918, "step": 18870 }, { "epoch": 1.6490477022540626, "grad_norm": 0.5922563930117825, "learning_rate": 5.005591106650853e-06, "loss": 0.1762837529182434, "step": 18875 }, { "epoch": 1.6494845360824741, "grad_norm": 0.5520962049332471, "learning_rate": 5.0030496949833005e-06, "loss": 0.17595698833465576, "step": 18880 }, { "epoch": 1.649921369910886, "grad_norm": 0.5970736556208754, "learning_rate": 5.000508282527858e-06, "loss": 0.192523193359375, "step": 18885 }, { "epoch": 1.6503582037392976, "grad_norm": 0.56867234285355, "learning_rate": 4.9979668699411e-06, "loss": 0.18884446620941162, "step": 18890 }, { "epoch": 1.6507950375677094, "grad_norm": 0.6066426732646127, "learning_rate": 4.995425457879601e-06, "loss": 0.1907481074333191, "step": 18895 }, { "epoch": 1.651231871396121, "grad_norm": 0.5244248362650004, "learning_rate": 4.992884046999941e-06, "loss": 0.20933156013488768, "step": 18900 }, { "epoch": 1.6516687052245325, "grad_norm": 0.6833411134405153, "learning_rate": 4.9903426379586955e-06, "loss": 0.1719886302947998, "step": 18905 }, { "epoch": 1.6521055390529442, "grad_norm": 0.6697637110737086, "learning_rate": 4.987801231412441e-06, "loss": 0.16221948862075805, "step": 18910 }, { "epoch": 1.652542372881356, "grad_norm": 0.6036126509468613, "learning_rate": 4.985259828017752e-06, "loss": 0.19586079120635985, "step": 18915 }, { "epoch": 1.6529792067097677, "grad_norm": 0.6555805153009593, "learning_rate": 4.982718428431202e-06, "loss": 0.20539286136627197, "step": 18920 }, { "epoch": 1.6534160405381793, "grad_norm": 0.5314723066333156, "learning_rate": 4.980177033309368e-06, "loss": 0.17212742567062378, "step": 18925 }, { "epoch": 1.6538528743665908, "grad_norm": 0.6162615035395302, "learning_rate": 4.9776356433088174e-06, "loss": 0.1805957317352295, "step": 18930 }, { "epoch": 1.6542897081950025, "grad_norm": 0.528601628448861, "learning_rate": 4.975094259086123e-06, "loss": 0.18778634071350098, "step": 18935 }, { "epoch": 1.6547265420234143, "grad_norm": 0.5788982528146241, "learning_rate": 4.972552881297856e-06, "loss": 0.1855933666229248, "step": 18940 }, { "epoch": 1.655163375851826, "grad_norm": 0.5668280329624451, "learning_rate": 4.970011510600582e-06, "loss": 0.16966745853424073, "step": 18945 }, { "epoch": 1.6556002096802378, "grad_norm": 0.6686325864863717, "learning_rate": 4.967470147650868e-06, "loss": 0.1841935396194458, "step": 18950 }, { "epoch": 1.6560370435086493, "grad_norm": 0.7106452275684538, "learning_rate": 4.9649287931052805e-06, "loss": 0.2031322956085205, "step": 18955 }, { "epoch": 1.6564738773370609, "grad_norm": 0.6822684949746797, "learning_rate": 4.962387447620376e-06, "loss": 0.19194443225860597, "step": 18960 }, { "epoch": 1.6569107111654726, "grad_norm": 0.7816624918503333, "learning_rate": 4.959846111852717e-06, "loss": 0.17193599939346313, "step": 18965 }, { "epoch": 1.6573475449938844, "grad_norm": 0.5638575748397217, "learning_rate": 4.957304786458862e-06, "loss": 0.17769451141357423, "step": 18970 }, { "epoch": 1.6577843788222961, "grad_norm": 0.4701785593833299, "learning_rate": 4.954763472095361e-06, "loss": 0.15657511949539185, "step": 18975 }, { "epoch": 1.6582212126507077, "grad_norm": 0.5655887409649335, "learning_rate": 4.952222169418768e-06, "loss": 0.21501355171203612, "step": 18980 }, { "epoch": 1.6586580464791192, "grad_norm": 0.5148207583241476, "learning_rate": 4.949680879085633e-06, "loss": 0.15707941055297853, "step": 18985 }, { "epoch": 1.659094880307531, "grad_norm": 0.5285630668941234, "learning_rate": 4.947139601752498e-06, "loss": 0.207318115234375, "step": 18990 }, { "epoch": 1.6595317141359427, "grad_norm": 0.5845047435460312, "learning_rate": 4.944598338075906e-06, "loss": 0.18138349056243896, "step": 18995 }, { "epoch": 1.6599685479643544, "grad_norm": 0.6625428800177346, "learning_rate": 4.942057088712396e-06, "loss": 0.1900537371635437, "step": 19000 }, { "epoch": 1.660405381792766, "grad_norm": 0.5949246093655702, "learning_rate": 4.939515854318502e-06, "loss": 0.18883283138275148, "step": 19005 }, { "epoch": 1.6608422156211777, "grad_norm": 0.6157134082109887, "learning_rate": 4.936974635550756e-06, "loss": 0.20247273445129393, "step": 19010 }, { "epoch": 1.6612790494495893, "grad_norm": 0.5923872185535827, "learning_rate": 4.9344334330656825e-06, "loss": 0.17386834621429442, "step": 19015 }, { "epoch": 1.661715883278001, "grad_norm": 0.634553635785359, "learning_rate": 4.931892247519807e-06, "loss": 0.17566931247711182, "step": 19020 }, { "epoch": 1.6621527171064128, "grad_norm": 0.847545278121666, "learning_rate": 4.929351079569645e-06, "loss": 0.1466444969177246, "step": 19025 }, { "epoch": 1.6625895509348245, "grad_norm": 0.632202729989257, "learning_rate": 4.9268099298717095e-06, "loss": 0.15893517732620238, "step": 19030 }, { "epoch": 1.663026384763236, "grad_norm": 0.9457884201541922, "learning_rate": 4.9242687990825125e-06, "loss": 0.17479753494262695, "step": 19035 }, { "epoch": 1.6634632185916476, "grad_norm": 0.5331504286903284, "learning_rate": 4.921727687858555e-06, "loss": 0.1719387412071228, "step": 19040 }, { "epoch": 1.6639000524200593, "grad_norm": 0.5773298081201412, "learning_rate": 4.919186596856337e-06, "loss": 0.1631285548210144, "step": 19045 }, { "epoch": 1.664336886248471, "grad_norm": 0.6783175473654507, "learning_rate": 4.9166455267323555e-06, "loss": 0.17949156761169432, "step": 19050 }, { "epoch": 1.6647737200768828, "grad_norm": 0.6599282557780288, "learning_rate": 4.914104478143094e-06, "loss": 0.18700668811798096, "step": 19055 }, { "epoch": 1.6652105539052944, "grad_norm": 0.6198687528415002, "learning_rate": 4.911563451745036e-06, "loss": 0.1907437801361084, "step": 19060 }, { "epoch": 1.6656473877337061, "grad_norm": 0.6357140861229393, "learning_rate": 4.90902244819466e-06, "loss": 0.17084312438964844, "step": 19065 }, { "epoch": 1.6660842215621177, "grad_norm": 0.6661453905672643, "learning_rate": 4.906481468148439e-06, "loss": 0.19010574817657472, "step": 19070 }, { "epoch": 1.6665210553905294, "grad_norm": 0.6591825104788053, "learning_rate": 4.903940512262833e-06, "loss": 0.1655358076095581, "step": 19075 }, { "epoch": 1.6669578892189412, "grad_norm": 0.6076584187363134, "learning_rate": 4.9013995811943045e-06, "loss": 0.16678400039672853, "step": 19080 }, { "epoch": 1.667394723047353, "grad_norm": 0.599691048236573, "learning_rate": 4.898858675599307e-06, "loss": 0.20220799446105958, "step": 19085 }, { "epoch": 1.6678315568757645, "grad_norm": 0.5414376159269062, "learning_rate": 4.896317796134282e-06, "loss": 0.1958179235458374, "step": 19090 }, { "epoch": 1.668268390704176, "grad_norm": 0.6619028984255457, "learning_rate": 4.89377694345567e-06, "loss": 0.1998779296875, "step": 19095 }, { "epoch": 1.6687052245325877, "grad_norm": 0.5615615009688266, "learning_rate": 4.891236118219905e-06, "loss": 0.167061448097229, "step": 19100 }, { "epoch": 1.6691420583609995, "grad_norm": 0.5827830400257861, "learning_rate": 4.888695321083411e-06, "loss": 0.16627166271209717, "step": 19105 }, { "epoch": 1.6695788921894112, "grad_norm": 0.5461901272663956, "learning_rate": 4.886154552702606e-06, "loss": 0.16356393098831176, "step": 19110 }, { "epoch": 1.6700157260178228, "grad_norm": 0.8283521322531183, "learning_rate": 4.883613813733902e-06, "loss": 0.17584431171417236, "step": 19115 }, { "epoch": 1.6704525598462345, "grad_norm": 1.5872710653836357, "learning_rate": 4.881073104833698e-06, "loss": 0.20749390125274658, "step": 19120 }, { "epoch": 1.670889393674646, "grad_norm": 0.6470554238379457, "learning_rate": 4.878532426658391e-06, "loss": 0.1827142596244812, "step": 19125 }, { "epoch": 1.6713262275030578, "grad_norm": 0.5785524406267601, "learning_rate": 4.875991779864368e-06, "loss": 0.15887112617492677, "step": 19130 }, { "epoch": 1.6717630613314696, "grad_norm": 0.5484006193765605, "learning_rate": 4.873451165108008e-06, "loss": 0.16214207410812378, "step": 19135 }, { "epoch": 1.6721998951598813, "grad_norm": 0.6191064447259824, "learning_rate": 4.8709105830456815e-06, "loss": 0.19807882308959962, "step": 19140 }, { "epoch": 1.6726367289882929, "grad_norm": 0.646405916804554, "learning_rate": 4.868370034333753e-06, "loss": 0.16854584217071533, "step": 19145 }, { "epoch": 1.6730735628167044, "grad_norm": 0.6754338254994403, "learning_rate": 4.865829519628572e-06, "loss": 0.1750631809234619, "step": 19150 }, { "epoch": 1.6735103966451161, "grad_norm": 0.4862385091924122, "learning_rate": 4.863289039586486e-06, "loss": 0.20278007984161378, "step": 19155 }, { "epoch": 1.673947230473528, "grad_norm": 0.48551063576888076, "learning_rate": 4.8607485948638285e-06, "loss": 0.1638711452484131, "step": 19160 }, { "epoch": 1.6743840643019396, "grad_norm": 0.5809899564182687, "learning_rate": 4.85820818611693e-06, "loss": 0.1830535888671875, "step": 19165 }, { "epoch": 1.6748208981303512, "grad_norm": 0.5735514877295455, "learning_rate": 4.855667814002105e-06, "loss": 0.1839381694793701, "step": 19170 }, { "epoch": 1.675257731958763, "grad_norm": 0.6196926182607608, "learning_rate": 4.853127479175662e-06, "loss": 0.18332645893096924, "step": 19175 }, { "epoch": 1.6756945657871745, "grad_norm": 0.6310185730042125, "learning_rate": 4.850587182293903e-06, "loss": 0.18289127349853515, "step": 19180 }, { "epoch": 1.6761313996155862, "grad_norm": 0.6021643387767224, "learning_rate": 4.84804692401311e-06, "loss": 0.1810214877128601, "step": 19185 }, { "epoch": 1.676568233443998, "grad_norm": 0.5941577778160808, "learning_rate": 4.845506704989565e-06, "loss": 0.18072144985198973, "step": 19190 }, { "epoch": 1.6770050672724097, "grad_norm": 0.5217024149247843, "learning_rate": 4.842966525879537e-06, "loss": 0.18068124055862428, "step": 19195 }, { "epoch": 1.6774419011008213, "grad_norm": 0.5189010946397229, "learning_rate": 4.840426387339283e-06, "loss": 0.14314372539520265, "step": 19200 }, { "epoch": 1.6778787349292328, "grad_norm": 0.5455620804401328, "learning_rate": 4.837886290025051e-06, "loss": 0.16266745328903198, "step": 19205 }, { "epoch": 1.6783155687576445, "grad_norm": 0.5364431961726319, "learning_rate": 4.8353462345930805e-06, "loss": 0.18031975030899047, "step": 19210 }, { "epoch": 1.6787524025860563, "grad_norm": 0.7918847807228151, "learning_rate": 4.832806221699593e-06, "loss": 0.20365045070648194, "step": 19215 }, { "epoch": 1.679189236414468, "grad_norm": 0.5771268703995965, "learning_rate": 4.830266252000806e-06, "loss": 0.16543941497802733, "step": 19220 }, { "epoch": 1.6796260702428796, "grad_norm": 0.6012324104195245, "learning_rate": 4.8277263261529246e-06, "loss": 0.16598122119903563, "step": 19225 }, { "epoch": 1.6800629040712913, "grad_norm": 0.8012321455143955, "learning_rate": 4.825186444812139e-06, "loss": 0.14886083602905273, "step": 19230 }, { "epoch": 1.6804997378997029, "grad_norm": 0.6125425377556004, "learning_rate": 4.822646608634632e-06, "loss": 0.14890068769454956, "step": 19235 }, { "epoch": 1.6809365717281146, "grad_norm": 0.6054273055009861, "learning_rate": 4.8201068182765755e-06, "loss": 0.17375054359436035, "step": 19240 }, { "epoch": 1.6813734055565264, "grad_norm": 0.5642439476191218, "learning_rate": 4.817567074394122e-06, "loss": 0.16429462432861328, "step": 19245 }, { "epoch": 1.6818102393849381, "grad_norm": 0.6263387559470831, "learning_rate": 4.815027377643419e-06, "loss": 0.1942681312561035, "step": 19250 }, { "epoch": 1.6822470732133497, "grad_norm": 0.5427562764696469, "learning_rate": 4.8124877286806024e-06, "loss": 0.19231035709381103, "step": 19255 }, { "epoch": 1.6826839070417612, "grad_norm": 0.6105584775333833, "learning_rate": 4.8099481281617905e-06, "loss": 0.17660582065582275, "step": 19260 }, { "epoch": 1.683120740870173, "grad_norm": 0.5524551866988621, "learning_rate": 4.807408576743093e-06, "loss": 0.1772691249847412, "step": 19265 }, { "epoch": 1.6835575746985847, "grad_norm": 1.1939248418982549, "learning_rate": 4.804869075080608e-06, "loss": 0.19207994937896727, "step": 19270 }, { "epoch": 1.6839944085269964, "grad_norm": 0.5620075565680203, "learning_rate": 4.802329623830417e-06, "loss": 0.18117644786834716, "step": 19275 }, { "epoch": 1.684431242355408, "grad_norm": 0.5648481890016411, "learning_rate": 4.799790223648589e-06, "loss": 0.17697142362594603, "step": 19280 }, { "epoch": 1.6848680761838197, "grad_norm": 0.5641336062717383, "learning_rate": 4.797250875191181e-06, "loss": 0.17574074268341064, "step": 19285 }, { "epoch": 1.6853049100122313, "grad_norm": 0.7564345178799593, "learning_rate": 4.7947115791142366e-06, "loss": 0.16017602682113646, "step": 19290 }, { "epoch": 1.685741743840643, "grad_norm": 0.5836765556449336, "learning_rate": 4.792172336073787e-06, "loss": 0.186086106300354, "step": 19295 }, { "epoch": 1.6861785776690548, "grad_norm": 0.6160384355423797, "learning_rate": 4.7896331467258475e-06, "loss": 0.16727272272109986, "step": 19300 }, { "epoch": 1.6866154114974665, "grad_norm": 0.6090072780686924, "learning_rate": 4.787094011726423e-06, "loss": 0.20385642051696778, "step": 19305 }, { "epoch": 1.687052245325878, "grad_norm": 0.683859818362616, "learning_rate": 4.784554931731498e-06, "loss": 0.1940992593765259, "step": 19310 }, { "epoch": 1.6874890791542896, "grad_norm": 0.591850718286683, "learning_rate": 4.782015907397048e-06, "loss": 0.14139242172241212, "step": 19315 }, { "epoch": 1.6879259129827013, "grad_norm": 0.6493028748309289, "learning_rate": 4.779476939379034e-06, "loss": 0.20231022834777831, "step": 19320 }, { "epoch": 1.688362746811113, "grad_norm": 0.5757154594521559, "learning_rate": 4.776938028333401e-06, "loss": 0.1690185546875, "step": 19325 }, { "epoch": 1.6887995806395248, "grad_norm": 0.5310286108219124, "learning_rate": 4.774399174916077e-06, "loss": 0.17625283002853392, "step": 19330 }, { "epoch": 1.6892364144679364, "grad_norm": 0.6266940459343845, "learning_rate": 4.771860379782981e-06, "loss": 0.1866253137588501, "step": 19335 }, { "epoch": 1.689673248296348, "grad_norm": 0.582754005025786, "learning_rate": 4.769321643590013e-06, "loss": 0.19446396827697754, "step": 19340 }, { "epoch": 1.6901100821247597, "grad_norm": 0.5998037544151845, "learning_rate": 4.7667829669930564e-06, "loss": 0.17702720165252686, "step": 19345 }, { "epoch": 1.6905469159531714, "grad_norm": 0.5863126379601694, "learning_rate": 4.764244350647982e-06, "loss": 0.187822163105011, "step": 19350 }, { "epoch": 1.6909837497815832, "grad_norm": 0.569333770350483, "learning_rate": 4.761705795210644e-06, "loss": 0.1599108934402466, "step": 19355 }, { "epoch": 1.691420583609995, "grad_norm": 0.5208545527787752, "learning_rate": 4.759167301336883e-06, "loss": 0.17463113069534303, "step": 19360 }, { "epoch": 1.6918574174384065, "grad_norm": 0.6628318271669514, "learning_rate": 4.756628869682519e-06, "loss": 0.16262117624282837, "step": 19365 }, { "epoch": 1.692294251266818, "grad_norm": 0.5792311108274556, "learning_rate": 4.754090500903362e-06, "loss": 0.18021258115768432, "step": 19370 }, { "epoch": 1.6927310850952297, "grad_norm": 0.6671758204096875, "learning_rate": 4.751552195655199e-06, "loss": 0.16042641401290894, "step": 19375 }, { "epoch": 1.6931679189236415, "grad_norm": 0.5963141629359092, "learning_rate": 4.749013954593806e-06, "loss": 0.16910145282745362, "step": 19380 }, { "epoch": 1.6936047527520532, "grad_norm": 0.6341296533546676, "learning_rate": 4.7464757783749385e-06, "loss": 0.19528024196624755, "step": 19385 }, { "epoch": 1.6940415865804648, "grad_norm": 0.5479768009310013, "learning_rate": 4.743937667654339e-06, "loss": 0.19535797834396362, "step": 19390 }, { "epoch": 1.6944784204088763, "grad_norm": 0.5415971662730609, "learning_rate": 4.74139962308773e-06, "loss": 0.16639493703842162, "step": 19395 }, { "epoch": 1.694915254237288, "grad_norm": 0.6748459285708396, "learning_rate": 4.738861645330822e-06, "loss": 0.2041701316833496, "step": 19400 }, { "epoch": 1.6953520880656998, "grad_norm": 0.625571063505882, "learning_rate": 4.736323735039299e-06, "loss": 0.1776095986366272, "step": 19405 }, { "epoch": 1.6957889218941116, "grad_norm": 0.49446958688026244, "learning_rate": 4.733785892868834e-06, "loss": 0.168043315410614, "step": 19410 }, { "epoch": 1.696225755722523, "grad_norm": 0.6192537857530683, "learning_rate": 4.731248119475084e-06, "loss": 0.1715185523033142, "step": 19415 }, { "epoch": 1.6966625895509349, "grad_norm": 0.7106700116031618, "learning_rate": 4.7287104155136835e-06, "loss": 0.1833965539932251, "step": 19420 }, { "epoch": 1.6970994233793464, "grad_norm": 0.5986980824581933, "learning_rate": 4.7261727816402536e-06, "loss": 0.16760267019271852, "step": 19425 }, { "epoch": 1.6975362572077581, "grad_norm": 0.6410445583013201, "learning_rate": 4.723635218510392e-06, "loss": 0.175468909740448, "step": 19430 }, { "epoch": 1.69797309103617, "grad_norm": 0.6159806641016761, "learning_rate": 4.721097726779683e-06, "loss": 0.15514070987701417, "step": 19435 }, { "epoch": 1.6984099248645816, "grad_norm": 0.6179859492749605, "learning_rate": 4.71856030710369e-06, "loss": 0.17931100130081176, "step": 19440 }, { "epoch": 1.6988467586929932, "grad_norm": 0.6256834200897459, "learning_rate": 4.716022960137957e-06, "loss": 0.17748911380767823, "step": 19445 }, { "epoch": 1.6992835925214047, "grad_norm": 0.6976425943448614, "learning_rate": 4.7134856865380106e-06, "loss": 0.18380630016326904, "step": 19450 }, { "epoch": 1.6997204263498165, "grad_norm": 0.5998525661721099, "learning_rate": 4.710948486959359e-06, "loss": 0.20163311958312988, "step": 19455 }, { "epoch": 1.7001572601782282, "grad_norm": 0.5004622445185871, "learning_rate": 4.708411362057491e-06, "loss": 0.17498404979705812, "step": 19460 }, { "epoch": 1.70059409400664, "grad_norm": 0.5702313039236988, "learning_rate": 4.705874312487877e-06, "loss": 0.16713014841079712, "step": 19465 }, { "epoch": 1.7010309278350515, "grad_norm": 0.7026072406488952, "learning_rate": 4.703337338905963e-06, "loss": 0.17482031583786012, "step": 19470 }, { "epoch": 1.7014677616634633, "grad_norm": 0.6115288746690106, "learning_rate": 4.70080044196718e-06, "loss": 0.19157130718231202, "step": 19475 }, { "epoch": 1.7019045954918748, "grad_norm": 0.6044505353121654, "learning_rate": 4.69826362232694e-06, "loss": 0.17268983125686646, "step": 19480 }, { "epoch": 1.7023414293202865, "grad_norm": 0.4999256274247592, "learning_rate": 4.695726880640632e-06, "loss": 0.16932857036590576, "step": 19485 }, { "epoch": 1.7027782631486983, "grad_norm": 0.5274487148675513, "learning_rate": 4.6931902175636265e-06, "loss": 0.17116239070892333, "step": 19490 }, { "epoch": 1.70321509697711, "grad_norm": 0.5931835673373703, "learning_rate": 4.690653633751274e-06, "loss": 0.16879417896270751, "step": 19495 }, { "epoch": 1.7036519308055216, "grad_norm": 0.6155507578933245, "learning_rate": 4.688117129858901e-06, "loss": 0.1635751962661743, "step": 19500 }, { "epoch": 1.704088764633933, "grad_norm": 0.558754870094852, "learning_rate": 4.685580706541818e-06, "loss": 0.15595991611480714, "step": 19505 }, { "epoch": 1.7045255984623449, "grad_norm": 0.6439864415789515, "learning_rate": 4.683044364455313e-06, "loss": 0.1817157745361328, "step": 19510 }, { "epoch": 1.7049624322907566, "grad_norm": 0.6042091261601711, "learning_rate": 4.680508104254652e-06, "loss": 0.184955370426178, "step": 19515 }, { "epoch": 1.7053992661191684, "grad_norm": 0.7176639928264655, "learning_rate": 4.677971926595081e-06, "loss": 0.19779753684997559, "step": 19520 }, { "epoch": 1.70583609994758, "grad_norm": 0.5536457561174699, "learning_rate": 4.675435832131826e-06, "loss": 0.2048640489578247, "step": 19525 }, { "epoch": 1.7062729337759917, "grad_norm": 0.5285982384319959, "learning_rate": 4.672899821520089e-06, "loss": 0.17108216285705566, "step": 19530 }, { "epoch": 1.7067097676044032, "grad_norm": 0.6526382057986191, "learning_rate": 4.670363895415049e-06, "loss": 0.1857698917388916, "step": 19535 }, { "epoch": 1.707146601432815, "grad_norm": 0.5820057128143211, "learning_rate": 4.667828054471868e-06, "loss": 0.17224717140197754, "step": 19540 }, { "epoch": 1.7075834352612267, "grad_norm": 0.636089542481102, "learning_rate": 4.66529229934568e-06, "loss": 0.1865911602973938, "step": 19545 }, { "epoch": 1.7080202690896384, "grad_norm": 0.7084272453685797, "learning_rate": 4.662756630691604e-06, "loss": 0.180906343460083, "step": 19550 }, { "epoch": 1.70845710291805, "grad_norm": 0.5589331138746862, "learning_rate": 4.660221049164731e-06, "loss": 0.15644609928131104, "step": 19555 }, { "epoch": 1.7088939367464615, "grad_norm": 0.6072822613271283, "learning_rate": 4.657685555420135e-06, "loss": 0.1402282476425171, "step": 19560 }, { "epoch": 1.7093307705748733, "grad_norm": 0.6234994186182674, "learning_rate": 4.655150150112857e-06, "loss": 0.17993576526641847, "step": 19565 }, { "epoch": 1.709767604403285, "grad_norm": 0.5122083638118591, "learning_rate": 4.6526148338979244e-06, "loss": 0.17151449918746947, "step": 19570 }, { "epoch": 1.7102044382316968, "grad_norm": 0.7076918178180237, "learning_rate": 4.650079607430341e-06, "loss": 0.19115021228790283, "step": 19575 }, { "epoch": 1.7106412720601083, "grad_norm": 0.6040042466665192, "learning_rate": 4.647544471365084e-06, "loss": 0.15677433013916015, "step": 19580 }, { "epoch": 1.71107810588852, "grad_norm": 0.5966485035479046, "learning_rate": 4.645009426357109e-06, "loss": 0.17903788089752198, "step": 19585 }, { "epoch": 1.7115149397169316, "grad_norm": 0.6256748739582093, "learning_rate": 4.642474473061347e-06, "loss": 0.18188307285308838, "step": 19590 }, { "epoch": 1.7119517735453433, "grad_norm": 0.5916177855213729, "learning_rate": 4.639939612132708e-06, "loss": 0.19603149890899657, "step": 19595 }, { "epoch": 1.712388607373755, "grad_norm": 0.5067529045872141, "learning_rate": 4.637404844226074e-06, "loss": 0.172773277759552, "step": 19600 }, { "epoch": 1.7128254412021668, "grad_norm": 0.5821078634841764, "learning_rate": 4.634870169996305e-06, "loss": 0.18144543170928956, "step": 19605 }, { "epoch": 1.7132622750305784, "grad_norm": 0.6025994144519502, "learning_rate": 4.6323355900982374e-06, "loss": 0.16673436164855956, "step": 19610 }, { "epoch": 1.71369910885899, "grad_norm": 0.6357060336526069, "learning_rate": 4.629801105186684e-06, "loss": 0.16550554037094117, "step": 19615 }, { "epoch": 1.7141359426874017, "grad_norm": 0.6390686135115385, "learning_rate": 4.6272667159164306e-06, "loss": 0.17495558261871338, "step": 19620 }, { "epoch": 1.7145727765158134, "grad_norm": 0.6107861714189348, "learning_rate": 4.624732422942242e-06, "loss": 0.18138309717178344, "step": 19625 }, { "epoch": 1.7150096103442252, "grad_norm": 0.6007854804244973, "learning_rate": 4.622198226918852e-06, "loss": 0.1677001953125, "step": 19630 }, { "epoch": 1.7154464441726367, "grad_norm": 0.6220532212975398, "learning_rate": 4.619664128500973e-06, "loss": 0.18843464851379393, "step": 19635 }, { "epoch": 1.7158832780010485, "grad_norm": 0.7768429227165378, "learning_rate": 4.617130128343296e-06, "loss": 0.1748513698577881, "step": 19640 }, { "epoch": 1.71632011182946, "grad_norm": 0.5496197246620576, "learning_rate": 4.614596227100479e-06, "loss": 0.17603795528411864, "step": 19645 }, { "epoch": 1.7167569456578717, "grad_norm": 0.7725205727021744, "learning_rate": 4.612062425427159e-06, "loss": 0.16131024360656737, "step": 19650 }, { "epoch": 1.7171937794862835, "grad_norm": 0.5886142778312081, "learning_rate": 4.609528723977951e-06, "loss": 0.1794520854949951, "step": 19655 }, { "epoch": 1.7176306133146952, "grad_norm": 0.6038217002748785, "learning_rate": 4.6069951234074325e-06, "loss": 0.1993764281272888, "step": 19660 }, { "epoch": 1.7180674471431068, "grad_norm": 0.6984879171292152, "learning_rate": 4.6044616243701655e-06, "loss": 0.17967225313186647, "step": 19665 }, { "epoch": 1.7185042809715183, "grad_norm": 0.5929808787933737, "learning_rate": 4.601928227520683e-06, "loss": 0.17966129779815673, "step": 19670 }, { "epoch": 1.71894111479993, "grad_norm": 0.6038274967163877, "learning_rate": 4.599394933513489e-06, "loss": 0.17036974430084229, "step": 19675 }, { "epoch": 1.7193779486283418, "grad_norm": 0.6619307978383057, "learning_rate": 4.596861743003066e-06, "loss": 0.19767483472824096, "step": 19680 }, { "epoch": 1.7198147824567536, "grad_norm": 0.6482956493684525, "learning_rate": 4.594328656643864e-06, "loss": 0.14982024431228638, "step": 19685 }, { "epoch": 1.720251616285165, "grad_norm": 0.6282161790456926, "learning_rate": 4.5917956750903084e-06, "loss": 0.14917802810668945, "step": 19690 }, { "epoch": 1.7206884501135769, "grad_norm": 0.6439977815843368, "learning_rate": 4.5892627989968004e-06, "loss": 0.17273736000061035, "step": 19695 }, { "epoch": 1.7211252839419884, "grad_norm": 0.6735244560049635, "learning_rate": 4.586730029017708e-06, "loss": 0.20054211616516113, "step": 19700 }, { "epoch": 1.7215621177704001, "grad_norm": 0.5281863269622031, "learning_rate": 4.584197365807377e-06, "loss": 0.18372178077697754, "step": 19705 }, { "epoch": 1.7219989515988119, "grad_norm": 0.5665957374848072, "learning_rate": 4.5816648100201235e-06, "loss": 0.18101965188980101, "step": 19710 }, { "epoch": 1.7224357854272236, "grad_norm": 0.6858729051155057, "learning_rate": 4.579132362310237e-06, "loss": 0.15398283004760743, "step": 19715 }, { "epoch": 1.7228726192556352, "grad_norm": 0.7240310852180675, "learning_rate": 4.5766000233319794e-06, "loss": 0.18175678253173827, "step": 19720 }, { "epoch": 1.7233094530840467, "grad_norm": 0.598435773360199, "learning_rate": 4.574067793739579e-06, "loss": 0.19659287929534913, "step": 19725 }, { "epoch": 1.7237462869124585, "grad_norm": 0.504373041915018, "learning_rate": 4.571535674187242e-06, "loss": 0.16379776000976562, "step": 19730 }, { "epoch": 1.7241831207408702, "grad_norm": 0.5571605525680569, "learning_rate": 4.569003665329146e-06, "loss": 0.1449829339981079, "step": 19735 }, { "epoch": 1.724619954569282, "grad_norm": 0.5439173666751261, "learning_rate": 4.566471767819438e-06, "loss": 0.14862596988677979, "step": 19740 }, { "epoch": 1.7250567883976935, "grad_norm": 0.5920989750941534, "learning_rate": 4.563939982312235e-06, "loss": 0.19144500494003297, "step": 19745 }, { "epoch": 1.725493622226105, "grad_norm": 0.5833594404451077, "learning_rate": 4.561408309461629e-06, "loss": 0.20724053382873536, "step": 19750 }, { "epoch": 1.7259304560545168, "grad_norm": 0.5697261047816427, "learning_rate": 4.558876749921678e-06, "loss": 0.16778628826141356, "step": 19755 }, { "epoch": 1.7263672898829285, "grad_norm": 0.6792175166365326, "learning_rate": 4.556345304346413e-06, "loss": 0.19546427726745605, "step": 19760 }, { "epoch": 1.7268041237113403, "grad_norm": 0.5508119403416027, "learning_rate": 4.553813973389838e-06, "loss": 0.19635381698608398, "step": 19765 }, { "epoch": 1.727240957539752, "grad_norm": 0.638022927762407, "learning_rate": 4.551282757705922e-06, "loss": 0.15916681289672852, "step": 19770 }, { "epoch": 1.7276777913681636, "grad_norm": 0.6170107925605289, "learning_rate": 4.548751657948612e-06, "loss": 0.14979398250579834, "step": 19775 }, { "epoch": 1.728114625196575, "grad_norm": 0.6962550019726564, "learning_rate": 4.546220674771819e-06, "loss": 0.16285388469696044, "step": 19780 }, { "epoch": 1.7285514590249869, "grad_norm": 0.5248455604763207, "learning_rate": 4.543689808829424e-06, "loss": 0.17522581815719604, "step": 19785 }, { "epoch": 1.7289882928533986, "grad_norm": 0.5790327753738862, "learning_rate": 4.541159060775279e-06, "loss": 0.18026931285858155, "step": 19790 }, { "epoch": 1.7294251266818104, "grad_norm": 0.5363514830375975, "learning_rate": 4.5386284312632065e-06, "loss": 0.17350648641586303, "step": 19795 }, { "epoch": 1.729861960510222, "grad_norm": 0.648629102482749, "learning_rate": 4.536097920946996e-06, "loss": 0.18087779283523558, "step": 19800 }, { "epoch": 1.7302987943386334, "grad_norm": 0.6384902182918702, "learning_rate": 4.533567530480409e-06, "loss": 0.20166492462158203, "step": 19805 }, { "epoch": 1.7307356281670452, "grad_norm": 0.5256085292682705, "learning_rate": 4.531037260517174e-06, "loss": 0.18446699380874634, "step": 19810 }, { "epoch": 1.731172461995457, "grad_norm": 0.5172820608021124, "learning_rate": 4.528507111710993e-06, "loss": 0.19069366455078124, "step": 19815 }, { "epoch": 1.7316092958238687, "grad_norm": 0.5725090480621093, "learning_rate": 4.525977084715526e-06, "loss": 0.1469299912452698, "step": 19820 }, { "epoch": 1.7320461296522802, "grad_norm": 0.6801846686829984, "learning_rate": 4.523447180184413e-06, "loss": 0.20072486400604247, "step": 19825 }, { "epoch": 1.732482963480692, "grad_norm": 0.5027179801791636, "learning_rate": 4.520917398771255e-06, "loss": 0.17971196174621581, "step": 19830 }, { "epoch": 1.7329197973091035, "grad_norm": 0.5731256733472416, "learning_rate": 4.518387741129626e-06, "loss": 0.16354256868362427, "step": 19835 }, { "epoch": 1.7333566311375153, "grad_norm": 0.6086233857234209, "learning_rate": 4.515858207913067e-06, "loss": 0.2129818916320801, "step": 19840 }, { "epoch": 1.733793464965927, "grad_norm": 0.6514526946172939, "learning_rate": 4.513328799775081e-06, "loss": 0.15921753644943237, "step": 19845 }, { "epoch": 1.7342302987943388, "grad_norm": 0.5507217195470218, "learning_rate": 4.51079951736915e-06, "loss": 0.15784744024276734, "step": 19850 }, { "epoch": 1.7346671326227503, "grad_norm": 0.6768043511697277, "learning_rate": 4.50827036134871e-06, "loss": 0.18449969291687013, "step": 19855 }, { "epoch": 1.7351039664511618, "grad_norm": 0.58309931293882, "learning_rate": 4.505741332367175e-06, "loss": 0.18213267326354982, "step": 19860 }, { "epoch": 1.7355408002795736, "grad_norm": 0.6179076758309741, "learning_rate": 4.503212431077922e-06, "loss": 0.18243502378463744, "step": 19865 }, { "epoch": 1.7359776341079853, "grad_norm": 0.7485641648024584, "learning_rate": 4.500683658134296e-06, "loss": 0.17085258960723876, "step": 19870 }, { "epoch": 1.736414467936397, "grad_norm": 0.716330534504643, "learning_rate": 4.498155014189608e-06, "loss": 0.18973127603530884, "step": 19875 }, { "epoch": 1.7368513017648086, "grad_norm": 0.5916436002368851, "learning_rate": 4.495626499897137e-06, "loss": 0.1565786600112915, "step": 19880 }, { "epoch": 1.7372881355932204, "grad_norm": 0.5945436243438945, "learning_rate": 4.493098115910125e-06, "loss": 0.17804157733917236, "step": 19885 }, { "epoch": 1.737724969421632, "grad_norm": 0.6148343336625646, "learning_rate": 4.490569862881784e-06, "loss": 0.187382709980011, "step": 19890 }, { "epoch": 1.7381618032500437, "grad_norm": 0.6809268702114681, "learning_rate": 4.488041741465293e-06, "loss": 0.18655531406402587, "step": 19895 }, { "epoch": 1.7385986370784554, "grad_norm": 0.5903593931689002, "learning_rate": 4.485513752313791e-06, "loss": 0.17752169370651244, "step": 19900 }, { "epoch": 1.7390354709068672, "grad_norm": 0.5832612472493053, "learning_rate": 4.48298589608039e-06, "loss": 0.18077127933502196, "step": 19905 }, { "epoch": 1.7394723047352787, "grad_norm": 0.6239851662089114, "learning_rate": 4.480458173418166e-06, "loss": 0.1751827836036682, "step": 19910 }, { "epoch": 1.7399091385636902, "grad_norm": 0.7471636570261369, "learning_rate": 4.477930584980153e-06, "loss": 0.18318099975585939, "step": 19915 }, { "epoch": 1.740345972392102, "grad_norm": 0.6163383886788283, "learning_rate": 4.475403131419361e-06, "loss": 0.17543070316314696, "step": 19920 }, { "epoch": 1.7407828062205137, "grad_norm": 0.5927981091253289, "learning_rate": 4.4728758133887596e-06, "loss": 0.17161753177642822, "step": 19925 }, { "epoch": 1.7412196400489255, "grad_norm": 0.7174223703627447, "learning_rate": 4.470348631541283e-06, "loss": 0.16486233472824097, "step": 19930 }, { "epoch": 1.741656473877337, "grad_norm": 0.7582566793320731, "learning_rate": 4.467821586529834e-06, "loss": 0.13073662519454957, "step": 19935 }, { "epoch": 1.7420933077057488, "grad_norm": 0.6769123520318173, "learning_rate": 4.465294679007275e-06, "loss": 0.17765398025512696, "step": 19940 }, { "epoch": 1.7425301415341603, "grad_norm": 0.6550358008471316, "learning_rate": 4.462767909626438e-06, "loss": 0.19966169595718383, "step": 19945 }, { "epoch": 1.742966975362572, "grad_norm": 0.5459851592789152, "learning_rate": 4.460241279040114e-06, "loss": 0.16847480535507203, "step": 19950 }, { "epoch": 1.7434038091909838, "grad_norm": 0.6413839754871811, "learning_rate": 4.457714787901062e-06, "loss": 0.20467519760131836, "step": 19955 }, { "epoch": 1.7438406430193956, "grad_norm": 0.6701854537650911, "learning_rate": 4.455188436862003e-06, "loss": 0.16987786293029786, "step": 19960 }, { "epoch": 1.744277476847807, "grad_norm": 0.6690866018876294, "learning_rate": 4.452662226575624e-06, "loss": 0.1845890164375305, "step": 19965 }, { "epoch": 1.7447143106762186, "grad_norm": 0.6633540392522566, "learning_rate": 4.4501361576945735e-06, "loss": 0.14122753143310546, "step": 19970 }, { "epoch": 1.7451511445046304, "grad_norm": 0.6586795016587733, "learning_rate": 4.447610230871466e-06, "loss": 0.19457796812057496, "step": 19975 }, { "epoch": 1.7455879783330421, "grad_norm": 0.5651303021705673, "learning_rate": 4.445084446758874e-06, "loss": 0.16650800704956054, "step": 19980 }, { "epoch": 1.7460248121614539, "grad_norm": 0.6279956382803014, "learning_rate": 4.442558806009339e-06, "loss": 0.17655171155929567, "step": 19985 }, { "epoch": 1.7464616459898654, "grad_norm": 0.59156861034099, "learning_rate": 4.440033309275362e-06, "loss": 0.1793489933013916, "step": 19990 }, { "epoch": 1.7468984798182772, "grad_norm": 0.6757526700795435, "learning_rate": 4.4375079572094105e-06, "loss": 0.16336650848388673, "step": 19995 }, { "epoch": 1.7473353136466887, "grad_norm": 0.6104901820990988, "learning_rate": 4.434982750463908e-06, "loss": 0.15299515724182128, "step": 20000 }, { "epoch": 1.7477721474751005, "grad_norm": 0.595908012147378, "learning_rate": 4.4324576896912505e-06, "loss": 0.17431492805480958, "step": 20005 }, { "epoch": 1.7482089813035122, "grad_norm": 0.5816947376976025, "learning_rate": 4.429932775543784e-06, "loss": 0.18522677421569825, "step": 20010 }, { "epoch": 1.748645815131924, "grad_norm": 0.6135018018601098, "learning_rate": 4.4274080086738244e-06, "loss": 0.20302019119262696, "step": 20015 }, { "epoch": 1.7490826489603355, "grad_norm": 0.66315725152238, "learning_rate": 4.4248833897336495e-06, "loss": 0.20196094512939453, "step": 20020 }, { "epoch": 1.749519482788747, "grad_norm": 0.6229805982341853, "learning_rate": 4.4223589193754964e-06, "loss": 0.18332602977752685, "step": 20025 }, { "epoch": 1.7499563166171588, "grad_norm": 0.6243305809900407, "learning_rate": 4.419834598251567e-06, "loss": 0.1590060591697693, "step": 20030 }, { "epoch": 1.7503931504455705, "grad_norm": 0.7180058153465698, "learning_rate": 4.417310427014021e-06, "loss": 0.1797865390777588, "step": 20035 }, { "epoch": 1.7508299842739823, "grad_norm": 0.7221966303737534, "learning_rate": 4.414786406314981e-06, "loss": 0.16842224597930908, "step": 20040 }, { "epoch": 1.7512668181023938, "grad_norm": 0.71319878017858, "learning_rate": 4.412262536806528e-06, "loss": 0.1693312406539917, "step": 20045 }, { "epoch": 1.7517036519308056, "grad_norm": 0.4794620593364455, "learning_rate": 4.409738819140712e-06, "loss": 0.16138641834259032, "step": 20050 }, { "epoch": 1.752140485759217, "grad_norm": 0.583093280334774, "learning_rate": 4.407215253969533e-06, "loss": 0.16250984668731688, "step": 20055 }, { "epoch": 1.7525773195876289, "grad_norm": 0.6424350068886179, "learning_rate": 4.404691841944959e-06, "loss": 0.1811502456665039, "step": 20060 }, { "epoch": 1.7530141534160406, "grad_norm": 0.5637801735661647, "learning_rate": 4.402168583718917e-06, "loss": 0.15619440078735353, "step": 20065 }, { "epoch": 1.7534509872444524, "grad_norm": 0.6880900720773032, "learning_rate": 4.399645479943295e-06, "loss": 0.1618585467338562, "step": 20070 }, { "epoch": 1.753887821072864, "grad_norm": 0.6170516161243987, "learning_rate": 4.397122531269935e-06, "loss": 0.16856353282928466, "step": 20075 }, { "epoch": 1.7543246549012754, "grad_norm": 0.6648777796724571, "learning_rate": 4.394599738350648e-06, "loss": 0.17894654273986815, "step": 20080 }, { "epoch": 1.7547614887296872, "grad_norm": 0.7356050209023655, "learning_rate": 4.392077101837197e-06, "loss": 0.17335898876190187, "step": 20085 }, { "epoch": 1.755198322558099, "grad_norm": 0.5379908792501084, "learning_rate": 4.389554622381311e-06, "loss": 0.1813305139541626, "step": 20090 }, { "epoch": 1.7556351563865107, "grad_norm": 1.1594342402080506, "learning_rate": 4.3870323006346745e-06, "loss": 0.17799580097198486, "step": 20095 }, { "epoch": 1.7560719902149222, "grad_norm": 0.6106148533528606, "learning_rate": 4.38451013724893e-06, "loss": 0.1455399751663208, "step": 20100 }, { "epoch": 1.7565088240433337, "grad_norm": 0.7890604357746451, "learning_rate": 4.381988132875687e-06, "loss": 0.15925378799438478, "step": 20105 }, { "epoch": 1.7569456578717455, "grad_norm": 0.6248720727557348, "learning_rate": 4.3794662881665e-06, "loss": 0.1866814136505127, "step": 20110 }, { "epoch": 1.7573824917001573, "grad_norm": 0.7030135592015211, "learning_rate": 4.376944603772896e-06, "loss": 0.18287017345428466, "step": 20115 }, { "epoch": 1.757819325528569, "grad_norm": 0.5125003363940731, "learning_rate": 4.3744230803463525e-06, "loss": 0.16498709917068483, "step": 20120 }, { "epoch": 1.7582561593569808, "grad_norm": 0.7228271767801205, "learning_rate": 4.37190171853831e-06, "loss": 0.17065644264221191, "step": 20125 }, { "epoch": 1.7586929931853923, "grad_norm": 0.6001024683871833, "learning_rate": 4.369380519000164e-06, "loss": 0.18214515447616578, "step": 20130 }, { "epoch": 1.7591298270138038, "grad_norm": 0.5946039997443722, "learning_rate": 4.366859482383271e-06, "loss": 0.1440805196762085, "step": 20135 }, { "epoch": 1.7595666608422156, "grad_norm": 0.8135039549656833, "learning_rate": 4.36433860933894e-06, "loss": 0.2047485113143921, "step": 20140 }, { "epoch": 1.7600034946706273, "grad_norm": 0.6063115980734104, "learning_rate": 4.361817900518444e-06, "loss": 0.17397065162658693, "step": 20145 }, { "epoch": 1.760440328499039, "grad_norm": 0.6526674208793788, "learning_rate": 4.359297356573011e-06, "loss": 0.1513993740081787, "step": 20150 }, { "epoch": 1.7608771623274506, "grad_norm": 0.5891547565776343, "learning_rate": 4.356776978153824e-06, "loss": 0.170151948928833, "step": 20155 }, { "epoch": 1.7613139961558621, "grad_norm": 0.6116605760818937, "learning_rate": 4.354256765912028e-06, "loss": 0.16755841970443724, "step": 20160 }, { "epoch": 1.761750829984274, "grad_norm": 0.7320417050594779, "learning_rate": 4.351736720498725e-06, "loss": 0.1996365785598755, "step": 20165 }, { "epoch": 1.7621876638126857, "grad_norm": 0.5917416103448816, "learning_rate": 4.349216842564965e-06, "loss": 0.18787981271743776, "step": 20170 }, { "epoch": 1.7626244976410974, "grad_norm": 0.5460531640408665, "learning_rate": 4.346697132761766e-06, "loss": 0.18028898239135743, "step": 20175 }, { "epoch": 1.7630613314695092, "grad_norm": 0.5799145952070212, "learning_rate": 4.3441775917400954e-06, "loss": 0.15970361232757568, "step": 20180 }, { "epoch": 1.7634981652979207, "grad_norm": 0.5840337157833959, "learning_rate": 4.341658220150882e-06, "loss": 0.18258785009384154, "step": 20185 }, { "epoch": 1.7639349991263322, "grad_norm": 0.5094027910010694, "learning_rate": 4.339139018645007e-06, "loss": 0.16638256311416627, "step": 20190 }, { "epoch": 1.764371832954744, "grad_norm": 0.5346072255053892, "learning_rate": 4.336619987873309e-06, "loss": 0.19100072383880615, "step": 20195 }, { "epoch": 1.7648086667831557, "grad_norm": 0.6789649056583799, "learning_rate": 4.334101128486583e-06, "loss": 0.18165762424468995, "step": 20200 }, { "epoch": 1.7652455006115675, "grad_norm": 0.6633915020617073, "learning_rate": 4.331582441135578e-06, "loss": 0.18634629249572754, "step": 20205 }, { "epoch": 1.765682334439979, "grad_norm": 0.6829775631202236, "learning_rate": 4.329063926471e-06, "loss": 0.17097940444946289, "step": 20210 }, { "epoch": 1.7661191682683905, "grad_norm": 0.6093035425168928, "learning_rate": 4.32654558514351e-06, "loss": 0.1795312285423279, "step": 20215 }, { "epoch": 1.7665560020968023, "grad_norm": 0.5753593399889612, "learning_rate": 4.324027417803725e-06, "loss": 0.17949742078781128, "step": 20220 }, { "epoch": 1.766992835925214, "grad_norm": 0.5720968106311214, "learning_rate": 4.321509425102216e-06, "loss": 0.17798991203308107, "step": 20225 }, { "epoch": 1.7674296697536258, "grad_norm": 0.5316897765629458, "learning_rate": 4.318991607689511e-06, "loss": 0.163788104057312, "step": 20230 }, { "epoch": 1.7678665035820373, "grad_norm": 0.6254295569327368, "learning_rate": 4.316473966216087e-06, "loss": 0.16868382692337036, "step": 20235 }, { "epoch": 1.768303337410449, "grad_norm": 0.6059335889706522, "learning_rate": 4.313956501332383e-06, "loss": 0.18055074214935302, "step": 20240 }, { "epoch": 1.7687401712388606, "grad_norm": 0.579334695559203, "learning_rate": 4.311439213688786e-06, "loss": 0.1928791284561157, "step": 20245 }, { "epoch": 1.7691770050672724, "grad_norm": 0.6131920516425354, "learning_rate": 4.308922103935643e-06, "loss": 0.18324716091156007, "step": 20250 }, { "epoch": 1.7696138388956841, "grad_norm": 0.5312578863489024, "learning_rate": 4.306405172723251e-06, "loss": 0.15808384418487548, "step": 20255 }, { "epoch": 1.7700506727240959, "grad_norm": 0.8164714651904136, "learning_rate": 4.303888420701862e-06, "loss": 0.1630309820175171, "step": 20260 }, { "epoch": 1.7704875065525074, "grad_norm": 0.6161534125064186, "learning_rate": 4.301371848521681e-06, "loss": 0.19202433824539183, "step": 20265 }, { "epoch": 1.770924340380919, "grad_norm": 0.696787508949583, "learning_rate": 4.298855456832868e-06, "loss": 0.20099430084228515, "step": 20270 }, { "epoch": 1.7713611742093307, "grad_norm": 0.5692910772494594, "learning_rate": 4.296339246285535e-06, "loss": 0.13787081241607665, "step": 20275 }, { "epoch": 1.7717980080377425, "grad_norm": 0.6836877068043828, "learning_rate": 4.293823217529748e-06, "loss": 0.15474355220794678, "step": 20280 }, { "epoch": 1.7722348418661542, "grad_norm": 0.6076980390870114, "learning_rate": 4.291307371215526e-06, "loss": 0.16370487213134766, "step": 20285 }, { "epoch": 1.7726716756945657, "grad_norm": 0.7487781614630167, "learning_rate": 4.288791707992841e-06, "loss": 0.1621667504310608, "step": 20290 }, { "epoch": 1.7731085095229775, "grad_norm": 0.6672822746901119, "learning_rate": 4.2862762285116194e-06, "loss": 0.16334712505340576, "step": 20295 }, { "epoch": 1.773545343351389, "grad_norm": 0.5290431621950769, "learning_rate": 4.283760933421734e-06, "loss": 0.19384412765502929, "step": 20300 }, { "epoch": 1.7739821771798008, "grad_norm": 0.5718570175710077, "learning_rate": 4.281245823373017e-06, "loss": 0.18158409595489503, "step": 20305 }, { "epoch": 1.7744190110082125, "grad_norm": 0.5657214587768606, "learning_rate": 4.278730899015248e-06, "loss": 0.17949318885803223, "step": 20310 }, { "epoch": 1.7748558448366243, "grad_norm": 0.6661153698915433, "learning_rate": 4.276216160998161e-06, "loss": 0.17450375556945802, "step": 20315 }, { "epoch": 1.7752926786650358, "grad_norm": 0.737505463094826, "learning_rate": 4.273701609971443e-06, "loss": 0.18407120704650878, "step": 20320 }, { "epoch": 1.7757295124934473, "grad_norm": 0.6879201615048898, "learning_rate": 4.271187246584732e-06, "loss": 0.17638111114501953, "step": 20325 }, { "epoch": 1.776166346321859, "grad_norm": 0.7952742990940821, "learning_rate": 4.268673071487612e-06, "loss": 0.17280633449554444, "step": 20330 }, { "epoch": 1.7766031801502709, "grad_norm": 0.6860764402152583, "learning_rate": 4.266159085329626e-06, "loss": 0.152028226852417, "step": 20335 }, { "epoch": 1.7770400139786826, "grad_norm": 0.6167111127913754, "learning_rate": 4.263645288760264e-06, "loss": 0.16226956844329835, "step": 20340 }, { "epoch": 1.7774768478070941, "grad_norm": 0.5823899464029364, "learning_rate": 4.26113168242897e-06, "loss": 0.1582628846168518, "step": 20345 }, { "epoch": 1.777913681635506, "grad_norm": 0.6118554093045738, "learning_rate": 4.258618266985136e-06, "loss": 0.1703661322593689, "step": 20350 }, { "epoch": 1.7783505154639174, "grad_norm": 0.6114429544742476, "learning_rate": 4.256105043078105e-06, "loss": 0.15278153419494628, "step": 20355 }, { "epoch": 1.7787873492923292, "grad_norm": 0.665514856574033, "learning_rate": 4.253592011357174e-06, "loss": 0.16145018339157105, "step": 20360 }, { "epoch": 1.779224183120741, "grad_norm": 0.6063849582483647, "learning_rate": 4.251079172471583e-06, "loss": 0.17374430894851683, "step": 20365 }, { "epoch": 1.7796610169491527, "grad_norm": 0.7350823593431246, "learning_rate": 4.24856652707053e-06, "loss": 0.18251967430114746, "step": 20370 }, { "epoch": 1.7800978507775642, "grad_norm": 0.6170275487504783, "learning_rate": 4.246054075803159e-06, "loss": 0.17930977344512938, "step": 20375 }, { "epoch": 1.7805346846059757, "grad_norm": 0.6232912763197369, "learning_rate": 4.243541819318563e-06, "loss": 0.14960694313049316, "step": 20380 }, { "epoch": 1.7809715184343875, "grad_norm": 0.511828722353086, "learning_rate": 4.241029758265789e-06, "loss": 0.18583400249481202, "step": 20385 }, { "epoch": 1.7814083522627993, "grad_norm": 0.5294900351511221, "learning_rate": 4.238517893293831e-06, "loss": 0.18944849967956542, "step": 20390 }, { "epoch": 1.781845186091211, "grad_norm": 0.8000194411578129, "learning_rate": 4.236006225051629e-06, "loss": 0.16046221256256105, "step": 20395 }, { "epoch": 1.7822820199196225, "grad_norm": 0.6445752392050965, "learning_rate": 4.233494754188077e-06, "loss": 0.1920769214630127, "step": 20400 }, { "epoch": 1.7827188537480343, "grad_norm": 0.5574580895295539, "learning_rate": 4.230983481352018e-06, "loss": 0.14900060892105102, "step": 20405 }, { "epoch": 1.7831556875764458, "grad_norm": 0.6380861991977116, "learning_rate": 4.228472407192239e-06, "loss": 0.15697388648986815, "step": 20410 }, { "epoch": 1.7835925214048576, "grad_norm": 0.5547606606481653, "learning_rate": 4.225961532357481e-06, "loss": 0.17038872241973876, "step": 20415 }, { "epoch": 1.7840293552332693, "grad_norm": 0.7940461567999103, "learning_rate": 4.223450857496432e-06, "loss": 0.16303856372833253, "step": 20420 }, { "epoch": 1.784466189061681, "grad_norm": 0.7124317540136843, "learning_rate": 4.220940383257724e-06, "loss": 0.18161816596984864, "step": 20425 }, { "epoch": 1.7849030228900926, "grad_norm": 0.5852530202865228, "learning_rate": 4.218430110289945e-06, "loss": 0.15498044490814208, "step": 20430 }, { "epoch": 1.7853398567185041, "grad_norm": 0.6508753389236162, "learning_rate": 4.215920039241623e-06, "loss": 0.17288477420806886, "step": 20435 }, { "epoch": 1.785776690546916, "grad_norm": 0.6514940995311488, "learning_rate": 4.213410170761241e-06, "loss": 0.1663733720779419, "step": 20440 }, { "epoch": 1.7862135243753277, "grad_norm": 2.12608144230922, "learning_rate": 4.210900505497224e-06, "loss": 0.1631217122077942, "step": 20445 }, { "epoch": 1.7866503582037394, "grad_norm": 0.7246813514283705, "learning_rate": 4.20839104409795e-06, "loss": 0.1870573043823242, "step": 20450 }, { "epoch": 1.787087192032151, "grad_norm": 0.6288268921988919, "learning_rate": 4.205881787211739e-06, "loss": 0.17855350971221923, "step": 20455 }, { "epoch": 1.7875240258605627, "grad_norm": 0.5985058176230174, "learning_rate": 4.2033727354868595e-06, "loss": 0.16183335781097413, "step": 20460 }, { "epoch": 1.7879608596889742, "grad_norm": 0.5949995570019104, "learning_rate": 4.200863889571528e-06, "loss": 0.19752506017684937, "step": 20465 }, { "epoch": 1.788397693517386, "grad_norm": 0.6485234611135433, "learning_rate": 4.198355250113909e-06, "loss": 0.1659022569656372, "step": 20470 }, { "epoch": 1.7888345273457977, "grad_norm": 0.5788458930915041, "learning_rate": 4.195846817762112e-06, "loss": 0.1698642134666443, "step": 20475 }, { "epoch": 1.7892713611742095, "grad_norm": 0.6225303841276613, "learning_rate": 4.193338593164192e-06, "loss": 0.16670531034469604, "step": 20480 }, { "epoch": 1.789708195002621, "grad_norm": 0.693478335213642, "learning_rate": 4.190830576968156e-06, "loss": 0.2065061092376709, "step": 20485 }, { "epoch": 1.7901450288310325, "grad_norm": 0.6259349197063491, "learning_rate": 4.188322769821947e-06, "loss": 0.17828453779220582, "step": 20490 }, { "epoch": 1.7905818626594443, "grad_norm": 0.5655403369704071, "learning_rate": 4.185815172373462e-06, "loss": 0.1939892053604126, "step": 20495 }, { "epoch": 1.791018696487856, "grad_norm": 0.5404799684295861, "learning_rate": 4.1833077852705415e-06, "loss": 0.15989983081817627, "step": 20500 }, { "epoch": 1.7914555303162678, "grad_norm": 0.6521091895190678, "learning_rate": 4.180800609160974e-06, "loss": 0.1595292568206787, "step": 20505 }, { "epoch": 1.7918923641446793, "grad_norm": 0.5615293312488526, "learning_rate": 4.178293644692488e-06, "loss": 0.16629859209060668, "step": 20510 }, { "epoch": 1.7923291979730909, "grad_norm": 0.5605783595541787, "learning_rate": 4.1757868925127635e-06, "loss": 0.16840200424194335, "step": 20515 }, { "epoch": 1.7927660318015026, "grad_norm": 0.6000627237083568, "learning_rate": 4.173280353269421e-06, "loss": 0.19857317209243774, "step": 20520 }, { "epoch": 1.7932028656299144, "grad_norm": 0.614725061574172, "learning_rate": 4.170774027610027e-06, "loss": 0.17723457813262938, "step": 20525 }, { "epoch": 1.7936396994583261, "grad_norm": 0.5989865953356195, "learning_rate": 4.168267916182095e-06, "loss": 0.19481703042984008, "step": 20530 }, { "epoch": 1.7940765332867379, "grad_norm": 0.5683252776918779, "learning_rate": 4.1657620196330804e-06, "loss": 0.18474048376083374, "step": 20535 }, { "epoch": 1.7945133671151494, "grad_norm": 0.5391891170660014, "learning_rate": 4.163256338610386e-06, "loss": 0.17378157377243042, "step": 20540 }, { "epoch": 1.794950200943561, "grad_norm": 0.5895223444733066, "learning_rate": 4.1607508737613564e-06, "loss": 0.17105934619903565, "step": 20545 }, { "epoch": 1.7953870347719727, "grad_norm": 0.6330814001235572, "learning_rate": 4.158245625733283e-06, "loss": 0.16453425884246825, "step": 20550 }, { "epoch": 1.7958238686003845, "grad_norm": 0.6004611222147026, "learning_rate": 4.155740595173397e-06, "loss": 0.17301614284515382, "step": 20555 }, { "epoch": 1.7962607024287962, "grad_norm": 0.6215934319049671, "learning_rate": 4.153235782728875e-06, "loss": 0.19891791343688964, "step": 20560 }, { "epoch": 1.7966975362572077, "grad_norm": 0.6401675127870121, "learning_rate": 4.150731189046841e-06, "loss": 0.17023518085479736, "step": 20565 }, { "epoch": 1.7971343700856193, "grad_norm": 0.5340389185375468, "learning_rate": 4.148226814774359e-06, "loss": 0.19458544254302979, "step": 20570 }, { "epoch": 1.797571203914031, "grad_norm": 0.5890297810478585, "learning_rate": 4.145722660558436e-06, "loss": 0.18782784938812255, "step": 20575 }, { "epoch": 1.7980080377424428, "grad_norm": 0.5364604254418218, "learning_rate": 4.143218727046025e-06, "loss": 0.16843736171722412, "step": 20580 }, { "epoch": 1.7984448715708545, "grad_norm": 0.5109616215090593, "learning_rate": 4.140715014884015e-06, "loss": 0.18555841445922852, "step": 20585 }, { "epoch": 1.7988817053992663, "grad_norm": 0.5929864749099228, "learning_rate": 4.138211524719248e-06, "loss": 0.16028411388397218, "step": 20590 }, { "epoch": 1.7993185392276778, "grad_norm": 0.5780612088067933, "learning_rate": 4.135708257198501e-06, "loss": 0.14462974071502685, "step": 20595 }, { "epoch": 1.7997553730560893, "grad_norm": 0.6070176660195372, "learning_rate": 4.133205212968498e-06, "loss": 0.18923721313476563, "step": 20600 }, { "epoch": 1.800192206884501, "grad_norm": 0.5569933986982387, "learning_rate": 4.130702392675901e-06, "loss": 0.17329646348953248, "step": 20605 }, { "epoch": 1.8006290407129129, "grad_norm": 0.573157685353202, "learning_rate": 4.128199796967318e-06, "loss": 0.15193796157836914, "step": 20610 }, { "epoch": 1.8010658745413246, "grad_norm": 0.6513448317975156, "learning_rate": 4.125697426489297e-06, "loss": 0.15433400869369507, "step": 20615 }, { "epoch": 1.8015027083697361, "grad_norm": 0.63638143967001, "learning_rate": 4.123195281888329e-06, "loss": 0.1860910415649414, "step": 20620 }, { "epoch": 1.8019395421981477, "grad_norm": 0.5153023373852188, "learning_rate": 4.120693363810843e-06, "loss": 0.18043171167373656, "step": 20625 }, { "epoch": 1.8023763760265594, "grad_norm": 0.5770053021417677, "learning_rate": 4.118191672903215e-06, "loss": 0.16664886474609375, "step": 20630 }, { "epoch": 1.8028132098549712, "grad_norm": 0.5477308010898967, "learning_rate": 4.115690209811759e-06, "loss": 0.17491743564605713, "step": 20635 }, { "epoch": 1.803250043683383, "grad_norm": 0.6255949070048724, "learning_rate": 4.113188975182731e-06, "loss": 0.1777498483657837, "step": 20640 }, { "epoch": 1.8036868775117945, "grad_norm": 0.74249175117638, "learning_rate": 4.110687969662328e-06, "loss": 0.16744039058685303, "step": 20645 }, { "epoch": 1.8041237113402062, "grad_norm": 0.6564090244898082, "learning_rate": 4.108187193896686e-06, "loss": 0.16847460269927977, "step": 20650 }, { "epoch": 1.8045605451686177, "grad_norm": 0.5614736548098869, "learning_rate": 4.105686648531886e-06, "loss": 0.1895839810371399, "step": 20655 }, { "epoch": 1.8049973789970295, "grad_norm": 0.6281781266490021, "learning_rate": 4.103186334213943e-06, "loss": 0.16535890102386475, "step": 20660 }, { "epoch": 1.8054342128254413, "grad_norm": 0.5685807317698909, "learning_rate": 4.10068625158882e-06, "loss": 0.1879391551017761, "step": 20665 }, { "epoch": 1.805871046653853, "grad_norm": 0.6578532087528586, "learning_rate": 4.098186401302413e-06, "loss": 0.1620584487915039, "step": 20670 }, { "epoch": 1.8063078804822645, "grad_norm": 0.7208795932212255, "learning_rate": 4.095686784000564e-06, "loss": 0.1944655656814575, "step": 20675 }, { "epoch": 1.806744714310676, "grad_norm": 0.5786528782018063, "learning_rate": 4.093187400329049e-06, "loss": 0.18137046098709106, "step": 20680 }, { "epoch": 1.8071815481390878, "grad_norm": 0.5889752877251105, "learning_rate": 4.090688250933588e-06, "loss": 0.1699803352355957, "step": 20685 }, { "epoch": 1.8076183819674996, "grad_norm": 0.594228838891628, "learning_rate": 4.088189336459838e-06, "loss": 0.1661755323410034, "step": 20690 }, { "epoch": 1.8080552157959113, "grad_norm": 0.713877748208737, "learning_rate": 4.085690657553398e-06, "loss": 0.17498563528060912, "step": 20695 }, { "epoch": 1.8084920496243229, "grad_norm": 0.8444186800770308, "learning_rate": 4.083192214859804e-06, "loss": 0.1700606107711792, "step": 20700 }, { "epoch": 1.8089288834527346, "grad_norm": 0.5375247919569004, "learning_rate": 4.080694009024532e-06, "loss": 0.17109702825546264, "step": 20705 }, { "epoch": 1.8093657172811461, "grad_norm": 0.6381454584114261, "learning_rate": 4.078196040692995e-06, "loss": 0.17739249467849733, "step": 20710 }, { "epoch": 1.809802551109558, "grad_norm": 0.5260015022025173, "learning_rate": 4.0756983105105455e-06, "loss": 0.18754653930664061, "step": 20715 }, { "epoch": 1.8102393849379697, "grad_norm": 0.5997400759221707, "learning_rate": 4.0732008191224775e-06, "loss": 0.16480510234832763, "step": 20720 }, { "epoch": 1.8106762187663814, "grad_norm": 0.6047324206870105, "learning_rate": 4.0707035671740184e-06, "loss": 0.19551315307617187, "step": 20725 }, { "epoch": 1.811113052594793, "grad_norm": 0.5760057178113579, "learning_rate": 4.068206555310336e-06, "loss": 0.18915877342224122, "step": 20730 }, { "epoch": 1.8115498864232045, "grad_norm": 0.520972334480672, "learning_rate": 4.065709784176537e-06, "loss": 0.18440690040588378, "step": 20735 }, { "epoch": 1.8119867202516162, "grad_norm": 0.6037772150500902, "learning_rate": 4.063213254417667e-06, "loss": 0.1707984447479248, "step": 20740 }, { "epoch": 1.812423554080028, "grad_norm": 0.5867943368641158, "learning_rate": 4.060716966678704e-06, "loss": 0.176552414894104, "step": 20745 }, { "epoch": 1.8128603879084397, "grad_norm": 0.4649239250237411, "learning_rate": 4.058220921604567e-06, "loss": 0.18592002391815185, "step": 20750 }, { "epoch": 1.8132972217368513, "grad_norm": 0.6382106489535883, "learning_rate": 4.055725119840113e-06, "loss": 0.1801377058029175, "step": 20755 }, { "epoch": 1.813734055565263, "grad_norm": 0.6268487790164331, "learning_rate": 4.053229562030134e-06, "loss": 0.17083755731582642, "step": 20760 }, { "epoch": 1.8141708893936745, "grad_norm": 0.6200955362635537, "learning_rate": 4.050734248819364e-06, "loss": 0.14290934801101685, "step": 20765 }, { "epoch": 1.8146077232220863, "grad_norm": 0.5758991227278012, "learning_rate": 4.0482391808524665e-06, "loss": 0.15952121019363402, "step": 20770 }, { "epoch": 1.815044557050498, "grad_norm": 0.6149103043435724, "learning_rate": 4.045744358774046e-06, "loss": 0.1804661512374878, "step": 20775 }, { "epoch": 1.8154813908789098, "grad_norm": 0.7890409599779274, "learning_rate": 4.0432497832286425e-06, "loss": 0.1532357931137085, "step": 20780 }, { "epoch": 1.8159182247073213, "grad_norm": 0.5441981292101931, "learning_rate": 4.040755454860731e-06, "loss": 0.18094545602798462, "step": 20785 }, { "epoch": 1.8163550585357329, "grad_norm": 0.6848215626445541, "learning_rate": 4.038261374314726e-06, "loss": 0.19270654916763305, "step": 20790 }, { "epoch": 1.8167918923641446, "grad_norm": 0.6565488897497587, "learning_rate": 4.035767542234975e-06, "loss": 0.1640916109085083, "step": 20795 }, { "epoch": 1.8172287261925564, "grad_norm": 0.5721805845284542, "learning_rate": 4.0332739592657634e-06, "loss": 0.15394768714904786, "step": 20800 }, { "epoch": 1.8176655600209681, "grad_norm": 0.6204025231640673, "learning_rate": 4.030780626051312e-06, "loss": 0.18050237894058227, "step": 20805 }, { "epoch": 1.8181023938493797, "grad_norm": 0.6320950301311574, "learning_rate": 4.028287543235773e-06, "loss": 0.14826507568359376, "step": 20810 }, { "epoch": 1.8185392276777914, "grad_norm": 0.6722030211778486, "learning_rate": 4.0257947114632385e-06, "loss": 0.18753609657287598, "step": 20815 }, { "epoch": 1.818976061506203, "grad_norm": 0.5574598263617461, "learning_rate": 4.023302131377736e-06, "loss": 0.18350749015808104, "step": 20820 }, { "epoch": 1.8194128953346147, "grad_norm": 0.543158144048061, "learning_rate": 4.020809803623224e-06, "loss": 0.15612609386444093, "step": 20825 }, { "epoch": 1.8198497291630265, "grad_norm": 0.762359120653912, "learning_rate": 4.018317728843599e-06, "loss": 0.18845994472503663, "step": 20830 }, { "epoch": 1.8202865629914382, "grad_norm": 0.5695773753760122, "learning_rate": 4.015825907682695e-06, "loss": 0.16470530033111572, "step": 20835 }, { "epoch": 1.8207233968198497, "grad_norm": 0.5806463998410163, "learning_rate": 4.01333434078427e-06, "loss": 0.16734097003936768, "step": 20840 }, { "epoch": 1.8211602306482613, "grad_norm": 0.6504253941135113, "learning_rate": 4.010843028792027e-06, "loss": 0.16115731000900269, "step": 20845 }, { "epoch": 1.821597064476673, "grad_norm": 0.6276032643921731, "learning_rate": 4.008351972349599e-06, "loss": 0.17980680465698243, "step": 20850 }, { "epoch": 1.8220338983050848, "grad_norm": 0.6399964881071535, "learning_rate": 4.005861172100552e-06, "loss": 0.17356407642364502, "step": 20855 }, { "epoch": 1.8224707321334965, "grad_norm": 0.64402891854697, "learning_rate": 4.00337062868839e-06, "loss": 0.17623038291931153, "step": 20860 }, { "epoch": 1.822907565961908, "grad_norm": 0.7006213468854465, "learning_rate": 4.000880342756545e-06, "loss": 0.18208687305450438, "step": 20865 }, { "epoch": 1.8233443997903198, "grad_norm": 0.7665196716782692, "learning_rate": 3.998390314948386e-06, "loss": 0.18644846677780152, "step": 20870 }, { "epoch": 1.8237812336187313, "grad_norm": 0.6558474082965612, "learning_rate": 3.995900545907215e-06, "loss": 0.17687851190567017, "step": 20875 }, { "epoch": 1.824218067447143, "grad_norm": 0.6717635389416488, "learning_rate": 3.993411036276266e-06, "loss": 0.17903952598571776, "step": 20880 }, { "epoch": 1.8246549012755549, "grad_norm": 0.5477222148705949, "learning_rate": 3.990921786698705e-06, "loss": 0.1761917233467102, "step": 20885 }, { "epoch": 1.8250917351039666, "grad_norm": 0.5958133380360638, "learning_rate": 3.988432797817634e-06, "loss": 0.16571928262710572, "step": 20890 }, { "epoch": 1.8255285689323781, "grad_norm": 0.5867085333879959, "learning_rate": 3.985944070276087e-06, "loss": 0.1771962523460388, "step": 20895 }, { "epoch": 1.8259654027607897, "grad_norm": 0.559822112327838, "learning_rate": 3.983455604717031e-06, "loss": 0.165957510471344, "step": 20900 }, { "epoch": 1.8264022365892014, "grad_norm": 0.58950169994691, "learning_rate": 3.980967401783359e-06, "loss": 0.15697768926620484, "step": 20905 }, { "epoch": 1.8268390704176132, "grad_norm": 0.6258907495543314, "learning_rate": 3.978479462117903e-06, "loss": 0.20092692375183105, "step": 20910 }, { "epoch": 1.827275904246025, "grad_norm": 0.6146374306676178, "learning_rate": 3.9759917863634265e-06, "loss": 0.15518004894256593, "step": 20915 }, { "epoch": 1.8277127380744365, "grad_norm": 0.5893854962031488, "learning_rate": 3.973504375162623e-06, "loss": 0.15559873580932618, "step": 20920 }, { "epoch": 1.828149571902848, "grad_norm": 0.666534291821481, "learning_rate": 3.971017229158117e-06, "loss": 0.17457327842712403, "step": 20925 }, { "epoch": 1.8285864057312597, "grad_norm": 0.6120707830940907, "learning_rate": 3.968530348992466e-06, "loss": 0.14225486516952515, "step": 20930 }, { "epoch": 1.8290232395596715, "grad_norm": 0.7014308893400363, "learning_rate": 3.966043735308159e-06, "loss": 0.17831478118896485, "step": 20935 }, { "epoch": 1.8294600733880833, "grad_norm": 0.5713014878221662, "learning_rate": 3.963557388747613e-06, "loss": 0.14387588500976561, "step": 20940 }, { "epoch": 1.829896907216495, "grad_norm": 0.5757002574411809, "learning_rate": 3.96107130995318e-06, "loss": 0.1701136350631714, "step": 20945 }, { "epoch": 1.8303337410449065, "grad_norm": 0.6386287636596987, "learning_rate": 3.958585499567142e-06, "loss": 0.1796770691871643, "step": 20950 }, { "epoch": 1.830770574873318, "grad_norm": 0.7219229317472137, "learning_rate": 3.956099958231709e-06, "loss": 0.15280346870422362, "step": 20955 }, { "epoch": 1.8312074087017298, "grad_norm": 0.6870528701843175, "learning_rate": 3.953614686589025e-06, "loss": 0.14749616384506226, "step": 20960 }, { "epoch": 1.8316442425301416, "grad_norm": 0.6067253382163286, "learning_rate": 3.951129685281164e-06, "loss": 0.14147350788116456, "step": 20965 }, { "epoch": 1.8320810763585533, "grad_norm": 0.5921717577293596, "learning_rate": 3.948644954950125e-06, "loss": 0.17588133811950685, "step": 20970 }, { "epoch": 1.8325179101869649, "grad_norm": 0.5799368123110643, "learning_rate": 3.9461604962378454e-06, "loss": 0.15978596210479737, "step": 20975 }, { "epoch": 1.8329547440153764, "grad_norm": 0.7355134289327366, "learning_rate": 3.943676309786183e-06, "loss": 0.15925030708312987, "step": 20980 }, { "epoch": 1.8333915778437881, "grad_norm": 0.7752040735475407, "learning_rate": 3.941192396236933e-06, "loss": 0.17099182605743407, "step": 20985 }, { "epoch": 1.8338284116722, "grad_norm": 0.631306840001767, "learning_rate": 3.938708756231819e-06, "loss": 0.16926395893096924, "step": 20990 }, { "epoch": 1.8342652455006117, "grad_norm": 0.572905750558375, "learning_rate": 3.9362253904124895e-06, "loss": 0.17227426767349244, "step": 20995 }, { "epoch": 1.8347020793290234, "grad_norm": 0.7302747522163853, "learning_rate": 3.933742299420525e-06, "loss": 0.18012149333953859, "step": 21000 }, { "epoch": 1.835138913157435, "grad_norm": 0.6665784922173648, "learning_rate": 3.931259483897434e-06, "loss": 0.17764681577682495, "step": 21005 }, { "epoch": 1.8355757469858465, "grad_norm": 0.6427512496801926, "learning_rate": 3.928776944484658e-06, "loss": 0.1754917860031128, "step": 21010 }, { "epoch": 1.8360125808142582, "grad_norm": 0.5709870705556189, "learning_rate": 3.92629468182356e-06, "loss": 0.1714940071105957, "step": 21015 }, { "epoch": 1.83644941464267, "grad_norm": 0.680946560827524, "learning_rate": 3.923812696555439e-06, "loss": 0.1693643569946289, "step": 21020 }, { "epoch": 1.8368862484710817, "grad_norm": 0.5926532032033937, "learning_rate": 3.9213309893215165e-06, "loss": 0.15336346626281738, "step": 21025 }, { "epoch": 1.8373230822994933, "grad_norm": 0.8681842441134515, "learning_rate": 3.918849560762944e-06, "loss": 0.18120217323303223, "step": 21030 }, { "epoch": 1.8377599161279048, "grad_norm": 0.7056142715945386, "learning_rate": 3.9163684115208035e-06, "loss": 0.1363450527191162, "step": 21035 }, { "epoch": 1.8381967499563165, "grad_norm": 0.5732087412834993, "learning_rate": 3.9138875422361e-06, "loss": 0.15081506967544556, "step": 21040 }, { "epoch": 1.8386335837847283, "grad_norm": 0.5787875649904813, "learning_rate": 3.91140695354977e-06, "loss": 0.16810765266418456, "step": 21045 }, { "epoch": 1.83907041761314, "grad_norm": 0.671012230254573, "learning_rate": 3.908926646102677e-06, "loss": 0.16939665079116822, "step": 21050 }, { "epoch": 1.8395072514415516, "grad_norm": 0.7507385015716492, "learning_rate": 3.90644662053561e-06, "loss": 0.16227707862854004, "step": 21055 }, { "epoch": 1.8399440852699633, "grad_norm": 0.700877288894378, "learning_rate": 3.903966877489289e-06, "loss": 0.1478442668914795, "step": 21060 }, { "epoch": 1.8403809190983749, "grad_norm": 0.6732020804123818, "learning_rate": 3.901487417604356e-06, "loss": 0.1741977095603943, "step": 21065 }, { "epoch": 1.8408177529267866, "grad_norm": 0.5375646569241291, "learning_rate": 3.899008241521382e-06, "loss": 0.1685330390930176, "step": 21070 }, { "epoch": 1.8412545867551984, "grad_norm": 0.658988636116431, "learning_rate": 3.8965293498808655e-06, "loss": 0.1354980707168579, "step": 21075 }, { "epoch": 1.8416914205836101, "grad_norm": 0.6056137329008654, "learning_rate": 3.8940507433232314e-06, "loss": 0.15302095413208008, "step": 21080 }, { "epoch": 1.8421282544120217, "grad_norm": 0.5899122327794335, "learning_rate": 3.89157242248883e-06, "loss": 0.1541649580001831, "step": 21085 }, { "epoch": 1.8425650882404332, "grad_norm": 0.5675021542410671, "learning_rate": 3.889094388017941e-06, "loss": 0.15899438858032228, "step": 21090 }, { "epoch": 1.843001922068845, "grad_norm": 0.7412881896457667, "learning_rate": 3.886616640550762e-06, "loss": 0.15161395072937012, "step": 21095 }, { "epoch": 1.8434387558972567, "grad_norm": 0.5999102428117582, "learning_rate": 3.884139180727425e-06, "loss": 0.16492223739624023, "step": 21100 }, { "epoch": 1.8438755897256685, "grad_norm": 0.6810451912902767, "learning_rate": 3.881662009187985e-06, "loss": 0.1829410195350647, "step": 21105 }, { "epoch": 1.84431242355408, "grad_norm": 0.6277242482184245, "learning_rate": 3.87918512657242e-06, "loss": 0.1487516164779663, "step": 21110 }, { "epoch": 1.8447492573824917, "grad_norm": 0.5470089126270654, "learning_rate": 3.876708533520638e-06, "loss": 0.14890958070755006, "step": 21115 }, { "epoch": 1.8451860912109033, "grad_norm": 0.6575456477841211, "learning_rate": 3.874232230672469e-06, "loss": 0.17063645124435425, "step": 21120 }, { "epoch": 1.845622925039315, "grad_norm": 0.8304122967834418, "learning_rate": 3.871756218667668e-06, "loss": 0.1730485200881958, "step": 21125 }, { "epoch": 1.8460597588677268, "grad_norm": 0.5893799778723042, "learning_rate": 3.869280498145914e-06, "loss": 0.1575087070465088, "step": 21130 }, { "epoch": 1.8464965926961385, "grad_norm": 0.5398087582168911, "learning_rate": 3.866805069746815e-06, "loss": 0.1490776300430298, "step": 21135 }, { "epoch": 1.84693342652455, "grad_norm": 0.5990452427497118, "learning_rate": 3.864329934109898e-06, "loss": 0.18563350439071655, "step": 21140 }, { "epoch": 1.8473702603529616, "grad_norm": 0.5605347832377449, "learning_rate": 3.861855091874619e-06, "loss": 0.16970467567443848, "step": 21145 }, { "epoch": 1.8478070941813733, "grad_norm": 0.6875366641206628, "learning_rate": 3.859380543680355e-06, "loss": 0.16274760961532592, "step": 21150 }, { "epoch": 1.848243928009785, "grad_norm": 0.5458711355194348, "learning_rate": 3.8569062901664115e-06, "loss": 0.16027923822402954, "step": 21155 }, { "epoch": 1.8486807618381969, "grad_norm": 0.5720245862807594, "learning_rate": 3.85443233197201e-06, "loss": 0.16543383598327638, "step": 21160 }, { "epoch": 1.8491175956666084, "grad_norm": 0.529927422149225, "learning_rate": 3.8519586697363025e-06, "loss": 0.1573905110359192, "step": 21165 }, { "epoch": 1.8495544294950201, "grad_norm": 0.49309817749837687, "learning_rate": 3.849485304098362e-06, "loss": 0.17415144443511962, "step": 21170 }, { "epoch": 1.8499912633234317, "grad_norm": 0.6358372422880616, "learning_rate": 3.847012235697187e-06, "loss": 0.1739483118057251, "step": 21175 }, { "epoch": 1.8504280971518434, "grad_norm": 0.6282122316743339, "learning_rate": 3.844539465171695e-06, "loss": 0.1713305354118347, "step": 21180 }, { "epoch": 1.8508649309802552, "grad_norm": 0.6639971499100388, "learning_rate": 3.842066993160732e-06, "loss": 0.17019951343536377, "step": 21185 }, { "epoch": 1.851301764808667, "grad_norm": 0.592720381173531, "learning_rate": 3.839594820303062e-06, "loss": 0.15152206420898437, "step": 21190 }, { "epoch": 1.8517385986370785, "grad_norm": 0.5536505141932392, "learning_rate": 3.837122947237372e-06, "loss": 0.1631099224090576, "step": 21195 }, { "epoch": 1.85217543246549, "grad_norm": 0.6404188103211751, "learning_rate": 3.834651374602276e-06, "loss": 0.1682591199874878, "step": 21200 }, { "epoch": 1.8526122662939017, "grad_norm": 0.612883834921616, "learning_rate": 3.832180103036306e-06, "loss": 0.1753248691558838, "step": 21205 }, { "epoch": 1.8530491001223135, "grad_norm": 0.7429237620344633, "learning_rate": 3.829709133177919e-06, "loss": 0.19410371780395508, "step": 21210 }, { "epoch": 1.8534859339507253, "grad_norm": 0.6218271878049138, "learning_rate": 3.827238465665491e-06, "loss": 0.15405575037002564, "step": 21215 }, { "epoch": 1.8539227677791368, "grad_norm": 0.6053125510318988, "learning_rate": 3.8247681011373265e-06, "loss": 0.1503197431564331, "step": 21220 }, { "epoch": 1.8543596016075485, "grad_norm": 0.7389792609263373, "learning_rate": 3.822298040231641e-06, "loss": 0.17696528434753417, "step": 21225 }, { "epoch": 1.85479643543596, "grad_norm": 0.5853193474457571, "learning_rate": 3.819828283586579e-06, "loss": 0.16926392316818237, "step": 21230 }, { "epoch": 1.8552332692643718, "grad_norm": 0.5406631353422641, "learning_rate": 3.817358831840208e-06, "loss": 0.12934224605560302, "step": 21235 }, { "epoch": 1.8556701030927836, "grad_norm": 0.7094541779062498, "learning_rate": 3.814889685630509e-06, "loss": 0.15740362405776978, "step": 21240 }, { "epoch": 1.8561069369211953, "grad_norm": 0.5406382876588754, "learning_rate": 3.812420845595392e-06, "loss": 0.17384786605834962, "step": 21245 }, { "epoch": 1.8565437707496069, "grad_norm": 0.6334819278448254, "learning_rate": 3.8099523123726858e-06, "loss": 0.14104046821594238, "step": 21250 }, { "epoch": 1.8569806045780184, "grad_norm": 0.5679687996971471, "learning_rate": 3.8074840866001343e-06, "loss": 0.1685829520225525, "step": 21255 }, { "epoch": 1.8574174384064301, "grad_norm": 0.6297602056678725, "learning_rate": 3.805016168915409e-06, "loss": 0.1645420551300049, "step": 21260 }, { "epoch": 1.857854272234842, "grad_norm": 0.6802947451051911, "learning_rate": 3.802548559956099e-06, "loss": 0.18835773468017578, "step": 21265 }, { "epoch": 1.8582911060632537, "grad_norm": 0.591337049260892, "learning_rate": 3.8000812603597138e-06, "loss": 0.18603651523590087, "step": 21270 }, { "epoch": 1.8587279398916652, "grad_norm": 0.6085736130650767, "learning_rate": 3.797614270763684e-06, "loss": 0.16759011745452881, "step": 21275 }, { "epoch": 1.859164773720077, "grad_norm": 0.6073289057736961, "learning_rate": 3.7951475918053583e-06, "loss": 0.14411834478378296, "step": 21280 }, { "epoch": 1.8596016075484885, "grad_norm": 0.5187445778742004, "learning_rate": 3.7926812241220047e-06, "loss": 0.16293206214904785, "step": 21285 }, { "epoch": 1.8600384413769002, "grad_norm": 0.6023484387367782, "learning_rate": 3.7902151683508136e-06, "loss": 0.1727057933807373, "step": 21290 }, { "epoch": 1.860475275205312, "grad_norm": 0.7149323242989445, "learning_rate": 3.787749425128893e-06, "loss": 0.17562427520751953, "step": 21295 }, { "epoch": 1.8609121090337237, "grad_norm": 0.5905946736367674, "learning_rate": 3.7852839950932694e-06, "loss": 0.1602012872695923, "step": 21300 }, { "epoch": 1.8613489428621353, "grad_norm": 0.6108229340566853, "learning_rate": 3.782818878880891e-06, "loss": 0.1562696099281311, "step": 21305 }, { "epoch": 1.8617857766905468, "grad_norm": 0.8314090674223444, "learning_rate": 3.7803540771286224e-06, "loss": 0.1604790449142456, "step": 21310 }, { "epoch": 1.8622226105189585, "grad_norm": 0.5367234984191449, "learning_rate": 3.7778895904732504e-06, "loss": 0.16221067905426026, "step": 21315 }, { "epoch": 1.8626594443473703, "grad_norm": 0.5998977807523465, "learning_rate": 3.775425419551474e-06, "loss": 0.15801138877868653, "step": 21320 }, { "epoch": 1.863096278175782, "grad_norm": 0.6339541526346802, "learning_rate": 3.7729615649999152e-06, "loss": 0.16312999725341798, "step": 21325 }, { "epoch": 1.8635331120041936, "grad_norm": 0.5531862127701739, "learning_rate": 3.770498027455115e-06, "loss": 0.17111880779266359, "step": 21330 }, { "epoch": 1.8639699458326051, "grad_norm": 0.6469425981578008, "learning_rate": 3.768034807553532e-06, "loss": 0.16568665504455565, "step": 21335 }, { "epoch": 1.8644067796610169, "grad_norm": 0.5537599443138054, "learning_rate": 3.7655719059315394e-06, "loss": 0.15856246948242186, "step": 21340 }, { "epoch": 1.8648436134894286, "grad_norm": 0.6235806681757626, "learning_rate": 3.763109323225434e-06, "loss": 0.14041624069213868, "step": 21345 }, { "epoch": 1.8652804473178404, "grad_norm": 0.723194493721875, "learning_rate": 3.760647060071422e-06, "loss": 0.18087280988693238, "step": 21350 }, { "epoch": 1.8657172811462521, "grad_norm": 0.6118937497106632, "learning_rate": 3.7581851171056347e-06, "loss": 0.16992757320404053, "step": 21355 }, { "epoch": 1.8661541149746637, "grad_norm": 0.7430404333901647, "learning_rate": 3.7557234949641177e-06, "loss": 0.15839014053344727, "step": 21360 }, { "epoch": 1.8665909488030752, "grad_norm": 0.5169764977332628, "learning_rate": 3.7532621942828328e-06, "loss": 0.16143163442611694, "step": 21365 }, { "epoch": 1.867027782631487, "grad_norm": 0.6225544836707353, "learning_rate": 3.7508012156976612e-06, "loss": 0.14067889451980592, "step": 21370 }, { "epoch": 1.8674646164598987, "grad_norm": 0.6386511762114233, "learning_rate": 3.7483405598444e-06, "loss": 0.18545979261398315, "step": 21375 }, { "epoch": 1.8679014502883104, "grad_norm": 0.550290707314749, "learning_rate": 3.745880227358761e-06, "loss": 0.17416715621948242, "step": 21380 }, { "epoch": 1.868338284116722, "grad_norm": 0.6664248371259569, "learning_rate": 3.743420218876374e-06, "loss": 0.15244654417037964, "step": 21385 }, { "epoch": 1.8687751179451335, "grad_norm": 0.5852600443002806, "learning_rate": 3.740960535032786e-06, "loss": 0.16163988113403321, "step": 21390 }, { "epoch": 1.8692119517735453, "grad_norm": 0.5383556441941912, "learning_rate": 3.738501176463457e-06, "loss": 0.19843339920043945, "step": 21395 }, { "epoch": 1.869648785601957, "grad_norm": 0.7190441540709183, "learning_rate": 3.7360421438037656e-06, "loss": 0.166220760345459, "step": 21400 }, { "epoch": 1.8700856194303688, "grad_norm": 0.6186289723814967, "learning_rate": 3.733583437689007e-06, "loss": 0.14744763374328612, "step": 21405 }, { "epoch": 1.8705224532587805, "grad_norm": 0.7431301698890342, "learning_rate": 3.7311250587543923e-06, "loss": 0.16241462230682374, "step": 21410 }, { "epoch": 1.870959287087192, "grad_norm": 0.5405015060909913, "learning_rate": 3.7286670076350416e-06, "loss": 0.18123779296875, "step": 21415 }, { "epoch": 1.8713961209156036, "grad_norm": 0.6164244659316986, "learning_rate": 3.726209284965998e-06, "loss": 0.17925028800964354, "step": 21420 }, { "epoch": 1.8718329547440153, "grad_norm": 0.564652413021687, "learning_rate": 3.7237518913822167e-06, "loss": 0.16953482627868652, "step": 21425 }, { "epoch": 1.872269788572427, "grad_norm": 0.5543238339849791, "learning_rate": 3.7212948275185682e-06, "loss": 0.17102041244506835, "step": 21430 }, { "epoch": 1.8727066224008388, "grad_norm": 0.6226727631917175, "learning_rate": 3.718838094009839e-06, "loss": 0.16850088834762572, "step": 21435 }, { "epoch": 1.8731434562292504, "grad_norm": 0.6354420489990066, "learning_rate": 3.7163816914907267e-06, "loss": 0.15917707681655885, "step": 21440 }, { "epoch": 1.873580290057662, "grad_norm": 0.6276430513775053, "learning_rate": 3.7139256205958473e-06, "loss": 0.17351549863815308, "step": 21445 }, { "epoch": 1.8740171238860737, "grad_norm": 0.6465777096630521, "learning_rate": 3.711469881959727e-06, "loss": 0.1709856629371643, "step": 21450 }, { "epoch": 1.8744539577144854, "grad_norm": 0.5207523712559223, "learning_rate": 3.70901447621681e-06, "loss": 0.15322768688201904, "step": 21455 }, { "epoch": 1.8748907915428972, "grad_norm": 0.6143594260936258, "learning_rate": 3.7065594040014543e-06, "loss": 0.18679581880569457, "step": 21460 }, { "epoch": 1.8753276253713087, "grad_norm": 0.502972579537679, "learning_rate": 3.704104665947929e-06, "loss": 0.15431766510009765, "step": 21465 }, { "epoch": 1.8757644591997205, "grad_norm": 0.5869616072275593, "learning_rate": 3.7016502626904194e-06, "loss": 0.165742027759552, "step": 21470 }, { "epoch": 1.876201293028132, "grad_norm": 0.6041834429184733, "learning_rate": 3.699196194863025e-06, "loss": 0.16509621143341063, "step": 21475 }, { "epoch": 1.8766381268565437, "grad_norm": 0.5736336240826597, "learning_rate": 3.6967424630997535e-06, "loss": 0.17118825912475585, "step": 21480 }, { "epoch": 1.8770749606849555, "grad_norm": 0.6531541973967537, "learning_rate": 3.6942890680345302e-06, "loss": 0.173235023021698, "step": 21485 }, { "epoch": 1.8775117945133672, "grad_norm": 0.7270085660358928, "learning_rate": 3.6918360103011947e-06, "loss": 0.17562265396118165, "step": 21490 }, { "epoch": 1.8779486283417788, "grad_norm": 0.7231242801485256, "learning_rate": 3.6893832905334935e-06, "loss": 0.1737847089767456, "step": 21495 }, { "epoch": 1.8783854621701903, "grad_norm": 0.5781187454805007, "learning_rate": 3.686930909365093e-06, "loss": 0.1681879162788391, "step": 21500 }, { "epoch": 1.878822295998602, "grad_norm": 0.6564042197136063, "learning_rate": 3.6844788674295682e-06, "loss": 0.15910590887069703, "step": 21505 }, { "epoch": 1.8792591298270138, "grad_norm": 0.633325821477405, "learning_rate": 3.6820271653604043e-06, "loss": 0.16969348192214967, "step": 21510 }, { "epoch": 1.8796959636554256, "grad_norm": 0.6784316295898556, "learning_rate": 3.6795758037910033e-06, "loss": 0.15578408241271974, "step": 21515 }, { "epoch": 1.880132797483837, "grad_norm": 0.5594230954997791, "learning_rate": 3.6771247833546765e-06, "loss": 0.1689704418182373, "step": 21520 }, { "epoch": 1.8805696313122489, "grad_norm": 0.5647418192996455, "learning_rate": 3.674674104684648e-06, "loss": 0.1683258295059204, "step": 21525 }, { "epoch": 1.8810064651406604, "grad_norm": 0.7758236587596893, "learning_rate": 3.6722237684140534e-06, "loss": 0.1531239151954651, "step": 21530 }, { "epoch": 1.8814432989690721, "grad_norm": 0.5581073130805946, "learning_rate": 3.669773775175942e-06, "loss": 0.18606057167053222, "step": 21535 }, { "epoch": 1.881880132797484, "grad_norm": 0.602084151564045, "learning_rate": 3.667324125603268e-06, "loss": 0.18242855072021485, "step": 21540 }, { "epoch": 1.8823169666258956, "grad_norm": 0.5461524897442178, "learning_rate": 3.6648748203289046e-06, "loss": 0.181107234954834, "step": 21545 }, { "epoch": 1.8827538004543072, "grad_norm": 0.5630736696164945, "learning_rate": 3.6624258599856306e-06, "loss": 0.1825602173805237, "step": 21550 }, { "epoch": 1.8831906342827187, "grad_norm": 0.6113897766434271, "learning_rate": 3.659977245206138e-06, "loss": 0.14063434600830077, "step": 21555 }, { "epoch": 1.8836274681111305, "grad_norm": 0.6737246988141852, "learning_rate": 3.6575289766230294e-06, "loss": 0.16846556663513185, "step": 21560 }, { "epoch": 1.8840643019395422, "grad_norm": 0.6187813494441429, "learning_rate": 3.6550810548688187e-06, "loss": 0.1713954448699951, "step": 21565 }, { "epoch": 1.884501135767954, "grad_norm": 0.6347282736831551, "learning_rate": 3.65263348057593e-06, "loss": 0.17681785821914672, "step": 21570 }, { "epoch": 1.8849379695963655, "grad_norm": 0.6394534438559215, "learning_rate": 3.650186254376693e-06, "loss": 0.17670276165008544, "step": 21575 }, { "epoch": 1.8853748034247773, "grad_norm": 0.5557642022886892, "learning_rate": 3.647739376903354e-06, "loss": 0.14735703468322753, "step": 21580 }, { "epoch": 1.8858116372531888, "grad_norm": 0.6504601545698558, "learning_rate": 3.645292848788066e-06, "loss": 0.1718476414680481, "step": 21585 }, { "epoch": 1.8862484710816005, "grad_norm": 0.6725780043399544, "learning_rate": 3.642846670662893e-06, "loss": 0.1389382004737854, "step": 21590 }, { "epoch": 1.8866853049100123, "grad_norm": 0.6888105133376774, "learning_rate": 3.640400843159807e-06, "loss": 0.17586050033569336, "step": 21595 }, { "epoch": 1.887122138738424, "grad_norm": 0.5745870230904749, "learning_rate": 3.6379553669106915e-06, "loss": 0.18373658657073974, "step": 21600 }, { "epoch": 1.8875589725668356, "grad_norm": 0.6559299019051207, "learning_rate": 3.6355102425473373e-06, "loss": 0.19443278312683104, "step": 21605 }, { "epoch": 1.8879958063952471, "grad_norm": 0.5496624619241713, "learning_rate": 3.6330654707014435e-06, "loss": 0.1742579698562622, "step": 21610 }, { "epoch": 1.8884326402236589, "grad_norm": 0.5998784025989468, "learning_rate": 3.6306210520046207e-06, "loss": 0.14801533222198487, "step": 21615 }, { "epoch": 1.8888694740520706, "grad_norm": 0.5912645833628518, "learning_rate": 3.628176987088388e-06, "loss": 0.17211291790008545, "step": 21620 }, { "epoch": 1.8893063078804824, "grad_norm": 0.6090060804355881, "learning_rate": 3.6257332765841722e-06, "loss": 0.1564424991607666, "step": 21625 }, { "epoch": 1.889743141708894, "grad_norm": 0.6395917845597784, "learning_rate": 3.6232899211233075e-06, "loss": 0.16048583984375, "step": 21630 }, { "epoch": 1.8901799755373057, "grad_norm": 0.6004353964984968, "learning_rate": 3.620846921337041e-06, "loss": 0.16942760944366456, "step": 21635 }, { "epoch": 1.8906168093657172, "grad_norm": 0.6308498484548607, "learning_rate": 3.61840427785652e-06, "loss": 0.15314643383026122, "step": 21640 }, { "epoch": 1.891053643194129, "grad_norm": 0.5812398915925818, "learning_rate": 3.6159619913128064e-06, "loss": 0.17517123222351075, "step": 21645 }, { "epoch": 1.8914904770225407, "grad_norm": 0.5787808319914384, "learning_rate": 3.6135200623368666e-06, "loss": 0.17914642095565797, "step": 21650 }, { "epoch": 1.8919273108509524, "grad_norm": 0.5357846753752052, "learning_rate": 3.6110784915595763e-06, "loss": 0.16435229778289795, "step": 21655 }, { "epoch": 1.892364144679364, "grad_norm": 0.6135003456589052, "learning_rate": 3.6086372796117173e-06, "loss": 0.1621830940246582, "step": 21660 }, { "epoch": 1.8928009785077755, "grad_norm": 0.6204553568715093, "learning_rate": 3.6061964271239838e-06, "loss": 0.1680171012878418, "step": 21665 }, { "epoch": 1.8932378123361873, "grad_norm": 0.5949823278332174, "learning_rate": 3.6037559347269658e-06, "loss": 0.16730120182037353, "step": 21670 }, { "epoch": 1.893674646164599, "grad_norm": 0.506991738472741, "learning_rate": 3.6013158030511714e-06, "loss": 0.14358441829681395, "step": 21675 }, { "epoch": 1.8941114799930108, "grad_norm": 0.6122414965406919, "learning_rate": 3.59887603272701e-06, "loss": 0.15281095504760742, "step": 21680 }, { "epoch": 1.8945483138214223, "grad_norm": 0.5825741586853703, "learning_rate": 3.5964366243848e-06, "loss": 0.1747403025627136, "step": 21685 }, { "epoch": 1.894985147649834, "grad_norm": 0.5630991413142113, "learning_rate": 3.5939975786547663e-06, "loss": 0.14061825275421141, "step": 21690 }, { "epoch": 1.8954219814782456, "grad_norm": 0.69486380559463, "learning_rate": 3.591558896167038e-06, "loss": 0.17156292200088502, "step": 21695 }, { "epoch": 1.8958588153066573, "grad_norm": 0.5479714408912303, "learning_rate": 3.589120577551649e-06, "loss": 0.17806023359298706, "step": 21700 }, { "epoch": 1.896295649135069, "grad_norm": 0.525122775450299, "learning_rate": 3.586682623438546e-06, "loss": 0.1473916530609131, "step": 21705 }, { "epoch": 1.8967324829634808, "grad_norm": 0.5815201261383296, "learning_rate": 3.5842450344575736e-06, "loss": 0.15296446084976195, "step": 21710 }, { "epoch": 1.8971693167918924, "grad_norm": 0.6512836246850301, "learning_rate": 3.581807811238487e-06, "loss": 0.1817716598510742, "step": 21715 }, { "epoch": 1.897606150620304, "grad_norm": 0.6153797658261891, "learning_rate": 3.5793709544109455e-06, "loss": 0.15275059938430785, "step": 21720 }, { "epoch": 1.8980429844487157, "grad_norm": 0.5817788559657886, "learning_rate": 3.576934464604514e-06, "loss": 0.1572357177734375, "step": 21725 }, { "epoch": 1.8984798182771274, "grad_norm": 0.5397069135086224, "learning_rate": 3.5744983424486644e-06, "loss": 0.16430935859680176, "step": 21730 }, { "epoch": 1.8989166521055392, "grad_norm": 0.6112156058247826, "learning_rate": 3.572062588572768e-06, "loss": 0.15699925422668456, "step": 21735 }, { "epoch": 1.8993534859339507, "grad_norm": 0.7041576972872281, "learning_rate": 3.5696272036061052e-06, "loss": 0.14791886806488036, "step": 21740 }, { "epoch": 1.8997903197623622, "grad_norm": 0.6579400260564566, "learning_rate": 3.5671921881778625e-06, "loss": 0.1465585708618164, "step": 21745 }, { "epoch": 1.900227153590774, "grad_norm": 0.5855809713530322, "learning_rate": 3.5647575429171267e-06, "loss": 0.15558518171310426, "step": 21750 }, { "epoch": 1.9006639874191857, "grad_norm": 0.5844429316877513, "learning_rate": 3.5623232684528923e-06, "loss": 0.18859264850616456, "step": 21755 }, { "epoch": 1.9011008212475975, "grad_norm": 0.7017341170778661, "learning_rate": 3.5598893654140588e-06, "loss": 0.161739718914032, "step": 21760 }, { "epoch": 1.9015376550760092, "grad_norm": 0.6666044489336223, "learning_rate": 3.5574558344294236e-06, "loss": 0.1498163104057312, "step": 21765 }, { "epoch": 1.9019744889044208, "grad_norm": 0.7297966065661902, "learning_rate": 3.555022676127694e-06, "loss": 0.16981639862060546, "step": 21770 }, { "epoch": 1.9024113227328323, "grad_norm": 0.6321069026757883, "learning_rate": 3.5525898911374797e-06, "loss": 0.16243257522583007, "step": 21775 }, { "epoch": 1.902848156561244, "grad_norm": 0.5792537779122746, "learning_rate": 3.550157480087293e-06, "loss": 0.15306441783905028, "step": 21780 }, { "epoch": 1.9032849903896558, "grad_norm": 0.8344073504184809, "learning_rate": 3.5477254436055498e-06, "loss": 0.15498298406600952, "step": 21785 }, { "epoch": 1.9037218242180676, "grad_norm": 0.9480679875001821, "learning_rate": 3.5452937823205718e-06, "loss": 0.1575535535812378, "step": 21790 }, { "epoch": 1.904158658046479, "grad_norm": 0.5714379375236452, "learning_rate": 3.5428624968605773e-06, "loss": 0.1485598564147949, "step": 21795 }, { "epoch": 1.9045954918748906, "grad_norm": 0.5562356336848059, "learning_rate": 3.5404315878536956e-06, "loss": 0.1699885129928589, "step": 21800 }, { "epoch": 1.9050323257033024, "grad_norm": 0.7021089242432844, "learning_rate": 3.5380010559279517e-06, "loss": 0.17592296600341797, "step": 21805 }, { "epoch": 1.9054691595317141, "grad_norm": 0.5191336431428647, "learning_rate": 3.535570901711277e-06, "loss": 0.16515324115753174, "step": 21810 }, { "epoch": 1.905905993360126, "grad_norm": 0.6602243663024828, "learning_rate": 3.5331411258315052e-06, "loss": 0.15394985675811768, "step": 21815 }, { "epoch": 1.9063428271885374, "grad_norm": 0.7574579662111269, "learning_rate": 3.5307117289163716e-06, "loss": 0.1789012670516968, "step": 21820 }, { "epoch": 1.9067796610169492, "grad_norm": 0.6293777421564392, "learning_rate": 3.528282711593516e-06, "loss": 0.16950687170028686, "step": 21825 }, { "epoch": 1.9072164948453607, "grad_norm": 0.667532772156081, "learning_rate": 3.5258540744904724e-06, "loss": 0.17257425785064698, "step": 21830 }, { "epoch": 1.9076533286737725, "grad_norm": 0.7960227836619593, "learning_rate": 3.5234258182346847e-06, "loss": 0.16473456621170043, "step": 21835 }, { "epoch": 1.9080901625021842, "grad_norm": 0.6066074183880605, "learning_rate": 3.520997943453497e-06, "loss": 0.17364401817321778, "step": 21840 }, { "epoch": 1.908526996330596, "grad_norm": 0.6924281629109448, "learning_rate": 3.518570450774152e-06, "loss": 0.16547176837921143, "step": 21845 }, { "epoch": 1.9089638301590075, "grad_norm": 0.6972504443685763, "learning_rate": 3.5161433408237944e-06, "loss": 0.13641974925994874, "step": 21850 }, { "epoch": 1.909400663987419, "grad_norm": 0.5053634169150487, "learning_rate": 3.5137166142294733e-06, "loss": 0.1829700231552124, "step": 21855 }, { "epoch": 1.9098374978158308, "grad_norm": 0.5629956472299185, "learning_rate": 3.5112902716181347e-06, "loss": 0.17729566097259522, "step": 21860 }, { "epoch": 1.9102743316442425, "grad_norm": 0.5089736067863678, "learning_rate": 3.5088643136166256e-06, "loss": 0.16881835460662842, "step": 21865 }, { "epoch": 1.9107111654726543, "grad_norm": 0.6024354635462903, "learning_rate": 3.5064387408516966e-06, "loss": 0.17944324016571045, "step": 21870 }, { "epoch": 1.9111479993010658, "grad_norm": 0.6018359662641, "learning_rate": 3.5040135539499963e-06, "loss": 0.16797977685928345, "step": 21875 }, { "epoch": 1.9115848331294776, "grad_norm": 0.610223716982571, "learning_rate": 3.5015887535380754e-06, "loss": 0.15370416641235352, "step": 21880 }, { "epoch": 1.912021666957889, "grad_norm": 0.7101327462977866, "learning_rate": 3.4991643402423837e-06, "loss": 0.16322617530822753, "step": 21885 }, { "epoch": 1.9124585007863009, "grad_norm": 0.6787578765396105, "learning_rate": 3.496740314689272e-06, "loss": 0.156163227558136, "step": 21890 }, { "epoch": 1.9128953346147126, "grad_norm": 0.958037959861128, "learning_rate": 3.4943166775049875e-06, "loss": 0.1512606382369995, "step": 21895 }, { "epoch": 1.9133321684431244, "grad_norm": 0.6042137395767796, "learning_rate": 3.4918934293156813e-06, "loss": 0.1635913848876953, "step": 21900 }, { "epoch": 1.913769002271536, "grad_norm": 0.6023969840320296, "learning_rate": 3.489470570747402e-06, "loss": 0.14554762840270996, "step": 21905 }, { "epoch": 1.9142058360999474, "grad_norm": 0.5291111665774264, "learning_rate": 3.487048102426097e-06, "loss": 0.1764204740524292, "step": 21910 }, { "epoch": 1.9146426699283592, "grad_norm": 0.6277357600296711, "learning_rate": 3.484626024977614e-06, "loss": 0.14353102445602417, "step": 21915 }, { "epoch": 1.915079503756771, "grad_norm": 0.8556649000928497, "learning_rate": 3.4822043390277027e-06, "loss": 0.137744140625, "step": 21920 }, { "epoch": 1.9155163375851827, "grad_norm": 0.7185216937386109, "learning_rate": 3.4797830452020033e-06, "loss": 0.19484131336212157, "step": 21925 }, { "epoch": 1.9159531714135942, "grad_norm": 0.5360616767304232, "learning_rate": 3.4773621441260625e-06, "loss": 0.16819077730178833, "step": 21930 }, { "epoch": 1.916390005242006, "grad_norm": 0.7390391158537062, "learning_rate": 3.4749416364253217e-06, "loss": 0.1582978367805481, "step": 21935 }, { "epoch": 1.9168268390704175, "grad_norm": 0.7068818873804867, "learning_rate": 3.472521522725123e-06, "loss": 0.144190514087677, "step": 21940 }, { "epoch": 1.9172636728988293, "grad_norm": 0.5817738073924109, "learning_rate": 3.4701018036507054e-06, "loss": 0.14467964172363282, "step": 21945 }, { "epoch": 1.917700506727241, "grad_norm": 0.5720392625033989, "learning_rate": 3.4676824798272064e-06, "loss": 0.15582828521728515, "step": 21950 }, { "epoch": 1.9181373405556528, "grad_norm": 0.5619226667434225, "learning_rate": 3.4652635518796587e-06, "loss": 0.16764936447143555, "step": 21955 }, { "epoch": 1.9185741743840643, "grad_norm": 0.5537980750619983, "learning_rate": 3.4628450204329977e-06, "loss": 0.12666962146759034, "step": 21960 }, { "epoch": 1.9190110082124758, "grad_norm": 0.8264672822919209, "learning_rate": 3.4604268861120517e-06, "loss": 0.14892339706420898, "step": 21965 }, { "epoch": 1.9194478420408876, "grad_norm": 0.7041757734092686, "learning_rate": 3.4580091495415486e-06, "loss": 0.16577144861221313, "step": 21970 }, { "epoch": 1.9198846758692993, "grad_norm": 0.6175981923832713, "learning_rate": 3.4555918113461146e-06, "loss": 0.17703018188476563, "step": 21975 }, { "epoch": 1.920321509697711, "grad_norm": 0.6439018808328341, "learning_rate": 3.453174872150271e-06, "loss": 0.17183306217193603, "step": 21980 }, { "epoch": 1.9207583435261226, "grad_norm": 0.5968770409586892, "learning_rate": 3.4507583325784393e-06, "loss": 0.1714152216911316, "step": 21985 }, { "epoch": 1.9211951773545344, "grad_norm": 0.5663438277904347, "learning_rate": 3.4483421932549312e-06, "loss": 0.15149240493774413, "step": 21990 }, { "epoch": 1.921632011182946, "grad_norm": 0.7445950492491143, "learning_rate": 3.4459264548039596e-06, "loss": 0.16178785562515258, "step": 21995 }, { "epoch": 1.9220688450113577, "grad_norm": 0.5729846626136884, "learning_rate": 3.443511117849636e-06, "loss": 0.1806644916534424, "step": 22000 }, { "epoch": 1.9225056788397694, "grad_norm": 0.613708116304657, "learning_rate": 3.441096183015963e-06, "loss": 0.13948730230331421, "step": 22005 }, { "epoch": 1.9229425126681812, "grad_norm": 0.6039231722133351, "learning_rate": 3.4386816509268416e-06, "loss": 0.17061116695404052, "step": 22010 }, { "epoch": 1.9233793464965927, "grad_norm": 0.7352522145432456, "learning_rate": 3.4362675222060727e-06, "loss": 0.15437371730804444, "step": 22015 }, { "epoch": 1.9238161803250042, "grad_norm": 0.6166751572326844, "learning_rate": 3.433853797477344e-06, "loss": 0.18437588214874268, "step": 22020 }, { "epoch": 1.924253014153416, "grad_norm": 0.5080917746463027, "learning_rate": 3.4314404773642463e-06, "loss": 0.17181649208068847, "step": 22025 }, { "epoch": 1.9246898479818277, "grad_norm": 0.6441664931213682, "learning_rate": 3.4290275624902626e-06, "loss": 0.14783982038497925, "step": 22030 }, { "epoch": 1.9251266818102395, "grad_norm": 0.5877238458022584, "learning_rate": 3.426615053478774e-06, "loss": 0.17724709510803222, "step": 22035 }, { "epoch": 1.925563515638651, "grad_norm": 0.6513610143989433, "learning_rate": 3.424202950953053e-06, "loss": 0.1698674440383911, "step": 22040 }, { "epoch": 1.9260003494670628, "grad_norm": 0.6302347333829761, "learning_rate": 3.4217912555362732e-06, "loss": 0.1710637927055359, "step": 22045 }, { "epoch": 1.9264371832954743, "grad_norm": 0.6011416008167884, "learning_rate": 3.4193799678514927e-06, "loss": 0.1460557222366333, "step": 22050 }, { "epoch": 1.926874017123886, "grad_norm": 0.514245217203095, "learning_rate": 3.4169690885216734e-06, "loss": 0.17514100074768066, "step": 22055 }, { "epoch": 1.9273108509522978, "grad_norm": 0.6287011478077724, "learning_rate": 3.41455861816967e-06, "loss": 0.17968379259109496, "step": 22060 }, { "epoch": 1.9277476847807096, "grad_norm": 0.5837571381052692, "learning_rate": 3.4121485574182274e-06, "loss": 0.1701976776123047, "step": 22065 }, { "epoch": 1.928184518609121, "grad_norm": 0.6582936511277356, "learning_rate": 3.409738906889989e-06, "loss": 0.15706117153167726, "step": 22070 }, { "epoch": 1.9286213524375326, "grad_norm": 0.6634017535207367, "learning_rate": 3.40732966720749e-06, "loss": 0.14441173076629638, "step": 22075 }, { "epoch": 1.9290581862659444, "grad_norm": 0.5811043467733322, "learning_rate": 3.4049208389931633e-06, "loss": 0.1790938138961792, "step": 22080 }, { "epoch": 1.9294950200943561, "grad_norm": 0.5117845726960346, "learning_rate": 3.402512422869328e-06, "loss": 0.1725807785987854, "step": 22085 }, { "epoch": 1.929931853922768, "grad_norm": 0.6843400171057025, "learning_rate": 3.400104419458203e-06, "loss": 0.15652542114257811, "step": 22090 }, { "epoch": 1.9303686877511794, "grad_norm": 0.7027373090209673, "learning_rate": 3.397696829381898e-06, "loss": 0.16328737735748292, "step": 22095 }, { "epoch": 1.9308055215795912, "grad_norm": 0.6303526264407411, "learning_rate": 3.3952896532624192e-06, "loss": 0.1624543309211731, "step": 22100 }, { "epoch": 1.9312423554080027, "grad_norm": 0.6022751142720946, "learning_rate": 3.3928828917216594e-06, "loss": 0.15086947679519652, "step": 22105 }, { "epoch": 1.9316791892364145, "grad_norm": 0.732199549103123, "learning_rate": 3.3904765453814117e-06, "loss": 0.16525129079818726, "step": 22110 }, { "epoch": 1.9321160230648262, "grad_norm": 0.6352743084659792, "learning_rate": 3.388070614863356e-06, "loss": 0.152136492729187, "step": 22115 }, { "epoch": 1.932552856893238, "grad_norm": 0.637842938401472, "learning_rate": 3.385665100789067e-06, "loss": 0.14226140975952148, "step": 22120 }, { "epoch": 1.9329896907216495, "grad_norm": 0.5325480874222951, "learning_rate": 3.3832600037800133e-06, "loss": 0.17364127635955812, "step": 22125 }, { "epoch": 1.933426524550061, "grad_norm": 0.5490486237478748, "learning_rate": 3.380855324457554e-06, "loss": 0.18858195543289186, "step": 22130 }, { "epoch": 1.9338633583784728, "grad_norm": 0.5856018674640069, "learning_rate": 3.37845106344294e-06, "loss": 0.19150121212005616, "step": 22135 }, { "epoch": 1.9343001922068845, "grad_norm": 0.7352066216927777, "learning_rate": 3.376047221357316e-06, "loss": 0.15223731994628906, "step": 22140 }, { "epoch": 1.9347370260352963, "grad_norm": 0.6703133673957611, "learning_rate": 3.3736437988217186e-06, "loss": 0.16976866722106934, "step": 22145 }, { "epoch": 1.9351738598637078, "grad_norm": 0.5506752596796585, "learning_rate": 3.371240796457071e-06, "loss": 0.11704328060150146, "step": 22150 }, { "epoch": 1.9356106936921194, "grad_norm": 0.5970656631280546, "learning_rate": 3.3688382148841937e-06, "loss": 0.16052677631378173, "step": 22155 }, { "epoch": 1.936047527520531, "grad_norm": 0.621979658883386, "learning_rate": 3.3664360547237966e-06, "loss": 0.1451920747756958, "step": 22160 }, { "epoch": 1.9364843613489429, "grad_norm": 0.6869221336804061, "learning_rate": 3.3640343165964796e-06, "loss": 0.14784512519836426, "step": 22165 }, { "epoch": 1.9369211951773546, "grad_norm": 0.5741901102534622, "learning_rate": 3.3616330011227337e-06, "loss": 0.16815619468688964, "step": 22170 }, { "epoch": 1.9373580290057664, "grad_norm": 0.5639531887553565, "learning_rate": 3.359232108922946e-06, "loss": 0.14443888664245605, "step": 22175 }, { "epoch": 1.937794862834178, "grad_norm": 0.5939526974370668, "learning_rate": 3.356831640617384e-06, "loss": 0.15665829181671143, "step": 22180 }, { "epoch": 1.9382316966625894, "grad_norm": 0.6343964561661027, "learning_rate": 3.354431596826213e-06, "loss": 0.14468252658843994, "step": 22185 }, { "epoch": 1.9386685304910012, "grad_norm": 0.7289754292073228, "learning_rate": 3.3520319781694875e-06, "loss": 0.1577589988708496, "step": 22190 }, { "epoch": 1.939105364319413, "grad_norm": 0.5957244645704879, "learning_rate": 3.349632785267153e-06, "loss": 0.17715322971343994, "step": 22195 }, { "epoch": 1.9395421981478247, "grad_norm": 0.5926363269986687, "learning_rate": 3.347234018739043e-06, "loss": 0.1633281111717224, "step": 22200 }, { "epoch": 1.9399790319762362, "grad_norm": 0.5541028753813239, "learning_rate": 3.344835679204881e-06, "loss": 0.1435903549194336, "step": 22205 }, { "epoch": 1.9404158658046478, "grad_norm": 0.6254415860867025, "learning_rate": 3.3424377672842794e-06, "loss": 0.1751558780670166, "step": 22210 }, { "epoch": 1.9408526996330595, "grad_norm": 0.591755343820062, "learning_rate": 3.3400402835967444e-06, "loss": 0.17205314636230468, "step": 22215 }, { "epoch": 1.9412895334614713, "grad_norm": 0.6512945065087368, "learning_rate": 3.3376432287616657e-06, "loss": 0.15806227922439575, "step": 22220 }, { "epoch": 1.941726367289883, "grad_norm": 0.6835759591310402, "learning_rate": 3.3352466033983266e-06, "loss": 0.14698656797409057, "step": 22225 }, { "epoch": 1.9421632011182945, "grad_norm": 0.6595817920916164, "learning_rate": 3.332850408125898e-06, "loss": 0.17414417266845703, "step": 22230 }, { "epoch": 1.9426000349467063, "grad_norm": 0.6687233974704971, "learning_rate": 3.3304546435634387e-06, "loss": 0.17348477840423585, "step": 22235 }, { "epoch": 1.9430368687751178, "grad_norm": 0.610762462135762, "learning_rate": 3.3280593103299006e-06, "loss": 0.16252777576446534, "step": 22240 }, { "epoch": 1.9434737026035296, "grad_norm": 0.5508850103042564, "learning_rate": 3.3256644090441157e-06, "loss": 0.18761296272277833, "step": 22245 }, { "epoch": 1.9439105364319413, "grad_norm": 0.5904823943720252, "learning_rate": 3.3232699403248115e-06, "loss": 0.1653764247894287, "step": 22250 }, { "epoch": 1.944347370260353, "grad_norm": 0.5958457146697712, "learning_rate": 3.3208759047906024e-06, "loss": 0.1491810917854309, "step": 22255 }, { "epoch": 1.9447842040887646, "grad_norm": 0.578014750576731, "learning_rate": 3.318482303059991e-06, "loss": 0.17702988386154175, "step": 22260 }, { "epoch": 1.9452210379171762, "grad_norm": 0.6104061660781086, "learning_rate": 3.3160891357513635e-06, "loss": 0.16514378786087036, "step": 22265 }, { "epoch": 1.945657871745588, "grad_norm": 0.5371442253905114, "learning_rate": 3.313696403483003e-06, "loss": 0.15137858390808107, "step": 22270 }, { "epoch": 1.9460947055739997, "grad_norm": 0.5708627338169352, "learning_rate": 3.3113041068730684e-06, "loss": 0.15255916118621826, "step": 22275 }, { "epoch": 1.9465315394024114, "grad_norm": 0.5457706824124648, "learning_rate": 3.308912246539615e-06, "loss": 0.14764583110809326, "step": 22280 }, { "epoch": 1.946968373230823, "grad_norm": 0.583461156110773, "learning_rate": 3.3065208231005825e-06, "loss": 0.15797038078308107, "step": 22285 }, { "epoch": 1.9474052070592347, "grad_norm": 0.596570624117495, "learning_rate": 3.3041298371737984e-06, "loss": 0.15281150341033936, "step": 22290 }, { "epoch": 1.9478420408876462, "grad_norm": 0.5814821959322121, "learning_rate": 3.301739289376975e-06, "loss": 0.18469394445419313, "step": 22295 }, { "epoch": 1.948278874716058, "grad_norm": 0.8155239755258462, "learning_rate": 3.299349180327717e-06, "loss": 0.1573349952697754, "step": 22300 }, { "epoch": 1.9487157085444697, "grad_norm": 0.617335553580072, "learning_rate": 3.2969595106435073e-06, "loss": 0.16070859432220458, "step": 22305 }, { "epoch": 1.9491525423728815, "grad_norm": 0.6677667956974803, "learning_rate": 3.29457028094172e-06, "loss": 0.1609650135040283, "step": 22310 }, { "epoch": 1.949589376201293, "grad_norm": 0.571944081316835, "learning_rate": 3.2921814918396188e-06, "loss": 0.142200767993927, "step": 22315 }, { "epoch": 1.9500262100297046, "grad_norm": 0.5682883727328019, "learning_rate": 3.289793143954346e-06, "loss": 0.19523919820785524, "step": 22320 }, { "epoch": 1.9504630438581163, "grad_norm": 0.600071441289296, "learning_rate": 3.2874052379029366e-06, "loss": 0.15084363222122193, "step": 22325 }, { "epoch": 1.950899877686528, "grad_norm": 0.6415139404711397, "learning_rate": 3.285017774302307e-06, "loss": 0.1451693058013916, "step": 22330 }, { "epoch": 1.9513367115149398, "grad_norm": 0.5824356510439661, "learning_rate": 3.282630753769265e-06, "loss": 0.15827172994613647, "step": 22335 }, { "epoch": 1.9517735453433513, "grad_norm": 0.676281189354814, "learning_rate": 3.280244176920494e-06, "loss": 0.17091819047927856, "step": 22340 }, { "epoch": 1.952210379171763, "grad_norm": 0.6258237923981563, "learning_rate": 3.277858044372573e-06, "loss": 0.1671261191368103, "step": 22345 }, { "epoch": 1.9526472130001746, "grad_norm": 0.6197118799430382, "learning_rate": 3.275472356741961e-06, "loss": 0.1721677303314209, "step": 22350 }, { "epoch": 1.9530840468285864, "grad_norm": 0.5899067999864388, "learning_rate": 3.2730871146450026e-06, "loss": 0.1655658006668091, "step": 22355 }, { "epoch": 1.9535208806569981, "grad_norm": 0.6452982221300206, "learning_rate": 3.2707023186979293e-06, "loss": 0.16762919425964357, "step": 22360 }, { "epoch": 1.95395771448541, "grad_norm": 0.6762716817896623, "learning_rate": 3.268317969516855e-06, "loss": 0.16785571575164795, "step": 22365 }, { "epoch": 1.9543945483138214, "grad_norm": 0.7417180093042886, "learning_rate": 3.2659340677177786e-06, "loss": 0.14892780780792236, "step": 22370 }, { "epoch": 1.954831382142233, "grad_norm": 0.7265697840441332, "learning_rate": 3.2635506139165827e-06, "loss": 0.16469278335571289, "step": 22375 }, { "epoch": 1.9552682159706447, "grad_norm": 0.607333194543124, "learning_rate": 3.2611676087290366e-06, "loss": 0.16236066818237305, "step": 22380 }, { "epoch": 1.9557050497990565, "grad_norm": 0.5558830492400807, "learning_rate": 3.258785052770793e-06, "loss": 0.1752820134162903, "step": 22385 }, { "epoch": 1.9561418836274682, "grad_norm": 0.6455368766310475, "learning_rate": 3.2564029466573867e-06, "loss": 0.16885874271392823, "step": 22390 }, { "epoch": 1.9565787174558797, "grad_norm": 0.5469930015537098, "learning_rate": 3.254021291004238e-06, "loss": 0.15234851837158203, "step": 22395 }, { "epoch": 1.9570155512842915, "grad_norm": 0.6278469268349294, "learning_rate": 3.2516400864266516e-06, "loss": 0.15179078578948973, "step": 22400 }, { "epoch": 1.957452385112703, "grad_norm": 0.5271886527607895, "learning_rate": 3.2492593335398117e-06, "loss": 0.13610327243804932, "step": 22405 }, { "epoch": 1.9578892189411148, "grad_norm": 0.6133798426390072, "learning_rate": 3.2468790329587896e-06, "loss": 0.15717024803161622, "step": 22410 }, { "epoch": 1.9583260527695265, "grad_norm": 0.5479697685267642, "learning_rate": 3.244499185298541e-06, "loss": 0.1843034505844116, "step": 22415 }, { "epoch": 1.9587628865979383, "grad_norm": 0.6282480027479074, "learning_rate": 3.2421197911738977e-06, "loss": 0.143998122215271, "step": 22420 }, { "epoch": 1.9591997204263498, "grad_norm": 0.636484925544635, "learning_rate": 3.239740851199582e-06, "loss": 0.14251984357833863, "step": 22425 }, { "epoch": 1.9596365542547614, "grad_norm": 0.57743609718893, "learning_rate": 3.2373623659901966e-06, "loss": 0.16196906566619873, "step": 22430 }, { "epoch": 1.960073388083173, "grad_norm": 0.7759997161850227, "learning_rate": 3.2349843361602217e-06, "loss": 0.1663263201713562, "step": 22435 }, { "epoch": 1.9605102219115849, "grad_norm": 0.6715307497982591, "learning_rate": 3.2326067623240277e-06, "loss": 0.17854660749435425, "step": 22440 }, { "epoch": 1.9609470557399966, "grad_norm": 0.5878226500930617, "learning_rate": 3.2302296450958615e-06, "loss": 0.1893526554107666, "step": 22445 }, { "epoch": 1.9613838895684081, "grad_norm": 0.6549954626026442, "learning_rate": 3.227852985089854e-06, "loss": 0.16770784854888915, "step": 22450 }, { "epoch": 1.96182072339682, "grad_norm": 0.5946527756980338, "learning_rate": 3.2254767829200202e-06, "loss": 0.14676235914230346, "step": 22455 }, { "epoch": 1.9622575572252314, "grad_norm": 0.5866678001137178, "learning_rate": 3.223101039200255e-06, "loss": 0.16641695499420167, "step": 22460 }, { "epoch": 1.9626943910536432, "grad_norm": 0.6774839305151755, "learning_rate": 3.220725754544329e-06, "loss": 0.1492924451828003, "step": 22465 }, { "epoch": 1.963131224882055, "grad_norm": 0.5461511428226955, "learning_rate": 3.218350929565906e-06, "loss": 0.15616016387939452, "step": 22470 }, { "epoch": 1.9635680587104667, "grad_norm": 0.5782941879611153, "learning_rate": 3.21597656487852e-06, "loss": 0.15624589920043946, "step": 22475 }, { "epoch": 1.9640048925388782, "grad_norm": 0.5252612939819689, "learning_rate": 3.213602661095593e-06, "loss": 0.1769200086593628, "step": 22480 }, { "epoch": 1.9644417263672898, "grad_norm": 0.5616719474623155, "learning_rate": 3.2112292188304258e-06, "loss": 0.15448285341262818, "step": 22485 }, { "epoch": 1.9648785601957015, "grad_norm": 0.6399475681413633, "learning_rate": 3.2088562386961986e-06, "loss": 0.18072580099105834, "step": 22490 }, { "epoch": 1.9653153940241133, "grad_norm": 0.6969554069880989, "learning_rate": 3.2064837213059774e-06, "loss": 0.1404830813407898, "step": 22495 }, { "epoch": 1.965752227852525, "grad_norm": 0.6059874455651446, "learning_rate": 3.204111667272699e-06, "loss": 0.17451941967010498, "step": 22500 }, { "epoch": 1.9661890616809365, "grad_norm": 0.5646332059763001, "learning_rate": 3.201740077209189e-06, "loss": 0.16014268398284912, "step": 22505 }, { "epoch": 1.9666258955093483, "grad_norm": 0.6233307426197523, "learning_rate": 3.1993689517281514e-06, "loss": 0.16111910343170166, "step": 22510 }, { "epoch": 1.9670627293377598, "grad_norm": 0.6192912859442773, "learning_rate": 3.1969982914421676e-06, "loss": 0.1799247980117798, "step": 22515 }, { "epoch": 1.9674995631661716, "grad_norm": 0.6129709912231078, "learning_rate": 3.1946280969637e-06, "loss": 0.1472531795501709, "step": 22520 }, { "epoch": 1.9679363969945833, "grad_norm": 0.5241238011107776, "learning_rate": 3.1922583689050933e-06, "loss": 0.15854833126068116, "step": 22525 }, { "epoch": 1.968373230822995, "grad_norm": 0.6518801035039494, "learning_rate": 3.189889107878568e-06, "loss": 0.17631123065948487, "step": 22530 }, { "epoch": 1.9688100646514066, "grad_norm": 0.6003476524602765, "learning_rate": 3.1875203144962243e-06, "loss": 0.17799015045166017, "step": 22535 }, { "epoch": 1.9692468984798182, "grad_norm": 0.5561741266704976, "learning_rate": 3.1851519893700435e-06, "loss": 0.17558107376098633, "step": 22540 }, { "epoch": 1.96968373230823, "grad_norm": 0.6414977982186211, "learning_rate": 3.1827841331118848e-06, "loss": 0.15124356746673584, "step": 22545 }, { "epoch": 1.9701205661366417, "grad_norm": 0.6932869825386452, "learning_rate": 3.1804167463334867e-06, "loss": 0.15797791481018067, "step": 22550 }, { "epoch": 1.9705573999650534, "grad_norm": 0.594145817170175, "learning_rate": 3.1780498296464692e-06, "loss": 0.15295836925506592, "step": 22555 }, { "epoch": 1.970994233793465, "grad_norm": 0.6602036507815998, "learning_rate": 3.1756833836623225e-06, "loss": 0.1916021227836609, "step": 22560 }, { "epoch": 1.9714310676218765, "grad_norm": 0.5675849454278505, "learning_rate": 3.173317408992423e-06, "loss": 0.17232913970947267, "step": 22565 }, { "epoch": 1.9718679014502882, "grad_norm": 0.5377220874551604, "learning_rate": 3.1709519062480244e-06, "loss": 0.1532061815261841, "step": 22570 }, { "epoch": 1.9723047352787, "grad_norm": 0.7651747170970893, "learning_rate": 3.168586876040255e-06, "loss": 0.15885369777679442, "step": 22575 }, { "epoch": 1.9727415691071117, "grad_norm": 0.5427255628789474, "learning_rate": 3.1662223189801227e-06, "loss": 0.15314650535583496, "step": 22580 }, { "epoch": 1.9731784029355235, "grad_norm": 0.7370773579188088, "learning_rate": 3.1638582356785154e-06, "loss": 0.14255304336547853, "step": 22585 }, { "epoch": 1.973615236763935, "grad_norm": 0.564272665241683, "learning_rate": 3.161494626746197e-06, "loss": 0.1357431411743164, "step": 22590 }, { "epoch": 1.9740520705923466, "grad_norm": 0.6348305550760903, "learning_rate": 3.1591314927938057e-06, "loss": 0.15979009866714478, "step": 22595 }, { "epoch": 1.9744889044207583, "grad_norm": 0.6059836485810528, "learning_rate": 3.1567688344318613e-06, "loss": 0.15584468841552734, "step": 22600 }, { "epoch": 1.97492573824917, "grad_norm": 0.6344324655490626, "learning_rate": 3.1544066522707594e-06, "loss": 0.1509345293045044, "step": 22605 }, { "epoch": 1.9753625720775818, "grad_norm": 0.6570176759520265, "learning_rate": 3.1520449469207725e-06, "loss": 0.16239585876464843, "step": 22610 }, { "epoch": 1.9757994059059933, "grad_norm": 0.583485792521779, "learning_rate": 3.14968371899205e-06, "loss": 0.14441232681274413, "step": 22615 }, { "epoch": 1.9762362397344049, "grad_norm": 0.7098852900480953, "learning_rate": 3.1473229690946184e-06, "loss": 0.17399215698242188, "step": 22620 }, { "epoch": 1.9766730735628166, "grad_norm": 0.7179977951525114, "learning_rate": 3.144962697838377e-06, "loss": 0.16174404621124266, "step": 22625 }, { "epoch": 1.9771099073912284, "grad_norm": 0.773516882196305, "learning_rate": 3.1426029058331086e-06, "loss": 0.14877986907958984, "step": 22630 }, { "epoch": 1.9775467412196401, "grad_norm": 0.7049534106937587, "learning_rate": 3.1402435936884646e-06, "loss": 0.1606074333190918, "step": 22635 }, { "epoch": 1.9779835750480517, "grad_norm": 0.6616345853608339, "learning_rate": 3.1378847620139774e-06, "loss": 0.1696804165840149, "step": 22640 }, { "epoch": 1.9784204088764634, "grad_norm": 0.654586279229337, "learning_rate": 3.1355264114190544e-06, "loss": 0.1648119568824768, "step": 22645 }, { "epoch": 1.978857242704875, "grad_norm": 0.5610954993813668, "learning_rate": 3.1331685425129775e-06, "loss": 0.13767566680908203, "step": 22650 }, { "epoch": 1.9792940765332867, "grad_norm": 0.6158514163784108, "learning_rate": 3.130811155904907e-06, "loss": 0.1286121129989624, "step": 22655 }, { "epoch": 1.9797309103616985, "grad_norm": 0.5368508721924049, "learning_rate": 3.1284542522038726e-06, "loss": 0.18243459463119507, "step": 22660 }, { "epoch": 1.9801677441901102, "grad_norm": 0.6177745340048832, "learning_rate": 3.126097832018784e-06, "loss": 0.19688031673431397, "step": 22665 }, { "epoch": 1.9806045780185217, "grad_norm": 0.6099843389127144, "learning_rate": 3.1237418959584274e-06, "loss": 0.13636809587478638, "step": 22670 }, { "epoch": 1.9810414118469333, "grad_norm": 0.6687459081659161, "learning_rate": 3.1213864446314587e-06, "loss": 0.16747562885284423, "step": 22675 }, { "epoch": 1.981478245675345, "grad_norm": 0.6116364485874336, "learning_rate": 3.1190314786464125e-06, "loss": 0.14944770336151122, "step": 22680 }, { "epoch": 1.9819150795037568, "grad_norm": 0.5500941369944755, "learning_rate": 3.1166769986116983e-06, "loss": 0.12909675836563111, "step": 22685 }, { "epoch": 1.9823519133321685, "grad_norm": 0.6751606154684582, "learning_rate": 3.1143230051355967e-06, "loss": 0.16761661767959596, "step": 22690 }, { "epoch": 1.98278874716058, "grad_norm": 0.5864323394483725, "learning_rate": 3.111969498826264e-06, "loss": 0.15803091526031493, "step": 22695 }, { "epoch": 1.9832255809889918, "grad_norm": 0.7821641615449705, "learning_rate": 3.109616480291733e-06, "loss": 0.16302800178527832, "step": 22700 }, { "epoch": 1.9836624148174034, "grad_norm": 0.6228505218515631, "learning_rate": 3.1072639501399084e-06, "loss": 0.18508026599884034, "step": 22705 }, { "epoch": 1.984099248645815, "grad_norm": 0.6247524103070335, "learning_rate": 3.104911908978567e-06, "loss": 0.1693345785140991, "step": 22710 }, { "epoch": 1.9845360824742269, "grad_norm": 0.6088207651867635, "learning_rate": 3.1025603574153666e-06, "loss": 0.14888348579406738, "step": 22715 }, { "epoch": 1.9849729163026386, "grad_norm": 0.5948995845017668, "learning_rate": 3.100209296057827e-06, "loss": 0.14125614166259765, "step": 22720 }, { "epoch": 1.9854097501310501, "grad_norm": 0.6645514046393199, "learning_rate": 3.0978587255133508e-06, "loss": 0.16472455263137817, "step": 22725 }, { "epoch": 1.9858465839594617, "grad_norm": 0.6644077079211904, "learning_rate": 3.0955086463892104e-06, "loss": 0.14440610408782958, "step": 22730 }, { "epoch": 1.9862834177878734, "grad_norm": 0.6916841983045261, "learning_rate": 3.093159059292549e-06, "loss": 0.1545469880104065, "step": 22735 }, { "epoch": 1.9867202516162852, "grad_norm": 0.6377768377485724, "learning_rate": 3.090809964830388e-06, "loss": 0.17191797494888306, "step": 22740 }, { "epoch": 1.987157085444697, "grad_norm": 0.7630399145596243, "learning_rate": 3.0884613636096167e-06, "loss": 0.15224016904830934, "step": 22745 }, { "epoch": 1.9875939192731085, "grad_norm": 0.5785021884602486, "learning_rate": 3.086113256237002e-06, "loss": 0.16505110263824463, "step": 22750 }, { "epoch": 1.9880307531015202, "grad_norm": 0.6285019157433769, "learning_rate": 3.0837656433191756e-06, "loss": 0.16168522834777832, "step": 22755 }, { "epoch": 1.9884675869299318, "grad_norm": 0.6434091148407698, "learning_rate": 3.081418525462648e-06, "loss": 0.14748876094818114, "step": 22760 }, { "epoch": 1.9889044207583435, "grad_norm": 0.5757936198433137, "learning_rate": 3.0790719032737997e-06, "loss": 0.14773232936859132, "step": 22765 }, { "epoch": 1.9893412545867553, "grad_norm": 0.5517214284870743, "learning_rate": 3.0767257773588844e-06, "loss": 0.15363599061965943, "step": 22770 }, { "epoch": 1.989778088415167, "grad_norm": 0.6260110413364274, "learning_rate": 3.074380148324024e-06, "loss": 0.14929554462432862, "step": 22775 }, { "epoch": 1.9902149222435785, "grad_norm": 0.609937271936069, "learning_rate": 3.072035016775217e-06, "loss": 0.19470996856689454, "step": 22780 }, { "epoch": 1.99065175607199, "grad_norm": 0.5459281623195564, "learning_rate": 3.0696903833183293e-06, "loss": 0.15368459224700928, "step": 22785 }, { "epoch": 1.9910885899004018, "grad_norm": 0.5802960236721361, "learning_rate": 3.067346248559099e-06, "loss": 0.1755683422088623, "step": 22790 }, { "epoch": 1.9915254237288136, "grad_norm": 0.6807786550985033, "learning_rate": 3.0650026131031364e-06, "loss": 0.14851763248443603, "step": 22795 }, { "epoch": 1.9919622575572253, "grad_norm": 0.610538130971805, "learning_rate": 3.062659477555922e-06, "loss": 0.16962629556655884, "step": 22800 }, { "epoch": 1.9923990913856369, "grad_norm": 0.7195453185071882, "learning_rate": 3.060316842522809e-06, "loss": 0.1610076308250427, "step": 22805 }, { "epoch": 1.9928359252140486, "grad_norm": 0.6647842496496706, "learning_rate": 3.0579747086090204e-06, "loss": 0.15148210525512695, "step": 22810 }, { "epoch": 1.9932727590424602, "grad_norm": 0.6023261733477574, "learning_rate": 3.0556330764196453e-06, "loss": 0.14853672981262206, "step": 22815 }, { "epoch": 1.993709592870872, "grad_norm": 0.6612264259057657, "learning_rate": 3.05329194655965e-06, "loss": 0.1536550998687744, "step": 22820 }, { "epoch": 1.9941464266992837, "grad_norm": 0.6238352333044158, "learning_rate": 3.0509513196338667e-06, "loss": 0.15675727128982545, "step": 22825 }, { "epoch": 1.9945832605276954, "grad_norm": 0.6178553717890737, "learning_rate": 3.048611196247001e-06, "loss": 0.15964479446411134, "step": 22830 }, { "epoch": 1.995020094356107, "grad_norm": 0.6343837870272635, "learning_rate": 3.0462715770036237e-06, "loss": 0.162871253490448, "step": 22835 }, { "epoch": 1.9954569281845185, "grad_norm": 0.7587478803109443, "learning_rate": 3.04393246250818e-06, "loss": 0.17335420846939087, "step": 22840 }, { "epoch": 1.9958937620129302, "grad_norm": 0.5814367362190277, "learning_rate": 3.0415938533649834e-06, "loss": 0.1952645182609558, "step": 22845 }, { "epoch": 1.996330595841342, "grad_norm": 0.7046522048886832, "learning_rate": 3.0392557501782137e-06, "loss": 0.13504383563995362, "step": 22850 }, { "epoch": 1.9967674296697537, "grad_norm": 0.5578126623715061, "learning_rate": 3.036918153551923e-06, "loss": 0.14544538259506226, "step": 22855 }, { "epoch": 1.9972042634981653, "grad_norm": 0.6263740219338896, "learning_rate": 3.034581064090033e-06, "loss": 0.181548547744751, "step": 22860 }, { "epoch": 1.997641097326577, "grad_norm": 0.6356255063588198, "learning_rate": 3.0322444823963337e-06, "loss": 0.15818792581558228, "step": 22865 }, { "epoch": 1.9980779311549886, "grad_norm": 0.590077589376541, "learning_rate": 3.0299084090744833e-06, "loss": 0.18042876720428466, "step": 22870 }, { "epoch": 1.9985147649834003, "grad_norm": 0.8259520203817334, "learning_rate": 3.0275728447280094e-06, "loss": 0.1535089135169983, "step": 22875 }, { "epoch": 1.998951598811812, "grad_norm": 0.6843748879380464, "learning_rate": 3.025237789960306e-06, "loss": 0.16518454551696776, "step": 22880 }, { "epoch": 1.9993884326402238, "grad_norm": 0.6857795549478366, "learning_rate": 3.0229032453746386e-06, "loss": 0.162964928150177, "step": 22885 }, { "epoch": 1.9998252664686353, "grad_norm": 0.5558359721042788, "learning_rate": 3.020569211574138e-06, "loss": 0.15648910999298096, "step": 22890 }, { "epoch": 2.0, "eval_loss": 0.16540029644966125, "eval_runtime": 0.7701, "eval_samples_per_second": 11.686, "eval_steps_per_second": 2.597, "eval_token_acc": 0.9286882230306994, "step": 22892 }, { "epoch": 2.000262100297047, "grad_norm": 0.5261813352437504, "learning_rate": 3.0182356891618057e-06, "loss": 0.12756998538970948, "step": 22895 }, { "epoch": 2.0006989341254586, "grad_norm": 0.5749352051519355, "learning_rate": 3.015902678740509e-06, "loss": 0.12399563789367676, "step": 22900 }, { "epoch": 2.0011357679538704, "grad_norm": 0.5253228121537813, "learning_rate": 3.0135701809129847e-06, "loss": 0.11504902839660644, "step": 22905 }, { "epoch": 2.001572601782282, "grad_norm": 0.5880463921917379, "learning_rate": 3.011238196281838e-06, "loss": 0.10246338844299316, "step": 22910 }, { "epoch": 2.002009435610694, "grad_norm": 0.5405825359781475, "learning_rate": 3.008906725449535e-06, "loss": 0.09645956158638, "step": 22915 }, { "epoch": 2.002446269439105, "grad_norm": 0.6166248596391697, "learning_rate": 3.0065757690184173e-06, "loss": 0.12231173515319824, "step": 22920 }, { "epoch": 2.002883103267517, "grad_norm": 0.6160366941589687, "learning_rate": 3.004245327590688e-06, "loss": 0.09520388245582581, "step": 22925 }, { "epoch": 2.0033199370959287, "grad_norm": 0.5198072409871543, "learning_rate": 3.0019154017684226e-06, "loss": 0.09301517009735108, "step": 22930 }, { "epoch": 2.0037567709243405, "grad_norm": 0.5582656314574629, "learning_rate": 2.9995859921535565e-06, "loss": 0.1054847240447998, "step": 22935 }, { "epoch": 2.004193604752752, "grad_norm": 0.5803751775082594, "learning_rate": 2.9972570993478985e-06, "loss": 0.10201892852783204, "step": 22940 }, { "epoch": 2.0046304385811635, "grad_norm": 0.4990925631620582, "learning_rate": 2.9949287239531166e-06, "loss": 0.08850365281105041, "step": 22945 }, { "epoch": 2.0050672724095753, "grad_norm": 0.4779664959802794, "learning_rate": 2.992600866570752e-06, "loss": 0.11190325021743774, "step": 22950 }, { "epoch": 2.005504106237987, "grad_norm": 0.6355422055408786, "learning_rate": 2.990273527802208e-06, "loss": 0.0882355034351349, "step": 22955 }, { "epoch": 2.005940940066399, "grad_norm": 0.542357455193059, "learning_rate": 2.987946708248757e-06, "loss": 0.1186147928237915, "step": 22960 }, { "epoch": 2.0063777738948105, "grad_norm": 0.6421226840505183, "learning_rate": 2.9856204085115335e-06, "loss": 0.10416316986083984, "step": 22965 }, { "epoch": 2.0068146077232223, "grad_norm": 0.5307977533460337, "learning_rate": 2.983294629191543e-06, "loss": 0.10613983869552612, "step": 22970 }, { "epoch": 2.0072514415516336, "grad_norm": 0.6092995480631661, "learning_rate": 2.980969370889648e-06, "loss": 0.09661349654197693, "step": 22975 }, { "epoch": 2.0076882753800454, "grad_norm": 0.83142582747689, "learning_rate": 2.9786446342065855e-06, "loss": 0.09055800437927246, "step": 22980 }, { "epoch": 2.008125109208457, "grad_norm": 0.5216070430696715, "learning_rate": 2.9763204197429528e-06, "loss": 0.09853509664535523, "step": 22985 }, { "epoch": 2.008561943036869, "grad_norm": 0.5791314537863805, "learning_rate": 2.973996728099212e-06, "loss": 0.11237692832946777, "step": 22990 }, { "epoch": 2.0089987768652806, "grad_norm": 0.6274049988632476, "learning_rate": 2.9716735598756933e-06, "loss": 0.10565013885498047, "step": 22995 }, { "epoch": 2.009435610693692, "grad_norm": 0.4973262965517525, "learning_rate": 2.9693509156725893e-06, "loss": 0.10190975666046143, "step": 23000 }, { "epoch": 2.0098724445221037, "grad_norm": 0.5518374014368391, "learning_rate": 2.967028796089959e-06, "loss": 0.10402029752731323, "step": 23005 }, { "epoch": 2.0103092783505154, "grad_norm": 0.5433027804222913, "learning_rate": 2.964707201727721e-06, "loss": 0.0908292293548584, "step": 23010 }, { "epoch": 2.010746112178927, "grad_norm": 0.5405806829036165, "learning_rate": 2.9623861331856647e-06, "loss": 0.098782879114151, "step": 23015 }, { "epoch": 2.011182946007339, "grad_norm": 0.5724172970510449, "learning_rate": 2.960065591063439e-06, "loss": 0.11230220794677734, "step": 23020 }, { "epoch": 2.0116197798357507, "grad_norm": 0.7183847899892059, "learning_rate": 2.9577455759605607e-06, "loss": 0.11740505695343018, "step": 23025 }, { "epoch": 2.012056613664162, "grad_norm": 0.709464791562877, "learning_rate": 2.9554260884764076e-06, "loss": 0.08936095237731934, "step": 23030 }, { "epoch": 2.0124934474925738, "grad_norm": 0.518247264591073, "learning_rate": 2.9531071292102222e-06, "loss": 0.12025045156478882, "step": 23035 }, { "epoch": 2.0129302813209855, "grad_norm": 0.5505204091289553, "learning_rate": 2.950788698761109e-06, "loss": 0.13448379039764405, "step": 23040 }, { "epoch": 2.0133671151493973, "grad_norm": 0.5981170469691962, "learning_rate": 2.9484707977280363e-06, "loss": 0.1255386710166931, "step": 23045 }, { "epoch": 2.013803948977809, "grad_norm": 0.7266659975186541, "learning_rate": 2.9461534267098387e-06, "loss": 0.10655444860458374, "step": 23050 }, { "epoch": 2.0142407828062203, "grad_norm": 0.5937906721268454, "learning_rate": 2.9438365863052097e-06, "loss": 0.12039000988006592, "step": 23055 }, { "epoch": 2.014677616634632, "grad_norm": 0.623360085861244, "learning_rate": 2.9415202771127095e-06, "loss": 0.11843798160552979, "step": 23060 }, { "epoch": 2.015114450463044, "grad_norm": 0.5345357310505207, "learning_rate": 2.93920449973076e-06, "loss": 0.11466350555419921, "step": 23065 }, { "epoch": 2.0155512842914556, "grad_norm": 0.736791446629654, "learning_rate": 2.936889254757641e-06, "loss": 0.08339978456497192, "step": 23070 }, { "epoch": 2.0159881181198673, "grad_norm": 0.5990522296110796, "learning_rate": 2.934574542791501e-06, "loss": 0.12412501573562622, "step": 23075 }, { "epoch": 2.016424951948279, "grad_norm": 0.5917431380809353, "learning_rate": 2.9322603644303483e-06, "loss": 0.10893933773040772, "step": 23080 }, { "epoch": 2.0168617857766904, "grad_norm": 0.6572500367867666, "learning_rate": 2.9299467202720545e-06, "loss": 0.11042981147766114, "step": 23085 }, { "epoch": 2.017298619605102, "grad_norm": 0.5302962306040674, "learning_rate": 2.92763361091435e-06, "loss": 0.12196552753448486, "step": 23090 }, { "epoch": 2.017735453433514, "grad_norm": 0.5330864321915835, "learning_rate": 2.92532103695483e-06, "loss": 0.10539977550506592, "step": 23095 }, { "epoch": 2.0181722872619257, "grad_norm": 0.6264755637212733, "learning_rate": 2.9230089989909536e-06, "loss": 0.1113588809967041, "step": 23100 }, { "epoch": 2.0186091210903374, "grad_norm": 0.5904911601891611, "learning_rate": 2.9206974976200336e-06, "loss": 0.11704354286193848, "step": 23105 }, { "epoch": 2.0190459549187487, "grad_norm": 0.5964015729825424, "learning_rate": 2.9183865334392514e-06, "loss": 0.09761815667152404, "step": 23110 }, { "epoch": 2.0194827887471605, "grad_norm": 0.5244143882452527, "learning_rate": 2.9160761070456468e-06, "loss": 0.103961181640625, "step": 23115 }, { "epoch": 2.0199196225755722, "grad_norm": 0.6685242808496129, "learning_rate": 2.91376621903612e-06, "loss": 0.09267948865890503, "step": 23120 }, { "epoch": 2.020356456403984, "grad_norm": 0.5284206207100682, "learning_rate": 2.9114568700074366e-06, "loss": 0.11497263908386231, "step": 23125 }, { "epoch": 2.0207932902323957, "grad_norm": 0.5480554949871588, "learning_rate": 2.909148060556216e-06, "loss": 0.10150643587112426, "step": 23130 }, { "epoch": 2.0212301240608075, "grad_norm": 0.4767048570601427, "learning_rate": 2.9068397912789435e-06, "loss": 0.09120336771011353, "step": 23135 }, { "epoch": 2.021666957889219, "grad_norm": 0.5616169810765876, "learning_rate": 2.9045320627719608e-06, "loss": 0.11234836578369141, "step": 23140 }, { "epoch": 2.0221037917176305, "grad_norm": 0.5407319281655657, "learning_rate": 2.9022248756314754e-06, "loss": 0.10189470052719116, "step": 23145 }, { "epoch": 2.0225406255460423, "grad_norm": 0.633442615541974, "learning_rate": 2.8999182304535483e-06, "loss": 0.10845392942428589, "step": 23150 }, { "epoch": 2.022977459374454, "grad_norm": 0.5287338292118806, "learning_rate": 2.897612127834108e-06, "loss": 0.0976359486579895, "step": 23155 }, { "epoch": 2.023414293202866, "grad_norm": 0.6087426830037309, "learning_rate": 2.8953065683689364e-06, "loss": 0.08558486700057984, "step": 23160 }, { "epoch": 2.023851127031277, "grad_norm": 0.6229726285323448, "learning_rate": 2.893001552653677e-06, "loss": 0.0878714680671692, "step": 23165 }, { "epoch": 2.024287960859689, "grad_norm": 0.6341336497085871, "learning_rate": 2.890697081283833e-06, "loss": 0.11775074005126954, "step": 23170 }, { "epoch": 2.0247247946881006, "grad_norm": 0.5417315302620457, "learning_rate": 2.8883931548547673e-06, "loss": 0.09316912889480591, "step": 23175 }, { "epoch": 2.0251616285165124, "grad_norm": 0.7439637407311595, "learning_rate": 2.8860897739617033e-06, "loss": 0.09661529660224914, "step": 23180 }, { "epoch": 2.025598462344924, "grad_norm": 0.5929375377650444, "learning_rate": 2.8837869391997193e-06, "loss": 0.11979650259017945, "step": 23185 }, { "epoch": 2.026035296173336, "grad_norm": 0.5634703919366606, "learning_rate": 2.8814846511637583e-06, "loss": 0.11452643871307373, "step": 23190 }, { "epoch": 2.026472130001747, "grad_norm": 0.7054132007639896, "learning_rate": 2.879182910448619e-06, "loss": 0.10714571475982666, "step": 23195 }, { "epoch": 2.026908963830159, "grad_norm": 0.5567035387244778, "learning_rate": 2.876881717648957e-06, "loss": 0.12541890144348145, "step": 23200 }, { "epoch": 2.0273457976585707, "grad_norm": 0.602859275625075, "learning_rate": 2.874581073359286e-06, "loss": 0.09831387996673584, "step": 23205 }, { "epoch": 2.0277826314869825, "grad_norm": 0.6018663428207178, "learning_rate": 2.8722809781739846e-06, "loss": 0.10521895885467529, "step": 23210 }, { "epoch": 2.028219465315394, "grad_norm": 0.5604137803290183, "learning_rate": 2.8699814326872834e-06, "loss": 0.10787166357040405, "step": 23215 }, { "epoch": 2.0286562991438055, "grad_norm": 0.5720601630066393, "learning_rate": 2.86768243749327e-06, "loss": 0.11446259021759034, "step": 23220 }, { "epoch": 2.0290931329722173, "grad_norm": 0.6171647229086528, "learning_rate": 2.865383993185898e-06, "loss": 0.09630392789840699, "step": 23225 }, { "epoch": 2.029529966800629, "grad_norm": 0.6107542887073084, "learning_rate": 2.8630861003589655e-06, "loss": 0.09071939587593078, "step": 23230 }, { "epoch": 2.029966800629041, "grad_norm": 0.6422148071333215, "learning_rate": 2.8607887596061416e-06, "loss": 0.10882506370544434, "step": 23235 }, { "epoch": 2.0304036344574525, "grad_norm": 0.5560873511429616, "learning_rate": 2.8584919715209435e-06, "loss": 0.12962684631347657, "step": 23240 }, { "epoch": 2.030840468285864, "grad_norm": 0.6040361260312185, "learning_rate": 2.856195736696752e-06, "loss": 0.13559935092926026, "step": 23245 }, { "epoch": 2.0312773021142756, "grad_norm": 0.6359262176230345, "learning_rate": 2.853900055726799e-06, "loss": 0.10713691711425781, "step": 23250 }, { "epoch": 2.0317141359426873, "grad_norm": 0.591363882093481, "learning_rate": 2.8516049292041793e-06, "loss": 0.08708414435386658, "step": 23255 }, { "epoch": 2.032150969771099, "grad_norm": 0.7305977415613994, "learning_rate": 2.8493103577218395e-06, "loss": 0.09311496019363404, "step": 23260 }, { "epoch": 2.032587803599511, "grad_norm": 0.6101410449459519, "learning_rate": 2.847016341872585e-06, "loss": 0.09334295988082886, "step": 23265 }, { "epoch": 2.0330246374279226, "grad_norm": 0.5473895190862483, "learning_rate": 2.8447228822490775e-06, "loss": 0.10915788412094116, "step": 23270 }, { "epoch": 2.033461471256334, "grad_norm": 0.6026812256321171, "learning_rate": 2.8424299794438327e-06, "loss": 0.10490741729736328, "step": 23275 }, { "epoch": 2.0338983050847457, "grad_norm": 0.5409432792150064, "learning_rate": 2.8401376340492284e-06, "loss": 0.12289714813232422, "step": 23280 }, { "epoch": 2.0343351389131574, "grad_norm": 0.6567826244378219, "learning_rate": 2.837845846657491e-06, "loss": 0.10747953653335571, "step": 23285 }, { "epoch": 2.034771972741569, "grad_norm": 0.4805143132861098, "learning_rate": 2.8355546178607126e-06, "loss": 0.087081778049469, "step": 23290 }, { "epoch": 2.035208806569981, "grad_norm": 0.5845313638925299, "learning_rate": 2.833263948250826e-06, "loss": 0.10507900714874267, "step": 23295 }, { "epoch": 2.0356456403983922, "grad_norm": 0.7864350168575936, "learning_rate": 2.8309738384196344e-06, "loss": 0.08638870716094971, "step": 23300 }, { "epoch": 2.036082474226804, "grad_norm": 0.5551178582025033, "learning_rate": 2.828684288958787e-06, "loss": 0.11021085977554321, "step": 23305 }, { "epoch": 2.0365193080552157, "grad_norm": 0.5676567130975, "learning_rate": 2.8263953004597945e-06, "loss": 0.10170928239822388, "step": 23310 }, { "epoch": 2.0369561418836275, "grad_norm": 0.5400216202260948, "learning_rate": 2.8241068735140177e-06, "loss": 0.09476673603057861, "step": 23315 }, { "epoch": 2.0373929757120393, "grad_norm": 0.6176391673428592, "learning_rate": 2.821819008712675e-06, "loss": 0.11763588190078736, "step": 23320 }, { "epoch": 2.037829809540451, "grad_norm": 0.6830970140263983, "learning_rate": 2.8195317066468387e-06, "loss": 0.11553521156311035, "step": 23325 }, { "epoch": 2.0382666433688623, "grad_norm": 0.6690984680028927, "learning_rate": 2.8172449679074337e-06, "loss": 0.09959610104560852, "step": 23330 }, { "epoch": 2.038703477197274, "grad_norm": 0.5697129874593887, "learning_rate": 2.814958793085245e-06, "loss": 0.11556491851806641, "step": 23335 }, { "epoch": 2.039140311025686, "grad_norm": 0.5470360215850792, "learning_rate": 2.812673182770905e-06, "loss": 0.09536057710647583, "step": 23340 }, { "epoch": 2.0395771448540976, "grad_norm": 0.5592710088780435, "learning_rate": 2.810388137554908e-06, "loss": 0.11219334602355957, "step": 23345 }, { "epoch": 2.0400139786825093, "grad_norm": 0.5898692657249344, "learning_rate": 2.808103658027593e-06, "loss": 0.12032902240753174, "step": 23350 }, { "epoch": 2.0404508125109206, "grad_norm": 0.7381341369352814, "learning_rate": 2.8058197447791652e-06, "loss": 0.07971521615982055, "step": 23355 }, { "epoch": 2.0408876463393324, "grad_norm": 0.5563954907547588, "learning_rate": 2.803536398399667e-06, "loss": 0.09040963649749756, "step": 23360 }, { "epoch": 2.041324480167744, "grad_norm": 0.5860759998193519, "learning_rate": 2.8012536194790096e-06, "loss": 0.1113624095916748, "step": 23365 }, { "epoch": 2.041761313996156, "grad_norm": 0.5220192325227321, "learning_rate": 2.79897140860695e-06, "loss": 0.08621495962142944, "step": 23370 }, { "epoch": 2.0421981478245677, "grad_norm": 0.6141957035476268, "learning_rate": 2.7966897663730987e-06, "loss": 0.10592038631439209, "step": 23375 }, { "epoch": 2.0426349816529794, "grad_norm": 0.6168410565275526, "learning_rate": 2.7944086933669224e-06, "loss": 0.09578025937080384, "step": 23380 }, { "epoch": 2.0430718154813907, "grad_norm": 0.5577388448032137, "learning_rate": 2.792128190177738e-06, "loss": 0.08584630489349365, "step": 23385 }, { "epoch": 2.0435086493098025, "grad_norm": 0.8479249928309162, "learning_rate": 2.789848257394716e-06, "loss": 0.13488707542419434, "step": 23390 }, { "epoch": 2.0439454831382142, "grad_norm": 0.5368893101678918, "learning_rate": 2.7875688956068785e-06, "loss": 0.09853640794754029, "step": 23395 }, { "epoch": 2.044382316966626, "grad_norm": 0.6918034792753557, "learning_rate": 2.7852901054031045e-06, "loss": 0.10470041036605834, "step": 23400 }, { "epoch": 2.0448191507950377, "grad_norm": 0.5394445348330675, "learning_rate": 2.7830118873721167e-06, "loss": 0.09577251672744751, "step": 23405 }, { "epoch": 2.045255984623449, "grad_norm": 0.6144258065754087, "learning_rate": 2.780734242102502e-06, "loss": 0.1177226185798645, "step": 23410 }, { "epoch": 2.045692818451861, "grad_norm": 0.5021323359998001, "learning_rate": 2.778457170182688e-06, "loss": 0.11776037216186523, "step": 23415 }, { "epoch": 2.0461296522802725, "grad_norm": 0.539981496294493, "learning_rate": 2.77618067220096e-06, "loss": 0.12846639156341552, "step": 23420 }, { "epoch": 2.0465664861086843, "grad_norm": 0.7090883350355048, "learning_rate": 2.773904748745454e-06, "loss": 0.10514893531799316, "step": 23425 }, { "epoch": 2.047003319937096, "grad_norm": 0.6165263965380677, "learning_rate": 2.7716294004041544e-06, "loss": 0.09189494848251342, "step": 23430 }, { "epoch": 2.047440153765508, "grad_norm": 0.6359288081183812, "learning_rate": 2.7693546277649054e-06, "loss": 0.11634536981582641, "step": 23435 }, { "epoch": 2.047876987593919, "grad_norm": 0.6854372317386174, "learning_rate": 2.767080431415392e-06, "loss": 0.11411793231964111, "step": 23440 }, { "epoch": 2.048313821422331, "grad_norm": 0.6609884451496676, "learning_rate": 2.76480681194316e-06, "loss": 0.08311567306518555, "step": 23445 }, { "epoch": 2.0487506552507426, "grad_norm": 0.7108388397196715, "learning_rate": 2.7625337699355994e-06, "loss": 0.10011985301971435, "step": 23450 }, { "epoch": 2.0491874890791544, "grad_norm": 0.5768564268229545, "learning_rate": 2.760261305979952e-06, "loss": 0.13793606758117677, "step": 23455 }, { "epoch": 2.049624322907566, "grad_norm": 0.5670853603365118, "learning_rate": 2.7579894206633118e-06, "loss": 0.09930203557014465, "step": 23460 }, { "epoch": 2.0500611567359774, "grad_norm": 0.5844540086720428, "learning_rate": 2.755718114572625e-06, "loss": 0.10374394655227662, "step": 23465 }, { "epoch": 2.050497990564389, "grad_norm": 0.6915515137536026, "learning_rate": 2.7534473882946846e-06, "loss": 0.10896220207214355, "step": 23470 }, { "epoch": 2.050934824392801, "grad_norm": 0.5947916688462134, "learning_rate": 2.751177242416133e-06, "loss": 0.10886590480804444, "step": 23475 }, { "epoch": 2.0513716582212127, "grad_norm": 0.5732252891281404, "learning_rate": 2.7489076775234696e-06, "loss": 0.10525738000869751, "step": 23480 }, { "epoch": 2.0518084920496245, "grad_norm": 0.5608616188591246, "learning_rate": 2.7466386942030364e-06, "loss": 0.1003885269165039, "step": 23485 }, { "epoch": 2.052245325878036, "grad_norm": 0.5891074019001376, "learning_rate": 2.7443702930410286e-06, "loss": 0.10965249538421631, "step": 23490 }, { "epoch": 2.0526821597064475, "grad_norm": 0.5615206964534158, "learning_rate": 2.742102474623488e-06, "loss": 0.11088407039642334, "step": 23495 }, { "epoch": 2.0531189935348593, "grad_norm": 0.5078563586134602, "learning_rate": 2.739835239536311e-06, "loss": 0.1061320185661316, "step": 23500 }, { "epoch": 2.053555827363271, "grad_norm": 0.6488279699298396, "learning_rate": 2.737568588365237e-06, "loss": 0.07655634880065917, "step": 23505 }, { "epoch": 2.053992661191683, "grad_norm": 0.633275115984815, "learning_rate": 2.735302521695862e-06, "loss": 0.09049627780914307, "step": 23510 }, { "epoch": 2.0544294950200945, "grad_norm": 0.6399275480879903, "learning_rate": 2.7330370401136253e-06, "loss": 0.10173243284225464, "step": 23515 }, { "epoch": 2.054866328848506, "grad_norm": 0.5752214568845405, "learning_rate": 2.730772144203816e-06, "loss": 0.10484609603881836, "step": 23520 }, { "epoch": 2.0553031626769176, "grad_norm": 0.5999301829315099, "learning_rate": 2.7285078345515727e-06, "loss": 0.12077550888061524, "step": 23525 }, { "epoch": 2.0557399965053293, "grad_norm": 0.5953814109449658, "learning_rate": 2.7262441117418796e-06, "loss": 0.10987956523895263, "step": 23530 }, { "epoch": 2.056176830333741, "grad_norm": 0.5996300128365993, "learning_rate": 2.723980976359577e-06, "loss": 0.09030292630195617, "step": 23535 }, { "epoch": 2.056613664162153, "grad_norm": 0.5130085705595573, "learning_rate": 2.721718428989344e-06, "loss": 0.0891103744506836, "step": 23540 }, { "epoch": 2.0570504979905646, "grad_norm": 0.6567857536245236, "learning_rate": 2.7194564702157188e-06, "loss": 0.10518544912338257, "step": 23545 }, { "epoch": 2.057487331818976, "grad_norm": 0.5132758450456782, "learning_rate": 2.7171951006230724e-06, "loss": 0.1105615735054016, "step": 23550 }, { "epoch": 2.0579241656473877, "grad_norm": 0.5496950854394266, "learning_rate": 2.7149343207956384e-06, "loss": 0.11645989418029785, "step": 23555 }, { "epoch": 2.0583609994757994, "grad_norm": 0.5847338360577327, "learning_rate": 2.7126741313174876e-06, "loss": 0.10588327646255494, "step": 23560 }, { "epoch": 2.058797833304211, "grad_norm": 0.6912004955412392, "learning_rate": 2.710414532772547e-06, "loss": 0.1094393253326416, "step": 23565 }, { "epoch": 2.059234667132623, "grad_norm": 0.5369073716802629, "learning_rate": 2.708155525744585e-06, "loss": 0.1021947979927063, "step": 23570 }, { "epoch": 2.0596715009610342, "grad_norm": 0.5656839811895538, "learning_rate": 2.7058971108172162e-06, "loss": 0.10777812004089356, "step": 23575 }, { "epoch": 2.060108334789446, "grad_norm": 0.5688965226379233, "learning_rate": 2.7036392885739043e-06, "loss": 0.09600555896759033, "step": 23580 }, { "epoch": 2.0605451686178577, "grad_norm": 0.5855217022673286, "learning_rate": 2.7013820595979645e-06, "loss": 0.11991328001022339, "step": 23585 }, { "epoch": 2.0609820024462695, "grad_norm": 0.5487364959426343, "learning_rate": 2.6991254244725517e-06, "loss": 0.11416889429092407, "step": 23590 }, { "epoch": 2.0614188362746813, "grad_norm": 0.5619299866716925, "learning_rate": 2.6968693837806687e-06, "loss": 0.10409318208694458, "step": 23595 }, { "epoch": 2.0618556701030926, "grad_norm": 0.579767134451589, "learning_rate": 2.6946139381051697e-06, "loss": 0.08195001482963563, "step": 23600 }, { "epoch": 2.0622925039315043, "grad_norm": 0.548458898838054, "learning_rate": 2.6923590880287475e-06, "loss": 0.1310875177383423, "step": 23605 }, { "epoch": 2.062729337759916, "grad_norm": 0.6048139666927884, "learning_rate": 2.690104834133952e-06, "loss": 0.11082017421722412, "step": 23610 }, { "epoch": 2.063166171588328, "grad_norm": 0.5406864914394243, "learning_rate": 2.6878511770031636e-06, "loss": 0.11815627813339233, "step": 23615 }, { "epoch": 2.0636030054167396, "grad_norm": 0.6479302429941255, "learning_rate": 2.685598117218623e-06, "loss": 0.14318721294403075, "step": 23620 }, { "epoch": 2.0640398392451513, "grad_norm": 0.5776745890433482, "learning_rate": 2.683345655362409e-06, "loss": 0.0948929488658905, "step": 23625 }, { "epoch": 2.0644766730735626, "grad_norm": 0.5736238977127917, "learning_rate": 2.6810937920164447e-06, "loss": 0.08323868513107299, "step": 23630 }, { "epoch": 2.0649135069019744, "grad_norm": 0.6098630821509518, "learning_rate": 2.6788425277625074e-06, "loss": 0.10395655632019044, "step": 23635 }, { "epoch": 2.065350340730386, "grad_norm": 0.5642036752980221, "learning_rate": 2.676591863182211e-06, "loss": 0.12345914840698242, "step": 23640 }, { "epoch": 2.065787174558798, "grad_norm": 0.6002985559010232, "learning_rate": 2.6743417988570163e-06, "loss": 0.11725265979766845, "step": 23645 }, { "epoch": 2.0662240083872097, "grad_norm": 0.6481900452484456, "learning_rate": 2.6720923353682294e-06, "loss": 0.11391892433166503, "step": 23650 }, { "epoch": 2.066660842215621, "grad_norm": 0.6243786969985172, "learning_rate": 2.6698434732970048e-06, "loss": 0.12414678335189819, "step": 23655 }, { "epoch": 2.0670976760440327, "grad_norm": 0.672749744925954, "learning_rate": 2.6675952132243337e-06, "loss": 0.0924500048160553, "step": 23660 }, { "epoch": 2.0675345098724445, "grad_norm": 0.5366447767383259, "learning_rate": 2.6653475557310626e-06, "loss": 0.08658527731895446, "step": 23665 }, { "epoch": 2.0679713437008562, "grad_norm": 0.626744532282855, "learning_rate": 2.663100501397873e-06, "loss": 0.12308403253555297, "step": 23670 }, { "epoch": 2.068408177529268, "grad_norm": 0.6092646831014146, "learning_rate": 2.660854050805294e-06, "loss": 0.12111152410507202, "step": 23675 }, { "epoch": 2.0688450113576797, "grad_norm": 0.6139662582947585, "learning_rate": 2.6586082045336968e-06, "loss": 0.09464104175567627, "step": 23680 }, { "epoch": 2.069281845186091, "grad_norm": 0.5991513739118798, "learning_rate": 2.656362963163304e-06, "loss": 0.10939681529998779, "step": 23685 }, { "epoch": 2.069718679014503, "grad_norm": 0.5930640431919513, "learning_rate": 2.6541183272741712e-06, "loss": 0.12609738111495972, "step": 23690 }, { "epoch": 2.0701555128429145, "grad_norm": 0.6222667947630178, "learning_rate": 2.651874297446202e-06, "loss": 0.1300173282623291, "step": 23695 }, { "epoch": 2.0705923466713263, "grad_norm": 0.6296544252148866, "learning_rate": 2.649630874259148e-06, "loss": 0.1014215111732483, "step": 23700 }, { "epoch": 2.071029180499738, "grad_norm": 0.6419840133133536, "learning_rate": 2.647388058292598e-06, "loss": 0.08574545383453369, "step": 23705 }, { "epoch": 2.0714660143281494, "grad_norm": 0.5636171318134688, "learning_rate": 2.6451458501259877e-06, "loss": 0.08012605905532837, "step": 23710 }, { "epoch": 2.071902848156561, "grad_norm": 0.5012074533930609, "learning_rate": 2.6429042503385892e-06, "loss": 0.1025885820388794, "step": 23715 }, { "epoch": 2.072339681984973, "grad_norm": 0.4993946088912319, "learning_rate": 2.6406632595095282e-06, "loss": 0.10142446756362915, "step": 23720 }, { "epoch": 2.0727765158133846, "grad_norm": 0.5336388951120432, "learning_rate": 2.6384228782177648e-06, "loss": 0.09970412254333497, "step": 23725 }, { "epoch": 2.0732133496417964, "grad_norm": 0.6088106442160727, "learning_rate": 2.6361831070421014e-06, "loss": 0.09611108303070068, "step": 23730 }, { "epoch": 2.073650183470208, "grad_norm": 0.4872967578922801, "learning_rate": 2.6339439465611905e-06, "loss": 0.0944484829902649, "step": 23735 }, { "epoch": 2.0740870172986194, "grad_norm": 0.609453772141893, "learning_rate": 2.6317053973535182e-06, "loss": 0.09695102572441101, "step": 23740 }, { "epoch": 2.074523851127031, "grad_norm": 0.6235572205091354, "learning_rate": 2.629467459997418e-06, "loss": 0.10059155225753784, "step": 23745 }, { "epoch": 2.074960684955443, "grad_norm": 0.7140115825063756, "learning_rate": 2.6272301350710603e-06, "loss": 0.09487636089324951, "step": 23750 }, { "epoch": 2.0753975187838547, "grad_norm": 0.5538857268682577, "learning_rate": 2.6249934231524653e-06, "loss": 0.12142037153244019, "step": 23755 }, { "epoch": 2.0758343526122665, "grad_norm": 0.5944696330814001, "learning_rate": 2.622757324819486e-06, "loss": 0.11892987489700317, "step": 23760 }, { "epoch": 2.0762711864406778, "grad_norm": 0.6078663278786076, "learning_rate": 2.6205218406498235e-06, "loss": 0.12104096412658691, "step": 23765 }, { "epoch": 2.0767080202690895, "grad_norm": 0.60447106203391, "learning_rate": 2.6182869712210174e-06, "loss": 0.08917327523231507, "step": 23770 }, { "epoch": 2.0771448540975013, "grad_norm": 0.5726672241472583, "learning_rate": 2.6160527171104477e-06, "loss": 0.08831095695495605, "step": 23775 }, { "epoch": 2.077581687925913, "grad_norm": 0.5129525654446844, "learning_rate": 2.6138190788953357e-06, "loss": 0.1192859411239624, "step": 23780 }, { "epoch": 2.0780185217543248, "grad_norm": 0.5612398079502591, "learning_rate": 2.611586057152746e-06, "loss": 0.0954541802406311, "step": 23785 }, { "epoch": 2.0784553555827365, "grad_norm": 0.6680226127097826, "learning_rate": 2.6093536524595827e-06, "loss": 0.10789961814880371, "step": 23790 }, { "epoch": 2.078892189411148, "grad_norm": 0.5963780945679268, "learning_rate": 2.6071218653925866e-06, "loss": 0.11430168151855469, "step": 23795 }, { "epoch": 2.0793290232395596, "grad_norm": 0.5782189780611835, "learning_rate": 2.6048906965283486e-06, "loss": 0.08906333446502686, "step": 23800 }, { "epoch": 2.0797658570679713, "grad_norm": 0.6949358168178099, "learning_rate": 2.6026601464432864e-06, "loss": 0.08287997245788574, "step": 23805 }, { "epoch": 2.080202690896383, "grad_norm": 0.5419288711348289, "learning_rate": 2.6004302157136707e-06, "loss": 0.12186161279678345, "step": 23810 }, { "epoch": 2.080639524724795, "grad_norm": 0.6352375704465866, "learning_rate": 2.5982009049156027e-06, "loss": 0.08600041866302491, "step": 23815 }, { "epoch": 2.081076358553206, "grad_norm": 0.5817959186076668, "learning_rate": 2.595972214625031e-06, "loss": 0.1122360348701477, "step": 23820 }, { "epoch": 2.081513192381618, "grad_norm": 0.634660498680361, "learning_rate": 2.593744145417738e-06, "loss": 0.10108567476272583, "step": 23825 }, { "epoch": 2.0819500262100297, "grad_norm": 0.4971591513519351, "learning_rate": 2.5915166978693497e-06, "loss": 0.10314596891403198, "step": 23830 }, { "epoch": 2.0823868600384414, "grad_norm": 0.5733060569379796, "learning_rate": 2.589289872555325e-06, "loss": 0.10289714336395264, "step": 23835 }, { "epoch": 2.082823693866853, "grad_norm": 0.49137065598428276, "learning_rate": 2.5870636700509722e-06, "loss": 0.09487130045890808, "step": 23840 }, { "epoch": 2.083260527695265, "grad_norm": 0.5559229844098497, "learning_rate": 2.584838090931432e-06, "loss": 0.11870708465576171, "step": 23845 }, { "epoch": 2.0836973615236762, "grad_norm": 0.6242878031005075, "learning_rate": 2.582613135771681e-06, "loss": 0.10367673635482788, "step": 23850 }, { "epoch": 2.084134195352088, "grad_norm": 0.6762703271242901, "learning_rate": 2.5803888051465454e-06, "loss": 0.12066631317138672, "step": 23855 }, { "epoch": 2.0845710291804997, "grad_norm": 0.5343654166954076, "learning_rate": 2.5781650996306773e-06, "loss": 0.08465087413787842, "step": 23860 }, { "epoch": 2.0850078630089115, "grad_norm": 0.5229487048240821, "learning_rate": 2.5759420197985812e-06, "loss": 0.08199135661125183, "step": 23865 }, { "epoch": 2.0854446968373233, "grad_norm": 0.6308440161431914, "learning_rate": 2.5737195662245832e-06, "loss": 0.12407577037811279, "step": 23870 }, { "epoch": 2.0858815306657346, "grad_norm": 0.634351635731687, "learning_rate": 2.5714977394828632e-06, "loss": 0.11674383878707886, "step": 23875 }, { "epoch": 2.0863183644941463, "grad_norm": 0.5367965771585782, "learning_rate": 2.569276540147429e-06, "loss": 0.11663961410522461, "step": 23880 }, { "epoch": 2.086755198322558, "grad_norm": 0.5411733509988862, "learning_rate": 2.5670559687921324e-06, "loss": 0.11718357801437378, "step": 23885 }, { "epoch": 2.08719203215097, "grad_norm": 0.5818998794335103, "learning_rate": 2.5648360259906606e-06, "loss": 0.10018959045410156, "step": 23890 }, { "epoch": 2.0876288659793816, "grad_norm": 0.7264797319106748, "learning_rate": 2.562616712316537e-06, "loss": 0.10455362796783448, "step": 23895 }, { "epoch": 2.0880656998077933, "grad_norm": 0.6431695215868126, "learning_rate": 2.560398028343124e-06, "loss": 0.09821063280105591, "step": 23900 }, { "epoch": 2.0885025336362046, "grad_norm": 0.6184222066988004, "learning_rate": 2.5581799746436187e-06, "loss": 0.1129607915878296, "step": 23905 }, { "epoch": 2.0889393674646164, "grad_norm": 0.49337148773862577, "learning_rate": 2.555962551791063e-06, "loss": 0.08500324487686158, "step": 23910 }, { "epoch": 2.089376201293028, "grad_norm": 0.5364661633396135, "learning_rate": 2.5537457603583256e-06, "loss": 0.1051000714302063, "step": 23915 }, { "epoch": 2.08981303512144, "grad_norm": 0.6136082589086486, "learning_rate": 2.5515296009181213e-06, "loss": 0.0995818018913269, "step": 23920 }, { "epoch": 2.0902498689498517, "grad_norm": 0.5834705521093412, "learning_rate": 2.5493140740429944e-06, "loss": 0.10680444240570068, "step": 23925 }, { "epoch": 2.090686702778263, "grad_norm": 0.5997962041636865, "learning_rate": 2.547099180305331e-06, "loss": 0.10656116008758545, "step": 23930 }, { "epoch": 2.0911235366066747, "grad_norm": 0.5096037689903469, "learning_rate": 2.5448849202773473e-06, "loss": 0.10308369398117065, "step": 23935 }, { "epoch": 2.0915603704350865, "grad_norm": 0.5595913552401943, "learning_rate": 2.542671294531105e-06, "loss": 0.08416385650634765, "step": 23940 }, { "epoch": 2.0919972042634982, "grad_norm": 0.5881700263778714, "learning_rate": 2.5404583036384944e-06, "loss": 0.1065673828125, "step": 23945 }, { "epoch": 2.09243403809191, "grad_norm": 0.487726417703519, "learning_rate": 2.5382459481712416e-06, "loss": 0.10181784629821777, "step": 23950 }, { "epoch": 2.0928708719203213, "grad_norm": 0.6176292288686608, "learning_rate": 2.5360342287009164e-06, "loss": 0.11199024915695191, "step": 23955 }, { "epoch": 2.093307705748733, "grad_norm": 0.5588331307919148, "learning_rate": 2.533823145798916e-06, "loss": 0.11918057203292846, "step": 23960 }, { "epoch": 2.093744539577145, "grad_norm": 0.5919516708127387, "learning_rate": 2.531612700036477e-06, "loss": 0.09519531726837158, "step": 23965 }, { "epoch": 2.0941813734055565, "grad_norm": 0.6162765444323108, "learning_rate": 2.529402891984668e-06, "loss": 0.08544681668281555, "step": 23970 }, { "epoch": 2.0946182072339683, "grad_norm": 0.5883178085301869, "learning_rate": 2.5271937222143993e-06, "loss": 0.10663201808929443, "step": 23975 }, { "epoch": 2.09505504106238, "grad_norm": 0.5710936628705772, "learning_rate": 2.5249851912964083e-06, "loss": 0.10685498714447021, "step": 23980 }, { "epoch": 2.0954918748907914, "grad_norm": 0.6529871991017708, "learning_rate": 2.5227772998012764e-06, "loss": 0.11516215801239013, "step": 23985 }, { "epoch": 2.095928708719203, "grad_norm": 0.5404769078439863, "learning_rate": 2.520570048299411e-06, "loss": 0.09666407704353333, "step": 23990 }, { "epoch": 2.096365542547615, "grad_norm": 0.5769755822297108, "learning_rate": 2.51836343736106e-06, "loss": 0.10529153347015381, "step": 23995 }, { "epoch": 2.0968023763760266, "grad_norm": 0.7176134337931532, "learning_rate": 2.5161574675563026e-06, "loss": 0.10820114612579346, "step": 24000 }, { "epoch": 2.0972392102044384, "grad_norm": 0.6890535341130097, "learning_rate": 2.5139521394550525e-06, "loss": 0.09633040428161621, "step": 24005 }, { "epoch": 2.09767604403285, "grad_norm": 0.6428237466014386, "learning_rate": 2.5117474536270616e-06, "loss": 0.13175071477890016, "step": 24010 }, { "epoch": 2.0981128778612614, "grad_norm": 0.6411531117496407, "learning_rate": 2.50954341064191e-06, "loss": 0.09861589670181274, "step": 24015 }, { "epoch": 2.098549711689673, "grad_norm": 0.5399908824938813, "learning_rate": 2.5073400110690176e-06, "loss": 0.11887274980545044, "step": 24020 }, { "epoch": 2.098986545518085, "grad_norm": 0.6601563367566481, "learning_rate": 2.5051372554776344e-06, "loss": 0.10277974605560303, "step": 24025 }, { "epoch": 2.0994233793464967, "grad_norm": 0.5099041130260614, "learning_rate": 2.502935144436843e-06, "loss": 0.09531445503234863, "step": 24030 }, { "epoch": 2.0998602131749085, "grad_norm": 0.5371938116899698, "learning_rate": 2.500733678515561e-06, "loss": 0.10883870124816894, "step": 24035 }, { "epoch": 2.1002970470033198, "grad_norm": 0.5520037849480375, "learning_rate": 2.498532858282543e-06, "loss": 0.08734181523323059, "step": 24040 }, { "epoch": 2.1007338808317315, "grad_norm": 0.4774300897301643, "learning_rate": 2.496332684306371e-06, "loss": 0.10940136909484863, "step": 24045 }, { "epoch": 2.1011707146601433, "grad_norm": 0.6648256159143249, "learning_rate": 2.4941331571554618e-06, "loss": 0.10055344104766846, "step": 24050 }, { "epoch": 2.101607548488555, "grad_norm": 0.7231503898586533, "learning_rate": 2.4919342773980694e-06, "loss": 0.1041487216949463, "step": 24055 }, { "epoch": 2.1020443823169668, "grad_norm": 0.5823202389599988, "learning_rate": 2.4897360456022708e-06, "loss": 0.13080835342407227, "step": 24060 }, { "epoch": 2.102481216145378, "grad_norm": 0.5101642671178938, "learning_rate": 2.4875384623359865e-06, "loss": 0.09291789531707764, "step": 24065 }, { "epoch": 2.10291804997379, "grad_norm": 0.5575559810113367, "learning_rate": 2.4853415281669617e-06, "loss": 0.10355942249298096, "step": 24070 }, { "epoch": 2.1033548838022016, "grad_norm": 0.5930717076721107, "learning_rate": 2.48314524366278e-06, "loss": 0.14059605598449706, "step": 24075 }, { "epoch": 2.1037917176306133, "grad_norm": 0.5704883454459553, "learning_rate": 2.4809496093908503e-06, "loss": 0.10012269020080566, "step": 24080 }, { "epoch": 2.104228551459025, "grad_norm": 0.535058263201712, "learning_rate": 2.4787546259184225e-06, "loss": 0.10461987257003784, "step": 24085 }, { "epoch": 2.104665385287437, "grad_norm": 0.5647723181079544, "learning_rate": 2.476560293812566e-06, "loss": 0.10196095705032349, "step": 24090 }, { "epoch": 2.105102219115848, "grad_norm": 0.5508998888208428, "learning_rate": 2.4743666136401946e-06, "loss": 0.09474964141845703, "step": 24095 }, { "epoch": 2.10553905294426, "grad_norm": 0.4822682346565355, "learning_rate": 2.4721735859680456e-06, "loss": 0.09942309856414795, "step": 24100 }, { "epoch": 2.1059758867726717, "grad_norm": 0.8067831868714647, "learning_rate": 2.469981211362689e-06, "loss": 0.09224688410758972, "step": 24105 }, { "epoch": 2.1064127206010834, "grad_norm": 0.6808570541327135, "learning_rate": 2.467789490390531e-06, "loss": 0.1121795654296875, "step": 24110 }, { "epoch": 2.106849554429495, "grad_norm": 0.5914747321014591, "learning_rate": 2.4655984236178005e-06, "loss": 0.11266125440597534, "step": 24115 }, { "epoch": 2.1072863882579065, "grad_norm": 0.6761265977888963, "learning_rate": 2.4634080116105684e-06, "loss": 0.09482908844947815, "step": 24120 }, { "epoch": 2.1077232220863182, "grad_norm": 0.6184828824027162, "learning_rate": 2.4612182549347226e-06, "loss": 0.10495682954788207, "step": 24125 }, { "epoch": 2.10816005591473, "grad_norm": 0.5914750956937062, "learning_rate": 2.459029154155995e-06, "loss": 0.10835227966308594, "step": 24130 }, { "epoch": 2.1085968897431417, "grad_norm": 0.4557000702566556, "learning_rate": 2.456840709839938e-06, "loss": 0.09487277269363403, "step": 24135 }, { "epoch": 2.1090337235715535, "grad_norm": 0.6859051582407143, "learning_rate": 2.454652922551942e-06, "loss": 0.10898729562759399, "step": 24140 }, { "epoch": 2.1094705573999653, "grad_norm": 0.6270521868509656, "learning_rate": 2.452465792857224e-06, "loss": 0.06503729820251465, "step": 24145 }, { "epoch": 2.1099073912283766, "grad_norm": 0.6662041933174485, "learning_rate": 2.4502793213208297e-06, "loss": 0.09564886093139649, "step": 24150 }, { "epoch": 2.1103442250567883, "grad_norm": 0.567626658112942, "learning_rate": 2.448093508507638e-06, "loss": 0.10800819396972657, "step": 24155 }, { "epoch": 2.1107810588852, "grad_norm": 0.5225053723046036, "learning_rate": 2.4459083549823526e-06, "loss": 0.11070795059204101, "step": 24160 }, { "epoch": 2.111217892713612, "grad_norm": 0.7525591689086281, "learning_rate": 2.4437238613095156e-06, "loss": 0.103550124168396, "step": 24165 }, { "epoch": 2.1116547265420236, "grad_norm": 0.6745564065388608, "learning_rate": 2.4415400280534886e-06, "loss": 0.11953785419464111, "step": 24170 }, { "epoch": 2.112091560370435, "grad_norm": 0.500116859185242, "learning_rate": 2.4393568557784713e-06, "loss": 0.1053160309791565, "step": 24175 }, { "epoch": 2.1125283941988466, "grad_norm": 0.5878829261322698, "learning_rate": 2.437174345048486e-06, "loss": 0.06773263216018677, "step": 24180 }, { "epoch": 2.1129652280272584, "grad_norm": 0.54034967192505, "learning_rate": 2.4349924964273915e-06, "loss": 0.10332314968109131, "step": 24185 }, { "epoch": 2.11340206185567, "grad_norm": 0.5680499687498314, "learning_rate": 2.4328113104788624e-06, "loss": 0.10481107234954834, "step": 24190 }, { "epoch": 2.113838895684082, "grad_norm": 0.6326103433408147, "learning_rate": 2.4306307877664183e-06, "loss": 0.11044145822525024, "step": 24195 }, { "epoch": 2.1142757295124937, "grad_norm": 0.5858547284703935, "learning_rate": 2.4284509288533963e-06, "loss": 0.0967056155204773, "step": 24200 }, { "epoch": 2.114712563340905, "grad_norm": 0.49113596429269046, "learning_rate": 2.4262717343029648e-06, "loss": 0.10629587173461914, "step": 24205 }, { "epoch": 2.1151493971693167, "grad_norm": 0.5652066802559084, "learning_rate": 2.424093204678123e-06, "loss": 0.09102309942245483, "step": 24210 }, { "epoch": 2.1155862309977285, "grad_norm": 0.5635677721692534, "learning_rate": 2.421915340541696e-06, "loss": 0.08455086946487426, "step": 24215 }, { "epoch": 2.11602306482614, "grad_norm": 0.5932048618991846, "learning_rate": 2.4197381424563373e-06, "loss": 0.10936121940612793, "step": 24220 }, { "epoch": 2.116459898654552, "grad_norm": 0.4494172094503723, "learning_rate": 2.417561610984526e-06, "loss": 0.08342878818511963, "step": 24225 }, { "epoch": 2.1168967324829633, "grad_norm": 0.48380031721166705, "learning_rate": 2.415385746688576e-06, "loss": 0.08635416030883789, "step": 24230 }, { "epoch": 2.117333566311375, "grad_norm": 0.4729681665537686, "learning_rate": 2.41321055013062e-06, "loss": 0.10776059627532959, "step": 24235 }, { "epoch": 2.117770400139787, "grad_norm": 0.4757795847418096, "learning_rate": 2.411036021872627e-06, "loss": 0.11540029048919678, "step": 24240 }, { "epoch": 2.1182072339681985, "grad_norm": 0.5749942591824281, "learning_rate": 2.408862162476386e-06, "loss": 0.10236918926239014, "step": 24245 }, { "epoch": 2.1186440677966103, "grad_norm": 0.5703672270977925, "learning_rate": 2.4066889725035158e-06, "loss": 0.09011005163192749, "step": 24250 }, { "epoch": 2.119080901625022, "grad_norm": 0.5207785564815284, "learning_rate": 2.4045164525154636e-06, "loss": 0.10947642326354981, "step": 24255 }, { "epoch": 2.1195177354534334, "grad_norm": 0.5961638626917671, "learning_rate": 2.402344603073499e-06, "loss": 0.10909440517425537, "step": 24260 }, { "epoch": 2.119954569281845, "grad_norm": 0.7057939605413801, "learning_rate": 2.4001734247387276e-06, "loss": 0.10518696308135986, "step": 24265 }, { "epoch": 2.120391403110257, "grad_norm": 0.6859389742264534, "learning_rate": 2.3980029180720706e-06, "loss": 0.11112478971481324, "step": 24270 }, { "epoch": 2.1208282369386686, "grad_norm": 0.5322992171028518, "learning_rate": 2.395833083634284e-06, "loss": 0.10030264854431152, "step": 24275 }, { "epoch": 2.1212650707670804, "grad_norm": 0.5614976251055261, "learning_rate": 2.393663921985947e-06, "loss": 0.10918545722961426, "step": 24280 }, { "epoch": 2.1217019045954917, "grad_norm": 0.5958337987726078, "learning_rate": 2.391495433687463e-06, "loss": 0.0835929811000824, "step": 24285 }, { "epoch": 2.1221387384239034, "grad_norm": 0.6906069809468335, "learning_rate": 2.389327619299063e-06, "loss": 0.09858324527740478, "step": 24290 }, { "epoch": 2.122575572252315, "grad_norm": 0.5693001107916219, "learning_rate": 2.3871604793808066e-06, "loss": 0.09284213781356812, "step": 24295 }, { "epoch": 2.123012406080727, "grad_norm": 0.5990181650408372, "learning_rate": 2.3849940144925765e-06, "loss": 0.08615120053291321, "step": 24300 }, { "epoch": 2.1234492399091387, "grad_norm": 0.5959647123262849, "learning_rate": 2.382828225194078e-06, "loss": 0.08610000610351562, "step": 24305 }, { "epoch": 2.1238860737375505, "grad_norm": 0.6206815286027234, "learning_rate": 2.3806631120448525e-06, "loss": 0.10680925846099854, "step": 24310 }, { "epoch": 2.1243229075659618, "grad_norm": 0.5360904659579944, "learning_rate": 2.37849867560425e-06, "loss": 0.07254769802093505, "step": 24315 }, { "epoch": 2.1247597413943735, "grad_norm": 0.6588301011659894, "learning_rate": 2.3763349164314625e-06, "loss": 0.10245064496994019, "step": 24320 }, { "epoch": 2.1251965752227853, "grad_norm": 0.6151121942441037, "learning_rate": 2.374171835085494e-06, "loss": 0.13547608852386475, "step": 24325 }, { "epoch": 2.125633409051197, "grad_norm": 0.6962365235218889, "learning_rate": 2.3720094321251845e-06, "loss": 0.10406254529953003, "step": 24330 }, { "epoch": 2.1260702428796088, "grad_norm": 0.6279107808721462, "learning_rate": 2.3698477081091893e-06, "loss": 0.10953847169876099, "step": 24335 }, { "epoch": 2.12650707670802, "grad_norm": 0.6048376798067774, "learning_rate": 2.367686663595996e-06, "loss": 0.09708452224731445, "step": 24340 }, { "epoch": 2.126943910536432, "grad_norm": 0.6140186281929859, "learning_rate": 2.3655262991439078e-06, "loss": 0.10901376008987426, "step": 24345 }, { "epoch": 2.1273807443648436, "grad_norm": 0.7144464673180286, "learning_rate": 2.3633666153110612e-06, "loss": 0.11925383806228637, "step": 24350 }, { "epoch": 2.1278175781932553, "grad_norm": 0.6657344314750574, "learning_rate": 2.3612076126554117e-06, "loss": 0.0745121955871582, "step": 24355 }, { "epoch": 2.128254412021667, "grad_norm": 0.6640960846933812, "learning_rate": 2.3590492917347384e-06, "loss": 0.12631188631057738, "step": 24360 }, { "epoch": 2.128691245850079, "grad_norm": 0.6273228095538187, "learning_rate": 2.3568916531066494e-06, "loss": 0.10911188125610352, "step": 24365 }, { "epoch": 2.12912807967849, "grad_norm": 0.5259395272212343, "learning_rate": 2.354734697328569e-06, "loss": 0.0888043463230133, "step": 24370 }, { "epoch": 2.129564913506902, "grad_norm": 0.6917470263335175, "learning_rate": 2.3525784249577556e-06, "loss": 0.098366379737854, "step": 24375 }, { "epoch": 2.1300017473353137, "grad_norm": 0.6101984813794469, "learning_rate": 2.3504228365512764e-06, "loss": 0.09061444997787475, "step": 24380 }, { "epoch": 2.1304385811637254, "grad_norm": 0.6503156012582643, "learning_rate": 2.3482679326660357e-06, "loss": 0.09642111659049987, "step": 24385 }, { "epoch": 2.130875414992137, "grad_norm": 0.6280277054164719, "learning_rate": 2.346113713858752e-06, "loss": 0.11252657175064087, "step": 24390 }, { "epoch": 2.1313122488205485, "grad_norm": 0.6014990501919814, "learning_rate": 2.343960180685974e-06, "loss": 0.09666962623596191, "step": 24395 }, { "epoch": 2.1317490826489602, "grad_norm": 0.6015116150652409, "learning_rate": 2.341807333704067e-06, "loss": 0.11526757478713989, "step": 24400 }, { "epoch": 2.132185916477372, "grad_norm": 0.6478058346321378, "learning_rate": 2.339655173469222e-06, "loss": 0.08238476514816284, "step": 24405 }, { "epoch": 2.1326227503057837, "grad_norm": 0.7300714964148864, "learning_rate": 2.3375037005374512e-06, "loss": 0.11162023544311524, "step": 24410 }, { "epoch": 2.1330595841341955, "grad_norm": 0.5320802856164804, "learning_rate": 2.3353529154645888e-06, "loss": 0.09575132131576539, "step": 24415 }, { "epoch": 2.133496417962607, "grad_norm": 0.5308847784440269, "learning_rate": 2.333202818806296e-06, "loss": 0.10540512800216675, "step": 24420 }, { "epoch": 2.1339332517910186, "grad_norm": 0.636228876543849, "learning_rate": 2.3310534111180494e-06, "loss": 0.09536590576171874, "step": 24425 }, { "epoch": 2.1343700856194303, "grad_norm": 0.5989289987271411, "learning_rate": 2.3289046929551545e-06, "loss": 0.0728880763053894, "step": 24430 }, { "epoch": 2.134806919447842, "grad_norm": 0.57567625308961, "learning_rate": 2.326756664872731e-06, "loss": 0.11741831302642822, "step": 24435 }, { "epoch": 2.135243753276254, "grad_norm": 0.5681177590653161, "learning_rate": 2.3246093274257304e-06, "loss": 0.08447689414024354, "step": 24440 }, { "epoch": 2.1356805871046656, "grad_norm": 0.5718396591276521, "learning_rate": 2.3224626811689118e-06, "loss": 0.10798635482788085, "step": 24445 }, { "epoch": 2.136117420933077, "grad_norm": 0.5126899674380366, "learning_rate": 2.320316726656869e-06, "loss": 0.10484750270843506, "step": 24450 }, { "epoch": 2.1365542547614886, "grad_norm": 1.3633106652392182, "learning_rate": 2.3181714644440108e-06, "loss": 0.11020424365997314, "step": 24455 }, { "epoch": 2.1369910885899004, "grad_norm": 0.5283027717380879, "learning_rate": 2.3160268950845654e-06, "loss": 0.09334160089492798, "step": 24460 }, { "epoch": 2.137427922418312, "grad_norm": 0.5242556992938596, "learning_rate": 2.3138830191325894e-06, "loss": 0.09242002964019776, "step": 24465 }, { "epoch": 2.137864756246724, "grad_norm": 0.6488411864075386, "learning_rate": 2.311739837141953e-06, "loss": 0.13069876432418823, "step": 24470 }, { "epoch": 2.1383015900751356, "grad_norm": 0.601096400600432, "learning_rate": 2.3095973496663493e-06, "loss": 0.11169124841690063, "step": 24475 }, { "epoch": 2.138738423903547, "grad_norm": 0.570757503518543, "learning_rate": 2.3074555572592916e-06, "loss": 0.10154494047164916, "step": 24480 }, { "epoch": 2.1391752577319587, "grad_norm": 0.6224516440868465, "learning_rate": 2.3053144604741175e-06, "loss": 0.11059060096740722, "step": 24485 }, { "epoch": 2.1396120915603705, "grad_norm": 0.6701843573313374, "learning_rate": 2.303174059863978e-06, "loss": 0.09250322580337525, "step": 24490 }, { "epoch": 2.140048925388782, "grad_norm": 0.550535380731928, "learning_rate": 2.301034355981851e-06, "loss": 0.12053937911987304, "step": 24495 }, { "epoch": 2.140485759217194, "grad_norm": 0.6650720832349812, "learning_rate": 2.298895349380531e-06, "loss": 0.10346848964691162, "step": 24500 }, { "epoch": 2.1409225930456053, "grad_norm": 0.6464738249086173, "learning_rate": 2.296757040612632e-06, "loss": 0.09568496942520141, "step": 24505 }, { "epoch": 2.141359426874017, "grad_norm": 0.5589454846038469, "learning_rate": 2.2946194302305886e-06, "loss": 0.09835878014564514, "step": 24510 }, { "epoch": 2.141796260702429, "grad_norm": 0.6874059863837028, "learning_rate": 2.292482518786653e-06, "loss": 0.09275643825531006, "step": 24515 }, { "epoch": 2.1422330945308405, "grad_norm": 0.632019726343895, "learning_rate": 2.290346306832902e-06, "loss": 0.1048797607421875, "step": 24520 }, { "epoch": 2.1426699283592523, "grad_norm": 0.5437306130395401, "learning_rate": 2.2882107949212245e-06, "loss": 0.09438481330871581, "step": 24525 }, { "epoch": 2.1431067621876636, "grad_norm": 0.5840109907963058, "learning_rate": 2.2860759836033367e-06, "loss": 0.12345732450485229, "step": 24530 }, { "epoch": 2.1435435960160754, "grad_norm": 0.6419696983040766, "learning_rate": 2.2839418734307684e-06, "loss": 0.12233438491821289, "step": 24535 }, { "epoch": 2.143980429844487, "grad_norm": 0.5232312764128058, "learning_rate": 2.2818084649548676e-06, "loss": 0.1105086326599121, "step": 24540 }, { "epoch": 2.144417263672899, "grad_norm": 0.7205919778239193, "learning_rate": 2.279675758726802e-06, "loss": 0.10148658752441406, "step": 24545 }, { "epoch": 2.1448540975013106, "grad_norm": 0.5841848882654032, "learning_rate": 2.2775437552975625e-06, "loss": 0.10951859951019287, "step": 24550 }, { "epoch": 2.1452909313297224, "grad_norm": 0.6181610808255557, "learning_rate": 2.275412455217952e-06, "loss": 0.09255237579345703, "step": 24555 }, { "epoch": 2.1457277651581337, "grad_norm": 0.656263682263283, "learning_rate": 2.273281859038593e-06, "loss": 0.11906564235687256, "step": 24560 }, { "epoch": 2.1461645989865454, "grad_norm": 0.6711126898517122, "learning_rate": 2.2711519673099326e-06, "loss": 0.10389041900634766, "step": 24565 }, { "epoch": 2.146601432814957, "grad_norm": 0.6765706313070825, "learning_rate": 2.2690227805822236e-06, "loss": 0.12021036148071289, "step": 24570 }, { "epoch": 2.147038266643369, "grad_norm": 0.46581045212629235, "learning_rate": 2.266894299405549e-06, "loss": 0.10118122100830078, "step": 24575 }, { "epoch": 2.1474751004717807, "grad_norm": 0.6885801794773067, "learning_rate": 2.264766524329801e-06, "loss": 0.09076406955718994, "step": 24580 }, { "epoch": 2.147911934300192, "grad_norm": 0.49878738599381245, "learning_rate": 2.2626394559046962e-06, "loss": 0.07085105776786804, "step": 24585 }, { "epoch": 2.1483487681286038, "grad_norm": 0.627610832344059, "learning_rate": 2.260513094679761e-06, "loss": 0.0905504047870636, "step": 24590 }, { "epoch": 2.1487856019570155, "grad_norm": 0.6070450261434911, "learning_rate": 2.25838744120435e-06, "loss": 0.09422853589057922, "step": 24595 }, { "epoch": 2.1492224357854273, "grad_norm": 0.6679859436028749, "learning_rate": 2.256262496027619e-06, "loss": 0.10398433208465577, "step": 24600 }, { "epoch": 2.149659269613839, "grad_norm": 0.5788345541090293, "learning_rate": 2.254138259698557e-06, "loss": 0.09634686708450317, "step": 24605 }, { "epoch": 2.1500961034422508, "grad_norm": 0.7316420212752047, "learning_rate": 2.2520147327659603e-06, "loss": 0.09567574262619019, "step": 24610 }, { "epoch": 2.150532937270662, "grad_norm": 0.6247597998503118, "learning_rate": 2.2498919157784426e-06, "loss": 0.10027302503585815, "step": 24615 }, { "epoch": 2.150969771099074, "grad_norm": 0.5930364206361985, "learning_rate": 2.247769809284441e-06, "loss": 0.07721484303474427, "step": 24620 }, { "epoch": 2.1514066049274856, "grad_norm": 0.650485024770609, "learning_rate": 2.2456484138321983e-06, "loss": 0.09702215194702149, "step": 24625 }, { "epoch": 2.1518434387558973, "grad_norm": 0.6012647295270196, "learning_rate": 2.243527729969786e-06, "loss": 0.1046860933303833, "step": 24630 }, { "epoch": 2.152280272584309, "grad_norm": 0.5357784938282067, "learning_rate": 2.2414077582450778e-06, "loss": 0.09487709403038025, "step": 24635 }, { "epoch": 2.1527171064127204, "grad_norm": 0.5414357442590616, "learning_rate": 2.2392884992057763e-06, "loss": 0.11593608856201172, "step": 24640 }, { "epoch": 2.153153940241132, "grad_norm": 0.5891087248445926, "learning_rate": 2.2371699533993905e-06, "loss": 0.10308423042297363, "step": 24645 }, { "epoch": 2.153590774069544, "grad_norm": 0.6440237116738576, "learning_rate": 2.2350521213732534e-06, "loss": 0.11396591663360596, "step": 24650 }, { "epoch": 2.1540276078979557, "grad_norm": 0.7032827674832298, "learning_rate": 2.2329350036745066e-06, "loss": 0.10457456111907959, "step": 24655 }, { "epoch": 2.1544644417263674, "grad_norm": 0.562979712224205, "learning_rate": 2.230818600850111e-06, "loss": 0.10907928943634033, "step": 24660 }, { "epoch": 2.1549012755547787, "grad_norm": 0.5525253085621057, "learning_rate": 2.22870291344684e-06, "loss": 0.1061869740486145, "step": 24665 }, { "epoch": 2.1553381093831905, "grad_norm": 0.5912172897712074, "learning_rate": 2.2265879420112834e-06, "loss": 0.09039252400398254, "step": 24670 }, { "epoch": 2.1557749432116022, "grad_norm": 0.5766994543761151, "learning_rate": 2.22447368708985e-06, "loss": 0.11556155681610107, "step": 24675 }, { "epoch": 2.156211777040014, "grad_norm": 0.5979947453750183, "learning_rate": 2.222360149228755e-06, "loss": 0.09915058016777038, "step": 24680 }, { "epoch": 2.1566486108684257, "grad_norm": 0.8197878656175477, "learning_rate": 2.2202473289740384e-06, "loss": 0.08032609224319458, "step": 24685 }, { "epoch": 2.1570854446968375, "grad_norm": 0.5560305900157176, "learning_rate": 2.2181352268715457e-06, "loss": 0.10075275897979737, "step": 24690 }, { "epoch": 2.157522278525249, "grad_norm": 0.5221878969540507, "learning_rate": 2.216023843466946e-06, "loss": 0.10057559013366699, "step": 24695 }, { "epoch": 2.1579591123536606, "grad_norm": 0.5851894907064046, "learning_rate": 2.2139131793057096e-06, "loss": 0.10678530931472778, "step": 24700 }, { "epoch": 2.1583959461820723, "grad_norm": 0.5966153067400397, "learning_rate": 2.2118032349331354e-06, "loss": 0.09885045886039734, "step": 24705 }, { "epoch": 2.158832780010484, "grad_norm": 0.6356236601752863, "learning_rate": 2.209694010894328e-06, "loss": 0.10065877437591553, "step": 24710 }, { "epoch": 2.159269613838896, "grad_norm": 0.6376460252594491, "learning_rate": 2.2075855077342063e-06, "loss": 0.1139909267425537, "step": 24715 }, { "epoch": 2.1597064476673076, "grad_norm": 0.5548776267330016, "learning_rate": 2.205477725997507e-06, "loss": 0.12046699523925782, "step": 24720 }, { "epoch": 2.160143281495719, "grad_norm": 0.5725392937522233, "learning_rate": 2.2033706662287773e-06, "loss": 0.09933221936225892, "step": 24725 }, { "epoch": 2.1605801153241306, "grad_norm": 0.6740656528706958, "learning_rate": 2.2012643289723775e-06, "loss": 0.09802476167678834, "step": 24730 }, { "epoch": 2.1610169491525424, "grad_norm": 0.5892211601762493, "learning_rate": 2.199158714772481e-06, "loss": 0.11760056018829346, "step": 24735 }, { "epoch": 2.161453782980954, "grad_norm": 0.7338266850651189, "learning_rate": 2.1970538241730784e-06, "loss": 0.09539583325386047, "step": 24740 }, { "epoch": 2.161890616809366, "grad_norm": 0.5515914938957958, "learning_rate": 2.194949657717968e-06, "loss": 0.09956985116004943, "step": 24745 }, { "epoch": 2.162327450637777, "grad_norm": 0.5363902350923435, "learning_rate": 2.192846215950766e-06, "loss": 0.10291393995285034, "step": 24750 }, { "epoch": 2.162764284466189, "grad_norm": 0.6878902087478357, "learning_rate": 2.1907434994148983e-06, "loss": 0.10209915637969971, "step": 24755 }, { "epoch": 2.1632011182946007, "grad_norm": 0.6395343158972129, "learning_rate": 2.188641508653604e-06, "loss": 0.12184382677078247, "step": 24760 }, { "epoch": 2.1636379521230125, "grad_norm": 0.5037502017605833, "learning_rate": 2.186540244209934e-06, "loss": 0.0956826388835907, "step": 24765 }, { "epoch": 2.164074785951424, "grad_norm": 0.5267362843384321, "learning_rate": 2.184439706626751e-06, "loss": 0.07777873277664185, "step": 24770 }, { "epoch": 2.1645116197798355, "grad_norm": 0.541424558216691, "learning_rate": 2.1823398964467346e-06, "loss": 0.09777028560638427, "step": 24775 }, { "epoch": 2.1649484536082473, "grad_norm": 0.5467141102993027, "learning_rate": 2.1802408142123697e-06, "loss": 0.09700496792793274, "step": 24780 }, { "epoch": 2.165385287436659, "grad_norm": 0.6195847694608932, "learning_rate": 2.1781424604659603e-06, "loss": 0.1039900541305542, "step": 24785 }, { "epoch": 2.165822121265071, "grad_norm": 0.5458951892672562, "learning_rate": 2.1760448357496162e-06, "loss": 0.10737448930740356, "step": 24790 }, { "epoch": 2.1662589550934825, "grad_norm": 0.5927175550156127, "learning_rate": 2.173947940605261e-06, "loss": 0.0744154393672943, "step": 24795 }, { "epoch": 2.1666957889218943, "grad_norm": 0.5427867043267008, "learning_rate": 2.1718517755746293e-06, "loss": 0.09790436029434205, "step": 24800 }, { "epoch": 2.1671326227503056, "grad_norm": 0.5786570268092508, "learning_rate": 2.16975634119927e-06, "loss": 0.11010257005691529, "step": 24805 }, { "epoch": 2.1675694565787174, "grad_norm": 0.5494738616996706, "learning_rate": 2.1676616380205397e-06, "loss": 0.11065542697906494, "step": 24810 }, { "epoch": 2.168006290407129, "grad_norm": 0.6655870608552066, "learning_rate": 2.1655676665796055e-06, "loss": 0.09415597915649414, "step": 24815 }, { "epoch": 2.168443124235541, "grad_norm": 0.5717155933460826, "learning_rate": 2.1634744274174524e-06, "loss": 0.10176808834075927, "step": 24820 }, { "epoch": 2.1688799580639526, "grad_norm": 0.6766454235210764, "learning_rate": 2.1613819210748643e-06, "loss": 0.09652300477027893, "step": 24825 }, { "epoch": 2.1693167918923644, "grad_norm": 0.6840035256623906, "learning_rate": 2.1592901480924474e-06, "loss": 0.118549382686615, "step": 24830 }, { "epoch": 2.1697536257207757, "grad_norm": 0.5409911201047781, "learning_rate": 2.1571991090106103e-06, "loss": 0.10467805862426757, "step": 24835 }, { "epoch": 2.1701904595491874, "grad_norm": 0.5838438602335692, "learning_rate": 2.1551088043695796e-06, "loss": 0.09585970640182495, "step": 24840 }, { "epoch": 2.170627293377599, "grad_norm": 0.6165999656586854, "learning_rate": 2.153019234709383e-06, "loss": 0.09381188750267029, "step": 24845 }, { "epoch": 2.171064127206011, "grad_norm": 0.5714157694853778, "learning_rate": 2.1509304005698696e-06, "loss": 0.09491169452667236, "step": 24850 }, { "epoch": 2.1715009610344227, "grad_norm": 0.5683324051948843, "learning_rate": 2.148842302490684e-06, "loss": 0.10169003009796143, "step": 24855 }, { "epoch": 2.171937794862834, "grad_norm": 0.545698129299422, "learning_rate": 2.1467549410112946e-06, "loss": 0.12007704973220826, "step": 24860 }, { "epoch": 2.1723746286912458, "grad_norm": 0.6438195582983479, "learning_rate": 2.1446683166709715e-06, "loss": 0.09696427583694459, "step": 24865 }, { "epoch": 2.1728114625196575, "grad_norm": 0.763795696384586, "learning_rate": 2.1425824300087948e-06, "loss": 0.09047033190727234, "step": 24870 }, { "epoch": 2.1732482963480693, "grad_norm": 0.6687799436442332, "learning_rate": 2.14049728156366e-06, "loss": 0.10420247316360473, "step": 24875 }, { "epoch": 2.173685130176481, "grad_norm": 0.540622915399784, "learning_rate": 2.1384128718742624e-06, "loss": 0.09662153720855712, "step": 24880 }, { "epoch": 2.1741219640048923, "grad_norm": 0.724830859365674, "learning_rate": 2.136329201479118e-06, "loss": 0.11938238143920898, "step": 24885 }, { "epoch": 2.174558797833304, "grad_norm": 0.6924179345878211, "learning_rate": 2.1342462709165385e-06, "loss": 0.11499121189117431, "step": 24890 }, { "epoch": 2.174995631661716, "grad_norm": 0.5616660414029725, "learning_rate": 2.1321640807246556e-06, "loss": 0.09780192375183105, "step": 24895 }, { "epoch": 2.1754324654901276, "grad_norm": 0.6484955904171228, "learning_rate": 2.130082631441403e-06, "loss": 0.1385960817337036, "step": 24900 }, { "epoch": 2.1758692993185393, "grad_norm": 0.5587939287039516, "learning_rate": 2.1280019236045284e-06, "loss": 0.09362821578979492, "step": 24905 }, { "epoch": 2.176306133146951, "grad_norm": 0.775640846013579, "learning_rate": 2.125921957751583e-06, "loss": 0.10480926036834717, "step": 24910 }, { "epoch": 2.1767429669753624, "grad_norm": 0.5309663705033197, "learning_rate": 2.12384273441993e-06, "loss": 0.08871890306472778, "step": 24915 }, { "epoch": 2.177179800803774, "grad_norm": 0.5878437849778413, "learning_rate": 2.1217642541467365e-06, "loss": 0.12266027927398682, "step": 24920 }, { "epoch": 2.177616634632186, "grad_norm": 0.5069795284898312, "learning_rate": 2.11968651746898e-06, "loss": 0.10704700946807862, "step": 24925 }, { "epoch": 2.1780534684605977, "grad_norm": 0.5645239690325828, "learning_rate": 2.11760952492345e-06, "loss": 0.10317325592041016, "step": 24930 }, { "epoch": 2.1784903022890094, "grad_norm": 0.6193198799356426, "learning_rate": 2.1155332770467344e-06, "loss": 0.10710396766662597, "step": 24935 }, { "epoch": 2.1789271361174207, "grad_norm": 0.5541956020003874, "learning_rate": 2.11345777437524e-06, "loss": 0.10486452579498291, "step": 24940 }, { "epoch": 2.1793639699458325, "grad_norm": 0.5151291456434814, "learning_rate": 2.1113830174451704e-06, "loss": 0.1203848123550415, "step": 24945 }, { "epoch": 2.1798008037742442, "grad_norm": 0.6641386329738921, "learning_rate": 2.1093090067925477e-06, "loss": 0.11490740776062011, "step": 24950 }, { "epoch": 2.180237637602656, "grad_norm": 0.5812226220265734, "learning_rate": 2.107235742953186e-06, "loss": 0.10025219917297364, "step": 24955 }, { "epoch": 2.1806744714310677, "grad_norm": 0.6245946117986805, "learning_rate": 2.1051632264627227e-06, "loss": 0.10597786903381348, "step": 24960 }, { "epoch": 2.1811113052594795, "grad_norm": 0.5058946184176376, "learning_rate": 2.1030914578565914e-06, "loss": 0.11474318504333496, "step": 24965 }, { "epoch": 2.181548139087891, "grad_norm": 0.571893478122722, "learning_rate": 2.101020437670035e-06, "loss": 0.09377167224884034, "step": 24970 }, { "epoch": 2.1819849729163026, "grad_norm": 0.589103085024124, "learning_rate": 2.0989501664381066e-06, "loss": 0.08235151767730713, "step": 24975 }, { "epoch": 2.1824218067447143, "grad_norm": 0.5513450358986772, "learning_rate": 2.096880644695663e-06, "loss": 0.09528096914291381, "step": 24980 }, { "epoch": 2.182858640573126, "grad_norm": 0.6008392940308597, "learning_rate": 2.094811872977366e-06, "loss": 0.0933325707912445, "step": 24985 }, { "epoch": 2.183295474401538, "grad_norm": 0.676992772537264, "learning_rate": 2.092743851817683e-06, "loss": 0.10712305307388306, "step": 24990 }, { "epoch": 2.183732308229949, "grad_norm": 0.6089291150945045, "learning_rate": 2.0906765817508945e-06, "loss": 0.12157456874847412, "step": 24995 }, { "epoch": 2.184169142058361, "grad_norm": 0.5553585006527371, "learning_rate": 2.0886100633110775e-06, "loss": 0.11185663938522339, "step": 25000 }, { "epoch": 2.1846059758867726, "grad_norm": 0.5792791320267817, "learning_rate": 2.0865442970321232e-06, "loss": 0.09081492424011231, "step": 25005 }, { "epoch": 2.1850428097151844, "grad_norm": 0.5768538118256475, "learning_rate": 2.084479283447723e-06, "loss": 0.09784960746765137, "step": 25010 }, { "epoch": 2.185479643543596, "grad_norm": 0.5640233724472512, "learning_rate": 2.0824150230913755e-06, "loss": 0.13656824827194214, "step": 25015 }, { "epoch": 2.185916477372008, "grad_norm": 0.49241564450028424, "learning_rate": 2.0803515164963845e-06, "loss": 0.09660797715187072, "step": 25020 }, { "epoch": 2.186353311200419, "grad_norm": 0.5540422588995164, "learning_rate": 2.0782887641958573e-06, "loss": 0.10859930515289307, "step": 25025 }, { "epoch": 2.186790145028831, "grad_norm": 0.6744057167965066, "learning_rate": 2.0762267667227123e-06, "loss": 0.11398870944976806, "step": 25030 }, { "epoch": 2.1872269788572427, "grad_norm": 0.5798625813950838, "learning_rate": 2.074165524609665e-06, "loss": 0.11483118534088135, "step": 25035 }, { "epoch": 2.1876638126856545, "grad_norm": 0.5596344852563749, "learning_rate": 2.0721050383892433e-06, "loss": 0.12845126390457154, "step": 25040 }, { "epoch": 2.188100646514066, "grad_norm": 0.6096425178776517, "learning_rate": 2.070045308593774e-06, "loss": 0.10128130912780761, "step": 25045 }, { "epoch": 2.1885374803424775, "grad_norm": 0.6808253738811542, "learning_rate": 2.0679863357553916e-06, "loss": 0.10229876041412353, "step": 25050 }, { "epoch": 2.1889743141708893, "grad_norm": 0.5811488112114592, "learning_rate": 2.0659281204060307e-06, "loss": 0.1117236852645874, "step": 25055 }, { "epoch": 2.189411147999301, "grad_norm": 0.5476311433875631, "learning_rate": 2.063870663077438e-06, "loss": 0.10790126323699951, "step": 25060 }, { "epoch": 2.189847981827713, "grad_norm": 0.6696259182758788, "learning_rate": 2.0618139643011585e-06, "loss": 0.0994056761264801, "step": 25065 }, { "epoch": 2.1902848156561245, "grad_norm": 0.5427469395460034, "learning_rate": 2.05975802460854e-06, "loss": 0.10088905096054077, "step": 25070 }, { "epoch": 2.1907216494845363, "grad_norm": 0.6101542313869446, "learning_rate": 2.057702844530742e-06, "loss": 0.12089147567749023, "step": 25075 }, { "epoch": 2.1911584833129476, "grad_norm": 0.5793239812293649, "learning_rate": 2.0556484245987186e-06, "loss": 0.11649754047393798, "step": 25080 }, { "epoch": 2.1915953171413594, "grad_norm": 0.5807908691822122, "learning_rate": 2.0535947653432327e-06, "loss": 0.10319392681121826, "step": 25085 }, { "epoch": 2.192032150969771, "grad_norm": 0.6511147518324576, "learning_rate": 2.0515418672948476e-06, "loss": 0.09919264912605286, "step": 25090 }, { "epoch": 2.192468984798183, "grad_norm": 0.7057589763132157, "learning_rate": 2.049489730983935e-06, "loss": 0.11523658037185669, "step": 25095 }, { "epoch": 2.1929058186265946, "grad_norm": 0.7003433094282183, "learning_rate": 2.0474383569406643e-06, "loss": 0.08555428385734558, "step": 25100 }, { "epoch": 2.193342652455006, "grad_norm": 0.5966480649019151, "learning_rate": 2.0453877456950137e-06, "loss": 0.08259962797164917, "step": 25105 }, { "epoch": 2.1937794862834177, "grad_norm": 0.60053565311585, "learning_rate": 2.0433378977767556e-06, "loss": 0.07750371098518372, "step": 25110 }, { "epoch": 2.1942163201118294, "grad_norm": 0.6634669869554276, "learning_rate": 2.0412888137154746e-06, "loss": 0.10674920082092285, "step": 25115 }, { "epoch": 2.194653153940241, "grad_norm": 0.5588547210170834, "learning_rate": 2.0392404940405526e-06, "loss": 0.09270354509353637, "step": 25120 }, { "epoch": 2.195089987768653, "grad_norm": 0.6002583299321144, "learning_rate": 2.0371929392811735e-06, "loss": 0.10619113445281983, "step": 25125 }, { "epoch": 2.1955268215970642, "grad_norm": 0.5366649111311482, "learning_rate": 2.035146149966329e-06, "loss": 0.08921153545379638, "step": 25130 }, { "epoch": 2.195963655425476, "grad_norm": 0.5676677992300632, "learning_rate": 2.0331001266248057e-06, "loss": 0.07637863159179688, "step": 25135 }, { "epoch": 2.1964004892538878, "grad_norm": 0.5712960790972864, "learning_rate": 2.0310548697852012e-06, "loss": 0.12344443798065186, "step": 25140 }, { "epoch": 2.1968373230822995, "grad_norm": 0.6174446757710582, "learning_rate": 2.029010379975903e-06, "loss": 0.09346777200698853, "step": 25145 }, { "epoch": 2.1972741569107113, "grad_norm": 0.46650747748149896, "learning_rate": 2.026966657725113e-06, "loss": 0.10430383682250977, "step": 25150 }, { "epoch": 2.197710990739123, "grad_norm": 0.5196031941595993, "learning_rate": 2.0249237035608247e-06, "loss": 0.08281511068344116, "step": 25155 }, { "epoch": 2.1981478245675343, "grad_norm": 0.5923062694474005, "learning_rate": 2.0228815180108423e-06, "loss": 0.1186383843421936, "step": 25160 }, { "epoch": 2.198584658395946, "grad_norm": 0.7116468376390275, "learning_rate": 2.0208401016027646e-06, "loss": 0.08461248874664307, "step": 25165 }, { "epoch": 2.199021492224358, "grad_norm": 0.6501394076818985, "learning_rate": 2.0187994548639935e-06, "loss": 0.09014836549758912, "step": 25170 }, { "epoch": 2.1994583260527696, "grad_norm": 0.5152888076007074, "learning_rate": 2.01675957832173e-06, "loss": 0.09434365034103394, "step": 25175 }, { "epoch": 2.1998951598811813, "grad_norm": 0.7525244708580147, "learning_rate": 2.0147204725029827e-06, "loss": 0.09198572039604187, "step": 25180 }, { "epoch": 2.200331993709593, "grad_norm": 0.5029096751512802, "learning_rate": 2.0126821379345557e-06, "loss": 0.08103966116905212, "step": 25185 }, { "epoch": 2.2007688275380044, "grad_norm": 0.7053259899765082, "learning_rate": 2.0106445751430527e-06, "loss": 0.1012431263923645, "step": 25190 }, { "epoch": 2.201205661366416, "grad_norm": 0.7649189194588679, "learning_rate": 2.008607784654884e-06, "loss": 0.101718008518219, "step": 25195 }, { "epoch": 2.201642495194828, "grad_norm": 0.6137715543825893, "learning_rate": 2.0065717669962525e-06, "loss": 0.08608781695365905, "step": 25200 }, { "epoch": 2.2020793290232397, "grad_norm": 0.5587418756306798, "learning_rate": 2.0045365226931717e-06, "loss": 0.10246944427490234, "step": 25205 }, { "epoch": 2.2025161628516514, "grad_norm": 0.5643495881106789, "learning_rate": 2.0025020522714424e-06, "loss": 0.11482185125350952, "step": 25210 }, { "epoch": 2.2029529966800627, "grad_norm": 0.47340069043913424, "learning_rate": 2.000468356256677e-06, "loss": 0.10643970966339111, "step": 25215 }, { "epoch": 2.2033898305084745, "grad_norm": 0.6225719625783449, "learning_rate": 1.9984354351742814e-06, "loss": 0.09831629991531372, "step": 25220 }, { "epoch": 2.2038266643368862, "grad_norm": 0.4994582708553021, "learning_rate": 1.996403289549462e-06, "loss": 0.09618560075759888, "step": 25225 }, { "epoch": 2.204263498165298, "grad_norm": 0.5857858979080685, "learning_rate": 1.994371919907228e-06, "loss": 0.08449429273605347, "step": 25230 }, { "epoch": 2.2047003319937097, "grad_norm": 0.5635815583571335, "learning_rate": 1.992341326772386e-06, "loss": 0.08342351913452148, "step": 25235 }, { "epoch": 2.205137165822121, "grad_norm": 0.6124750596075756, "learning_rate": 1.990311510669541e-06, "loss": 0.09384479522705078, "step": 25240 }, { "epoch": 2.205573999650533, "grad_norm": 0.5947302665330131, "learning_rate": 1.9882824721230964e-06, "loss": 0.09692924022674561, "step": 25245 }, { "epoch": 2.2060108334789446, "grad_norm": 0.5492957678465334, "learning_rate": 1.9862542116572603e-06, "loss": 0.08570476770401, "step": 25250 }, { "epoch": 2.2064476673073563, "grad_norm": 0.559306311691863, "learning_rate": 1.9842267297960334e-06, "loss": 0.09643207788467408, "step": 25255 }, { "epoch": 2.206884501135768, "grad_norm": 0.5430070446214269, "learning_rate": 1.9822000270632196e-06, "loss": 0.1083017110824585, "step": 25260 }, { "epoch": 2.20732133496418, "grad_norm": 0.5190488869178205, "learning_rate": 1.98017410398242e-06, "loss": 0.09604323506355286, "step": 25265 }, { "epoch": 2.207758168792591, "grad_norm": 0.6669216304283203, "learning_rate": 1.978148961077033e-06, "loss": 0.11777969598770141, "step": 25270 }, { "epoch": 2.208195002621003, "grad_norm": 0.7700815389856949, "learning_rate": 1.9761245988702553e-06, "loss": 0.08178225755691529, "step": 25275 }, { "epoch": 2.2086318364494146, "grad_norm": 0.6319493156977943, "learning_rate": 1.974101017885086e-06, "loss": 0.12446668148040771, "step": 25280 }, { "epoch": 2.2090686702778264, "grad_norm": 0.7648318922324475, "learning_rate": 1.972078218644319e-06, "loss": 0.08616147637367248, "step": 25285 }, { "epoch": 2.209505504106238, "grad_norm": 0.6439040148352558, "learning_rate": 1.9700562016705433e-06, "loss": 0.07422285079956055, "step": 25290 }, { "epoch": 2.20994233793465, "grad_norm": 0.6125533485956872, "learning_rate": 1.9680349674861537e-06, "loss": 0.1045256495475769, "step": 25295 }, { "epoch": 2.210379171763061, "grad_norm": 0.48734733757313875, "learning_rate": 1.966014516613337e-06, "loss": 0.10437607765197754, "step": 25300 }, { "epoch": 2.210816005591473, "grad_norm": 0.6407153505604314, "learning_rate": 1.963994849574078e-06, "loss": 0.1219254493713379, "step": 25305 }, { "epoch": 2.2112528394198847, "grad_norm": 0.5816531751464912, "learning_rate": 1.961975966890159e-06, "loss": 0.10155820846557617, "step": 25310 }, { "epoch": 2.2116896732482965, "grad_norm": 0.5313154351596628, "learning_rate": 1.959957869083164e-06, "loss": 0.06927446126937867, "step": 25315 }, { "epoch": 2.212126507076708, "grad_norm": 0.4903867428093122, "learning_rate": 1.957940556674468e-06, "loss": 0.08987390398979186, "step": 25320 }, { "epoch": 2.2125633409051195, "grad_norm": 0.5684929619805262, "learning_rate": 1.9559240301852456e-06, "loss": 0.10770751237869262, "step": 25325 }, { "epoch": 2.2130001747335313, "grad_norm": 0.4953577510958072, "learning_rate": 1.953908290136472e-06, "loss": 0.08735828399658203, "step": 25330 }, { "epoch": 2.213437008561943, "grad_norm": 0.5096074234335107, "learning_rate": 1.9518933370489134e-06, "loss": 0.08060235381126404, "step": 25335 }, { "epoch": 2.213873842390355, "grad_norm": 0.613105938364285, "learning_rate": 1.949879171443135e-06, "loss": 0.11972825527191162, "step": 25340 }, { "epoch": 2.2143106762187665, "grad_norm": 0.6674444237146765, "learning_rate": 1.9478657938394985e-06, "loss": 0.09454243183135987, "step": 25345 }, { "epoch": 2.214747510047178, "grad_norm": 0.5954395442067977, "learning_rate": 1.9458532047581647e-06, "loss": 0.08327102661132812, "step": 25350 }, { "epoch": 2.2151843438755896, "grad_norm": 0.6538193767576904, "learning_rate": 1.9438414047190846e-06, "loss": 0.0971758484840393, "step": 25355 }, { "epoch": 2.2156211777040014, "grad_norm": 0.6059688154749224, "learning_rate": 1.941830394242015e-06, "loss": 0.10273683071136475, "step": 25360 }, { "epoch": 2.216058011532413, "grad_norm": 0.6348139673034143, "learning_rate": 1.9398201738464945e-06, "loss": 0.07301058769226074, "step": 25365 }, { "epoch": 2.216494845360825, "grad_norm": 0.5555218574629842, "learning_rate": 1.937810744051872e-06, "loss": 0.1036328911781311, "step": 25370 }, { "epoch": 2.2169316791892366, "grad_norm": 0.5980689299484176, "learning_rate": 1.9358021053772825e-06, "loss": 0.11000131368637085, "step": 25375 }, { "epoch": 2.217368513017648, "grad_norm": 0.6121580414373613, "learning_rate": 1.9337942583416636e-06, "loss": 0.11490790843963623, "step": 25380 }, { "epoch": 2.2178053468460597, "grad_norm": 0.5490243947069191, "learning_rate": 1.9317872034637418e-06, "loss": 0.10220968723297119, "step": 25385 }, { "epoch": 2.2182421806744714, "grad_norm": 0.5775725743072821, "learning_rate": 1.9297809412620423e-06, "loss": 0.09052207469940185, "step": 25390 }, { "epoch": 2.218679014502883, "grad_norm": 0.6723133998181915, "learning_rate": 1.9277754722548887e-06, "loss": 0.09951840043067932, "step": 25395 }, { "epoch": 2.219115848331295, "grad_norm": 0.5529387745702075, "learning_rate": 1.92577079696039e-06, "loss": 0.09180142283439637, "step": 25400 }, { "epoch": 2.2195526821597062, "grad_norm": 0.6429619331158826, "learning_rate": 1.9237669158964616e-06, "loss": 0.09701069593429565, "step": 25405 }, { "epoch": 2.219989515988118, "grad_norm": 0.695144747207671, "learning_rate": 1.921763829580805e-06, "loss": 0.1172421932220459, "step": 25410 }, { "epoch": 2.2204263498165298, "grad_norm": 0.6993669714821623, "learning_rate": 1.9197615385309226e-06, "loss": 0.0992282509803772, "step": 25415 }, { "epoch": 2.2208631836449415, "grad_norm": 0.7025240890515743, "learning_rate": 1.917760043264107e-06, "loss": 0.07968604564666748, "step": 25420 }, { "epoch": 2.2213000174733533, "grad_norm": 0.5621865715849924, "learning_rate": 1.915759344297447e-06, "loss": 0.1012681007385254, "step": 25425 }, { "epoch": 2.221736851301765, "grad_norm": 0.5213145037946405, "learning_rate": 1.913759442147824e-06, "loss": 0.11236586570739746, "step": 25430 }, { "epoch": 2.2221736851301763, "grad_norm": 0.5378027772410694, "learning_rate": 1.9117603373319184e-06, "loss": 0.11978604793548583, "step": 25435 }, { "epoch": 2.222610518958588, "grad_norm": 0.5434056706327623, "learning_rate": 1.9097620303661986e-06, "loss": 0.09284324645996093, "step": 25440 }, { "epoch": 2.223047352787, "grad_norm": 0.5551332477560402, "learning_rate": 1.9077645217669283e-06, "loss": 0.10307478904724121, "step": 25445 }, { "epoch": 2.2234841866154116, "grad_norm": 0.663258885323604, "learning_rate": 1.9057678120501693e-06, "loss": 0.10319960117340088, "step": 25450 }, { "epoch": 2.2239210204438233, "grad_norm": 0.5437998648399885, "learning_rate": 1.9037719017317714e-06, "loss": 0.10178968906402588, "step": 25455 }, { "epoch": 2.2243578542722346, "grad_norm": 0.5349709659961646, "learning_rate": 1.9017767913273839e-06, "loss": 0.0954288125038147, "step": 25460 }, { "epoch": 2.2247946881006464, "grad_norm": 0.6132871361406425, "learning_rate": 1.8997824813524395e-06, "loss": 0.09350591897964478, "step": 25465 }, { "epoch": 2.225231521929058, "grad_norm": 0.6083365740856763, "learning_rate": 1.897788972322176e-06, "loss": 0.08534803390502929, "step": 25470 }, { "epoch": 2.22566835575747, "grad_norm": 0.5450333230530585, "learning_rate": 1.8957962647516154e-06, "loss": 0.0903246521949768, "step": 25475 }, { "epoch": 2.2261051895858817, "grad_norm": 0.7939160482027288, "learning_rate": 1.8938043591555782e-06, "loss": 0.09498226642608643, "step": 25480 }, { "epoch": 2.226542023414293, "grad_norm": 0.6409314273579167, "learning_rate": 1.8918132560486746e-06, "loss": 0.08808136582374573, "step": 25485 }, { "epoch": 2.2269788572427047, "grad_norm": 0.6402900332363148, "learning_rate": 1.8898229559453085e-06, "loss": 0.11796989440917968, "step": 25490 }, { "epoch": 2.2274156910711165, "grad_norm": 0.6268113580206834, "learning_rate": 1.887833459359676e-06, "loss": 0.10342390537261963, "step": 25495 }, { "epoch": 2.2278525248995282, "grad_norm": 0.5368878005999601, "learning_rate": 1.8858447668057634e-06, "loss": 0.08810331225395203, "step": 25500 }, { "epoch": 2.22828935872794, "grad_norm": 0.6067320399226204, "learning_rate": 1.883856878797356e-06, "loss": 0.12748115062713622, "step": 25505 }, { "epoch": 2.2287261925563517, "grad_norm": 0.7424240018012592, "learning_rate": 1.881869795848023e-06, "loss": 0.07817830443382263, "step": 25510 }, { "epoch": 2.229163026384763, "grad_norm": 0.6409454346970798, "learning_rate": 1.8798835184711328e-06, "loss": 0.10979187488555908, "step": 25515 }, { "epoch": 2.229599860213175, "grad_norm": 0.5706308714538497, "learning_rate": 1.8778980471798415e-06, "loss": 0.08780092597007752, "step": 25520 }, { "epoch": 2.2300366940415866, "grad_norm": 0.6661746510566771, "learning_rate": 1.8759133824870968e-06, "loss": 0.10880851745605469, "step": 25525 }, { "epoch": 2.2304735278699983, "grad_norm": 0.6967889160022921, "learning_rate": 1.8739295249056378e-06, "loss": 0.09512379169464111, "step": 25530 }, { "epoch": 2.23091036169841, "grad_norm": 0.5279716397006219, "learning_rate": 1.871946474948e-06, "loss": 0.09304124116897583, "step": 25535 }, { "epoch": 2.231347195526822, "grad_norm": 0.6591792111275164, "learning_rate": 1.8699642331265044e-06, "loss": 0.0894158124923706, "step": 25540 }, { "epoch": 2.231784029355233, "grad_norm": 0.6432712823590162, "learning_rate": 1.8679827999532635e-06, "loss": 0.12142393589019776, "step": 25545 }, { "epoch": 2.232220863183645, "grad_norm": 0.5962356048389101, "learning_rate": 1.866002175940187e-06, "loss": 0.09469056129455566, "step": 25550 }, { "epoch": 2.2326576970120566, "grad_norm": 0.5767719202630056, "learning_rate": 1.8640223615989694e-06, "loss": 0.0954801857471466, "step": 25555 }, { "epoch": 2.2330945308404684, "grad_norm": 0.6867153645445299, "learning_rate": 1.8620433574410979e-06, "loss": 0.09925345182418824, "step": 25560 }, { "epoch": 2.23353136466888, "grad_norm": 0.5826123894973769, "learning_rate": 1.8600651639778482e-06, "loss": 0.09090602397918701, "step": 25565 }, { "epoch": 2.2339681984972914, "grad_norm": 0.5984680971155761, "learning_rate": 1.858087781720293e-06, "loss": 0.09111249446868896, "step": 25570 }, { "epoch": 2.234405032325703, "grad_norm": 0.5015999026175036, "learning_rate": 1.8561112111792872e-06, "loss": 0.07378507852554321, "step": 25575 }, { "epoch": 2.234841866154115, "grad_norm": 0.5886760788476033, "learning_rate": 1.8541354528654843e-06, "loss": 0.11439615488052368, "step": 25580 }, { "epoch": 2.2352786999825267, "grad_norm": 0.6269356955012175, "learning_rate": 1.8521605072893206e-06, "loss": 0.11849111318588257, "step": 25585 }, { "epoch": 2.2357155338109385, "grad_norm": 0.5782852781844808, "learning_rate": 1.8501863749610266e-06, "loss": 0.09388524293899536, "step": 25590 }, { "epoch": 2.2361523676393498, "grad_norm": 0.6113827458567952, "learning_rate": 1.8482130563906214e-06, "loss": 0.10508666038513184, "step": 25595 }, { "epoch": 2.2365892014677615, "grad_norm": 0.5149805704156453, "learning_rate": 1.8462405520879122e-06, "loss": 0.0980189561843872, "step": 25600 }, { "epoch": 2.2370260352961733, "grad_norm": 0.5452957336507779, "learning_rate": 1.844268862562501e-06, "loss": 0.09802748560905457, "step": 25605 }, { "epoch": 2.237462869124585, "grad_norm": 0.6222032619179544, "learning_rate": 1.8422979883237729e-06, "loss": 0.10398188829421998, "step": 25610 }, { "epoch": 2.237899702952997, "grad_norm": 0.6247815351296118, "learning_rate": 1.8403279298809106e-06, "loss": 0.10092840194702149, "step": 25615 }, { "epoch": 2.2383365367814085, "grad_norm": 0.5492411180591149, "learning_rate": 1.838358687742874e-06, "loss": 0.10417760610580444, "step": 25620 }, { "epoch": 2.23877337060982, "grad_norm": 0.6123708628263459, "learning_rate": 1.8363902624184238e-06, "loss": 0.09944286346435546, "step": 25625 }, { "epoch": 2.2392102044382316, "grad_norm": 0.48461588816885476, "learning_rate": 1.834422654416102e-06, "loss": 0.08717916011810303, "step": 25630 }, { "epoch": 2.2396470382666434, "grad_norm": 0.5127734191988451, "learning_rate": 1.8324558642442454e-06, "loss": 0.11375279426574707, "step": 25635 }, { "epoch": 2.240083872095055, "grad_norm": 0.6901497186041545, "learning_rate": 1.830489892410975e-06, "loss": 0.11138720512390136, "step": 25640 }, { "epoch": 2.240520705923467, "grad_norm": 0.5186538430825828, "learning_rate": 1.8285247394241994e-06, "loss": 0.08912276625633239, "step": 25645 }, { "epoch": 2.2409575397518786, "grad_norm": 0.856400969921532, "learning_rate": 1.8265604057916248e-06, "loss": 0.09263890981674194, "step": 25650 }, { "epoch": 2.24139437358029, "grad_norm": 0.6968983194073127, "learning_rate": 1.8245968920207303e-06, "loss": 0.08933271169662475, "step": 25655 }, { "epoch": 2.2418312074087017, "grad_norm": 0.526497872330407, "learning_rate": 1.822634198618798e-06, "loss": 0.11771726608276367, "step": 25660 }, { "epoch": 2.2422680412371134, "grad_norm": 0.5946029186027724, "learning_rate": 1.8206723260928888e-06, "loss": 0.0905335009098053, "step": 25665 }, { "epoch": 2.242704875065525, "grad_norm": 0.551294920831035, "learning_rate": 1.8187112749498575e-06, "loss": 0.08746429681777954, "step": 25670 }, { "epoch": 2.243141708893937, "grad_norm": 0.5918824151852684, "learning_rate": 1.8167510456963406e-06, "loss": 0.0967401385307312, "step": 25675 }, { "epoch": 2.2435785427223482, "grad_norm": 0.5768084524018242, "learning_rate": 1.8147916388387705e-06, "loss": 0.09931275844573975, "step": 25680 }, { "epoch": 2.24401537655076, "grad_norm": 0.5066992555835805, "learning_rate": 1.8128330548833555e-06, "loss": 0.11248601675033569, "step": 25685 }, { "epoch": 2.2444522103791718, "grad_norm": 0.6358978505957165, "learning_rate": 1.8108752943361036e-06, "loss": 0.0839966893196106, "step": 25690 }, { "epoch": 2.2448890442075835, "grad_norm": 0.5275678924328071, "learning_rate": 1.8089183577028018e-06, "loss": 0.09605120420455933, "step": 25695 }, { "epoch": 2.2453258780359953, "grad_norm": 0.5723891214301324, "learning_rate": 1.8069622454890258e-06, "loss": 0.11651129722595215, "step": 25700 }, { "epoch": 2.2457627118644066, "grad_norm": 0.5931993388842318, "learning_rate": 1.8050069582001424e-06, "loss": 0.0950339674949646, "step": 25705 }, { "epoch": 2.2461995456928183, "grad_norm": 0.6023349196093778, "learning_rate": 1.803052496341301e-06, "loss": 0.10137258768081665, "step": 25710 }, { "epoch": 2.24663637952123, "grad_norm": 0.6524542951284741, "learning_rate": 1.8010988604174385e-06, "loss": 0.09570370316505432, "step": 25715 }, { "epoch": 2.247073213349642, "grad_norm": 0.5853282595174683, "learning_rate": 1.799146050933278e-06, "loss": 0.1147912859916687, "step": 25720 }, { "epoch": 2.2475100471780536, "grad_norm": 0.6770845161080534, "learning_rate": 1.7971940683933325e-06, "loss": 0.11893773078918457, "step": 25725 }, { "epoch": 2.2479468810064653, "grad_norm": 0.5402950899816481, "learning_rate": 1.7952429133018962e-06, "loss": 0.09676564931869507, "step": 25730 }, { "epoch": 2.2483837148348766, "grad_norm": 0.5615000451731389, "learning_rate": 1.7932925861630552e-06, "loss": 0.08548897504806519, "step": 25735 }, { "epoch": 2.2488205486632884, "grad_norm": 0.5610161200283651, "learning_rate": 1.791343087480677e-06, "loss": 0.1096470832824707, "step": 25740 }, { "epoch": 2.2492573824917, "grad_norm": 0.6018570988636934, "learning_rate": 1.7893944177584171e-06, "loss": 0.10317034721374511, "step": 25745 }, { "epoch": 2.249694216320112, "grad_norm": 0.5184383882461702, "learning_rate": 1.787446577499717e-06, "loss": 0.09901168346405029, "step": 25750 }, { "epoch": 2.2501310501485237, "grad_norm": 0.528668681337734, "learning_rate": 1.7854995672078002e-06, "loss": 0.0948137879371643, "step": 25755 }, { "epoch": 2.2505678839769354, "grad_norm": 0.6859579460792574, "learning_rate": 1.783553387385683e-06, "loss": 0.10401947498321533, "step": 25760 }, { "epoch": 2.2510047178053467, "grad_norm": 0.5634883242540994, "learning_rate": 1.7816080385361606e-06, "loss": 0.0968406617641449, "step": 25765 }, { "epoch": 2.2514415516337585, "grad_norm": 0.5900278780727802, "learning_rate": 1.779663521161819e-06, "loss": 0.12265233993530274, "step": 25770 }, { "epoch": 2.2518783854621702, "grad_norm": 0.5544555100972043, "learning_rate": 1.7777198357650245e-06, "loss": 0.09128131866455078, "step": 25775 }, { "epoch": 2.252315219290582, "grad_norm": 0.6477582926367605, "learning_rate": 1.77577698284793e-06, "loss": 0.09271761178970336, "step": 25780 }, { "epoch": 2.2527520531189937, "grad_norm": 0.49063520943940464, "learning_rate": 1.7738349629124729e-06, "loss": 0.10531257390975952, "step": 25785 }, { "epoch": 2.253188886947405, "grad_norm": 0.7841256074098678, "learning_rate": 1.7718937764603794e-06, "loss": 0.09394519329071045, "step": 25790 }, { "epoch": 2.253625720775817, "grad_norm": 0.5053966729113808, "learning_rate": 1.769953423993155e-06, "loss": 0.09750458598136902, "step": 25795 }, { "epoch": 2.2540625546042286, "grad_norm": 0.5468764820862195, "learning_rate": 1.768013906012091e-06, "loss": 0.08598516583442688, "step": 25800 }, { "epoch": 2.2544993884326403, "grad_norm": 0.5746393693856051, "learning_rate": 1.7660752230182664e-06, "loss": 0.09415554404258727, "step": 25805 }, { "epoch": 2.254936222261052, "grad_norm": 0.5990435036447368, "learning_rate": 1.7641373755125419e-06, "loss": 0.10865252017974854, "step": 25810 }, { "epoch": 2.2553730560894634, "grad_norm": 0.5121130823605561, "learning_rate": 1.7622003639955614e-06, "loss": 0.09892199039459229, "step": 25815 }, { "epoch": 2.255809889917875, "grad_norm": 0.5453864369246973, "learning_rate": 1.760264188967753e-06, "loss": 0.11037232875823974, "step": 25820 }, { "epoch": 2.256246723746287, "grad_norm": 0.60884331902299, "learning_rate": 1.7583288509293322e-06, "loss": 0.08560248613357543, "step": 25825 }, { "epoch": 2.2566835575746986, "grad_norm": 0.474846818985951, "learning_rate": 1.7563943503802933e-06, "loss": 0.10270501375198364, "step": 25830 }, { "epoch": 2.2571203914031104, "grad_norm": 0.633595538376617, "learning_rate": 1.7544606878204195e-06, "loss": 0.08589074611663819, "step": 25835 }, { "epoch": 2.2575572252315217, "grad_norm": 0.5863866181552851, "learning_rate": 1.7525278637492738e-06, "loss": 0.09797589778900147, "step": 25840 }, { "epoch": 2.2579940590599334, "grad_norm": 0.6850696763458326, "learning_rate": 1.7505958786662025e-06, "loss": 0.09826447367668152, "step": 25845 }, { "epoch": 2.258430892888345, "grad_norm": 0.5516664655299764, "learning_rate": 1.748664733070336e-06, "loss": 0.08605314493179321, "step": 25850 }, { "epoch": 2.258867726716757, "grad_norm": 0.6904410589760732, "learning_rate": 1.7467344274605875e-06, "loss": 0.10842640399932861, "step": 25855 }, { "epoch": 2.2593045605451687, "grad_norm": 0.46821469005956706, "learning_rate": 1.744804962335656e-06, "loss": 0.08359603881835938, "step": 25860 }, { "epoch": 2.2597413943735805, "grad_norm": 0.5286166366807203, "learning_rate": 1.7428763381940178e-06, "loss": 0.10493004322052002, "step": 25865 }, { "epoch": 2.2601782282019918, "grad_norm": 0.5767085263636195, "learning_rate": 1.7409485555339401e-06, "loss": 0.0833477258682251, "step": 25870 }, { "epoch": 2.2606150620304035, "grad_norm": 0.6422635004411708, "learning_rate": 1.7390216148534611e-06, "loss": 0.09814896583557128, "step": 25875 }, { "epoch": 2.2610518958588153, "grad_norm": 0.6409564001827124, "learning_rate": 1.7370955166504134e-06, "loss": 0.10251818895339966, "step": 25880 }, { "epoch": 2.261488729687227, "grad_norm": 0.6160542650990971, "learning_rate": 1.735170261422403e-06, "loss": 0.0905477225780487, "step": 25885 }, { "epoch": 2.261925563515639, "grad_norm": 0.6541041969155471, "learning_rate": 1.7332458496668247e-06, "loss": 0.10138517618179321, "step": 25890 }, { "epoch": 2.2623623973440505, "grad_norm": 0.5402713740017502, "learning_rate": 1.7313222818808517e-06, "loss": 0.10536036491394044, "step": 25895 }, { "epoch": 2.262799231172462, "grad_norm": 0.71619727719148, "learning_rate": 1.729399558561437e-06, "loss": 0.09961557984352112, "step": 25900 }, { "epoch": 2.2632360650008736, "grad_norm": 0.8155513503444692, "learning_rate": 1.727477680205325e-06, "loss": 0.09370858669281006, "step": 25905 }, { "epoch": 2.2636728988292854, "grad_norm": 0.6109147486983916, "learning_rate": 1.7255566473090269e-06, "loss": 0.10559810400009155, "step": 25910 }, { "epoch": 2.264109732657697, "grad_norm": 0.5499572868452093, "learning_rate": 1.7236364603688494e-06, "loss": 0.07675835490226746, "step": 25915 }, { "epoch": 2.264546566486109, "grad_norm": 0.5091132599126567, "learning_rate": 1.7217171198808714e-06, "loss": 0.07948188781738282, "step": 25920 }, { "epoch": 2.26498340031452, "grad_norm": 0.7447580102858689, "learning_rate": 1.7197986263409605e-06, "loss": 0.09505658149719239, "step": 25925 }, { "epoch": 2.265420234142932, "grad_norm": 0.5849710287127744, "learning_rate": 1.7178809802447577e-06, "loss": 0.1266505241394043, "step": 25930 }, { "epoch": 2.2658570679713437, "grad_norm": 0.6367883964738917, "learning_rate": 1.7159641820876944e-06, "loss": 0.08956615924835205, "step": 25935 }, { "epoch": 2.2662939017997554, "grad_norm": 0.5896809397202143, "learning_rate": 1.714048232364971e-06, "loss": 0.08554489612579345, "step": 25940 }, { "epoch": 2.266730735628167, "grad_norm": 0.6021385128479093, "learning_rate": 1.7121331315715794e-06, "loss": 0.09400533437728882, "step": 25945 }, { "epoch": 2.2671675694565785, "grad_norm": 0.6984411321514388, "learning_rate": 1.7102188802022878e-06, "loss": 0.09724124670028686, "step": 25950 }, { "epoch": 2.2676044032849902, "grad_norm": 0.6597157617046987, "learning_rate": 1.7083054787516424e-06, "loss": 0.08067021369934083, "step": 25955 }, { "epoch": 2.268041237113402, "grad_norm": 0.5164792729279454, "learning_rate": 1.7063929277139763e-06, "loss": 0.11255403757095336, "step": 25960 }, { "epoch": 2.2684780709418138, "grad_norm": 0.6185487214713765, "learning_rate": 1.7044812275833973e-06, "loss": 0.11905089616775513, "step": 25965 }, { "epoch": 2.2689149047702255, "grad_norm": 0.7050207621241305, "learning_rate": 1.7025703788537956e-06, "loss": 0.1136662244796753, "step": 25970 }, { "epoch": 2.2693517385986373, "grad_norm": 0.6472454052890686, "learning_rate": 1.7006603820188394e-06, "loss": 0.09163299202919006, "step": 25975 }, { "epoch": 2.2697885724270486, "grad_norm": 0.6211201049429479, "learning_rate": 1.6987512375719823e-06, "loss": 0.09880961775779724, "step": 25980 }, { "epoch": 2.2702254062554603, "grad_norm": 0.5900185995484265, "learning_rate": 1.6968429460064495e-06, "loss": 0.11602897644042968, "step": 25985 }, { "epoch": 2.270662240083872, "grad_norm": 0.5180993487282223, "learning_rate": 1.6949355078152546e-06, "loss": 0.0876339852809906, "step": 25990 }, { "epoch": 2.271099073912284, "grad_norm": 0.6256827302650552, "learning_rate": 1.693028923491184e-06, "loss": 0.11466097831726074, "step": 25995 }, { "epoch": 2.2715359077406956, "grad_norm": 0.6780203667997988, "learning_rate": 1.6911231935268064e-06, "loss": 0.097004234790802, "step": 26000 }, { "epoch": 2.2719727415691073, "grad_norm": 0.8110603066722046, "learning_rate": 1.689218318414469e-06, "loss": 0.08917895555496216, "step": 26005 }, { "epoch": 2.2724095753975186, "grad_norm": 0.5261800332166525, "learning_rate": 1.687314298646297e-06, "loss": 0.08336880207061767, "step": 26010 }, { "epoch": 2.2728464092259304, "grad_norm": 0.4945050453102902, "learning_rate": 1.6854111347141993e-06, "loss": 0.09626657962799072, "step": 26015 }, { "epoch": 2.273283243054342, "grad_norm": 0.5413168021069817, "learning_rate": 1.683508827109857e-06, "loss": 0.09393607378005982, "step": 26020 }, { "epoch": 2.273720076882754, "grad_norm": 0.5507265453723342, "learning_rate": 1.681607376324737e-06, "loss": 0.10519479513168335, "step": 26025 }, { "epoch": 2.2741569107111657, "grad_norm": 0.5321900269237595, "learning_rate": 1.6797067828500791e-06, "loss": 0.08705440759658814, "step": 26030 }, { "epoch": 2.274593744539577, "grad_norm": 0.6607038416417571, "learning_rate": 1.6778070471769042e-06, "loss": 0.11110343933105468, "step": 26035 }, { "epoch": 2.2750305783679887, "grad_norm": 0.6075534502291386, "learning_rate": 1.6759081697960094e-06, "loss": 0.06999058723449707, "step": 26040 }, { "epoch": 2.2754674121964005, "grad_norm": 0.5462700948622544, "learning_rate": 1.6740101511979745e-06, "loss": 0.10292387008666992, "step": 26045 }, { "epoch": 2.2759042460248122, "grad_norm": 0.6054202471508238, "learning_rate": 1.6721129918731544e-06, "loss": 0.11502063274383545, "step": 26050 }, { "epoch": 2.276341079853224, "grad_norm": 0.6444052208026649, "learning_rate": 1.6702166923116792e-06, "loss": 0.09720508456230163, "step": 26055 }, { "epoch": 2.2767779136816353, "grad_norm": 0.8215246947617344, "learning_rate": 1.6683212530034649e-06, "loss": 0.11160421371459961, "step": 26060 }, { "epoch": 2.277214747510047, "grad_norm": 0.5444668288009901, "learning_rate": 1.6664266744381975e-06, "loss": 0.10136594772338867, "step": 26065 }, { "epoch": 2.277651581338459, "grad_norm": 0.5738080304654666, "learning_rate": 1.664532957105343e-06, "loss": 0.10037211179733277, "step": 26070 }, { "epoch": 2.2780884151668706, "grad_norm": 0.5025995127661641, "learning_rate": 1.6626401014941457e-06, "loss": 0.09595444202423095, "step": 26075 }, { "epoch": 2.2785252489952823, "grad_norm": 0.5015569790006422, "learning_rate": 1.6607481080936288e-06, "loss": 0.08589547872543335, "step": 26080 }, { "epoch": 2.278962082823694, "grad_norm": 0.4764396398938767, "learning_rate": 1.6588569773925883e-06, "loss": 0.08457965850830078, "step": 26085 }, { "epoch": 2.2793989166521054, "grad_norm": 0.6267330832379349, "learning_rate": 1.6569667098796027e-06, "loss": 0.10050861835479737, "step": 26090 }, { "epoch": 2.279835750480517, "grad_norm": 0.5249319005961304, "learning_rate": 1.655077306043023e-06, "loss": 0.10305507183074951, "step": 26095 }, { "epoch": 2.280272584308929, "grad_norm": 0.49823822719604494, "learning_rate": 1.6531887663709801e-06, "loss": 0.10061519145965576, "step": 26100 }, { "epoch": 2.2807094181373406, "grad_norm": 0.5788069007318155, "learning_rate": 1.651301091351379e-06, "loss": 0.10439541339874267, "step": 26105 }, { "epoch": 2.2811462519657524, "grad_norm": 0.48025704769401506, "learning_rate": 1.6494142814719005e-06, "loss": 0.11870685815811158, "step": 26110 }, { "epoch": 2.281583085794164, "grad_norm": 0.5473685698677707, "learning_rate": 1.647528337220009e-06, "loss": 0.09275057315826415, "step": 26115 }, { "epoch": 2.2820199196225754, "grad_norm": 0.6114140813034221, "learning_rate": 1.6456432590829363e-06, "loss": 0.11280251741409301, "step": 26120 }, { "epoch": 2.282456753450987, "grad_norm": 0.5650335230867096, "learning_rate": 1.6437590475476994e-06, "loss": 0.11938828229904175, "step": 26125 }, { "epoch": 2.282893587279399, "grad_norm": 0.7767901215035705, "learning_rate": 1.6418757031010802e-06, "loss": 0.10971196889877319, "step": 26130 }, { "epoch": 2.2833304211078107, "grad_norm": 0.5977566634286948, "learning_rate": 1.639993226229647e-06, "loss": 0.07547000646591187, "step": 26135 }, { "epoch": 2.2837672549362225, "grad_norm": 0.5807224476015436, "learning_rate": 1.638111617419737e-06, "loss": 0.10158746242523194, "step": 26140 }, { "epoch": 2.2842040887646338, "grad_norm": 0.49641314933723685, "learning_rate": 1.6362308771574691e-06, "loss": 0.0893831729888916, "step": 26145 }, { "epoch": 2.2846409225930455, "grad_norm": 0.6049336609616565, "learning_rate": 1.634351005928733e-06, "loss": 0.11741176843643189, "step": 26150 }, { "epoch": 2.2850777564214573, "grad_norm": 0.6059629693334457, "learning_rate": 1.6324720042191945e-06, "loss": 0.11772670745849609, "step": 26155 }, { "epoch": 2.285514590249869, "grad_norm": 0.6423630286367183, "learning_rate": 1.6305938725143e-06, "loss": 0.11428959369659424, "step": 26160 }, { "epoch": 2.285951424078281, "grad_norm": 0.5434099082164452, "learning_rate": 1.6287166112992603e-06, "loss": 0.09416592717170716, "step": 26165 }, { "epoch": 2.286388257906692, "grad_norm": 0.5615628160553794, "learning_rate": 1.6268402210590733e-06, "loss": 0.10776177644729615, "step": 26170 }, { "epoch": 2.286825091735104, "grad_norm": 0.5725630167400217, "learning_rate": 1.6249647022785038e-06, "loss": 0.11843836307525635, "step": 26175 }, { "epoch": 2.2872619255635156, "grad_norm": 0.6580270281189674, "learning_rate": 1.6230900554420964e-06, "loss": 0.10635437965393066, "step": 26180 }, { "epoch": 2.2876987593919274, "grad_norm": 0.6417727571060167, "learning_rate": 1.6212162810341652e-06, "loss": 0.10355625152587891, "step": 26185 }, { "epoch": 2.288135593220339, "grad_norm": 0.5934657599093238, "learning_rate": 1.619343379538807e-06, "loss": 0.08687974214553833, "step": 26190 }, { "epoch": 2.2885724270487504, "grad_norm": 0.7029676379504666, "learning_rate": 1.6174713514398815e-06, "loss": 0.08383414745330811, "step": 26195 }, { "epoch": 2.289009260877162, "grad_norm": 0.568906704136069, "learning_rate": 1.615600197221034e-06, "loss": 0.0908517837524414, "step": 26200 }, { "epoch": 2.289446094705574, "grad_norm": 0.6448208347495132, "learning_rate": 1.6137299173656778e-06, "loss": 0.10395021438598633, "step": 26205 }, { "epoch": 2.2898829285339857, "grad_norm": 0.7881145798216392, "learning_rate": 1.6118605123570003e-06, "loss": 0.11266669034957885, "step": 26210 }, { "epoch": 2.2903197623623974, "grad_norm": 0.6038969169978828, "learning_rate": 1.609991982677967e-06, "loss": 0.11470448970794678, "step": 26215 }, { "epoch": 2.290756596190809, "grad_norm": 0.5158954861810007, "learning_rate": 1.608124328811314e-06, "loss": 0.10482852458953858, "step": 26220 }, { "epoch": 2.291193430019221, "grad_norm": 0.5721610395642669, "learning_rate": 1.6062575512395511e-06, "loss": 0.09857804775238037, "step": 26225 }, { "epoch": 2.2916302638476322, "grad_norm": 0.6187247028994854, "learning_rate": 1.6043916504449608e-06, "loss": 0.13441344499588012, "step": 26230 }, { "epoch": 2.292067097676044, "grad_norm": 0.5515369093160438, "learning_rate": 1.602526626909604e-06, "loss": 0.0953908085823059, "step": 26235 }, { "epoch": 2.2925039315044557, "grad_norm": 0.4580524002507323, "learning_rate": 1.6006624811153086e-06, "loss": 0.09605130553245544, "step": 26240 }, { "epoch": 2.2929407653328675, "grad_norm": 0.5833750384681236, "learning_rate": 1.598799213543682e-06, "loss": 0.09711296558380127, "step": 26245 }, { "epoch": 2.2933775991612793, "grad_norm": 0.657076112161604, "learning_rate": 1.5969368246760997e-06, "loss": 0.10278072357177734, "step": 26250 }, { "epoch": 2.2938144329896906, "grad_norm": 0.624367327532389, "learning_rate": 1.5950753149937115e-06, "loss": 0.08609719276428222, "step": 26255 }, { "epoch": 2.2942512668181023, "grad_norm": 0.533112034852688, "learning_rate": 1.593214684977441e-06, "loss": 0.10012686252593994, "step": 26260 }, { "epoch": 2.294688100646514, "grad_norm": 0.6639754389058349, "learning_rate": 1.5913549351079828e-06, "loss": 0.10370748043060303, "step": 26265 }, { "epoch": 2.295124934474926, "grad_norm": 0.6122163957475371, "learning_rate": 1.5894960658658076e-06, "loss": 0.09137070178985596, "step": 26270 }, { "epoch": 2.2955617683033376, "grad_norm": 0.6490791043498153, "learning_rate": 1.587638077731154e-06, "loss": 0.09501404762268066, "step": 26275 }, { "epoch": 2.295998602131749, "grad_norm": 0.5069667030258854, "learning_rate": 1.5857809711840383e-06, "loss": 0.10684937238693237, "step": 26280 }, { "epoch": 2.2964354359601606, "grad_norm": 0.5969900680157273, "learning_rate": 1.5839247467042445e-06, "loss": 0.12407811880111694, "step": 26285 }, { "epoch": 2.2968722697885724, "grad_norm": 0.6068729242544334, "learning_rate": 1.5820694047713298e-06, "loss": 0.12094517946243286, "step": 26290 }, { "epoch": 2.297309103616984, "grad_norm": 0.6374879452413496, "learning_rate": 1.5802149458646233e-06, "loss": 0.09635223746299744, "step": 26295 }, { "epoch": 2.297745937445396, "grad_norm": 0.6134152875674485, "learning_rate": 1.5783613704632284e-06, "loss": 0.07840074300765991, "step": 26300 }, { "epoch": 2.298182771273807, "grad_norm": 0.6587525227142101, "learning_rate": 1.5765086790460177e-06, "loss": 0.10340704917907714, "step": 26305 }, { "epoch": 2.298619605102219, "grad_norm": 0.6077821334581928, "learning_rate": 1.5746568720916343e-06, "loss": 0.09339604377746583, "step": 26310 }, { "epoch": 2.2990564389306307, "grad_norm": 0.7472155176044311, "learning_rate": 1.5728059500784981e-06, "loss": 0.09646941423416137, "step": 26315 }, { "epoch": 2.2994932727590425, "grad_norm": 0.6440404976012163, "learning_rate": 1.5709559134847946e-06, "loss": 0.11665084362030029, "step": 26320 }, { "epoch": 2.2999301065874542, "grad_norm": 0.6523215708960266, "learning_rate": 1.5691067627884838e-06, "loss": 0.07453806400299072, "step": 26325 }, { "epoch": 2.300366940415866, "grad_norm": 0.5634441300908555, "learning_rate": 1.5672584984672929e-06, "loss": 0.10044422149658203, "step": 26330 }, { "epoch": 2.3008037742442773, "grad_norm": 0.7126385516975408, "learning_rate": 1.5654111209987276e-06, "loss": 0.11874489784240723, "step": 26335 }, { "epoch": 2.301240608072689, "grad_norm": 0.5185662289934639, "learning_rate": 1.563564630860056e-06, "loss": 0.10655641555786133, "step": 26340 }, { "epoch": 2.301677441901101, "grad_norm": 0.5914411057287652, "learning_rate": 1.561719028528324e-06, "loss": 0.13436558246612548, "step": 26345 }, { "epoch": 2.3021142757295125, "grad_norm": 0.5864712123387884, "learning_rate": 1.559874314480344e-06, "loss": 0.09955159425735474, "step": 26350 }, { "epoch": 2.3025511095579243, "grad_norm": 0.5984846387873317, "learning_rate": 1.5580304891927001e-06, "loss": 0.09429332613945007, "step": 26355 }, { "epoch": 2.302987943386336, "grad_norm": 0.5097318302224324, "learning_rate": 1.5561875531417458e-06, "loss": 0.10507862567901612, "step": 26360 }, { "epoch": 2.3034247772147474, "grad_norm": 0.6001596199191997, "learning_rate": 1.5543455068036045e-06, "loss": 0.08594629764556885, "step": 26365 }, { "epoch": 2.303861611043159, "grad_norm": 0.5888425258699475, "learning_rate": 1.5525043506541742e-06, "loss": 0.0699865460395813, "step": 26370 }, { "epoch": 2.304298444871571, "grad_norm": 0.546820457535496, "learning_rate": 1.5506640851691164e-06, "loss": 0.08263392448425293, "step": 26375 }, { "epoch": 2.3047352786999826, "grad_norm": 0.51106304067402, "learning_rate": 1.5488247108238702e-06, "loss": 0.0802229881286621, "step": 26380 }, { "epoch": 2.3051721125283944, "grad_norm": 0.581796018121332, "learning_rate": 1.5469862280936344e-06, "loss": 0.09791723489761353, "step": 26385 }, { "epoch": 2.3056089463568057, "grad_norm": 0.49191130412699974, "learning_rate": 1.5451486374533875e-06, "loss": 0.10982743501663209, "step": 26390 }, { "epoch": 2.3060457801852174, "grad_norm": 0.58743172363755, "learning_rate": 1.5433119393778696e-06, "loss": 0.09823352098464966, "step": 26395 }, { "epoch": 2.306482614013629, "grad_norm": 0.583711590118977, "learning_rate": 1.5414761343415974e-06, "loss": 0.10075559616088867, "step": 26400 }, { "epoch": 2.306919447842041, "grad_norm": 0.6541450695924866, "learning_rate": 1.5396412228188512e-06, "loss": 0.09346700310707093, "step": 26405 }, { "epoch": 2.3073562816704527, "grad_norm": 0.7062113274438842, "learning_rate": 1.5378072052836812e-06, "loss": 0.09110714793205262, "step": 26410 }, { "epoch": 2.307793115498864, "grad_norm": 0.7270559769294397, "learning_rate": 1.5359740822099128e-06, "loss": 0.10714130401611328, "step": 26415 }, { "epoch": 2.3082299493272758, "grad_norm": 0.5851802614500824, "learning_rate": 1.5341418540711284e-06, "loss": 0.1075870156288147, "step": 26420 }, { "epoch": 2.3086667831556875, "grad_norm": 0.6552394975664408, "learning_rate": 1.5323105213406913e-06, "loss": 0.06603745222091675, "step": 26425 }, { "epoch": 2.3091036169840993, "grad_norm": 0.4949933795960961, "learning_rate": 1.530480084491725e-06, "loss": 0.06622167825698852, "step": 26430 }, { "epoch": 2.309540450812511, "grad_norm": 0.5508626960594089, "learning_rate": 1.5286505439971288e-06, "loss": 0.09916934967041016, "step": 26435 }, { "epoch": 2.309977284640923, "grad_norm": 0.7386838057485409, "learning_rate": 1.5268219003295625e-06, "loss": 0.11771892309188843, "step": 26440 }, { "epoch": 2.310414118469334, "grad_norm": 0.6158543918399936, "learning_rate": 1.5249941539614638e-06, "loss": 0.07302557229995728, "step": 26445 }, { "epoch": 2.310850952297746, "grad_norm": 0.6300667832642372, "learning_rate": 1.5231673053650258e-06, "loss": 0.10173587799072266, "step": 26450 }, { "epoch": 2.3112877861261576, "grad_norm": 0.6373474074009644, "learning_rate": 1.5213413550122224e-06, "loss": 0.0997411847114563, "step": 26455 }, { "epoch": 2.3117246199545693, "grad_norm": 0.5127663848153313, "learning_rate": 1.519516303374788e-06, "loss": 0.0873259425163269, "step": 26460 }, { "epoch": 2.312161453782981, "grad_norm": 0.57074969724071, "learning_rate": 1.5176921509242248e-06, "loss": 0.09792813062667846, "step": 26465 }, { "epoch": 2.312598287611393, "grad_norm": 0.6107477008595992, "learning_rate": 1.5158688981318087e-06, "loss": 0.0928487777709961, "step": 26470 }, { "epoch": 2.313035121439804, "grad_norm": 0.6139546656690188, "learning_rate": 1.5140465454685765e-06, "loss": 0.08637443780899048, "step": 26475 }, { "epoch": 2.313471955268216, "grad_norm": 0.7250564243929365, "learning_rate": 1.5122250934053362e-06, "loss": 0.09958702325820923, "step": 26480 }, { "epoch": 2.3139087890966277, "grad_norm": 0.5778855003779471, "learning_rate": 1.5104045424126596e-06, "loss": 0.08883192539215087, "step": 26485 }, { "epoch": 2.3143456229250394, "grad_norm": 0.537841147741975, "learning_rate": 1.508584892960891e-06, "loss": 0.09172990918159485, "step": 26490 }, { "epoch": 2.314782456753451, "grad_norm": 0.6819685285193922, "learning_rate": 1.5067661455201365e-06, "loss": 0.11414868831634521, "step": 26495 }, { "epoch": 2.3152192905818625, "grad_norm": 0.6808090576130025, "learning_rate": 1.5049483005602745e-06, "loss": 0.08690593242645264, "step": 26500 }, { "epoch": 2.3156561244102742, "grad_norm": 0.6609754598082033, "learning_rate": 1.5031313585509455e-06, "loss": 0.10777838230133056, "step": 26505 }, { "epoch": 2.316092958238686, "grad_norm": 0.5286094440472604, "learning_rate": 1.5013153199615583e-06, "loss": 0.09338884353637696, "step": 26510 }, { "epoch": 2.3165297920670977, "grad_norm": 0.5693094901613367, "learning_rate": 1.4995001852612888e-06, "loss": 0.09003621339797974, "step": 26515 }, { "epoch": 2.3169666258955095, "grad_norm": 0.5290386690687455, "learning_rate": 1.4976859549190781e-06, "loss": 0.0854374647140503, "step": 26520 }, { "epoch": 2.317403459723921, "grad_norm": 0.6006336315905823, "learning_rate": 1.495872629403637e-06, "loss": 0.10225080251693726, "step": 26525 }, { "epoch": 2.3178402935523326, "grad_norm": 0.6005317356840397, "learning_rate": 1.4940602091834366e-06, "loss": 0.1112666368484497, "step": 26530 }, { "epoch": 2.3182771273807443, "grad_norm": 0.5118169429140109, "learning_rate": 1.4922486947267222e-06, "loss": 0.10023126602172852, "step": 26535 }, { "epoch": 2.318713961209156, "grad_norm": 0.6469557087780706, "learning_rate": 1.4904380865014978e-06, "loss": 0.10970715284347535, "step": 26540 }, { "epoch": 2.319150795037568, "grad_norm": 0.5067741127102209, "learning_rate": 1.4886283849755368e-06, "loss": 0.096670001745224, "step": 26545 }, { "epoch": 2.319587628865979, "grad_norm": 0.502355305217383, "learning_rate": 1.4868195906163764e-06, "loss": 0.09955615997314453, "step": 26550 }, { "epoch": 2.320024462694391, "grad_norm": 0.6202411286319326, "learning_rate": 1.4850117038913226e-06, "loss": 0.08167816400527954, "step": 26555 }, { "epoch": 2.3204612965228026, "grad_norm": 0.706141054342546, "learning_rate": 1.4832047252674436e-06, "loss": 0.12057267427444458, "step": 26560 }, { "epoch": 2.3208981303512144, "grad_norm": 0.6319262405247206, "learning_rate": 1.4813986552115738e-06, "loss": 0.08291927576065064, "step": 26565 }, { "epoch": 2.321334964179626, "grad_norm": 0.5998173947290423, "learning_rate": 1.4795934941903161e-06, "loss": 0.09639384746551513, "step": 26570 }, { "epoch": 2.321771798008038, "grad_norm": 0.5849159003242186, "learning_rate": 1.4777892426700342e-06, "loss": 0.11228657960891723, "step": 26575 }, { "epoch": 2.3222086318364497, "grad_norm": 0.5108135478742607, "learning_rate": 1.475985901116858e-06, "loss": 0.11401927471160889, "step": 26580 }, { "epoch": 2.322645465664861, "grad_norm": 0.6161485323899681, "learning_rate": 1.4741834699966824e-06, "loss": 0.11031337976455688, "step": 26585 }, { "epoch": 2.3230822994932727, "grad_norm": 0.6627824205878489, "learning_rate": 1.4723819497751695e-06, "loss": 0.10592027902603149, "step": 26590 }, { "epoch": 2.3235191333216845, "grad_norm": 0.6247825079746712, "learning_rate": 1.4705813409177412e-06, "loss": 0.08544690608978271, "step": 26595 }, { "epoch": 2.3239559671500962, "grad_norm": 0.5794971113994742, "learning_rate": 1.4687816438895907e-06, "loss": 0.09199731349945069, "step": 26600 }, { "epoch": 2.324392800978508, "grad_norm": 0.6096504586588913, "learning_rate": 1.4669828591556695e-06, "loss": 0.08925693035125733, "step": 26605 }, { "epoch": 2.3248296348069193, "grad_norm": 0.5343865242885646, "learning_rate": 1.4651849871806957e-06, "loss": 0.0899770975112915, "step": 26610 }, { "epoch": 2.325266468635331, "grad_norm": 0.5939430510852468, "learning_rate": 1.4633880284291518e-06, "loss": 0.10916838645935059, "step": 26615 }, { "epoch": 2.325703302463743, "grad_norm": 0.507973337775745, "learning_rate": 1.4615919833652825e-06, "loss": 0.10215972661972046, "step": 26620 }, { "epoch": 2.3261401362921545, "grad_norm": 0.5286225199482943, "learning_rate": 1.4597968524531008e-06, "loss": 0.10070710182189942, "step": 26625 }, { "epoch": 2.3265769701205663, "grad_norm": 0.6144652449964199, "learning_rate": 1.4580026361563783e-06, "loss": 0.11072522401809692, "step": 26630 }, { "epoch": 2.3270138039489776, "grad_norm": 0.7305809442863995, "learning_rate": 1.4562093349386564e-06, "loss": 0.0961754560470581, "step": 26635 }, { "epoch": 2.3274506377773894, "grad_norm": 0.5864915486174263, "learning_rate": 1.4544169492632315e-06, "loss": 0.10616264343261719, "step": 26640 }, { "epoch": 2.327887471605801, "grad_norm": 0.6013606082590024, "learning_rate": 1.4526254795931726e-06, "loss": 0.09233105182647705, "step": 26645 }, { "epoch": 2.328324305434213, "grad_norm": 0.6689811940388127, "learning_rate": 1.450834926391305e-06, "loss": 0.10776035785675049, "step": 26650 }, { "epoch": 2.3287611392626246, "grad_norm": 0.5650337005765518, "learning_rate": 1.449045290120223e-06, "loss": 0.09821927547454834, "step": 26655 }, { "epoch": 2.329197973091036, "grad_norm": 0.5514765766593919, "learning_rate": 1.4472565712422792e-06, "loss": 0.1218592643737793, "step": 26660 }, { "epoch": 2.3296348069194477, "grad_norm": 0.5840195667850826, "learning_rate": 1.4454687702195907e-06, "loss": 0.09524151682853699, "step": 26665 }, { "epoch": 2.3300716407478594, "grad_norm": 0.5090727423082977, "learning_rate": 1.4436818875140396e-06, "loss": 0.09836872816085815, "step": 26670 }, { "epoch": 2.330508474576271, "grad_norm": 0.5098755557637527, "learning_rate": 1.4418959235872687e-06, "loss": 0.10092116594314575, "step": 26675 }, { "epoch": 2.330945308404683, "grad_norm": 0.5814266056528746, "learning_rate": 1.440110878900683e-06, "loss": 0.10612530708312988, "step": 26680 }, { "epoch": 2.3313821422330947, "grad_norm": 0.5416259662375356, "learning_rate": 1.43832675391545e-06, "loss": 0.10195662975311279, "step": 26685 }, { "epoch": 2.331818976061506, "grad_norm": 0.5729193769159706, "learning_rate": 1.4365435490925022e-06, "loss": 0.09707575440406799, "step": 26690 }, { "epoch": 2.3322558098899178, "grad_norm": 0.6515424917605773, "learning_rate": 1.4347612648925312e-06, "loss": 0.0844119668006897, "step": 26695 }, { "epoch": 2.3326926437183295, "grad_norm": 0.7408782143742954, "learning_rate": 1.4329799017759954e-06, "loss": 0.08152636289596557, "step": 26700 }, { "epoch": 2.3331294775467413, "grad_norm": 0.608824487453214, "learning_rate": 1.4311994602031065e-06, "loss": 0.10964748859405518, "step": 26705 }, { "epoch": 2.333566311375153, "grad_norm": 0.4959581233219465, "learning_rate": 1.4294199406338476e-06, "loss": 0.07777758240699768, "step": 26710 }, { "epoch": 2.3340031452035648, "grad_norm": 0.6416650039122533, "learning_rate": 1.427641343527959e-06, "loss": 0.11092007160186768, "step": 26715 }, { "epoch": 2.334439979031976, "grad_norm": 0.6616410625125587, "learning_rate": 1.4258636693449406e-06, "loss": 0.08407531380653381, "step": 26720 }, { "epoch": 2.334876812860388, "grad_norm": 0.6130886466443988, "learning_rate": 1.4240869185440603e-06, "loss": 0.07974908351898194, "step": 26725 }, { "epoch": 2.3353136466887996, "grad_norm": 0.6758889002580396, "learning_rate": 1.4223110915843418e-06, "loss": 0.09800799489021302, "step": 26730 }, { "epoch": 2.3357504805172113, "grad_norm": 0.5586703219308284, "learning_rate": 1.4205361889245729e-06, "loss": 0.09532495141029358, "step": 26735 }, { "epoch": 2.336187314345623, "grad_norm": 0.6204562637684767, "learning_rate": 1.4187622110232984e-06, "loss": 0.08780778646469116, "step": 26740 }, { "epoch": 2.3366241481740344, "grad_norm": 0.5538447091998405, "learning_rate": 1.4169891583388322e-06, "loss": 0.10065115690231323, "step": 26745 }, { "epoch": 2.337060982002446, "grad_norm": 0.6888764573921962, "learning_rate": 1.4152170313292407e-06, "loss": 0.09794147610664368, "step": 26750 }, { "epoch": 2.337497815830858, "grad_norm": 0.5281135232462905, "learning_rate": 1.4134458304523578e-06, "loss": 0.06952993869781494, "step": 26755 }, { "epoch": 2.3379346496592697, "grad_norm": 0.5964686652785627, "learning_rate": 1.4116755561657741e-06, "loss": 0.08139709234237671, "step": 26760 }, { "epoch": 2.3383714834876814, "grad_norm": 0.6500611332493522, "learning_rate": 1.4099062089268418e-06, "loss": 0.08266897201538086, "step": 26765 }, { "epoch": 2.3388083173160927, "grad_norm": 0.5919032487296889, "learning_rate": 1.4081377891926723e-06, "loss": 0.12433792352676391, "step": 26770 }, { "epoch": 2.3392451511445045, "grad_norm": 0.6165503323023093, "learning_rate": 1.4063702974201416e-06, "loss": 0.10798243284225464, "step": 26775 }, { "epoch": 2.3396819849729162, "grad_norm": 0.5999365088394248, "learning_rate": 1.4046037340658819e-06, "loss": 0.09529628753662109, "step": 26780 }, { "epoch": 2.340118818801328, "grad_norm": 0.6963243916523082, "learning_rate": 1.402838099586285e-06, "loss": 0.11470694541931152, "step": 26785 }, { "epoch": 2.3405556526297397, "grad_norm": 0.6364753620384009, "learning_rate": 1.4010733944375083e-06, "loss": 0.13268531560897828, "step": 26790 }, { "epoch": 2.3409924864581515, "grad_norm": 0.6568226616305319, "learning_rate": 1.399309619075463e-06, "loss": 0.08903255462646484, "step": 26795 }, { "epoch": 2.341429320286563, "grad_norm": 0.5728320305664026, "learning_rate": 1.3975467739558224e-06, "loss": 0.08274394869804383, "step": 26800 }, { "epoch": 2.3418661541149746, "grad_norm": 0.5235451087514574, "learning_rate": 1.395784859534019e-06, "loss": 0.1030418872833252, "step": 26805 }, { "epoch": 2.3423029879433863, "grad_norm": 0.5106619365535066, "learning_rate": 1.394023876265248e-06, "loss": 0.09257320761680603, "step": 26810 }, { "epoch": 2.342739821771798, "grad_norm": 0.6624464793024843, "learning_rate": 1.3922638246044595e-06, "loss": 0.09036102890968323, "step": 26815 }, { "epoch": 2.34317665560021, "grad_norm": 0.5171826194647217, "learning_rate": 1.390504705006363e-06, "loss": 0.07608287334442139, "step": 26820 }, { "epoch": 2.3436134894286216, "grad_norm": 0.7282640179923899, "learning_rate": 1.3887465179254333e-06, "loss": 0.0971988558769226, "step": 26825 }, { "epoch": 2.344050323257033, "grad_norm": 0.5057151215611445, "learning_rate": 1.3869892638158976e-06, "loss": 0.0931062400341034, "step": 26830 }, { "epoch": 2.3444871570854446, "grad_norm": 0.5693273868076086, "learning_rate": 1.3852329431317446e-06, "loss": 0.11284608840942383, "step": 26835 }, { "epoch": 2.3449239909138564, "grad_norm": 0.7018822479726019, "learning_rate": 1.38347755632672e-06, "loss": 0.09791985750198365, "step": 26840 }, { "epoch": 2.345360824742268, "grad_norm": 0.5416669635089559, "learning_rate": 1.3817231038543332e-06, "loss": 0.11230006217956542, "step": 26845 }, { "epoch": 2.34579765857068, "grad_norm": 0.5175394805068861, "learning_rate": 1.3799695861678463e-06, "loss": 0.0884058952331543, "step": 26850 }, { "epoch": 2.346234492399091, "grad_norm": 0.5437934876031606, "learning_rate": 1.3782170037202852e-06, "loss": 0.11004221439361572, "step": 26855 }, { "epoch": 2.346671326227503, "grad_norm": 0.6366833478142543, "learning_rate": 1.3764653569644298e-06, "loss": 0.1076655387878418, "step": 26860 }, { "epoch": 2.3471081600559147, "grad_norm": 0.5609611740386323, "learning_rate": 1.3747146463528211e-06, "loss": 0.08892698287963867, "step": 26865 }, { "epoch": 2.3475449938843265, "grad_norm": 0.5903266284334912, "learning_rate": 1.3729648723377543e-06, "loss": 0.10375679731369018, "step": 26870 }, { "epoch": 2.3479818277127382, "grad_norm": 0.6827519746565828, "learning_rate": 1.371216035371289e-06, "loss": 0.09866106510162354, "step": 26875 }, { "epoch": 2.3484186615411495, "grad_norm": 0.544882747977054, "learning_rate": 1.3694681359052376e-06, "loss": 0.09550805687904358, "step": 26880 }, { "epoch": 2.3488554953695613, "grad_norm": 0.6313947668464419, "learning_rate": 1.3677211743911706e-06, "loss": 0.09249190092086793, "step": 26885 }, { "epoch": 2.349292329197973, "grad_norm": 0.6029753952287964, "learning_rate": 1.365975151280422e-06, "loss": 0.08339272737503052, "step": 26890 }, { "epoch": 2.349729163026385, "grad_norm": 0.8524646109450602, "learning_rate": 1.3642300670240726e-06, "loss": 0.10092120170593262, "step": 26895 }, { "epoch": 2.3501659968547965, "grad_norm": 0.5735755936618313, "learning_rate": 1.3624859220729713e-06, "loss": 0.11748230457305908, "step": 26900 }, { "epoch": 2.3506028306832083, "grad_norm": 0.5107563826385626, "learning_rate": 1.3607427168777177e-06, "loss": 0.09404549598693848, "step": 26905 }, { "epoch": 2.3510396645116196, "grad_norm": 0.5830816697765776, "learning_rate": 1.359000451888673e-06, "loss": 0.10167986154556274, "step": 26910 }, { "epoch": 2.3514764983400314, "grad_norm": 0.5668085564643817, "learning_rate": 1.3572591275559515e-06, "loss": 0.09358499050140381, "step": 26915 }, { "epoch": 2.351913332168443, "grad_norm": 0.5119924660531162, "learning_rate": 1.355518744329426e-06, "loss": 0.10440211296081543, "step": 26920 }, { "epoch": 2.352350165996855, "grad_norm": 0.6904473746382388, "learning_rate": 1.3537793026587288e-06, "loss": 0.0850628674030304, "step": 26925 }, { "epoch": 2.3527869998252666, "grad_norm": 0.5589844344599332, "learning_rate": 1.3520408029932447e-06, "loss": 0.07622579336166382, "step": 26930 }, { "epoch": 2.3532238336536784, "grad_norm": 0.6635346409801629, "learning_rate": 1.3503032457821174e-06, "loss": 0.11108918190002441, "step": 26935 }, { "epoch": 2.3536606674820897, "grad_norm": 0.5285561158784801, "learning_rate": 1.3485666314742458e-06, "loss": 0.11748595237731933, "step": 26940 }, { "epoch": 2.3540975013105014, "grad_norm": 0.5884094701288902, "learning_rate": 1.3468309605182883e-06, "loss": 0.10669595003128052, "step": 26945 }, { "epoch": 2.354534335138913, "grad_norm": 0.5714869299865823, "learning_rate": 1.3450962333626549e-06, "loss": 0.10507806539535522, "step": 26950 }, { "epoch": 2.354971168967325, "grad_norm": 0.5879729901198609, "learning_rate": 1.3433624504555194e-06, "loss": 0.11993310451507569, "step": 26955 }, { "epoch": 2.3554080027957367, "grad_norm": 0.7111588870641992, "learning_rate": 1.3416296122447991e-06, "loss": 0.11228338479995728, "step": 26960 }, { "epoch": 2.355844836624148, "grad_norm": 0.7179045942617649, "learning_rate": 1.3398977191781797e-06, "loss": 0.10184609889984131, "step": 26965 }, { "epoch": 2.3562816704525598, "grad_norm": 0.5056865687377231, "learning_rate": 1.3381667717030956e-06, "loss": 0.09469338655471801, "step": 26970 }, { "epoch": 2.3567185042809715, "grad_norm": 0.6355790499078595, "learning_rate": 1.3364367702667413e-06, "loss": 0.12824636697769165, "step": 26975 }, { "epoch": 2.3571553381093833, "grad_norm": 0.6021335484015542, "learning_rate": 1.3347077153160632e-06, "loss": 0.09613929986953736, "step": 26980 }, { "epoch": 2.357592171937795, "grad_norm": 0.725713117143659, "learning_rate": 1.3329796072977647e-06, "loss": 0.08969777822494507, "step": 26985 }, { "epoch": 2.3580290057662063, "grad_norm": 0.5504608435556628, "learning_rate": 1.3312524466583044e-06, "loss": 0.10941762924194336, "step": 26990 }, { "epoch": 2.358465839594618, "grad_norm": 0.7794061447468439, "learning_rate": 1.3295262338438946e-06, "loss": 0.0957135796546936, "step": 26995 }, { "epoch": 2.35890267342303, "grad_norm": 0.5134045260059473, "learning_rate": 1.3278009693005072e-06, "loss": 0.07201533317565918, "step": 27000 }, { "epoch": 2.3593395072514416, "grad_norm": 0.6592348951073432, "learning_rate": 1.326076653473864e-06, "loss": 0.09871820211410523, "step": 27005 }, { "epoch": 2.3597763410798533, "grad_norm": 0.7737882072753919, "learning_rate": 1.3243532868094456e-06, "loss": 0.09508835077285767, "step": 27010 }, { "epoch": 2.3602131749082647, "grad_norm": 0.6115643795839866, "learning_rate": 1.3226308697524853e-06, "loss": 0.09057304859161378, "step": 27015 }, { "epoch": 2.3606500087366764, "grad_norm": 0.6742288671356166, "learning_rate": 1.3209094027479713e-06, "loss": 0.11147917509078979, "step": 27020 }, { "epoch": 2.361086842565088, "grad_norm": 0.5955685877666023, "learning_rate": 1.319188886240645e-06, "loss": 0.10312285423278808, "step": 27025 }, { "epoch": 2.3615236763935, "grad_norm": 0.9534807053911345, "learning_rate": 1.317469320675006e-06, "loss": 0.1039531946182251, "step": 27030 }, { "epoch": 2.3619605102219117, "grad_norm": 0.5760634090323409, "learning_rate": 1.3157507064953058e-06, "loss": 0.09491138458251953, "step": 27035 }, { "epoch": 2.3623973440503234, "grad_norm": 0.6575996386098477, "learning_rate": 1.3140330441455478e-06, "loss": 0.11119484901428223, "step": 27040 }, { "epoch": 2.362834177878735, "grad_norm": 0.6602093553969857, "learning_rate": 1.3123163340694955e-06, "loss": 0.11131514310836792, "step": 27045 }, { "epoch": 2.3632710117071465, "grad_norm": 0.6680681517641842, "learning_rate": 1.310600576710661e-06, "loss": 0.10301082134246826, "step": 27050 }, { "epoch": 2.3637078455355582, "grad_norm": 0.573542250333346, "learning_rate": 1.3088857725123128e-06, "loss": 0.09814460873603821, "step": 27055 }, { "epoch": 2.36414467936397, "grad_norm": 0.4376959310943506, "learning_rate": 1.3071719219174706e-06, "loss": 0.08831111192703248, "step": 27060 }, { "epoch": 2.3645815131923817, "grad_norm": 0.5749968942197319, "learning_rate": 1.305459025368912e-06, "loss": 0.09990142583847046, "step": 27065 }, { "epoch": 2.3650183470207935, "grad_norm": 0.6513535927298517, "learning_rate": 1.3037470833091638e-06, "loss": 0.10576865673065186, "step": 27070 }, { "epoch": 2.365455180849205, "grad_norm": 0.5201552032828537, "learning_rate": 1.3020360961805106e-06, "loss": 0.08339734673500061, "step": 27075 }, { "epoch": 2.3658920146776166, "grad_norm": 0.6147994398657535, "learning_rate": 1.3003260644249865e-06, "loss": 0.08955110311508178, "step": 27080 }, { "epoch": 2.3663288485060283, "grad_norm": 0.5705609945038537, "learning_rate": 1.2986169884843798e-06, "loss": 0.0924769401550293, "step": 27085 }, { "epoch": 2.36676568233444, "grad_norm": 0.5942924304433355, "learning_rate": 1.296908868800233e-06, "loss": 0.08913471698760986, "step": 27090 }, { "epoch": 2.367202516162852, "grad_norm": 0.7828645393212307, "learning_rate": 1.2952017058138378e-06, "loss": 0.10716032981872559, "step": 27095 }, { "epoch": 2.367639349991263, "grad_norm": 0.5501804452066797, "learning_rate": 1.2934954999662463e-06, "loss": 0.07688337564468384, "step": 27100 }, { "epoch": 2.368076183819675, "grad_norm": 0.7063895140212404, "learning_rate": 1.291790251698255e-06, "loss": 0.09146339893341064, "step": 27105 }, { "epoch": 2.3685130176480866, "grad_norm": 0.5690580727662676, "learning_rate": 1.290085961450419e-06, "loss": 0.08473104238510132, "step": 27110 }, { "epoch": 2.3689498514764984, "grad_norm": 0.6110719949007395, "learning_rate": 1.2883826296630436e-06, "loss": 0.0999828040599823, "step": 27115 }, { "epoch": 2.36938668530491, "grad_norm": 0.5180655241186207, "learning_rate": 1.2866802567761848e-06, "loss": 0.10059096813201904, "step": 27120 }, { "epoch": 2.3698235191333215, "grad_norm": 0.6542997616776478, "learning_rate": 1.2849788432296529e-06, "loss": 0.10334274768829346, "step": 27125 }, { "epoch": 2.370260352961733, "grad_norm": 0.5938376873360154, "learning_rate": 1.2832783894630112e-06, "loss": 0.0854438841342926, "step": 27130 }, { "epoch": 2.370697186790145, "grad_norm": 0.5927251913133589, "learning_rate": 1.2815788959155733e-06, "loss": 0.11388964653015136, "step": 27135 }, { "epoch": 2.3711340206185567, "grad_norm": 0.5778303764208369, "learning_rate": 1.279880363026404e-06, "loss": 0.10047683715820313, "step": 27140 }, { "epoch": 2.3715708544469685, "grad_norm": 0.6304691246474934, "learning_rate": 1.2781827912343247e-06, "loss": 0.08599916696548462, "step": 27145 }, { "epoch": 2.37200768827538, "grad_norm": 0.5500506088216569, "learning_rate": 1.2764861809779e-06, "loss": 0.09209025502204896, "step": 27150 }, { "epoch": 2.3724445221037915, "grad_norm": 0.6451897758889892, "learning_rate": 1.2747905326954545e-06, "loss": 0.10548183917999268, "step": 27155 }, { "epoch": 2.3728813559322033, "grad_norm": 0.6747024513863779, "learning_rate": 1.2730958468250593e-06, "loss": 0.09276244044303894, "step": 27160 }, { "epoch": 2.373318189760615, "grad_norm": 0.5305264371863607, "learning_rate": 1.2714021238045405e-06, "loss": 0.0973897099494934, "step": 27165 }, { "epoch": 2.373755023589027, "grad_norm": 0.6391998451767676, "learning_rate": 1.2697093640714697e-06, "loss": 0.08614925146102906, "step": 27170 }, { "epoch": 2.3741918574174385, "grad_norm": 0.5509013144850629, "learning_rate": 1.2680175680631778e-06, "loss": 0.09815778136253357, "step": 27175 }, { "epoch": 2.3746286912458503, "grad_norm": 0.4594544495066772, "learning_rate": 1.2663267362167393e-06, "loss": 0.09020568728446961, "step": 27180 }, { "epoch": 2.3750655250742616, "grad_norm": 0.6054951759642208, "learning_rate": 1.2646368689689837e-06, "loss": 0.1204339861869812, "step": 27185 }, { "epoch": 2.3755023589026734, "grad_norm": 0.5182320502962099, "learning_rate": 1.2629479667564893e-06, "loss": 0.08339364528656006, "step": 27190 }, { "epoch": 2.375939192731085, "grad_norm": 0.5974446564511242, "learning_rate": 1.261260030015584e-06, "loss": 0.07904205322265626, "step": 27195 }, { "epoch": 2.376376026559497, "grad_norm": 0.6274111150530433, "learning_rate": 1.2595730591823524e-06, "loss": 0.0944109320640564, "step": 27200 }, { "epoch": 2.3768128603879086, "grad_norm": 0.6303388845948383, "learning_rate": 1.2578870546926215e-06, "loss": 0.10686180591583253, "step": 27205 }, { "epoch": 2.37724969421632, "grad_norm": 0.6294578355718056, "learning_rate": 1.2562020169819766e-06, "loss": 0.09449238777160644, "step": 27210 }, { "epoch": 2.3776865280447317, "grad_norm": 0.6266734462359648, "learning_rate": 1.2545179464857443e-06, "loss": 0.09440528154373169, "step": 27215 }, { "epoch": 2.3781233618731434, "grad_norm": 0.5586977377415755, "learning_rate": 1.2528348436390097e-06, "loss": 0.10859537124633789, "step": 27220 }, { "epoch": 2.378560195701555, "grad_norm": 0.5828569290279157, "learning_rate": 1.2511527088766011e-06, "loss": 0.08300014734268188, "step": 27225 }, { "epoch": 2.378997029529967, "grad_norm": 0.6047015341690143, "learning_rate": 1.249471542633104e-06, "loss": 0.09421491622924805, "step": 27230 }, { "epoch": 2.3794338633583783, "grad_norm": 0.578393895356799, "learning_rate": 1.247791345342847e-06, "loss": 0.09313257932662963, "step": 27235 }, { "epoch": 2.37987069718679, "grad_norm": 0.5730549107740895, "learning_rate": 1.2461121174399115e-06, "loss": 0.08562020063400269, "step": 27240 }, { "epoch": 2.3803075310152018, "grad_norm": 0.5718441960061913, "learning_rate": 1.2444338593581279e-06, "loss": 0.09744424819946289, "step": 27245 }, { "epoch": 2.3807443648436135, "grad_norm": 0.620730291949675, "learning_rate": 1.242756571531074e-06, "loss": 0.09872318506240844, "step": 27250 }, { "epoch": 2.3811811986720253, "grad_norm": 0.5959104380255705, "learning_rate": 1.2410802543920825e-06, "loss": 0.11390353441238403, "step": 27255 }, { "epoch": 2.381618032500437, "grad_norm": 0.7387150441479939, "learning_rate": 1.2394049083742293e-06, "loss": 0.09859432578086853, "step": 27260 }, { "epoch": 2.3820548663288483, "grad_norm": 0.6974013794852651, "learning_rate": 1.237730533910344e-06, "loss": 0.1015714168548584, "step": 27265 }, { "epoch": 2.38249170015726, "grad_norm": 0.5991026193843613, "learning_rate": 1.2360571314330006e-06, "loss": 0.09895586967468262, "step": 27270 }, { "epoch": 2.382928533985672, "grad_norm": 0.5483663821156209, "learning_rate": 1.2343847013745286e-06, "loss": 0.08293647170066834, "step": 27275 }, { "epoch": 2.3833653678140836, "grad_norm": 0.5989940925075, "learning_rate": 1.2327132441669964e-06, "loss": 0.09046610593795776, "step": 27280 }, { "epoch": 2.3838022016424953, "grad_norm": 0.6608406206595174, "learning_rate": 1.231042760242232e-06, "loss": 0.10336818695068359, "step": 27285 }, { "epoch": 2.384239035470907, "grad_norm": 0.5684044697213645, "learning_rate": 1.2293732500318046e-06, "loss": 0.11153534650802613, "step": 27290 }, { "epoch": 2.3846758692993184, "grad_norm": 0.5999796498157646, "learning_rate": 1.2277047139670323e-06, "loss": 0.10433975458145142, "step": 27295 }, { "epoch": 2.38511270312773, "grad_norm": 0.6562157872836442, "learning_rate": 1.2260371524789866e-06, "loss": 0.07653347253799439, "step": 27300 }, { "epoch": 2.385549536956142, "grad_norm": 0.6031445519954228, "learning_rate": 1.2243705659984828e-06, "loss": 0.10427500009536743, "step": 27305 }, { "epoch": 2.3859863707845537, "grad_norm": 0.7219406328985998, "learning_rate": 1.222704954956085e-06, "loss": 0.10722914934158326, "step": 27310 }, { "epoch": 2.3864232046129654, "grad_norm": 0.6022600752442069, "learning_rate": 1.2210403197821041e-06, "loss": 0.10509402751922607, "step": 27315 }, { "epoch": 2.3868600384413767, "grad_norm": 0.6464116426097053, "learning_rate": 1.2193766609066039e-06, "loss": 0.11009006500244141, "step": 27320 }, { "epoch": 2.3872968722697885, "grad_norm": 0.5974801087292689, "learning_rate": 1.2177139787593884e-06, "loss": 0.10354759693145751, "step": 27325 }, { "epoch": 2.3877337060982002, "grad_norm": 0.5577888979415592, "learning_rate": 1.216052273770018e-06, "loss": 0.10036476850509643, "step": 27330 }, { "epoch": 2.388170539926612, "grad_norm": 0.7434118949648402, "learning_rate": 1.2143915463677936e-06, "loss": 0.09106194972991943, "step": 27335 }, { "epoch": 2.3886073737550237, "grad_norm": 0.5696754838019179, "learning_rate": 1.2127317969817658e-06, "loss": 0.09285629987716675, "step": 27340 }, { "epoch": 2.389044207583435, "grad_norm": 0.5127522493039155, "learning_rate": 1.2110730260407333e-06, "loss": 0.10212194919586182, "step": 27345 }, { "epoch": 2.389481041411847, "grad_norm": 0.5752452407534219, "learning_rate": 1.2094152339732395e-06, "loss": 0.08509783744812012, "step": 27350 }, { "epoch": 2.3899178752402586, "grad_norm": 0.5040041341842953, "learning_rate": 1.2077584212075804e-06, "loss": 0.07299817204475403, "step": 27355 }, { "epoch": 2.3903547090686703, "grad_norm": 0.5108315993063944, "learning_rate": 1.2061025881717919e-06, "loss": 0.09916231632232667, "step": 27360 }, { "epoch": 2.390791542897082, "grad_norm": 0.606143631700799, "learning_rate": 1.2044477352936635e-06, "loss": 0.09610548019409179, "step": 27365 }, { "epoch": 2.3912283767254934, "grad_norm": 0.6191064375738443, "learning_rate": 1.2027938630007268e-06, "loss": 0.09186611771583557, "step": 27370 }, { "epoch": 2.391665210553905, "grad_norm": 0.5814483227872512, "learning_rate": 1.2011409717202617e-06, "loss": 0.08970087766647339, "step": 27375 }, { "epoch": 2.392102044382317, "grad_norm": 0.5863514691165879, "learning_rate": 1.1994890618792926e-06, "loss": 0.09921976327896118, "step": 27380 }, { "epoch": 2.3925388782107286, "grad_norm": 0.5422590934130064, "learning_rate": 1.1978381339045958e-06, "loss": 0.09698427319526673, "step": 27385 }, { "epoch": 2.3929757120391404, "grad_norm": 0.6456035090931656, "learning_rate": 1.1961881882226884e-06, "loss": 0.09815672636032105, "step": 27390 }, { "epoch": 2.393412545867552, "grad_norm": 0.6045383250115457, "learning_rate": 1.1945392252598348e-06, "loss": 0.10657508373260498, "step": 27395 }, { "epoch": 2.393849379695964, "grad_norm": 0.6670600840717185, "learning_rate": 1.1928912454420506e-06, "loss": 0.0956741988658905, "step": 27400 }, { "epoch": 2.394286213524375, "grad_norm": 0.6460411116299969, "learning_rate": 1.1912442491950872e-06, "loss": 0.09279995560646057, "step": 27405 }, { "epoch": 2.394723047352787, "grad_norm": 0.6226071655884449, "learning_rate": 1.189598236944452e-06, "loss": 0.0677451252937317, "step": 27410 }, { "epoch": 2.3951598811811987, "grad_norm": 0.5326667139856435, "learning_rate": 1.1879532091153922e-06, "loss": 0.10666660070419312, "step": 27415 }, { "epoch": 2.3955967150096105, "grad_norm": 0.5755800645658316, "learning_rate": 1.1863091661329045e-06, "loss": 0.08925076127052307, "step": 27420 }, { "epoch": 2.396033548838022, "grad_norm": 0.5552720087217736, "learning_rate": 1.184666108421727e-06, "loss": 0.09525283575057983, "step": 27425 }, { "epoch": 2.3964703826664335, "grad_norm": 0.5203695281307353, "learning_rate": 1.1830240364063483e-06, "loss": 0.1073268175125122, "step": 27430 }, { "epoch": 2.3969072164948453, "grad_norm": 0.5332537655519484, "learning_rate": 1.181382950510998e-06, "loss": 0.0972102403640747, "step": 27435 }, { "epoch": 2.397344050323257, "grad_norm": 0.5888170655981532, "learning_rate": 1.179742851159652e-06, "loss": 0.11594449281692505, "step": 27440 }, { "epoch": 2.397780884151669, "grad_norm": 0.6420962369544476, "learning_rate": 1.178103738776033e-06, "loss": 0.10313978195190429, "step": 27445 }, { "epoch": 2.3982177179800805, "grad_norm": 0.6583917040758697, "learning_rate": 1.176465613783605e-06, "loss": 0.09178286790847778, "step": 27450 }, { "epoch": 2.398654551808492, "grad_norm": 0.5724303304027849, "learning_rate": 1.174828476605583e-06, "loss": 0.09326913952827454, "step": 27455 }, { "epoch": 2.3990913856369036, "grad_norm": 0.6365171855824459, "learning_rate": 1.1731923276649192e-06, "loss": 0.09458954930305481, "step": 27460 }, { "epoch": 2.3995282194653154, "grad_norm": 0.5615436258791979, "learning_rate": 1.1715571673843201e-06, "loss": 0.09539632797241211, "step": 27465 }, { "epoch": 2.399965053293727, "grad_norm": 0.5783887591746684, "learning_rate": 1.1699229961862253e-06, "loss": 0.08900794386863708, "step": 27470 }, { "epoch": 2.400401887122139, "grad_norm": 0.6256065467112757, "learning_rate": 1.1682898144928283e-06, "loss": 0.10530157089233398, "step": 27475 }, { "epoch": 2.40083872095055, "grad_norm": 0.6067916859825169, "learning_rate": 1.1666576227260618e-06, "loss": 0.0818753957748413, "step": 27480 }, { "epoch": 2.401275554778962, "grad_norm": 0.5746734688764272, "learning_rate": 1.1650264213076056e-06, "loss": 0.07301689386367798, "step": 27485 }, { "epoch": 2.4017123886073737, "grad_norm": 0.5096600541667973, "learning_rate": 1.1633962106588825e-06, "loss": 0.09646118879318237, "step": 27490 }, { "epoch": 2.4021492224357854, "grad_norm": 0.4841160523569149, "learning_rate": 1.1617669912010581e-06, "loss": 0.10701429843902588, "step": 27495 }, { "epoch": 2.402586056264197, "grad_norm": 0.5945263506336151, "learning_rate": 1.1601387633550443e-06, "loss": 0.09987261295318603, "step": 27500 }, { "epoch": 2.403022890092609, "grad_norm": 0.5999813110524695, "learning_rate": 1.1585115275414927e-06, "loss": 0.10238804817199706, "step": 27505 }, { "epoch": 2.4034597239210203, "grad_norm": 0.6656912329350592, "learning_rate": 1.1568852841808054e-06, "loss": 0.07455369234085082, "step": 27510 }, { "epoch": 2.403896557749432, "grad_norm": 0.6209375545596345, "learning_rate": 1.1552600336931213e-06, "loss": 0.10562161207199097, "step": 27515 }, { "epoch": 2.4043333915778438, "grad_norm": 0.5032266711372145, "learning_rate": 1.153635776498328e-06, "loss": 0.08861311674118041, "step": 27520 }, { "epoch": 2.4047702254062555, "grad_norm": 0.5931271141573267, "learning_rate": 1.152012513016052e-06, "loss": 0.07043415307998657, "step": 27525 }, { "epoch": 2.4052070592346673, "grad_norm": 0.5782100626157134, "learning_rate": 1.150390243665669e-06, "loss": 0.11204577684402466, "step": 27530 }, { "epoch": 2.405643893063079, "grad_norm": 0.6277584809590299, "learning_rate": 1.1487689688662883e-06, "loss": 0.09645596146583557, "step": 27535 }, { "epoch": 2.4060807268914903, "grad_norm": 0.593861361986049, "learning_rate": 1.1471486890367728e-06, "loss": 0.08663063049316407, "step": 27540 }, { "epoch": 2.406517560719902, "grad_norm": 0.574167260636612, "learning_rate": 1.1455294045957217e-06, "loss": 0.08768547773361206, "step": 27545 }, { "epoch": 2.406954394548314, "grad_norm": 0.6420695519319153, "learning_rate": 1.143911115961478e-06, "loss": 0.10093027353286743, "step": 27550 }, { "epoch": 2.4073912283767256, "grad_norm": 0.586613702570237, "learning_rate": 1.1422938235521314e-06, "loss": 0.10069177150726319, "step": 27555 }, { "epoch": 2.4078280622051373, "grad_norm": 0.6170416458730263, "learning_rate": 1.1406775277855088e-06, "loss": 0.08860287070274353, "step": 27560 }, { "epoch": 2.4082648960335487, "grad_norm": 0.626181101860569, "learning_rate": 1.139062229079183e-06, "loss": 0.09160822629928589, "step": 27565 }, { "epoch": 2.4087017298619604, "grad_norm": 0.5400409971886478, "learning_rate": 1.1374479278504664e-06, "loss": 0.09039531350135803, "step": 27570 }, { "epoch": 2.409138563690372, "grad_norm": 0.5959989575061327, "learning_rate": 1.1358346245164186e-06, "loss": 0.07296168804168701, "step": 27575 }, { "epoch": 2.409575397518784, "grad_norm": 0.5011758246740396, "learning_rate": 1.134222319493835e-06, "loss": 0.11114460229873657, "step": 27580 }, { "epoch": 2.4100122313471957, "grad_norm": 0.6947456247026016, "learning_rate": 1.13261101319926e-06, "loss": 0.08309100866317749, "step": 27585 }, { "epoch": 2.410449065175607, "grad_norm": 0.6777592008001626, "learning_rate": 1.1310007060489747e-06, "loss": 0.1107419490814209, "step": 27590 }, { "epoch": 2.4108858990040187, "grad_norm": 0.6527530967796552, "learning_rate": 1.1293913984590032e-06, "loss": 0.1165863037109375, "step": 27595 }, { "epoch": 2.4113227328324305, "grad_norm": 0.5389547939272634, "learning_rate": 1.1277830908451126e-06, "loss": 0.11000297069549561, "step": 27600 }, { "epoch": 2.4117595666608422, "grad_norm": 0.646646267480768, "learning_rate": 1.1261757836228088e-06, "loss": 0.07751631736755371, "step": 27605 }, { "epoch": 2.412196400489254, "grad_norm": 0.6094001816354095, "learning_rate": 1.1245694772073451e-06, "loss": 0.09697536826133728, "step": 27610 }, { "epoch": 2.4126332343176657, "grad_norm": 0.608450268663945, "learning_rate": 1.1229641720137096e-06, "loss": 0.10271613597869873, "step": 27615 }, { "epoch": 2.413070068146077, "grad_norm": 0.5359096508378005, "learning_rate": 1.1213598684566374e-06, "loss": 0.09133671522140503, "step": 27620 }, { "epoch": 2.413506901974489, "grad_norm": 0.6514377199883974, "learning_rate": 1.1197565669506006e-06, "loss": 0.09653361439704895, "step": 27625 }, { "epoch": 2.4139437358029006, "grad_norm": 0.7063569691736032, "learning_rate": 1.118154267909814e-06, "loss": 0.11555386781692505, "step": 27630 }, { "epoch": 2.4143805696313123, "grad_norm": 0.5086536031495886, "learning_rate": 1.1165529717482314e-06, "loss": 0.10300729274749756, "step": 27635 }, { "epoch": 2.414817403459724, "grad_norm": 0.5316929599355419, "learning_rate": 1.1149526788795533e-06, "loss": 0.09006168842315673, "step": 27640 }, { "epoch": 2.415254237288136, "grad_norm": 0.5524085803294952, "learning_rate": 1.1133533897172154e-06, "loss": 0.08563013076782226, "step": 27645 }, { "epoch": 2.415691071116547, "grad_norm": 0.618441382484983, "learning_rate": 1.1117551046743942e-06, "loss": 0.08038858771324157, "step": 27650 }, { "epoch": 2.416127904944959, "grad_norm": 0.6192978850777862, "learning_rate": 1.1101578241640126e-06, "loss": 0.08011565208435059, "step": 27655 }, { "epoch": 2.4165647387733706, "grad_norm": 0.5602849528175965, "learning_rate": 1.1085615485987245e-06, "loss": 0.11703295707702636, "step": 27660 }, { "epoch": 2.4170015726017824, "grad_norm": 0.4695316136837893, "learning_rate": 1.1069662783909335e-06, "loss": 0.11947928667068482, "step": 27665 }, { "epoch": 2.417438406430194, "grad_norm": 0.6338906894609979, "learning_rate": 1.1053720139527768e-06, "loss": 0.08408350944519043, "step": 27670 }, { "epoch": 2.4178752402586055, "grad_norm": 0.5533330059846208, "learning_rate": 1.103778755696137e-06, "loss": 0.06747633814811707, "step": 27675 }, { "epoch": 2.418312074087017, "grad_norm": 0.5993458993849522, "learning_rate": 1.102186504032632e-06, "loss": 0.08678894639015197, "step": 27680 }, { "epoch": 2.418748907915429, "grad_norm": 0.7123311751959112, "learning_rate": 1.1005952593736236e-06, "loss": 0.11259196996688843, "step": 27685 }, { "epoch": 2.4191857417438407, "grad_norm": 0.5671434688936569, "learning_rate": 1.0990050221302106e-06, "loss": 0.08915749192237854, "step": 27690 }, { "epoch": 2.4196225755722525, "grad_norm": 0.5928901071919045, "learning_rate": 1.0974157927132335e-06, "loss": 0.095027494430542, "step": 27695 }, { "epoch": 2.4200594094006638, "grad_norm": 0.6932352225569977, "learning_rate": 1.09582757153327e-06, "loss": 0.07818903923034667, "step": 27700 }, { "epoch": 2.4204962432290755, "grad_norm": 0.5862112181901538, "learning_rate": 1.0942403590006378e-06, "loss": 0.09714151620864868, "step": 27705 }, { "epoch": 2.4209330770574873, "grad_norm": 0.6503916599518855, "learning_rate": 1.0926541555253988e-06, "loss": 0.07598328590393066, "step": 27710 }, { "epoch": 2.421369910885899, "grad_norm": 0.4918516407154056, "learning_rate": 1.091068961517347e-06, "loss": 0.09174538850784301, "step": 27715 }, { "epoch": 2.421806744714311, "grad_norm": 0.5100853121537322, "learning_rate": 1.0894847773860235e-06, "loss": 0.08920915126800537, "step": 27720 }, { "epoch": 2.4222435785427225, "grad_norm": 0.71814798393924, "learning_rate": 1.0879016035406982e-06, "loss": 0.07438524961471557, "step": 27725 }, { "epoch": 2.422680412371134, "grad_norm": 0.48746396269730974, "learning_rate": 1.0863194403903893e-06, "loss": 0.10258209705352783, "step": 27730 }, { "epoch": 2.4231172461995456, "grad_norm": 0.6040903761963342, "learning_rate": 1.084738288343849e-06, "loss": 0.10735454559326171, "step": 27735 }, { "epoch": 2.4235540800279574, "grad_norm": 0.6956276258393499, "learning_rate": 1.083158147809572e-06, "loss": 0.09800810813903808, "step": 27740 }, { "epoch": 2.423990913856369, "grad_norm": 0.4956683057239343, "learning_rate": 1.0815790191957876e-06, "loss": 0.1123807430267334, "step": 27745 }, { "epoch": 2.424427747684781, "grad_norm": 0.5705305563429725, "learning_rate": 1.0800009029104663e-06, "loss": 0.08594449758529663, "step": 27750 }, { "epoch": 2.4248645815131926, "grad_norm": 0.6467963814551787, "learning_rate": 1.078423799361315e-06, "loss": 0.09025561809539795, "step": 27755 }, { "epoch": 2.425301415341604, "grad_norm": 0.6164119662963885, "learning_rate": 1.07684770895578e-06, "loss": 0.10572361946105957, "step": 27760 }, { "epoch": 2.4257382491700157, "grad_norm": 0.6783420975209274, "learning_rate": 1.0752726321010482e-06, "loss": 0.08744306564331054, "step": 27765 }, { "epoch": 2.4261750829984274, "grad_norm": 0.5175038061024196, "learning_rate": 1.0736985692040392e-06, "loss": 0.11002395153045655, "step": 27770 }, { "epoch": 2.426611916826839, "grad_norm": 0.575483118648545, "learning_rate": 1.0721255206714177e-06, "loss": 0.0800854206085205, "step": 27775 }, { "epoch": 2.427048750655251, "grad_norm": 0.6838632272557265, "learning_rate": 1.0705534869095791e-06, "loss": 0.11120493412017822, "step": 27780 }, { "epoch": 2.4274855844836623, "grad_norm": 0.5708433040922453, "learning_rate": 1.0689824683246648e-06, "loss": 0.10184752941131592, "step": 27785 }, { "epoch": 2.427922418312074, "grad_norm": 0.5382169156792871, "learning_rate": 1.067412465322543e-06, "loss": 0.09486312866210937, "step": 27790 }, { "epoch": 2.4283592521404858, "grad_norm": 0.8701159034072911, "learning_rate": 1.0658434783088305e-06, "loss": 0.11670379638671875, "step": 27795 }, { "epoch": 2.4287960859688975, "grad_norm": 0.4688255087078251, "learning_rate": 1.064275507688875e-06, "loss": 0.09417620897293091, "step": 27800 }, { "epoch": 2.4292329197973093, "grad_norm": 0.7071555167557696, "learning_rate": 1.0627085538677622e-06, "loss": 0.08052328824996949, "step": 27805 }, { "epoch": 2.4296697536257206, "grad_norm": 0.5378508480591445, "learning_rate": 1.0611426172503198e-06, "loss": 0.1071918249130249, "step": 27810 }, { "epoch": 2.4301065874541323, "grad_norm": 0.5748301522865533, "learning_rate": 1.059577698241107e-06, "loss": 0.10523087978363037, "step": 27815 }, { "epoch": 2.430543421282544, "grad_norm": 0.5818302199359647, "learning_rate": 1.0580137972444226e-06, "loss": 0.11155064105987549, "step": 27820 }, { "epoch": 2.430980255110956, "grad_norm": 0.5442392038269294, "learning_rate": 1.0564509146643015e-06, "loss": 0.07767424583435059, "step": 27825 }, { "epoch": 2.4314170889393676, "grad_norm": 0.7028871969559113, "learning_rate": 1.0548890509045178e-06, "loss": 0.09996860027313233, "step": 27830 }, { "epoch": 2.431853922767779, "grad_norm": 0.547166050134093, "learning_rate": 1.0533282063685785e-06, "loss": 0.08507631421089172, "step": 27835 }, { "epoch": 2.4322907565961907, "grad_norm": 0.6350694845796384, "learning_rate": 1.0517683814597329e-06, "loss": 0.11451177597045899, "step": 27840 }, { "epoch": 2.4327275904246024, "grad_norm": 0.5715278158726315, "learning_rate": 1.0502095765809612e-06, "loss": 0.10626626014709473, "step": 27845 }, { "epoch": 2.433164424253014, "grad_norm": 0.663964554024305, "learning_rate": 1.0486517921349826e-06, "loss": 0.08268136382102967, "step": 27850 }, { "epoch": 2.433601258081426, "grad_norm": 0.6081906100591472, "learning_rate": 1.0470950285242531e-06, "loss": 0.09161261320114136, "step": 27855 }, { "epoch": 2.4340380919098377, "grad_norm": 0.5644539645902832, "learning_rate": 1.0455392861509622e-06, "loss": 0.09011242389678956, "step": 27860 }, { "epoch": 2.4344749257382494, "grad_norm": 0.5930742697081743, "learning_rate": 1.043984565417041e-06, "loss": 0.08165923953056335, "step": 27865 }, { "epoch": 2.4349117595666607, "grad_norm": 0.49852710607993767, "learning_rate": 1.0424308667241506e-06, "loss": 0.09476639032363891, "step": 27870 }, { "epoch": 2.4353485933950725, "grad_norm": 0.5360275436284375, "learning_rate": 1.0408781904736925e-06, "loss": 0.09438177347183227, "step": 27875 }, { "epoch": 2.4357854272234842, "grad_norm": 0.5452310755875827, "learning_rate": 1.0393265370668027e-06, "loss": 0.09987478852272033, "step": 27880 }, { "epoch": 2.436222261051896, "grad_norm": 0.5850252186888393, "learning_rate": 1.037775906904351e-06, "loss": 0.07265278100967407, "step": 27885 }, { "epoch": 2.4366590948803077, "grad_norm": 0.5671846130281141, "learning_rate": 1.0362263003869437e-06, "loss": 0.07400538325309754, "step": 27890 }, { "epoch": 2.437095928708719, "grad_norm": 0.5548247073525688, "learning_rate": 1.0346777179149264e-06, "loss": 0.09225984811782836, "step": 27895 }, { "epoch": 2.437532762537131, "grad_norm": 0.5976951892357694, "learning_rate": 1.033130159888376e-06, "loss": 0.09588711857795715, "step": 27900 }, { "epoch": 2.4379695963655426, "grad_norm": 0.46777942546427526, "learning_rate": 1.0315836267071034e-06, "loss": 0.09477322101593018, "step": 27905 }, { "epoch": 2.4384064301939543, "grad_norm": 0.765759706548577, "learning_rate": 1.0300381187706603e-06, "loss": 0.12023993730545043, "step": 27910 }, { "epoch": 2.438843264022366, "grad_norm": 0.634050366722205, "learning_rate": 1.0284936364783299e-06, "loss": 0.11470520496368408, "step": 27915 }, { "epoch": 2.4392800978507774, "grad_norm": 0.6414246577842795, "learning_rate": 1.0269501802291298e-06, "loss": 0.09798209071159363, "step": 27920 }, { "epoch": 2.439716931679189, "grad_norm": 0.5991289721495795, "learning_rate": 1.0254077504218134e-06, "loss": 0.09459285140037536, "step": 27925 }, { "epoch": 2.440153765507601, "grad_norm": 0.5282679626581654, "learning_rate": 1.023866347454871e-06, "loss": 0.10572261810302734, "step": 27930 }, { "epoch": 2.4405905993360126, "grad_norm": 0.510343184629431, "learning_rate": 1.0223259717265233e-06, "loss": 0.10928304195404052, "step": 27935 }, { "epoch": 2.4410274331644244, "grad_norm": 0.7245248479056351, "learning_rate": 1.0207866236347308e-06, "loss": 0.0744330108165741, "step": 27940 }, { "epoch": 2.4414642669928357, "grad_norm": 0.5676248268420738, "learning_rate": 1.0192483035771849e-06, "loss": 0.0861852526664734, "step": 27945 }, { "epoch": 2.4419011008212475, "grad_norm": 0.7276450398942728, "learning_rate": 1.0177110119513116e-06, "loss": 0.08166487216949463, "step": 27950 }, { "epoch": 2.442337934649659, "grad_norm": 0.738521324588038, "learning_rate": 1.0161747491542728e-06, "loss": 0.0994720458984375, "step": 27955 }, { "epoch": 2.442774768478071, "grad_norm": 0.6176276208352419, "learning_rate": 1.0146395155829614e-06, "loss": 0.11197385787963868, "step": 27960 }, { "epoch": 2.4432116023064827, "grad_norm": 0.5587049932034687, "learning_rate": 1.01310531163401e-06, "loss": 0.09351379871368408, "step": 27965 }, { "epoch": 2.4436484361348945, "grad_norm": 0.7296408337545518, "learning_rate": 1.011572137703779e-06, "loss": 0.11565127372741699, "step": 27970 }, { "epoch": 2.4440852699633058, "grad_norm": 0.5538332876754302, "learning_rate": 1.0100399941883698e-06, "loss": 0.10174760818481446, "step": 27975 }, { "epoch": 2.4445221037917175, "grad_norm": 0.512944334552195, "learning_rate": 1.008508881483608e-06, "loss": 0.08279865980148315, "step": 27980 }, { "epoch": 2.4449589376201293, "grad_norm": 0.5941742089276061, "learning_rate": 1.0069787999850622e-06, "loss": 0.10330547094345092, "step": 27985 }, { "epoch": 2.445395771448541, "grad_norm": 0.5154000292542149, "learning_rate": 1.0054497500880279e-06, "loss": 0.07441055774688721, "step": 27990 }, { "epoch": 2.445832605276953, "grad_norm": 0.6311152697867044, "learning_rate": 1.0039217321875393e-06, "loss": 0.10595866441726684, "step": 27995 }, { "epoch": 2.4462694391053645, "grad_norm": 0.742020593169241, "learning_rate": 1.0023947466783613e-06, "loss": 0.09351729154586792, "step": 28000 }, { "epoch": 2.446706272933776, "grad_norm": 0.5418781058497735, "learning_rate": 1.0008687939549915e-06, "loss": 0.0901415467262268, "step": 28005 }, { "epoch": 2.4471431067621876, "grad_norm": 0.7640326056669031, "learning_rate": 9.993438744116602e-07, "loss": 0.09594534635543824, "step": 28010 }, { "epoch": 2.4475799405905994, "grad_norm": 0.5814267923314836, "learning_rate": 9.97819988442335e-07, "loss": 0.09250825643539429, "step": 28015 }, { "epoch": 2.448016774419011, "grad_norm": 0.5768704950901766, "learning_rate": 9.96297136440712e-07, "loss": 0.09862352609634399, "step": 28020 }, { "epoch": 2.448453608247423, "grad_norm": 0.6234069986936221, "learning_rate": 9.947753188002202e-07, "loss": 0.08883090019226074, "step": 28025 }, { "epoch": 2.448890442075834, "grad_norm": 0.7619925512902954, "learning_rate": 9.932545359140256e-07, "loss": 0.10911276340484619, "step": 28030 }, { "epoch": 2.449327275904246, "grad_norm": 0.5196029867948987, "learning_rate": 9.917347881750222e-07, "loss": 0.09069467186927796, "step": 28035 }, { "epoch": 2.4497641097326577, "grad_norm": 0.5439200629128776, "learning_rate": 9.902160759758417e-07, "loss": 0.10884724855422974, "step": 28040 }, { "epoch": 2.4502009435610694, "grad_norm": 0.4918297088524469, "learning_rate": 9.886983997088405e-07, "loss": 0.12221784591674804, "step": 28045 }, { "epoch": 2.450637777389481, "grad_norm": 0.5956609694305549, "learning_rate": 9.871817597661153e-07, "loss": 0.11012871265411377, "step": 28050 }, { "epoch": 2.4510746112178925, "grad_norm": 0.6199736840056905, "learning_rate": 9.856661565394903e-07, "loss": 0.0825860321521759, "step": 28055 }, { "epoch": 2.4515114450463042, "grad_norm": 0.5716932164875659, "learning_rate": 9.841515904205228e-07, "loss": 0.07308807373046874, "step": 28060 }, { "epoch": 2.451948278874716, "grad_norm": 0.6652867233346111, "learning_rate": 9.826380618005048e-07, "loss": 0.0978392481803894, "step": 28065 }, { "epoch": 2.4523851127031278, "grad_norm": 0.5201347328482534, "learning_rate": 9.81125571070457e-07, "loss": 0.08766233921051025, "step": 28070 }, { "epoch": 2.4528219465315395, "grad_norm": 0.7361153913433982, "learning_rate": 9.796141186211327e-07, "loss": 0.08086459636688233, "step": 28075 }, { "epoch": 2.4532587803599513, "grad_norm": 1.0340763193484521, "learning_rate": 9.781037048430164e-07, "loss": 0.09038331508636474, "step": 28080 }, { "epoch": 2.4536956141883626, "grad_norm": 0.569420838242305, "learning_rate": 9.765943301263276e-07, "loss": 0.11224508285522461, "step": 28085 }, { "epoch": 2.4541324480167743, "grad_norm": 0.4411353384848851, "learning_rate": 9.750859948610126e-07, "loss": 0.07854675054550171, "step": 28090 }, { "epoch": 2.454569281845186, "grad_norm": 0.5734238852486508, "learning_rate": 9.735786994367535e-07, "loss": 0.1242908239364624, "step": 28095 }, { "epoch": 2.455006115673598, "grad_norm": 0.8459085051766594, "learning_rate": 9.720724442429614e-07, "loss": 0.11648468971252442, "step": 28100 }, { "epoch": 2.4554429495020096, "grad_norm": 0.54179346108332, "learning_rate": 9.70567229668778e-07, "loss": 0.07992650270462036, "step": 28105 }, { "epoch": 2.4558797833304213, "grad_norm": 0.6809096157421967, "learning_rate": 9.690630561030767e-07, "loss": 0.08663623929023742, "step": 28110 }, { "epoch": 2.4563166171588326, "grad_norm": 0.5746350573225818, "learning_rate": 9.675599239344646e-07, "loss": 0.07491111755371094, "step": 28115 }, { "epoch": 2.4567534509872444, "grad_norm": 0.5663638651836558, "learning_rate": 9.660578335512766e-07, "loss": 0.10335843563079834, "step": 28120 }, { "epoch": 2.457190284815656, "grad_norm": 0.517413565413244, "learning_rate": 9.645567853415778e-07, "loss": 0.07985371351242065, "step": 28125 }, { "epoch": 2.457627118644068, "grad_norm": 0.6376544070356143, "learning_rate": 9.630567796931683e-07, "loss": 0.0764091968536377, "step": 28130 }, { "epoch": 2.4580639524724797, "grad_norm": 0.5035178781976858, "learning_rate": 9.615578169935757e-07, "loss": 0.08778886198997497, "step": 28135 }, { "epoch": 2.458500786300891, "grad_norm": 0.57662750698472, "learning_rate": 9.600598976300574e-07, "loss": 0.09947375655174255, "step": 28140 }, { "epoch": 2.4589376201293027, "grad_norm": 0.6513998930338266, "learning_rate": 9.58563021989602e-07, "loss": 0.09246759414672852, "step": 28145 }, { "epoch": 2.4593744539577145, "grad_norm": 0.5856560817849376, "learning_rate": 9.570671904589319e-07, "loss": 0.09995664358139038, "step": 28150 }, { "epoch": 2.4598112877861262, "grad_norm": 0.4894821856410595, "learning_rate": 9.555724034244952e-07, "loss": 0.09782021641731262, "step": 28155 }, { "epoch": 2.460248121614538, "grad_norm": 0.49000165015952046, "learning_rate": 9.5407866127247e-07, "loss": 0.12919220924377442, "step": 28160 }, { "epoch": 2.4606849554429493, "grad_norm": 0.4670971697438699, "learning_rate": 9.525859643887697e-07, "loss": 0.11480799913406373, "step": 28165 }, { "epoch": 2.461121789271361, "grad_norm": 0.6647547835518735, "learning_rate": 9.510943131590322e-07, "loss": 0.10384567975997924, "step": 28170 }, { "epoch": 2.461558623099773, "grad_norm": 0.5699784363259716, "learning_rate": 9.496037079686282e-07, "loss": 0.11264947652816773, "step": 28175 }, { "epoch": 2.4619954569281846, "grad_norm": 0.4895580687128312, "learning_rate": 9.481141492026552e-07, "loss": 0.09543229341506958, "step": 28180 }, { "epoch": 2.4624322907565963, "grad_norm": 0.7314247917784321, "learning_rate": 9.466256372459448e-07, "loss": 0.0986121952533722, "step": 28185 }, { "epoch": 2.4628691245850076, "grad_norm": 0.7130335078723936, "learning_rate": 9.451381724830538e-07, "loss": 0.09293516874313354, "step": 28190 }, { "epoch": 2.4633059584134194, "grad_norm": 0.5614541232727188, "learning_rate": 9.436517552982727e-07, "loss": 0.10733922719955444, "step": 28195 }, { "epoch": 2.463742792241831, "grad_norm": 0.6591753512760371, "learning_rate": 9.42166386075618e-07, "loss": 0.09555697441101074, "step": 28200 }, { "epoch": 2.464179626070243, "grad_norm": 0.5190296474854518, "learning_rate": 9.406820651988358e-07, "loss": 0.09270339608192443, "step": 28205 }, { "epoch": 2.4646164598986546, "grad_norm": 0.5654519645484583, "learning_rate": 9.391987930514012e-07, "loss": 0.0969977080821991, "step": 28210 }, { "epoch": 2.4650532937270664, "grad_norm": 0.6628017610032451, "learning_rate": 9.377165700165219e-07, "loss": 0.08452988266944886, "step": 28215 }, { "epoch": 2.465490127555478, "grad_norm": 0.559597919352236, "learning_rate": 9.362353964771309e-07, "loss": 0.08921302556991577, "step": 28220 }, { "epoch": 2.4659269613838894, "grad_norm": 0.6829919718061193, "learning_rate": 9.347552728158887e-07, "loss": 0.09036886096000671, "step": 28225 }, { "epoch": 2.466363795212301, "grad_norm": 0.6273028668376529, "learning_rate": 9.332761994151923e-07, "loss": 0.10980165004730225, "step": 28230 }, { "epoch": 2.466800629040713, "grad_norm": 0.5701284556103886, "learning_rate": 9.317981766571554e-07, "loss": 0.10565049648284912, "step": 28235 }, { "epoch": 2.4672374628691247, "grad_norm": 0.5612555778648414, "learning_rate": 9.303212049236309e-07, "loss": 0.08375682830810546, "step": 28240 }, { "epoch": 2.4676742966975365, "grad_norm": 0.6524976265329208, "learning_rate": 9.288452845961948e-07, "loss": 0.09567596912384033, "step": 28245 }, { "epoch": 2.4681111305259478, "grad_norm": 0.5886915874508452, "learning_rate": 9.273704160561542e-07, "loss": 0.0748673677444458, "step": 28250 }, { "epoch": 2.4685479643543595, "grad_norm": 0.49495450996411205, "learning_rate": 9.258965996845415e-07, "loss": 0.10616127252578736, "step": 28255 }, { "epoch": 2.4689847981827713, "grad_norm": 0.6197479876417751, "learning_rate": 9.244238358621199e-07, "loss": 0.09248495101928711, "step": 28260 }, { "epoch": 2.469421632011183, "grad_norm": 0.6224489694292751, "learning_rate": 9.229521249693774e-07, "loss": 0.09891651868820191, "step": 28265 }, { "epoch": 2.469858465839595, "grad_norm": 0.6024876970207191, "learning_rate": 9.214814673865352e-07, "loss": 0.0999288260936737, "step": 28270 }, { "epoch": 2.470295299668006, "grad_norm": 0.5629805551028714, "learning_rate": 9.200118634935379e-07, "loss": 0.08310307264328003, "step": 28275 }, { "epoch": 2.470732133496418, "grad_norm": 0.6774925250883045, "learning_rate": 9.185433136700583e-07, "loss": 0.11218135356903076, "step": 28280 }, { "epoch": 2.4711689673248296, "grad_norm": 0.691680359503978, "learning_rate": 9.170758182954998e-07, "loss": 0.0960921287536621, "step": 28285 }, { "epoch": 2.4716058011532414, "grad_norm": 0.623530460675515, "learning_rate": 9.156093777489894e-07, "loss": 0.11344515085220337, "step": 28290 }, { "epoch": 2.472042634981653, "grad_norm": 0.5750699544218514, "learning_rate": 9.141439924093875e-07, "loss": 0.09047149419784546, "step": 28295 }, { "epoch": 2.4724794688100644, "grad_norm": 0.49512731135109495, "learning_rate": 9.12679662655273e-07, "loss": 0.09807690978050232, "step": 28300 }, { "epoch": 2.472916302638476, "grad_norm": 0.5523536831093896, "learning_rate": 9.112163888649611e-07, "loss": 0.08832350373268127, "step": 28305 }, { "epoch": 2.473353136466888, "grad_norm": 0.8082615303099006, "learning_rate": 9.09754171416487e-07, "loss": 0.09378248453140259, "step": 28310 }, { "epoch": 2.4737899702952997, "grad_norm": 0.6733498618905623, "learning_rate": 9.082930106876192e-07, "loss": 0.0897972822189331, "step": 28315 }, { "epoch": 2.4742268041237114, "grad_norm": 0.5538296497085191, "learning_rate": 9.06832907055849e-07, "loss": 0.07345717549324035, "step": 28320 }, { "epoch": 2.474663637952123, "grad_norm": 0.7100005340165494, "learning_rate": 9.053738608983958e-07, "loss": 0.08931217193603516, "step": 28325 }, { "epoch": 2.4751004717805345, "grad_norm": 0.6492742150117522, "learning_rate": 9.03915872592206e-07, "loss": 0.08720067739486695, "step": 28330 }, { "epoch": 2.4755373056089462, "grad_norm": 1.053270701248243, "learning_rate": 9.024589425139508e-07, "loss": 0.10824763774871826, "step": 28335 }, { "epoch": 2.475974139437358, "grad_norm": 0.5173039455023534, "learning_rate": 9.010030710400319e-07, "loss": 0.0855729341506958, "step": 28340 }, { "epoch": 2.4764109732657698, "grad_norm": 0.5456198854686698, "learning_rate": 8.995482585465742e-07, "loss": 0.07644249796867371, "step": 28345 }, { "epoch": 2.4768478070941815, "grad_norm": 0.6165463460781718, "learning_rate": 8.980945054094315e-07, "loss": 0.09732311964035034, "step": 28350 }, { "epoch": 2.4772846409225933, "grad_norm": 0.6368368801012237, "learning_rate": 8.966418120041815e-07, "loss": 0.07760719060897828, "step": 28355 }, { "epoch": 2.4777214747510046, "grad_norm": 0.6224129745165138, "learning_rate": 8.951901787061295e-07, "loss": 0.08155871033668519, "step": 28360 }, { "epoch": 2.4781583085794163, "grad_norm": 0.6139578364531971, "learning_rate": 8.937396058903047e-07, "loss": 0.07463186383247375, "step": 28365 }, { "epoch": 2.478595142407828, "grad_norm": 0.5848550012595322, "learning_rate": 8.922900939314677e-07, "loss": 0.10554171800613403, "step": 28370 }, { "epoch": 2.47903197623624, "grad_norm": 0.5275320705620434, "learning_rate": 8.908416432040995e-07, "loss": 0.09957111477851868, "step": 28375 }, { "epoch": 2.4794688100646516, "grad_norm": 0.5973276417185128, "learning_rate": 8.89394254082408e-07, "loss": 0.12059800624847412, "step": 28380 }, { "epoch": 2.479905643893063, "grad_norm": 0.5853515222001273, "learning_rate": 8.879479269403301e-07, "loss": 0.10796036720275878, "step": 28385 }, { "epoch": 2.4803424777214746, "grad_norm": 0.72821027696441, "learning_rate": 8.865026621515254e-07, "loss": 0.10717933177947998, "step": 28390 }, { "epoch": 2.4807793115498864, "grad_norm": 0.5333000198415956, "learning_rate": 8.850584600893791e-07, "loss": 0.0928141713142395, "step": 28395 }, { "epoch": 2.481216145378298, "grad_norm": 0.5919178832401518, "learning_rate": 8.83615321127001e-07, "loss": 0.09824051856994628, "step": 28400 }, { "epoch": 2.48165297920671, "grad_norm": 0.7127554120745869, "learning_rate": 8.821732456372307e-07, "loss": 0.10156233310699463, "step": 28405 }, { "epoch": 2.482089813035121, "grad_norm": 0.5569292663938902, "learning_rate": 8.807322339926272e-07, "loss": 0.08693810701370239, "step": 28410 }, { "epoch": 2.482526646863533, "grad_norm": 0.5256136928204986, "learning_rate": 8.792922865654807e-07, "loss": 0.0997683584690094, "step": 28415 }, { "epoch": 2.4829634806919447, "grad_norm": 0.5953306752559716, "learning_rate": 8.778534037278008e-07, "loss": 0.08764536380767822, "step": 28420 }, { "epoch": 2.4834003145203565, "grad_norm": 0.5633056691655237, "learning_rate": 8.764155858513252e-07, "loss": 0.09665488600730895, "step": 28425 }, { "epoch": 2.4838371483487682, "grad_norm": 0.5284823389744078, "learning_rate": 8.749788333075154e-07, "loss": 0.07967618107795715, "step": 28430 }, { "epoch": 2.48427398217718, "grad_norm": 0.5426484441004581, "learning_rate": 8.735431464675575e-07, "loss": 0.08273351788520814, "step": 28435 }, { "epoch": 2.4847108160055913, "grad_norm": 0.5079263610695418, "learning_rate": 8.721085257023648e-07, "loss": 0.08811686038970948, "step": 28440 }, { "epoch": 2.485147649834003, "grad_norm": 0.6802962769546377, "learning_rate": 8.706749713825707e-07, "loss": 0.10010716915130616, "step": 28445 }, { "epoch": 2.485584483662415, "grad_norm": 0.5628586928390223, "learning_rate": 8.692424838785379e-07, "loss": 0.0883719801902771, "step": 28450 }, { "epoch": 2.4860213174908266, "grad_norm": 0.9698527065820324, "learning_rate": 8.678110635603499e-07, "loss": 0.09220257997512818, "step": 28455 }, { "epoch": 2.4864581513192383, "grad_norm": 0.5170887163453102, "learning_rate": 8.663807107978162e-07, "loss": 0.0964186429977417, "step": 28460 }, { "epoch": 2.48689498514765, "grad_norm": 0.6942012053896018, "learning_rate": 8.649514259604685e-07, "loss": 0.11709930896759033, "step": 28465 }, { "epoch": 2.4873318189760614, "grad_norm": 0.5635028460661928, "learning_rate": 8.635232094175672e-07, "loss": 0.10202881097793579, "step": 28470 }, { "epoch": 2.487768652804473, "grad_norm": 0.6704932010818542, "learning_rate": 8.620960615380913e-07, "loss": 0.11412866115570068, "step": 28475 }, { "epoch": 2.488205486632885, "grad_norm": 0.6216162560864494, "learning_rate": 8.606699826907456e-07, "loss": 0.10279321670532227, "step": 28480 }, { "epoch": 2.4886423204612966, "grad_norm": 0.6224376610695427, "learning_rate": 8.592449732439623e-07, "loss": 0.09180154800415039, "step": 28485 }, { "epoch": 2.4890791542897084, "grad_norm": 0.5020362806538557, "learning_rate": 8.578210335658898e-07, "loss": 0.07328060865402222, "step": 28490 }, { "epoch": 2.4895159881181197, "grad_norm": 0.5085109190389132, "learning_rate": 8.563981640244085e-07, "loss": 0.11262421607971192, "step": 28495 }, { "epoch": 2.4899528219465314, "grad_norm": 0.6734051036095478, "learning_rate": 8.549763649871151e-07, "loss": 0.0910420536994934, "step": 28500 }, { "epoch": 2.490389655774943, "grad_norm": 0.6377060916515495, "learning_rate": 8.535556368213354e-07, "loss": 0.1000588059425354, "step": 28505 }, { "epoch": 2.490826489603355, "grad_norm": 0.5332635879116734, "learning_rate": 8.521359798941148e-07, "loss": 0.10311840772628784, "step": 28510 }, { "epoch": 2.4912633234317667, "grad_norm": 0.5921089057245928, "learning_rate": 8.507173945722258e-07, "loss": 0.08712505102157593, "step": 28515 }, { "epoch": 2.491700157260178, "grad_norm": 0.5534207253171556, "learning_rate": 8.49299881222157e-07, "loss": 0.07576773166656495, "step": 28520 }, { "epoch": 2.4921369910885898, "grad_norm": 0.7845935963253355, "learning_rate": 8.478834402101283e-07, "loss": 0.11213595867156982, "step": 28525 }, { "epoch": 2.4925738249170015, "grad_norm": 0.6264588250459706, "learning_rate": 8.464680719020774e-07, "loss": 0.07813420295715331, "step": 28530 }, { "epoch": 2.4930106587454133, "grad_norm": 0.8601585123279919, "learning_rate": 8.450537766636651e-07, "loss": 0.10553075075149536, "step": 28535 }, { "epoch": 2.493447492573825, "grad_norm": 0.6055457727127491, "learning_rate": 8.436405548602788e-07, "loss": 0.08918556571006775, "step": 28540 }, { "epoch": 2.4938843264022363, "grad_norm": 0.6430242312995843, "learning_rate": 8.422284068570236e-07, "loss": 0.08932015299797058, "step": 28545 }, { "epoch": 2.494321160230648, "grad_norm": 0.49694916434650466, "learning_rate": 8.40817333018733e-07, "loss": 0.09766232967376709, "step": 28550 }, { "epoch": 2.49475799405906, "grad_norm": 0.5263343100285419, "learning_rate": 8.394073337099545e-07, "loss": 0.074478018283844, "step": 28555 }, { "epoch": 2.4951948278874716, "grad_norm": 0.7905572400963374, "learning_rate": 8.379984092949666e-07, "loss": 0.08070403337478638, "step": 28560 }, { "epoch": 2.4956316617158834, "grad_norm": 0.6751789040001412, "learning_rate": 8.365905601377644e-07, "loss": 0.11220120191574097, "step": 28565 }, { "epoch": 2.496068495544295, "grad_norm": 0.5172765991426131, "learning_rate": 8.351837866020696e-07, "loss": 0.07413986325263977, "step": 28570 }, { "epoch": 2.496505329372707, "grad_norm": 0.6465478134961254, "learning_rate": 8.337780890513225e-07, "loss": 0.09965307712554931, "step": 28575 }, { "epoch": 2.496942163201118, "grad_norm": 0.6337476120202706, "learning_rate": 8.323734678486866e-07, "loss": 0.10011488199234009, "step": 28580 }, { "epoch": 2.49737899702953, "grad_norm": 0.4376475661297919, "learning_rate": 8.309699233570473e-07, "loss": 0.10724079608917236, "step": 28585 }, { "epoch": 2.4978158308579417, "grad_norm": 0.583800333134949, "learning_rate": 8.29567455939011e-07, "loss": 0.11180051565170288, "step": 28590 }, { "epoch": 2.4982526646863534, "grad_norm": 0.553634484994659, "learning_rate": 8.281660659569091e-07, "loss": 0.1128991961479187, "step": 28595 }, { "epoch": 2.498689498514765, "grad_norm": 0.5689261152855101, "learning_rate": 8.267657537727891e-07, "loss": 0.09345470070838928, "step": 28600 }, { "epoch": 2.4991263323431765, "grad_norm": 0.6119889677449303, "learning_rate": 8.253665197484267e-07, "loss": 0.09300047159194946, "step": 28605 }, { "epoch": 2.4995631661715882, "grad_norm": 0.6113938955235386, "learning_rate": 8.239683642453128e-07, "loss": 0.09014101028442383, "step": 28610 }, { "epoch": 2.5, "grad_norm": 0.6064997262164686, "learning_rate": 8.22571287624666e-07, "loss": 0.09903267025947571, "step": 28615 }, { "epoch": 2.5004368338284118, "grad_norm": 0.5992936933789972, "learning_rate": 8.211752902474174e-07, "loss": 0.08793317079544068, "step": 28620 }, { "epoch": 2.5008736676568235, "grad_norm": 0.6044374710072388, "learning_rate": 8.197803724742287e-07, "loss": 0.13293473720550536, "step": 28625 }, { "epoch": 2.501310501485235, "grad_norm": 0.5371017150725336, "learning_rate": 8.183865346654774e-07, "loss": 0.0858599066734314, "step": 28630 }, { "epoch": 2.5017473353136466, "grad_norm": 0.5584917874647923, "learning_rate": 8.169937771812614e-07, "loss": 0.09847948551177979, "step": 28635 }, { "epoch": 2.5021841691420583, "grad_norm": 0.5866834449993183, "learning_rate": 8.15602100381403e-07, "loss": 0.09223768711090088, "step": 28640 }, { "epoch": 2.50262100297047, "grad_norm": 0.5362598391889615, "learning_rate": 8.142115046254434e-07, "loss": 0.1083040714263916, "step": 28645 }, { "epoch": 2.503057836798882, "grad_norm": 0.47226854199894325, "learning_rate": 8.128219902726441e-07, "loss": 0.06548033356666565, "step": 28650 }, { "epoch": 2.503494670627293, "grad_norm": 0.691044006610049, "learning_rate": 8.114335576819865e-07, "loss": 0.0859731376171112, "step": 28655 }, { "epoch": 2.503931504455705, "grad_norm": 0.7206197448046919, "learning_rate": 8.100462072121762e-07, "loss": 0.10936560630798339, "step": 28660 }, { "epoch": 2.5043683382841166, "grad_norm": 0.8032048809296329, "learning_rate": 8.086599392216344e-07, "loss": 0.11239160299301147, "step": 28665 }, { "epoch": 2.5048051721125284, "grad_norm": 0.5739164385957377, "learning_rate": 8.072747540685072e-07, "loss": 0.08830887079238892, "step": 28670 }, { "epoch": 2.50524200594094, "grad_norm": 0.420752574846653, "learning_rate": 8.058906521106574e-07, "loss": 0.08958665132522584, "step": 28675 }, { "epoch": 2.5056788397693515, "grad_norm": 0.46352887513596625, "learning_rate": 8.045076337056695e-07, "loss": 0.07306194305419922, "step": 28680 }, { "epoch": 2.5061156735977637, "grad_norm": 0.5380256121132838, "learning_rate": 8.031256992108482e-07, "loss": 0.09251840114593506, "step": 28685 }, { "epoch": 2.506552507426175, "grad_norm": 0.5858287196230586, "learning_rate": 8.017448489832158e-07, "loss": 0.09733718633651733, "step": 28690 }, { "epoch": 2.5069893412545867, "grad_norm": 0.5489888327968456, "learning_rate": 8.003650833795202e-07, "loss": 0.08689619302749634, "step": 28695 }, { "epoch": 2.5074261750829985, "grad_norm": 0.5249531351924567, "learning_rate": 7.989864027562216e-07, "loss": 0.10573619604110718, "step": 28700 }, { "epoch": 2.5078630089114102, "grad_norm": 0.5944436865217454, "learning_rate": 7.976088074695065e-07, "loss": 0.13056652545928954, "step": 28705 }, { "epoch": 2.508299842739822, "grad_norm": 0.6048085407684084, "learning_rate": 7.962322978752778e-07, "loss": 0.08299393653869629, "step": 28710 }, { "epoch": 2.5087366765682333, "grad_norm": 0.659519349820892, "learning_rate": 7.948568743291574e-07, "loss": 0.08879043459892273, "step": 28715 }, { "epoch": 2.509173510396645, "grad_norm": 0.6942967400971108, "learning_rate": 7.934825371864862e-07, "loss": 0.10153393745422364, "step": 28720 }, { "epoch": 2.509610344225057, "grad_norm": 0.5090580283606887, "learning_rate": 7.921092868023289e-07, "loss": 0.1002849817276001, "step": 28725 }, { "epoch": 2.5100471780534686, "grad_norm": 0.4782611482447943, "learning_rate": 7.907371235314648e-07, "loss": 0.0925992488861084, "step": 28730 }, { "epoch": 2.5104840118818803, "grad_norm": 0.6461395963554081, "learning_rate": 7.893660477283926e-07, "loss": 0.11251204013824463, "step": 28735 }, { "epoch": 2.5109208457102916, "grad_norm": 0.6719854740788421, "learning_rate": 7.879960597473346e-07, "loss": 0.09775007963180542, "step": 28740 }, { "epoch": 2.5113576795387034, "grad_norm": 0.5467996404655711, "learning_rate": 7.866271599422243e-07, "loss": 0.11067674160003663, "step": 28745 }, { "epoch": 2.511794513367115, "grad_norm": 0.5185066583027456, "learning_rate": 7.852593486667215e-07, "loss": 0.10101840496063233, "step": 28750 }, { "epoch": 2.512231347195527, "grad_norm": 0.5474940775398165, "learning_rate": 7.838926262742003e-07, "loss": 0.10288349390029908, "step": 28755 }, { "epoch": 2.5126681810239386, "grad_norm": 0.6907485195241386, "learning_rate": 7.825269931177571e-07, "loss": 0.11388953924179077, "step": 28760 }, { "epoch": 2.51310501485235, "grad_norm": 0.6353079828318717, "learning_rate": 7.811624495502018e-07, "loss": 0.10513205528259277, "step": 28765 }, { "epoch": 2.5135418486807617, "grad_norm": 0.6386239310084829, "learning_rate": 7.797989959240698e-07, "loss": 0.08742566108703613, "step": 28770 }, { "epoch": 2.5139786825091734, "grad_norm": 0.5250752812229147, "learning_rate": 7.78436632591606e-07, "loss": 0.10071907043457032, "step": 28775 }, { "epoch": 2.514415516337585, "grad_norm": 0.5188720074730253, "learning_rate": 7.77075359904782e-07, "loss": 0.08142763376235962, "step": 28780 }, { "epoch": 2.514852350165997, "grad_norm": 0.6470669046171431, "learning_rate": 7.757151782152828e-07, "loss": 0.0953183114528656, "step": 28785 }, { "epoch": 2.5152891839944083, "grad_norm": 0.5674450260767727, "learning_rate": 7.743560878745127e-07, "loss": 0.10667539834976196, "step": 28790 }, { "epoch": 2.5157260178228205, "grad_norm": 0.62632158227334, "learning_rate": 7.729980892335947e-07, "loss": 0.08365890979766846, "step": 28795 }, { "epoch": 2.5161628516512318, "grad_norm": 0.5861222493123442, "learning_rate": 7.71641182643369e-07, "loss": 0.10604159832000733, "step": 28800 }, { "epoch": 2.5165996854796435, "grad_norm": 0.5356938796181923, "learning_rate": 7.702853684543959e-07, "loss": 0.09878150820732116, "step": 28805 }, { "epoch": 2.5170365193080553, "grad_norm": 0.6356476044946574, "learning_rate": 7.689306470169477e-07, "loss": 0.10972722768783569, "step": 28810 }, { "epoch": 2.517473353136467, "grad_norm": 0.5943509213594349, "learning_rate": 7.675770186810211e-07, "loss": 0.09642371535301208, "step": 28815 }, { "epoch": 2.517910186964879, "grad_norm": 0.6153196547516152, "learning_rate": 7.662244837963257e-07, "loss": 0.09609302878379822, "step": 28820 }, { "epoch": 2.51834702079329, "grad_norm": 0.7863013043384002, "learning_rate": 7.648730427122925e-07, "loss": 0.08901660442352295, "step": 28825 }, { "epoch": 2.518783854621702, "grad_norm": 0.5784370896635314, "learning_rate": 7.635226957780667e-07, "loss": 0.10526325702667236, "step": 28830 }, { "epoch": 2.5192206884501136, "grad_norm": 0.536232533305337, "learning_rate": 7.621734433425121e-07, "loss": 0.09052751660346985, "step": 28835 }, { "epoch": 2.5196575222785254, "grad_norm": 0.5467902207644317, "learning_rate": 7.608252857542093e-07, "loss": 0.09084784388542175, "step": 28840 }, { "epoch": 2.520094356106937, "grad_norm": 0.5639846957426594, "learning_rate": 7.594782233614551e-07, "loss": 0.10290727615356446, "step": 28845 }, { "epoch": 2.5205311899353484, "grad_norm": 0.5970041474191231, "learning_rate": 7.581322565122673e-07, "loss": 0.08293262720108033, "step": 28850 }, { "epoch": 2.52096802376376, "grad_norm": 0.5945702653357534, "learning_rate": 7.56787385554375e-07, "loss": 0.08672025799751282, "step": 28855 }, { "epoch": 2.521404857592172, "grad_norm": 0.4978031708530023, "learning_rate": 7.554436108352303e-07, "loss": 0.07228922247886657, "step": 28860 }, { "epoch": 2.5218416914205837, "grad_norm": 0.510353850277966, "learning_rate": 7.54100932701996e-07, "loss": 0.08354418873786926, "step": 28865 }, { "epoch": 2.5222785252489954, "grad_norm": 0.54934125451174, "learning_rate": 7.52759351501558e-07, "loss": 0.07314573526382447, "step": 28870 }, { "epoch": 2.5227153590774067, "grad_norm": 0.7045813290302376, "learning_rate": 7.514188675805106e-07, "loss": 0.0768357276916504, "step": 28875 }, { "epoch": 2.5231521929058185, "grad_norm": 0.48520101637002944, "learning_rate": 7.500794812851736e-07, "loss": 0.07168791294097901, "step": 28880 }, { "epoch": 2.5235890267342302, "grad_norm": 0.5128688825777087, "learning_rate": 7.487411929615767e-07, "loss": 0.08649438023567199, "step": 28885 }, { "epoch": 2.524025860562642, "grad_norm": 0.5869014592132265, "learning_rate": 7.47404002955468e-07, "loss": 0.09338241219520568, "step": 28890 }, { "epoch": 2.5244626943910538, "grad_norm": 0.5283705167236549, "learning_rate": 7.460679116123131e-07, "loss": 0.08425992727279663, "step": 28895 }, { "epoch": 2.524899528219465, "grad_norm": 0.6259919603451526, "learning_rate": 7.447329192772929e-07, "loss": 0.08881217241287231, "step": 28900 }, { "epoch": 2.5253363620478773, "grad_norm": 0.551531248001844, "learning_rate": 7.43399026295304e-07, "loss": 0.0905917763710022, "step": 28905 }, { "epoch": 2.5257731958762886, "grad_norm": 0.5688146733071309, "learning_rate": 7.420662330109569e-07, "loss": 0.0822511613368988, "step": 28910 }, { "epoch": 2.5262100297047003, "grad_norm": 0.7487443587709933, "learning_rate": 7.407345397685833e-07, "loss": 0.09047559499740601, "step": 28915 }, { "epoch": 2.526646863533112, "grad_norm": 0.6043837184678909, "learning_rate": 7.394039469122255e-07, "loss": 0.10017354488372802, "step": 28920 }, { "epoch": 2.527083697361524, "grad_norm": 0.6953754767591738, "learning_rate": 7.380744547856456e-07, "loss": 0.11244027614593506, "step": 28925 }, { "epoch": 2.5275205311899356, "grad_norm": 0.5514187203931264, "learning_rate": 7.367460637323187e-07, "loss": 0.10902652740478516, "step": 28930 }, { "epoch": 2.527957365018347, "grad_norm": 0.6232223545211418, "learning_rate": 7.354187740954355e-07, "loss": 0.0958707332611084, "step": 28935 }, { "epoch": 2.5283941988467586, "grad_norm": 0.6528914811549129, "learning_rate": 7.340925862179027e-07, "loss": 0.08493369817733765, "step": 28940 }, { "epoch": 2.5288310326751704, "grad_norm": 0.612087253448008, "learning_rate": 7.327675004423424e-07, "loss": 0.09291605353355407, "step": 28945 }, { "epoch": 2.529267866503582, "grad_norm": 0.5154596095032793, "learning_rate": 7.314435171110928e-07, "loss": 0.0967492401599884, "step": 28950 }, { "epoch": 2.529704700331994, "grad_norm": 0.5118136719339337, "learning_rate": 7.301206365662055e-07, "loss": 0.09076436161994934, "step": 28955 }, { "epoch": 2.530141534160405, "grad_norm": 0.48946014510198577, "learning_rate": 7.2879885914945e-07, "loss": 0.07376353740692139, "step": 28960 }, { "epoch": 2.530578367988817, "grad_norm": 0.5409939181844747, "learning_rate": 7.274781852023071e-07, "loss": 0.07773278951644898, "step": 28965 }, { "epoch": 2.5310152018172287, "grad_norm": 0.7122826344348118, "learning_rate": 7.261586150659755e-07, "loss": 0.08530164957046509, "step": 28970 }, { "epoch": 2.5314520356456405, "grad_norm": 0.45432158380749227, "learning_rate": 7.248401490813662e-07, "loss": 0.08935568928718567, "step": 28975 }, { "epoch": 2.5318888694740522, "grad_norm": 0.6298790744967084, "learning_rate": 7.235227875891076e-07, "loss": 0.09023871421813964, "step": 28980 }, { "epoch": 2.5323257033024635, "grad_norm": 0.5723144269805734, "learning_rate": 7.222065309295423e-07, "loss": 0.09555806517601013, "step": 28985 }, { "epoch": 2.5327625371308753, "grad_norm": 0.5399749136922395, "learning_rate": 7.208913794427236e-07, "loss": 0.0828302264213562, "step": 28990 }, { "epoch": 2.533199370959287, "grad_norm": 0.5846818701962209, "learning_rate": 7.195773334684275e-07, "loss": 0.086566162109375, "step": 28995 }, { "epoch": 2.533636204787699, "grad_norm": 0.5089863526579632, "learning_rate": 7.182643933461331e-07, "loss": 0.09714348316192627, "step": 29000 }, { "epoch": 2.5340730386161106, "grad_norm": 0.6052868405189953, "learning_rate": 7.169525594150445e-07, "loss": 0.09074023962020875, "step": 29005 }, { "epoch": 2.534509872444522, "grad_norm": 0.45628902285078826, "learning_rate": 7.156418320140729e-07, "loss": 0.07182368040084838, "step": 29010 }, { "epoch": 2.5349467062729336, "grad_norm": 0.46778406264404143, "learning_rate": 7.143322114818485e-07, "loss": 0.09013898968696595, "step": 29015 }, { "epoch": 2.5353835401013454, "grad_norm": 0.5755689795885125, "learning_rate": 7.130236981567107e-07, "loss": 0.08347684144973755, "step": 29020 }, { "epoch": 2.535820373929757, "grad_norm": 0.4935657686635429, "learning_rate": 7.11716292376719e-07, "loss": 0.06895396709442139, "step": 29025 }, { "epoch": 2.536257207758169, "grad_norm": 0.5994287449638057, "learning_rate": 7.104099944796383e-07, "loss": 0.10489953756332397, "step": 29030 }, { "epoch": 2.5366940415865806, "grad_norm": 0.62660598157077, "learning_rate": 7.091048048029564e-07, "loss": 0.0888663113117218, "step": 29035 }, { "epoch": 2.5371308754149924, "grad_norm": 0.5599002673235159, "learning_rate": 7.078007236838686e-07, "loss": 0.10044417381286622, "step": 29040 }, { "epoch": 2.5375677092434037, "grad_norm": 0.6365969253126507, "learning_rate": 7.064977514592842e-07, "loss": 0.10009579658508301, "step": 29045 }, { "epoch": 2.5380045430718154, "grad_norm": 0.5646676465478695, "learning_rate": 7.051958884658311e-07, "loss": 0.08338685035705566, "step": 29050 }, { "epoch": 2.538441376900227, "grad_norm": 0.5591795505127424, "learning_rate": 7.038951350398438e-07, "loss": 0.09479759335517883, "step": 29055 }, { "epoch": 2.538878210728639, "grad_norm": 0.5469596032079884, "learning_rate": 7.025954915173772e-07, "loss": 0.08400864005088807, "step": 29060 }, { "epoch": 2.5393150445570507, "grad_norm": 0.6147046832696395, "learning_rate": 7.012969582341911e-07, "loss": 0.09117245674133301, "step": 29065 }, { "epoch": 2.539751878385462, "grad_norm": 0.6182674421293322, "learning_rate": 6.99999535525766e-07, "loss": 0.08470288515090943, "step": 29070 }, { "epoch": 2.5401887122138738, "grad_norm": 0.7350003460383967, "learning_rate": 6.987032237272906e-07, "loss": 0.10305596590042114, "step": 29075 }, { "epoch": 2.5406255460422855, "grad_norm": 0.4571647944324745, "learning_rate": 6.974080231736713e-07, "loss": 0.0841102123260498, "step": 29080 }, { "epoch": 2.5410623798706973, "grad_norm": 0.5327413700433089, "learning_rate": 6.961139341995227e-07, "loss": 0.10782909393310547, "step": 29085 }, { "epoch": 2.541499213699109, "grad_norm": 0.9498512942110087, "learning_rate": 6.948209571391745e-07, "loss": 0.09106965065002441, "step": 29090 }, { "epoch": 2.5419360475275203, "grad_norm": 0.5939917384835512, "learning_rate": 6.935290923266685e-07, "loss": 0.08978201150894165, "step": 29095 }, { "epoch": 2.542372881355932, "grad_norm": 0.592064011107911, "learning_rate": 6.922383400957583e-07, "loss": 0.0859358549118042, "step": 29100 }, { "epoch": 2.542809715184344, "grad_norm": 0.4784066696190492, "learning_rate": 6.909487007799142e-07, "loss": 0.10957250595092774, "step": 29105 }, { "epoch": 2.5432465490127556, "grad_norm": 0.6431914610716558, "learning_rate": 6.896601747123128e-07, "loss": 0.09754565954208375, "step": 29110 }, { "epoch": 2.5436833828411674, "grad_norm": 0.6090998131614077, "learning_rate": 6.883727622258491e-07, "loss": 0.09578793048858643, "step": 29115 }, { "epoch": 2.5441202166695787, "grad_norm": 0.6447394968445144, "learning_rate": 6.870864636531244e-07, "loss": 0.08019348382949829, "step": 29120 }, { "epoch": 2.5445570504979904, "grad_norm": 0.623565223642272, "learning_rate": 6.858012793264601e-07, "loss": 0.09188226461410523, "step": 29125 }, { "epoch": 2.544993884326402, "grad_norm": 0.5061780647853806, "learning_rate": 6.845172095778796e-07, "loss": 0.08695329427719116, "step": 29130 }, { "epoch": 2.545430718154814, "grad_norm": 0.41771138770265154, "learning_rate": 6.832342547391274e-07, "loss": 0.07846062779426574, "step": 29135 }, { "epoch": 2.5458675519832257, "grad_norm": 0.5718409228476138, "learning_rate": 6.819524151416551e-07, "loss": 0.0863321840763092, "step": 29140 }, { "epoch": 2.546304385811637, "grad_norm": 0.6020506124010374, "learning_rate": 6.806716911166267e-07, "loss": 0.09308181405067444, "step": 29145 }, { "epoch": 2.546741219640049, "grad_norm": 0.4476089896050743, "learning_rate": 6.793920829949202e-07, "loss": 0.07953280210494995, "step": 29150 }, { "epoch": 2.5471780534684605, "grad_norm": 0.4986515116820853, "learning_rate": 6.78113591107123e-07, "loss": 0.0924309492111206, "step": 29155 }, { "epoch": 2.5476148872968722, "grad_norm": 0.5886833261713368, "learning_rate": 6.768362157835351e-07, "loss": 0.07909076809883117, "step": 29160 }, { "epoch": 2.548051721125284, "grad_norm": 0.6196197222770901, "learning_rate": 6.755599573541665e-07, "loss": 0.07049486637115479, "step": 29165 }, { "epoch": 2.5484885549536958, "grad_norm": 0.5165996416188271, "learning_rate": 6.742848161487425e-07, "loss": 0.08781424760818482, "step": 29170 }, { "epoch": 2.5489253887821075, "grad_norm": 0.7284114345209679, "learning_rate": 6.73010792496695e-07, "loss": 0.08974589109420776, "step": 29175 }, { "epoch": 2.549362222610519, "grad_norm": 0.6634576026466896, "learning_rate": 6.717378867271712e-07, "loss": 0.0922180712223053, "step": 29180 }, { "epoch": 2.5497990564389306, "grad_norm": 0.6640376545810922, "learning_rate": 6.70466099169027e-07, "loss": 0.08128868341445923, "step": 29185 }, { "epoch": 2.5502358902673423, "grad_norm": 0.5793722419355274, "learning_rate": 6.691954301508302e-07, "loss": 0.08504584431648254, "step": 29190 }, { "epoch": 2.550672724095754, "grad_norm": 0.5441129398239359, "learning_rate": 6.679258800008598e-07, "loss": 0.08792386054992676, "step": 29195 }, { "epoch": 2.551109557924166, "grad_norm": 0.6634084599295488, "learning_rate": 6.666574490471034e-07, "loss": 0.12178359031677247, "step": 29200 }, { "epoch": 2.551546391752577, "grad_norm": 0.4443064942695208, "learning_rate": 6.65390137617265e-07, "loss": 0.08181021213531495, "step": 29205 }, { "epoch": 2.551983225580989, "grad_norm": 0.6571573974753413, "learning_rate": 6.641239460387533e-07, "loss": 0.10948171615600585, "step": 29210 }, { "epoch": 2.5524200594094006, "grad_norm": 0.5349965593477354, "learning_rate": 6.628588746386916e-07, "loss": 0.06669933795928955, "step": 29215 }, { "epoch": 2.5528568932378124, "grad_norm": 0.5648267332425381, "learning_rate": 6.615949237439129e-07, "loss": 0.09260982275009155, "step": 29220 }, { "epoch": 2.553293727066224, "grad_norm": 0.4675434204315617, "learning_rate": 6.603320936809593e-07, "loss": 0.08739345073699951, "step": 29225 }, { "epoch": 2.5537305608946355, "grad_norm": 0.6936773893316195, "learning_rate": 6.590703847760838e-07, "loss": 0.09468142986297608, "step": 29230 }, { "epoch": 2.554167394723047, "grad_norm": 0.5390705396296285, "learning_rate": 6.578097973552522e-07, "loss": 0.08758933544158935, "step": 29235 }, { "epoch": 2.554604228551459, "grad_norm": 0.6040759359326389, "learning_rate": 6.56550331744138e-07, "loss": 0.11327109336853028, "step": 29240 }, { "epoch": 2.5550410623798707, "grad_norm": 0.5731576975806794, "learning_rate": 6.552919882681241e-07, "loss": 0.09233731627464295, "step": 29245 }, { "epoch": 2.5554778962082825, "grad_norm": 0.7994185285665273, "learning_rate": 6.540347672523078e-07, "loss": 0.08273038864135743, "step": 29250 }, { "epoch": 2.555914730036694, "grad_norm": 0.6378723302467925, "learning_rate": 6.527786690214899e-07, "loss": 0.11090312004089356, "step": 29255 }, { "epoch": 2.556351563865106, "grad_norm": 0.5725654299251469, "learning_rate": 6.515236939001884e-07, "loss": 0.067825186252594, "step": 29260 }, { "epoch": 2.5567883976935173, "grad_norm": 0.6613402367226856, "learning_rate": 6.502698422126241e-07, "loss": 0.09397326707839966, "step": 29265 }, { "epoch": 2.557225231521929, "grad_norm": 0.5965051242418795, "learning_rate": 6.49017114282734e-07, "loss": 0.0969008207321167, "step": 29270 }, { "epoch": 2.557662065350341, "grad_norm": 0.563989349934111, "learning_rate": 6.477655104341595e-07, "loss": 0.09213705658912659, "step": 29275 }, { "epoch": 2.5580988991787525, "grad_norm": 0.61475486347196, "learning_rate": 6.465150309902574e-07, "loss": 0.11032719612121582, "step": 29280 }, { "epoch": 2.5585357330071643, "grad_norm": 0.6395370729590855, "learning_rate": 6.452656762740856e-07, "loss": 0.0938199520111084, "step": 29285 }, { "epoch": 2.5589725668355756, "grad_norm": 0.5488161998740875, "learning_rate": 6.440174466084198e-07, "loss": 0.08277348279953003, "step": 29290 }, { "epoch": 2.5594094006639874, "grad_norm": 0.4900413605913683, "learning_rate": 6.4277034231574e-07, "loss": 0.09653167724609375, "step": 29295 }, { "epoch": 2.559846234492399, "grad_norm": 0.4815541719862443, "learning_rate": 6.415243637182367e-07, "loss": 0.08979241847991944, "step": 29300 }, { "epoch": 2.560283068320811, "grad_norm": 0.5850358568869319, "learning_rate": 6.402795111378119e-07, "loss": 0.11234047412872314, "step": 29305 }, { "epoch": 2.5607199021492226, "grad_norm": 0.5345818797499943, "learning_rate": 6.390357848960715e-07, "loss": 0.1008229374885559, "step": 29310 }, { "epoch": 2.561156735977634, "grad_norm": 0.6294981040067403, "learning_rate": 6.377931853143382e-07, "loss": 0.10174398422241211, "step": 29315 }, { "epoch": 2.5615935698060457, "grad_norm": 0.47081446367767804, "learning_rate": 6.365517127136339e-07, "loss": 0.07795754671096802, "step": 29320 }, { "epoch": 2.5620304036344574, "grad_norm": 0.5726756018299198, "learning_rate": 6.353113674146977e-07, "loss": 0.07960922718048095, "step": 29325 }, { "epoch": 2.562467237462869, "grad_norm": 0.5375263636239488, "learning_rate": 6.340721497379726e-07, "loss": 0.08812670707702637, "step": 29330 }, { "epoch": 2.562904071291281, "grad_norm": 0.6155416681270458, "learning_rate": 6.328340600036131e-07, "loss": 0.080900639295578, "step": 29335 }, { "epoch": 2.5633409051196923, "grad_norm": 0.5998767677561793, "learning_rate": 6.315970985314806e-07, "loss": 0.08835006952285766, "step": 29340 }, { "epoch": 2.563777738948104, "grad_norm": 0.4744644667974, "learning_rate": 6.303612656411456e-07, "loss": 0.06903203129768372, "step": 29345 }, { "epoch": 2.5642145727765158, "grad_norm": 0.5298753229386416, "learning_rate": 6.291265616518866e-07, "loss": 0.09804675579071045, "step": 29350 }, { "epoch": 2.5646514066049275, "grad_norm": 0.6490559714813402, "learning_rate": 6.2789298688269e-07, "loss": 0.08148431777954102, "step": 29355 }, { "epoch": 2.5650882404333393, "grad_norm": 0.5836812509996985, "learning_rate": 6.266605416522525e-07, "loss": 0.08921836614608765, "step": 29360 }, { "epoch": 2.5655250742617506, "grad_norm": 0.6054090718009791, "learning_rate": 6.254292262789769e-07, "loss": 0.11595697402954101, "step": 29365 }, { "epoch": 2.565961908090163, "grad_norm": 0.5851618749388047, "learning_rate": 6.241990410809761e-07, "loss": 0.10340842008590698, "step": 29370 }, { "epoch": 2.566398741918574, "grad_norm": 0.7188440843975676, "learning_rate": 6.22969986376068e-07, "loss": 0.10683155059814453, "step": 29375 }, { "epoch": 2.566835575746986, "grad_norm": 0.6227970292217216, "learning_rate": 6.217420624817833e-07, "loss": 0.12047002315521241, "step": 29380 }, { "epoch": 2.5672724095753976, "grad_norm": 0.6081323054771365, "learning_rate": 6.205152697153538e-07, "loss": 0.09018832445144653, "step": 29385 }, { "epoch": 2.5677092434038093, "grad_norm": 0.6040386693839529, "learning_rate": 6.192896083937255e-07, "loss": 0.09097908735275269, "step": 29390 }, { "epoch": 2.568146077232221, "grad_norm": 0.5652616982926943, "learning_rate": 6.18065078833548e-07, "loss": 0.07672321200370788, "step": 29395 }, { "epoch": 2.5685829110606324, "grad_norm": 0.5954155341190237, "learning_rate": 6.168416813511796e-07, "loss": 0.09958653450012207, "step": 29400 }, { "epoch": 2.569019744889044, "grad_norm": 0.7626862795900264, "learning_rate": 6.156194162626877e-07, "loss": 0.0837904930114746, "step": 29405 }, { "epoch": 2.569456578717456, "grad_norm": 0.6159838984186105, "learning_rate": 6.143982838838459e-07, "loss": 0.07295472621917724, "step": 29410 }, { "epoch": 2.5698934125458677, "grad_norm": 0.6512075274374207, "learning_rate": 6.131782845301337e-07, "loss": 0.102511727809906, "step": 29415 }, { "epoch": 2.5703302463742794, "grad_norm": 0.6713702809108826, "learning_rate": 6.119594185167398e-07, "loss": 0.10676939487457275, "step": 29420 }, { "epoch": 2.5707670802026907, "grad_norm": 0.5877614457360272, "learning_rate": 6.107416861585602e-07, "loss": 0.0841952919960022, "step": 29425 }, { "epoch": 2.5712039140311025, "grad_norm": 0.4851865371934471, "learning_rate": 6.095250877701969e-07, "loss": 0.06848546862602234, "step": 29430 }, { "epoch": 2.5716407478595142, "grad_norm": 0.5432411244120277, "learning_rate": 6.083096236659602e-07, "loss": 0.07903218269348145, "step": 29435 }, { "epoch": 2.572077581687926, "grad_norm": 0.5863922613761878, "learning_rate": 6.070952941598662e-07, "loss": 0.0874470055103302, "step": 29440 }, { "epoch": 2.5725144155163377, "grad_norm": 0.6455497167655236, "learning_rate": 6.058820995656378e-07, "loss": 0.09625656604766845, "step": 29445 }, { "epoch": 2.572951249344749, "grad_norm": 0.7277486949405868, "learning_rate": 6.046700401967059e-07, "loss": 0.10343656539916993, "step": 29450 }, { "epoch": 2.573388083173161, "grad_norm": 0.7047880951115341, "learning_rate": 6.034591163662063e-07, "loss": 0.07352660894393921, "step": 29455 }, { "epoch": 2.5738249170015726, "grad_norm": 0.6408858692037394, "learning_rate": 6.02249328386984e-07, "loss": 0.07793523669242859, "step": 29460 }, { "epoch": 2.5742617508299843, "grad_norm": 0.6678196612954642, "learning_rate": 6.01040676571587e-07, "loss": 0.09066535234451294, "step": 29465 }, { "epoch": 2.574698584658396, "grad_norm": 0.5609369330420135, "learning_rate": 5.998331612322749e-07, "loss": 0.07961535453796387, "step": 29470 }, { "epoch": 2.5751354184868074, "grad_norm": 0.6416584158843256, "learning_rate": 5.986267826810083e-07, "loss": 0.09576724767684937, "step": 29475 }, { "epoch": 2.575572252315219, "grad_norm": 0.6223424078856813, "learning_rate": 5.974215412294576e-07, "loss": 0.07907907366752624, "step": 29480 }, { "epoch": 2.576009086143631, "grad_norm": 0.4012682300035778, "learning_rate": 5.962174371889967e-07, "loss": 0.0859804630279541, "step": 29485 }, { "epoch": 2.5764459199720426, "grad_norm": 0.5926565189924914, "learning_rate": 5.950144708707095e-07, "loss": 0.10484927892684937, "step": 29490 }, { "epoch": 2.5768827538004544, "grad_norm": 0.6666607364782606, "learning_rate": 5.938126425853824e-07, "loss": 0.10457475185394287, "step": 29495 }, { "epoch": 2.5773195876288657, "grad_norm": 0.7800434006571684, "learning_rate": 5.926119526435081e-07, "loss": 0.08322135806083679, "step": 29500 }, { "epoch": 2.577756421457278, "grad_norm": 0.4940082311536946, "learning_rate": 5.914124013552886e-07, "loss": 0.08744970560073853, "step": 29505 }, { "epoch": 2.578193255285689, "grad_norm": 0.5917291114779571, "learning_rate": 5.90213989030628e-07, "loss": 0.08892265558242798, "step": 29510 }, { "epoch": 2.578630089114101, "grad_norm": 0.5943593257102959, "learning_rate": 5.890167159791371e-07, "loss": 0.11018707752227783, "step": 29515 }, { "epoch": 2.5790669229425127, "grad_norm": 0.602558324258175, "learning_rate": 5.878205825101329e-07, "loss": 0.0876535415649414, "step": 29520 }, { "epoch": 2.5795037567709245, "grad_norm": 0.7120058000808697, "learning_rate": 5.866255889326383e-07, "loss": 0.09466583728790283, "step": 29525 }, { "epoch": 2.5799405905993362, "grad_norm": 0.6239748548385764, "learning_rate": 5.854317355553807e-07, "loss": 0.10345810651779175, "step": 29530 }, { "epoch": 2.5803774244277475, "grad_norm": 0.6600733129569728, "learning_rate": 5.842390226867955e-07, "loss": 0.10426225662231445, "step": 29535 }, { "epoch": 2.5808142582561593, "grad_norm": 0.5086570356768453, "learning_rate": 5.830474506350176e-07, "loss": 0.07612954378128052, "step": 29540 }, { "epoch": 2.581251092084571, "grad_norm": 0.5626255633187827, "learning_rate": 5.818570197078943e-07, "loss": 0.10713226795196533, "step": 29545 }, { "epoch": 2.581687925912983, "grad_norm": 0.6333028363375085, "learning_rate": 5.806677302129731e-07, "loss": 0.09711164236068726, "step": 29550 }, { "epoch": 2.5821247597413945, "grad_norm": 0.6808559881256303, "learning_rate": 5.794795824575078e-07, "loss": 0.08939844369888306, "step": 29555 }, { "epoch": 2.582561593569806, "grad_norm": 0.7697683430789636, "learning_rate": 5.782925767484598e-07, "loss": 0.09117356538772584, "step": 29560 }, { "epoch": 2.5829984273982176, "grad_norm": 0.6303972369449496, "learning_rate": 5.771067133924913e-07, "loss": 0.09540005922317504, "step": 29565 }, { "epoch": 2.5834352612266294, "grad_norm": 0.5606246252767596, "learning_rate": 5.759219926959742e-07, "loss": 0.10381674766540527, "step": 29570 }, { "epoch": 2.583872095055041, "grad_norm": 0.5671312818482436, "learning_rate": 5.747384149649788e-07, "loss": 0.09348592162132263, "step": 29575 }, { "epoch": 2.584308928883453, "grad_norm": 0.5544839464542476, "learning_rate": 5.73555980505286e-07, "loss": 0.10051660537719727, "step": 29580 }, { "epoch": 2.584745762711864, "grad_norm": 0.6566066565475384, "learning_rate": 5.723746896223776e-07, "loss": 0.09736414551734925, "step": 29585 }, { "epoch": 2.585182596540276, "grad_norm": 0.5312369028592531, "learning_rate": 5.711945426214432e-07, "loss": 0.08522498607635498, "step": 29590 }, { "epoch": 2.5856194303686877, "grad_norm": 0.4925692993297524, "learning_rate": 5.700155398073747e-07, "loss": 0.07524027824401855, "step": 29595 }, { "epoch": 2.5860562641970994, "grad_norm": 0.6087530148760363, "learning_rate": 5.688376814847679e-07, "loss": 0.11580567359924317, "step": 29600 }, { "epoch": 2.586493098025511, "grad_norm": 0.6221763883032608, "learning_rate": 5.676609679579237e-07, "loss": 0.08741742968559266, "step": 29605 }, { "epoch": 2.5869299318539225, "grad_norm": 0.5500783763582191, "learning_rate": 5.664853995308489e-07, "loss": 0.11142945289611816, "step": 29610 }, { "epoch": 2.5873667656823347, "grad_norm": 0.6178910167544608, "learning_rate": 5.653109765072517e-07, "loss": 0.09039056301116943, "step": 29615 }, { "epoch": 2.587803599510746, "grad_norm": 0.7145450646137219, "learning_rate": 5.641376991905457e-07, "loss": 0.09203388690948486, "step": 29620 }, { "epoch": 2.5882404333391578, "grad_norm": 0.5054102355600515, "learning_rate": 5.629655678838486e-07, "loss": 0.0808899998664856, "step": 29625 }, { "epoch": 2.5886772671675695, "grad_norm": 0.5197316137114462, "learning_rate": 5.617945828899812e-07, "loss": 0.08639734983444214, "step": 29630 }, { "epoch": 2.5891141009959813, "grad_norm": 0.6228591819030991, "learning_rate": 5.606247445114715e-07, "loss": 0.08323009014129638, "step": 29635 }, { "epoch": 2.589550934824393, "grad_norm": 0.7793357861923143, "learning_rate": 5.594560530505444e-07, "loss": 0.08293457627296448, "step": 29640 }, { "epoch": 2.5899877686528043, "grad_norm": 0.5003905465431902, "learning_rate": 5.582885088091356e-07, "loss": 0.06463019251823425, "step": 29645 }, { "epoch": 2.590424602481216, "grad_norm": 0.4891997870838139, "learning_rate": 5.571221120888808e-07, "loss": 0.09789799451828003, "step": 29650 }, { "epoch": 2.590861436309628, "grad_norm": 0.5350449794878563, "learning_rate": 5.559568631911189e-07, "loss": 0.1169088363647461, "step": 29655 }, { "epoch": 2.5912982701380396, "grad_norm": 0.6650315238848296, "learning_rate": 5.547927624168948e-07, "loss": 0.106598961353302, "step": 29660 }, { "epoch": 2.5917351039664513, "grad_norm": 0.6668708886728151, "learning_rate": 5.536298100669546e-07, "loss": 0.07191199064254761, "step": 29665 }, { "epoch": 2.5921719377948627, "grad_norm": 0.4952023265923706, "learning_rate": 5.524680064417481e-07, "loss": 0.08298063278198242, "step": 29670 }, { "epoch": 2.5926087716232744, "grad_norm": 0.5662066328670352, "learning_rate": 5.51307351841428e-07, "loss": 0.09409853219985961, "step": 29675 }, { "epoch": 2.593045605451686, "grad_norm": 0.515534809949208, "learning_rate": 5.501478465658522e-07, "loss": 0.09513962268829346, "step": 29680 }, { "epoch": 2.593482439280098, "grad_norm": 0.5291073243097372, "learning_rate": 5.489894909145782e-07, "loss": 0.0953770101070404, "step": 29685 }, { "epoch": 2.5939192731085097, "grad_norm": 0.6820422490645649, "learning_rate": 5.478322851868711e-07, "loss": 0.0978120744228363, "step": 29690 }, { "epoch": 2.594356106936921, "grad_norm": 0.561606517140209, "learning_rate": 5.466762296816952e-07, "loss": 0.0968121588230133, "step": 29695 }, { "epoch": 2.5947929407653327, "grad_norm": 0.5991532971106139, "learning_rate": 5.455213246977182e-07, "loss": 0.07765421867370606, "step": 29700 }, { "epoch": 2.5952297745937445, "grad_norm": 0.6589692772009311, "learning_rate": 5.443675705333102e-07, "loss": 0.10649324655532837, "step": 29705 }, { "epoch": 2.5956666084221562, "grad_norm": 0.4462404624193104, "learning_rate": 5.432149674865467e-07, "loss": 0.11404718160629272, "step": 29710 }, { "epoch": 2.596103442250568, "grad_norm": 0.48644427633022586, "learning_rate": 5.42063515855204e-07, "loss": 0.09094181656837463, "step": 29715 }, { "epoch": 2.5965402760789793, "grad_norm": 0.4243020337096098, "learning_rate": 5.409132159367592e-07, "loss": 0.08285326957702636, "step": 29720 }, { "epoch": 2.5969771099073915, "grad_norm": 0.512294157270361, "learning_rate": 5.39764068028395e-07, "loss": 0.09967820644378662, "step": 29725 }, { "epoch": 2.597413943735803, "grad_norm": 0.6349696921423185, "learning_rate": 5.386160724269951e-07, "loss": 0.10592041015625, "step": 29730 }, { "epoch": 2.5978507775642146, "grad_norm": 0.6666407384400265, "learning_rate": 5.37469229429145e-07, "loss": 0.09345967769622802, "step": 29735 }, { "epoch": 2.5982876113926263, "grad_norm": 0.5900468355854478, "learning_rate": 5.363235393311323e-07, "loss": 0.1224747657775879, "step": 29740 }, { "epoch": 2.598724445221038, "grad_norm": 0.6039693775610367, "learning_rate": 5.351790024289489e-07, "loss": 0.1051074743270874, "step": 29745 }, { "epoch": 2.59916127904945, "grad_norm": 0.6518480170138754, "learning_rate": 5.340356190182855e-07, "loss": 0.10237267017364501, "step": 29750 }, { "epoch": 2.599598112877861, "grad_norm": 0.5355731073736858, "learning_rate": 5.32893389394537e-07, "loss": 0.0804103970527649, "step": 29755 }, { "epoch": 2.600034946706273, "grad_norm": 0.5774598042285026, "learning_rate": 5.317523138528008e-07, "loss": 0.08386074304580689, "step": 29760 }, { "epoch": 2.6004717805346846, "grad_norm": 0.6649458039061047, "learning_rate": 5.306123926878737e-07, "loss": 0.08002328872680664, "step": 29765 }, { "epoch": 2.6009086143630964, "grad_norm": 0.6106747029415359, "learning_rate": 5.294736261942562e-07, "loss": 0.08588454127311707, "step": 29770 }, { "epoch": 2.601345448191508, "grad_norm": 0.6746585785804327, "learning_rate": 5.283360146661492e-07, "loss": 0.09901124835014344, "step": 29775 }, { "epoch": 2.6017822820199195, "grad_norm": 0.7577130057541953, "learning_rate": 5.271995583974576e-07, "loss": 0.06825516223907471, "step": 29780 }, { "epoch": 2.602219115848331, "grad_norm": 0.5231012187616552, "learning_rate": 5.260642576817837e-07, "loss": 0.0876352310180664, "step": 29785 }, { "epoch": 2.602655949676743, "grad_norm": 0.43020050052738257, "learning_rate": 5.249301128124367e-07, "loss": 0.07963874340057372, "step": 29790 }, { "epoch": 2.6030927835051547, "grad_norm": 0.49621776093609593, "learning_rate": 5.237971240824213e-07, "loss": 0.0923809289932251, "step": 29795 }, { "epoch": 2.6035296173335665, "grad_norm": 0.6767993385995086, "learning_rate": 5.226652917844482e-07, "loss": 0.09228787422180176, "step": 29800 }, { "epoch": 2.603966451161978, "grad_norm": 0.6039083780075962, "learning_rate": 5.215346162109263e-07, "loss": 0.08704186677932739, "step": 29805 }, { "epoch": 2.6044032849903895, "grad_norm": 0.5960311798380531, "learning_rate": 5.204050976539687e-07, "loss": 0.10630496740341186, "step": 29810 }, { "epoch": 2.6048401188188013, "grad_norm": 0.6250745830509515, "learning_rate": 5.192767364053869e-07, "loss": 0.11353623867034912, "step": 29815 }, { "epoch": 2.605276952647213, "grad_norm": 0.5581413024287271, "learning_rate": 5.181495327566927e-07, "loss": 0.08196033239364624, "step": 29820 }, { "epoch": 2.605713786475625, "grad_norm": 0.7070635447513626, "learning_rate": 5.170234869991042e-07, "loss": 0.07631555199623108, "step": 29825 }, { "epoch": 2.606150620304036, "grad_norm": 0.5310212163146802, "learning_rate": 5.158985994235322e-07, "loss": 0.08901370763778686, "step": 29830 }, { "epoch": 2.606587454132448, "grad_norm": 0.5634003935650786, "learning_rate": 5.147748703205957e-07, "loss": 0.1016154170036316, "step": 29835 }, { "epoch": 2.6070242879608596, "grad_norm": 0.5451310655949058, "learning_rate": 5.136522999806093e-07, "loss": 0.09619032740592956, "step": 29840 }, { "epoch": 2.6074611217892714, "grad_norm": 0.5233030861929088, "learning_rate": 5.125308886935921e-07, "loss": 0.08933011293411255, "step": 29845 }, { "epoch": 2.607897955617683, "grad_norm": 0.6344712261549521, "learning_rate": 5.114106367492617e-07, "loss": 0.10200839042663574, "step": 29850 }, { "epoch": 2.608334789446095, "grad_norm": 0.5954121278918205, "learning_rate": 5.10291544437036e-07, "loss": 0.08689801692962647, "step": 29855 }, { "epoch": 2.6087716232745066, "grad_norm": 0.5760074135163044, "learning_rate": 5.091736120460328e-07, "loss": 0.09198073148727418, "step": 29860 }, { "epoch": 2.609208457102918, "grad_norm": 0.46203721543417436, "learning_rate": 5.080568398650726e-07, "loss": 0.09962900876998901, "step": 29865 }, { "epoch": 2.6096452909313297, "grad_norm": 0.6714116177521757, "learning_rate": 5.069412281826746e-07, "loss": 0.08711241483688355, "step": 29870 }, { "epoch": 2.6100821247597414, "grad_norm": 0.8776239354951164, "learning_rate": 5.058267772870573e-07, "loss": 0.0986629605293274, "step": 29875 }, { "epoch": 2.610518958588153, "grad_norm": 0.6226693700034668, "learning_rate": 5.047134874661419e-07, "loss": 0.08508074283599854, "step": 29880 }, { "epoch": 2.610955792416565, "grad_norm": 0.6358446620434252, "learning_rate": 5.036013590075467e-07, "loss": 0.08384823203086852, "step": 29885 }, { "epoch": 2.6113926262449763, "grad_norm": 0.4874791262713918, "learning_rate": 5.02490392198593e-07, "loss": 0.07955321073532104, "step": 29890 }, { "epoch": 2.611829460073388, "grad_norm": 0.5529259656961402, "learning_rate": 5.013805873262978e-07, "loss": 0.08693246841430664, "step": 29895 }, { "epoch": 2.6122662939017998, "grad_norm": 0.5638227239083871, "learning_rate": 5.002719446773829e-07, "loss": 0.09264551401138306, "step": 29900 }, { "epoch": 2.6127031277302115, "grad_norm": 1.2276288235729718, "learning_rate": 4.991644645382654e-07, "loss": 0.10351419448852539, "step": 29905 }, { "epoch": 2.6131399615586233, "grad_norm": 0.5746721858548473, "learning_rate": 4.980581471950657e-07, "loss": 0.0973582148551941, "step": 29910 }, { "epoch": 2.6135767953870346, "grad_norm": 0.5504879403107517, "learning_rate": 4.969529929336014e-07, "loss": 0.0866666555404663, "step": 29915 }, { "epoch": 2.6140136292154463, "grad_norm": 0.555741175002487, "learning_rate": 4.958490020393903e-07, "loss": 0.08992725610733032, "step": 29920 }, { "epoch": 2.614450463043858, "grad_norm": 0.5280163038661333, "learning_rate": 4.947461747976495e-07, "loss": 0.09794430732727051, "step": 29925 }, { "epoch": 2.61488729687227, "grad_norm": 0.6514252630846626, "learning_rate": 4.936445114932953e-07, "loss": 0.09221373796463013, "step": 29930 }, { "epoch": 2.6153241307006816, "grad_norm": 0.7089812596955485, "learning_rate": 4.925440124109448e-07, "loss": 0.08934947252273559, "step": 29935 }, { "epoch": 2.615760964529093, "grad_norm": 0.7727846555721626, "learning_rate": 4.914446778349125e-07, "loss": 0.07498162984848022, "step": 29940 }, { "epoch": 2.6161977983575047, "grad_norm": 0.48316305447623625, "learning_rate": 4.903465080492132e-07, "loss": 0.07282964587211609, "step": 29945 }, { "epoch": 2.6166346321859164, "grad_norm": 0.746046882372339, "learning_rate": 4.892495033375594e-07, "loss": 0.09500086903572083, "step": 29950 }, { "epoch": 2.617071466014328, "grad_norm": 0.50682268060155, "learning_rate": 4.88153663983365e-07, "loss": 0.09039734601974488, "step": 29955 }, { "epoch": 2.61750829984274, "grad_norm": 0.5523466533019742, "learning_rate": 4.870589902697387e-07, "loss": 0.09169911742210388, "step": 29960 }, { "epoch": 2.6179451336711512, "grad_norm": 0.6365654669384552, "learning_rate": 4.859654824794935e-07, "loss": 0.07795186042785644, "step": 29965 }, { "epoch": 2.6183819674995634, "grad_norm": 0.4569199895345474, "learning_rate": 4.848731408951374e-07, "loss": 0.08303771018981934, "step": 29970 }, { "epoch": 2.6188188013279747, "grad_norm": 0.653690101487867, "learning_rate": 4.837819657988768e-07, "loss": 0.08520632982254028, "step": 29975 }, { "epoch": 2.6192556351563865, "grad_norm": 0.5120393950357163, "learning_rate": 4.826919574726202e-07, "loss": 0.09555623531341553, "step": 29980 }, { "epoch": 2.6196924689847982, "grad_norm": 0.6473119116356483, "learning_rate": 4.816031161979711e-07, "loss": 0.10014266967773437, "step": 29985 }, { "epoch": 2.62012930281321, "grad_norm": 0.5537370977014006, "learning_rate": 4.805154422562331e-07, "loss": 0.10949974060058594, "step": 29990 }, { "epoch": 2.6205661366416217, "grad_norm": 0.7686380953504472, "learning_rate": 4.794289359284071e-07, "loss": 0.07912477254867553, "step": 29995 }, { "epoch": 2.621002970470033, "grad_norm": 0.5849025761901852, "learning_rate": 4.783435974951956e-07, "loss": 0.1262653112411499, "step": 30000 }, { "epoch": 2.621439804298445, "grad_norm": 0.6195685147647914, "learning_rate": 4.772594272369941e-07, "loss": 0.08624813556671143, "step": 30005 }, { "epoch": 2.6218766381268566, "grad_norm": 0.632026311955022, "learning_rate": 4.7617642543390186e-07, "loss": 0.11934021711349488, "step": 30010 }, { "epoch": 2.6223134719552683, "grad_norm": 0.6060437425645537, "learning_rate": 4.7509459236571186e-07, "loss": 0.10838896036148071, "step": 30015 }, { "epoch": 2.62275030578368, "grad_norm": 0.5101688876361878, "learning_rate": 4.740139283119183e-07, "loss": 0.09815551638603211, "step": 30020 }, { "epoch": 2.6231871396120914, "grad_norm": 0.475659743310552, "learning_rate": 4.7293443355171054e-07, "loss": 0.10191966295242309, "step": 30025 }, { "epoch": 2.623623973440503, "grad_norm": 0.5335849965049821, "learning_rate": 4.718561083639767e-07, "loss": 0.09516454935073852, "step": 30030 }, { "epoch": 2.624060807268915, "grad_norm": 0.5253195742152177, "learning_rate": 4.707789530273055e-07, "loss": 0.0758946418762207, "step": 30035 }, { "epoch": 2.6244976410973266, "grad_norm": 0.5230363541435531, "learning_rate": 4.697029678199794e-07, "loss": 0.07903776168823243, "step": 30040 }, { "epoch": 2.6249344749257384, "grad_norm": 0.575119702310121, "learning_rate": 4.686281530199832e-07, "loss": 0.09726915359497071, "step": 30045 }, { "epoch": 2.6253713087541497, "grad_norm": 0.5634935344950066, "learning_rate": 4.675545089049921e-07, "loss": 0.12047429084777832, "step": 30050 }, { "epoch": 2.6258081425825615, "grad_norm": 0.5091959539141985, "learning_rate": 4.664820357523869e-07, "loss": 0.08488472104072571, "step": 30055 }, { "epoch": 2.626244976410973, "grad_norm": 0.6899286571805042, "learning_rate": 4.6541073383924e-07, "loss": 0.0947264313697815, "step": 30060 }, { "epoch": 2.626681810239385, "grad_norm": 0.5223014817243806, "learning_rate": 4.643406034423253e-07, "loss": 0.08340727090835572, "step": 30065 }, { "epoch": 2.6271186440677967, "grad_norm": 0.5956356204811277, "learning_rate": 4.632716448381114e-07, "loss": 0.09355921745300293, "step": 30070 }, { "epoch": 2.627555477896208, "grad_norm": 0.5392157036361869, "learning_rate": 4.6220385830276326e-07, "loss": 0.104903244972229, "step": 30075 }, { "epoch": 2.6279923117246202, "grad_norm": 0.6811266285917639, "learning_rate": 4.611372441121492e-07, "loss": 0.09308810234069824, "step": 30080 }, { "epoch": 2.6284291455530315, "grad_norm": 0.5803783554786842, "learning_rate": 4.6007180254182494e-07, "loss": 0.11181552410125732, "step": 30085 }, { "epoch": 2.6288659793814433, "grad_norm": 0.5709865980220469, "learning_rate": 4.5900753386705186e-07, "loss": 0.09668439030647277, "step": 30090 }, { "epoch": 2.629302813209855, "grad_norm": 0.608612515988541, "learning_rate": 4.5794443836278293e-07, "loss": 0.08774285316467285, "step": 30095 }, { "epoch": 2.629739647038267, "grad_norm": 0.5616401181603332, "learning_rate": 4.568825163036722e-07, "loss": 0.07293189764022827, "step": 30100 }, { "epoch": 2.6301764808666785, "grad_norm": 0.5367867782593237, "learning_rate": 4.558217679640664e-07, "loss": 0.12179832458496094, "step": 30105 }, { "epoch": 2.63061331469509, "grad_norm": 0.6083726366407003, "learning_rate": 4.5476219361801355e-07, "loss": 0.10060386657714844, "step": 30110 }, { "epoch": 2.6310501485235016, "grad_norm": 0.5220667312790876, "learning_rate": 4.53703793539253e-07, "loss": 0.10577540397644043, "step": 30115 }, { "epoch": 2.6314869823519134, "grad_norm": 0.5626304719713913, "learning_rate": 4.526465680012254e-07, "loss": 0.08109558820724487, "step": 30120 }, { "epoch": 2.631923816180325, "grad_norm": 0.5148346517830021, "learning_rate": 4.515905172770663e-07, "loss": 0.08253644704818726, "step": 30125 }, { "epoch": 2.632360650008737, "grad_norm": 0.6169696200573919, "learning_rate": 4.505356416396056e-07, "loss": 0.0904824674129486, "step": 30130 }, { "epoch": 2.632797483837148, "grad_norm": 0.5527197850362089, "learning_rate": 4.4948194136137435e-07, "loss": 0.09602872729301452, "step": 30135 }, { "epoch": 2.63323431766556, "grad_norm": 0.5603292055537055, "learning_rate": 4.4842941671459416e-07, "loss": 0.09635907411575317, "step": 30140 }, { "epoch": 2.6336711514939717, "grad_norm": 0.5192724595885856, "learning_rate": 4.473780679711903e-07, "loss": 0.09440885782241822, "step": 30145 }, { "epoch": 2.6341079853223834, "grad_norm": 0.5965387610205397, "learning_rate": 4.46327895402775e-07, "loss": 0.08580070734024048, "step": 30150 }, { "epoch": 2.634544819150795, "grad_norm": 0.7782493329090515, "learning_rate": 4.452788992806645e-07, "loss": 0.064521324634552, "step": 30155 }, { "epoch": 2.6349816529792065, "grad_norm": 0.6277729466588285, "learning_rate": 4.442310798758664e-07, "loss": 0.09095596075057984, "step": 30160 }, { "epoch": 2.6354184868076183, "grad_norm": 0.500419165127476, "learning_rate": 4.4318443745908823e-07, "loss": 0.07770921587944031, "step": 30165 }, { "epoch": 2.63585532063603, "grad_norm": 0.5958860099876516, "learning_rate": 4.421389723007302e-07, "loss": 0.09380267858505249, "step": 30170 }, { "epoch": 2.6362921544644418, "grad_norm": 0.7043562092831557, "learning_rate": 4.410946846708891e-07, "loss": 0.09307695627212524, "step": 30175 }, { "epoch": 2.6367289882928535, "grad_norm": 0.48970323452505005, "learning_rate": 4.4005157483935747e-07, "loss": 0.0801745593547821, "step": 30180 }, { "epoch": 2.637165822121265, "grad_norm": 0.6218187795453795, "learning_rate": 4.390096430756241e-07, "loss": 0.10461238622665406, "step": 30185 }, { "epoch": 2.6376026559496766, "grad_norm": 0.5045812622528567, "learning_rate": 4.3796888964887376e-07, "loss": 0.091182541847229, "step": 30190 }, { "epoch": 2.6380394897780883, "grad_norm": 0.47328267208479025, "learning_rate": 4.3692931482798575e-07, "loss": 0.08209292888641358, "step": 30195 }, { "epoch": 2.6384763236065, "grad_norm": 0.5161829759813016, "learning_rate": 4.358909188815363e-07, "loss": 0.09956138134002686, "step": 30200 }, { "epoch": 2.638913157434912, "grad_norm": 0.6094359347067518, "learning_rate": 4.348537020777954e-07, "loss": 0.0856728196144104, "step": 30205 }, { "epoch": 2.6393499912633236, "grad_norm": 0.49813433112613265, "learning_rate": 4.338176646847292e-07, "loss": 0.06495946049690246, "step": 30210 }, { "epoch": 2.6397868250917353, "grad_norm": 0.5611898602778149, "learning_rate": 4.327828069699985e-07, "loss": 0.08622887134552001, "step": 30215 }, { "epoch": 2.6402236589201467, "grad_norm": 0.5571564635580226, "learning_rate": 4.317491292009618e-07, "loss": 0.09256120920181274, "step": 30220 }, { "epoch": 2.6406604927485584, "grad_norm": 0.46320100430336236, "learning_rate": 4.3071663164466925e-07, "loss": 0.08094390630722045, "step": 30225 }, { "epoch": 2.64109732657697, "grad_norm": 0.5344346531989923, "learning_rate": 4.2968531456786754e-07, "loss": 0.09067380428314209, "step": 30230 }, { "epoch": 2.641534160405382, "grad_norm": 0.7333638948102749, "learning_rate": 4.286551782370008e-07, "loss": 0.09672646522521973, "step": 30235 }, { "epoch": 2.6419709942337937, "grad_norm": 0.5619331489154404, "learning_rate": 4.2762622291820376e-07, "loss": 0.1064618706703186, "step": 30240 }, { "epoch": 2.642407828062205, "grad_norm": 0.5868685898640968, "learning_rate": 4.265984488773089e-07, "loss": 0.1131967544555664, "step": 30245 }, { "epoch": 2.6428446618906167, "grad_norm": 0.5595330011031371, "learning_rate": 4.255718563798422e-07, "loss": 0.09588817358016968, "step": 30250 }, { "epoch": 2.6432814957190285, "grad_norm": 0.8034891440374686, "learning_rate": 4.245464456910264e-07, "loss": 0.07523235082626342, "step": 30255 }, { "epoch": 2.6437183295474402, "grad_norm": 0.4780958751510987, "learning_rate": 4.235222170757758e-07, "loss": 0.09950857162475586, "step": 30260 }, { "epoch": 2.644155163375852, "grad_norm": 0.5293556224845511, "learning_rate": 4.224991707987025e-07, "loss": 0.0682432472705841, "step": 30265 }, { "epoch": 2.6445919972042633, "grad_norm": 0.5095591107302601, "learning_rate": 4.214773071241113e-07, "loss": 0.07659857273101807, "step": 30270 }, { "epoch": 2.645028831032675, "grad_norm": 0.693772268936758, "learning_rate": 4.204566263160015e-07, "loss": 0.07381681203842164, "step": 30275 }, { "epoch": 2.645465664861087, "grad_norm": 0.5640529371705868, "learning_rate": 4.194371286380672e-07, "loss": 0.11688048839569092, "step": 30280 }, { "epoch": 2.6459024986894986, "grad_norm": 0.5319363184818818, "learning_rate": 4.18418814353696e-07, "loss": 0.09015856981277466, "step": 30285 }, { "epoch": 2.6463393325179103, "grad_norm": 0.6396353504394438, "learning_rate": 4.17401683725972e-07, "loss": 0.08939541578292846, "step": 30290 }, { "epoch": 2.6467761663463216, "grad_norm": 0.5373978790661905, "learning_rate": 4.1638573701767046e-07, "loss": 0.07914016246795655, "step": 30295 }, { "epoch": 2.6472130001747334, "grad_norm": 0.5440845889720617, "learning_rate": 4.1537097449126485e-07, "loss": 0.0912954866886139, "step": 30300 }, { "epoch": 2.647649834003145, "grad_norm": 0.5675823633063082, "learning_rate": 4.1435739640891706e-07, "loss": 0.10268619060516357, "step": 30305 }, { "epoch": 2.648086667831557, "grad_norm": 0.5829459425551669, "learning_rate": 4.133450030324887e-07, "loss": 0.08175298571586609, "step": 30310 }, { "epoch": 2.6485235016599686, "grad_norm": 0.6104448667803747, "learning_rate": 4.1233379462353064e-07, "loss": 0.09801883101463318, "step": 30315 }, { "epoch": 2.64896033548838, "grad_norm": 0.49836237108652537, "learning_rate": 4.113237714432916e-07, "loss": 0.10122638940811157, "step": 30320 }, { "epoch": 2.649397169316792, "grad_norm": 0.6933603871327155, "learning_rate": 4.1031493375271125e-07, "loss": 0.07960072755813599, "step": 30325 }, { "epoch": 2.6498340031452035, "grad_norm": 0.6573324764538377, "learning_rate": 4.093072818124233e-07, "loss": 0.09355853199958801, "step": 30330 }, { "epoch": 2.650270836973615, "grad_norm": 0.5893884080530477, "learning_rate": 4.083008158827584e-07, "loss": 0.0721893310546875, "step": 30335 }, { "epoch": 2.650707670802027, "grad_norm": 0.4588793610641721, "learning_rate": 4.0729553622373407e-07, "loss": 0.0806196689605713, "step": 30340 }, { "epoch": 2.6511445046304387, "grad_norm": 0.6887825489570362, "learning_rate": 4.062914430950693e-07, "loss": 0.10082749128341675, "step": 30345 }, { "epoch": 2.6515813384588505, "grad_norm": 0.5436030209910296, "learning_rate": 4.052885367561693e-07, "loss": 0.08390822410583496, "step": 30350 }, { "epoch": 2.6520181722872618, "grad_norm": 0.5387937041541518, "learning_rate": 4.042868174661391e-07, "loss": 0.10773929357528686, "step": 30355 }, { "epoch": 2.6524550061156735, "grad_norm": 0.6456094815569466, "learning_rate": 4.0328628548377157e-07, "loss": 0.09628872871398926, "step": 30360 }, { "epoch": 2.6528918399440853, "grad_norm": 0.8413359812655319, "learning_rate": 4.022869410675584e-07, "loss": 0.07958228588104248, "step": 30365 }, { "epoch": 2.653328673772497, "grad_norm": 0.42979129078648276, "learning_rate": 4.0128878447567745e-07, "loss": 0.08668659925460816, "step": 30370 }, { "epoch": 2.653765507600909, "grad_norm": 0.6374917486950878, "learning_rate": 4.002918159660063e-07, "loss": 0.12217180728912354, "step": 30375 }, { "epoch": 2.65420234142932, "grad_norm": 0.6202526984563261, "learning_rate": 3.992960357961123e-07, "loss": 0.10858255624771118, "step": 30380 }, { "epoch": 2.654639175257732, "grad_norm": 0.5929900859730811, "learning_rate": 3.9830144422325524e-07, "loss": 0.08177794814109803, "step": 30385 }, { "epoch": 2.6550760090861436, "grad_norm": 0.6841988925547184, "learning_rate": 3.9730804150439064e-07, "loss": 0.0808020830154419, "step": 30390 }, { "epoch": 2.6555128429145554, "grad_norm": 0.48229127563484986, "learning_rate": 3.963158278961632e-07, "loss": 0.10930674076080323, "step": 30395 }, { "epoch": 2.655949676742967, "grad_norm": 0.49136256579470194, "learning_rate": 3.9532480365491563e-07, "loss": 0.10669001340866088, "step": 30400 }, { "epoch": 2.6563865105713784, "grad_norm": 0.5976194615715825, "learning_rate": 3.9433496903667656e-07, "loss": 0.08876268863677979, "step": 30405 }, { "epoch": 2.65682334439979, "grad_norm": 0.554385139235163, "learning_rate": 3.93346324297173e-07, "loss": 0.09387147426605225, "step": 30410 }, { "epoch": 2.657260178228202, "grad_norm": 0.6065141211491805, "learning_rate": 3.9235886969182123e-07, "loss": 0.07130571603775024, "step": 30415 }, { "epoch": 2.6576970120566137, "grad_norm": 0.6507413055153141, "learning_rate": 3.9137260547573274e-07, "loss": 0.09991025328636169, "step": 30420 }, { "epoch": 2.6581338458850254, "grad_norm": 0.5062144003229924, "learning_rate": 3.903875319037098e-07, "loss": 0.0901574969291687, "step": 30425 }, { "epoch": 2.6585706797134367, "grad_norm": 0.7102000833722352, "learning_rate": 3.894036492302461e-07, "loss": 0.08054698705673217, "step": 30430 }, { "epoch": 2.659007513541849, "grad_norm": 0.4916909520855698, "learning_rate": 3.884209577095294e-07, "loss": 0.10597939491271972, "step": 30435 }, { "epoch": 2.6594443473702603, "grad_norm": 0.6360658904053905, "learning_rate": 3.874394575954382e-07, "loss": 0.08355979323387146, "step": 30440 }, { "epoch": 2.659881181198672, "grad_norm": 0.6367393024885651, "learning_rate": 3.864591491415465e-07, "loss": 0.07762210965156555, "step": 30445 }, { "epoch": 2.6603180150270838, "grad_norm": 0.5737787237917337, "learning_rate": 3.8548003260111545e-07, "loss": 0.08280155062675476, "step": 30450 }, { "epoch": 2.6607548488554955, "grad_norm": 0.4862461202079956, "learning_rate": 3.845021082271028e-07, "loss": 0.10497759580612183, "step": 30455 }, { "epoch": 2.6611916826839073, "grad_norm": 0.5656209172200315, "learning_rate": 3.8352537627215635e-07, "loss": 0.10605032444000244, "step": 30460 }, { "epoch": 2.6616285165123186, "grad_norm": 0.5949304942492439, "learning_rate": 3.825498369886149e-07, "loss": 0.10665524005889893, "step": 30465 }, { "epoch": 2.6620653503407303, "grad_norm": 0.6040360641435357, "learning_rate": 3.815754906285102e-07, "loss": 0.09885191321372985, "step": 30470 }, { "epoch": 2.662502184169142, "grad_norm": 0.624030207582018, "learning_rate": 3.8060233744356634e-07, "loss": 0.07950292825698853, "step": 30475 }, { "epoch": 2.662939017997554, "grad_norm": 0.5595681313491148, "learning_rate": 3.796303776851984e-07, "loss": 0.08078542947769166, "step": 30480 }, { "epoch": 2.6633758518259656, "grad_norm": 0.6755281424775667, "learning_rate": 3.786596116045127e-07, "loss": 0.07637009620666504, "step": 30485 }, { "epoch": 2.663812685654377, "grad_norm": 0.5915587999029595, "learning_rate": 3.7769003945230863e-07, "loss": 0.10091859102249146, "step": 30490 }, { "epoch": 2.6642495194827887, "grad_norm": 0.5679757375067636, "learning_rate": 3.767216614790764e-07, "loss": 0.09770231246948242, "step": 30495 }, { "epoch": 2.6646863533112004, "grad_norm": 0.5878288447046972, "learning_rate": 3.757544779349964e-07, "loss": 0.10716650485992432, "step": 30500 }, { "epoch": 2.665123187139612, "grad_norm": 0.5149825287646125, "learning_rate": 3.747884890699416e-07, "loss": 0.09614900350570679, "step": 30505 }, { "epoch": 2.665560020968024, "grad_norm": 0.5546886105981559, "learning_rate": 3.7382369513347726e-07, "loss": 0.10089482069015503, "step": 30510 }, { "epoch": 2.6659968547964352, "grad_norm": 0.6245951930050831, "learning_rate": 3.72860096374858e-07, "loss": 0.09166674017906189, "step": 30515 }, { "epoch": 2.666433688624847, "grad_norm": 0.7168753084516807, "learning_rate": 3.718976930430318e-07, "loss": 0.06692981123924255, "step": 30520 }, { "epoch": 2.6668705224532587, "grad_norm": 0.5589247622576462, "learning_rate": 3.709364853866365e-07, "loss": 0.10722229480743409, "step": 30525 }, { "epoch": 2.6673073562816705, "grad_norm": 0.484551356975561, "learning_rate": 3.699764736540007e-07, "loss": 0.09303872585296631, "step": 30530 }, { "epoch": 2.6677441901100822, "grad_norm": 0.5271551657186003, "learning_rate": 3.690176580931443e-07, "loss": 0.09128753542900085, "step": 30535 }, { "epoch": 2.6681810239384935, "grad_norm": 0.5165196320421277, "learning_rate": 3.68060038951778e-07, "loss": 0.08451687097549439, "step": 30540 }, { "epoch": 2.6686178577669057, "grad_norm": 0.5265023965866373, "learning_rate": 3.6710361647730506e-07, "loss": 0.10171107053756714, "step": 30545 }, { "epoch": 2.669054691595317, "grad_norm": 0.571339449268467, "learning_rate": 3.661483909168168e-07, "loss": 0.08437812328338623, "step": 30550 }, { "epoch": 2.669491525423729, "grad_norm": 0.59899505705743, "learning_rate": 3.651943625170995e-07, "loss": 0.078677898645401, "step": 30555 }, { "epoch": 2.6699283592521406, "grad_norm": 0.5591774150985839, "learning_rate": 3.642415315246239e-07, "loss": 0.07362396121025086, "step": 30560 }, { "epoch": 2.6703651930805523, "grad_norm": 0.6069814117761487, "learning_rate": 3.632898981855576e-07, "loss": 0.09860095977783204, "step": 30565 }, { "epoch": 2.670802026908964, "grad_norm": 0.5615802246470939, "learning_rate": 3.623394627457538e-07, "loss": 0.08239195346832276, "step": 30570 }, { "epoch": 2.6712388607373754, "grad_norm": 0.5455552387099006, "learning_rate": 3.6139022545076177e-07, "loss": 0.07491078376770019, "step": 30575 }, { "epoch": 2.671675694565787, "grad_norm": 0.468434432362022, "learning_rate": 3.6044218654581575e-07, "loss": 0.06889870166778564, "step": 30580 }, { "epoch": 2.672112528394199, "grad_norm": 0.5967722806740804, "learning_rate": 3.5949534627584217e-07, "loss": 0.07969180345535279, "step": 30585 }, { "epoch": 2.6725493622226106, "grad_norm": 0.6442165756454673, "learning_rate": 3.585497048854608e-07, "loss": 0.08542297482490539, "step": 30590 }, { "epoch": 2.6729861960510224, "grad_norm": 0.4882046710125144, "learning_rate": 3.576052626189763e-07, "loss": 0.07827746272087097, "step": 30595 }, { "epoch": 2.6734230298794337, "grad_norm": 0.7962516046900199, "learning_rate": 3.566620197203885e-07, "loss": 0.08020737171173095, "step": 30600 }, { "epoch": 2.6738598637078455, "grad_norm": 0.6648903612534058, "learning_rate": 3.5571997643338407e-07, "loss": 0.0917893648147583, "step": 30605 }, { "epoch": 2.674296697536257, "grad_norm": 0.5896645609259449, "learning_rate": 3.547791330013417e-07, "loss": 0.08842645883560181, "step": 30610 }, { "epoch": 2.674733531364669, "grad_norm": 0.5534536931712769, "learning_rate": 3.538394896673281e-07, "loss": 0.10309650897979736, "step": 30615 }, { "epoch": 2.6751703651930807, "grad_norm": 0.5829375315191069, "learning_rate": 3.5290104667410394e-07, "loss": 0.09215772151947021, "step": 30620 }, { "epoch": 2.675607199021492, "grad_norm": 0.6652459785503587, "learning_rate": 3.5196380426411326e-07, "loss": 0.08906949758529663, "step": 30625 }, { "epoch": 2.6760440328499038, "grad_norm": 0.6541736056136286, "learning_rate": 3.510277626794967e-07, "loss": 0.08488640785217286, "step": 30630 }, { "epoch": 2.6764808666783155, "grad_norm": 0.6571488245117951, "learning_rate": 3.500929221620808e-07, "loss": 0.08279542326927185, "step": 30635 }, { "epoch": 2.6769177005067273, "grad_norm": 0.5059467070932323, "learning_rate": 3.491592829533813e-07, "loss": 0.09349939227104187, "step": 30640 }, { "epoch": 2.677354534335139, "grad_norm": 0.671469264264946, "learning_rate": 3.4822684529460635e-07, "loss": 0.11162691116333008, "step": 30645 }, { "epoch": 2.6777913681635503, "grad_norm": 0.48870905172399204, "learning_rate": 3.4729560942665166e-07, "loss": 0.07778352499008179, "step": 30650 }, { "epoch": 2.678228201991962, "grad_norm": 0.5730649362342368, "learning_rate": 3.463655755901052e-07, "loss": 0.07094618082046508, "step": 30655 }, { "epoch": 2.678665035820374, "grad_norm": 0.7245828675260487, "learning_rate": 3.4543674402523877e-07, "loss": 0.08254964947700501, "step": 30660 }, { "epoch": 2.6791018696487856, "grad_norm": 0.5548263452337833, "learning_rate": 3.4450911497201924e-07, "loss": 0.06554520130157471, "step": 30665 }, { "epoch": 2.6795387034771974, "grad_norm": 0.501924644174538, "learning_rate": 3.435826886700994e-07, "loss": 0.07879834175109864, "step": 30670 }, { "epoch": 2.679975537305609, "grad_norm": 0.5182247298272125, "learning_rate": 3.4265746535882384e-07, "loss": 0.11457507610321045, "step": 30675 }, { "epoch": 2.680412371134021, "grad_norm": 0.624720397040364, "learning_rate": 3.417334452772242e-07, "loss": 0.08704074621200561, "step": 30680 }, { "epoch": 2.680849204962432, "grad_norm": 0.5404004751522674, "learning_rate": 3.408106286640228e-07, "loss": 0.11014857292175292, "step": 30685 }, { "epoch": 2.681286038790844, "grad_norm": 0.49796428990996994, "learning_rate": 3.3988901575762955e-07, "loss": 0.0746668517589569, "step": 30690 }, { "epoch": 2.6817228726192557, "grad_norm": 0.5310189539003399, "learning_rate": 3.3896860679614395e-07, "loss": 0.09258973598480225, "step": 30695 }, { "epoch": 2.6821597064476674, "grad_norm": 0.5530097882590743, "learning_rate": 3.380494020173558e-07, "loss": 0.08247168064117431, "step": 30700 }, { "epoch": 2.682596540276079, "grad_norm": 0.5603201119881385, "learning_rate": 3.371314016587418e-07, "loss": 0.08660242557525635, "step": 30705 }, { "epoch": 2.6830333741044905, "grad_norm": 0.577114174759106, "learning_rate": 3.362146059574689e-07, "loss": 0.10469776391983032, "step": 30710 }, { "epoch": 2.6834702079329023, "grad_norm": 0.5753175519159498, "learning_rate": 3.352990151503926e-07, "loss": 0.09773094058036805, "step": 30715 }, { "epoch": 2.683907041761314, "grad_norm": 0.5288616471585148, "learning_rate": 3.3438462947405603e-07, "loss": 0.07167980670928956, "step": 30720 }, { "epoch": 2.6843438755897258, "grad_norm": 0.494213737998395, "learning_rate": 3.334714491646912e-07, "loss": 0.08513784408569336, "step": 30725 }, { "epoch": 2.6847807094181375, "grad_norm": 0.5007922694256224, "learning_rate": 3.325594744582206e-07, "loss": 0.10199837684631348, "step": 30730 }, { "epoch": 2.685217543246549, "grad_norm": 0.502128397482951, "learning_rate": 3.3164870559025395e-07, "loss": 0.08382900357246399, "step": 30735 }, { "epoch": 2.6856543770749606, "grad_norm": 0.5809029641837457, "learning_rate": 3.307391427960882e-07, "loss": 0.10227298736572266, "step": 30740 }, { "epoch": 2.6860912109033723, "grad_norm": 0.5582607517425432, "learning_rate": 3.298307863107109e-07, "loss": 0.09355493783950805, "step": 30745 }, { "epoch": 2.686528044731784, "grad_norm": 0.6358013893634199, "learning_rate": 3.28923636368797e-07, "loss": 0.07725682258605956, "step": 30750 }, { "epoch": 2.686964878560196, "grad_norm": 0.5906235730375856, "learning_rate": 3.280176932047091e-07, "loss": 0.09180769920349122, "step": 30755 }, { "epoch": 2.687401712388607, "grad_norm": 0.6831569680206725, "learning_rate": 3.2711295705249836e-07, "loss": 0.10631489753723145, "step": 30760 }, { "epoch": 2.687838546217019, "grad_norm": 0.640382971428402, "learning_rate": 3.262094281459055e-07, "loss": 0.07720654010772705, "step": 30765 }, { "epoch": 2.6882753800454307, "grad_norm": 0.407496468225933, "learning_rate": 3.253071067183572e-07, "loss": 0.08446028232574462, "step": 30770 }, { "epoch": 2.6887122138738424, "grad_norm": 0.5698507518123379, "learning_rate": 3.244059930029703e-07, "loss": 0.10323691368103027, "step": 30775 }, { "epoch": 2.689149047702254, "grad_norm": 0.4517563096016157, "learning_rate": 3.235060872325474e-07, "loss": 0.08410818576812744, "step": 30780 }, { "epoch": 2.6895858815306655, "grad_norm": 0.5749125193744998, "learning_rate": 3.226073896395815e-07, "loss": 0.10658464431762696, "step": 30785 }, { "epoch": 2.6900227153590777, "grad_norm": 0.615975263641042, "learning_rate": 3.2170990045625074e-07, "loss": 0.11452791690826417, "step": 30790 }, { "epoch": 2.690459549187489, "grad_norm": 0.5535758214831581, "learning_rate": 3.208136199144224e-07, "loss": 0.08057512640953064, "step": 30795 }, { "epoch": 2.6908963830159007, "grad_norm": 0.4926782370656787, "learning_rate": 3.199185482456535e-07, "loss": 0.08116253018379212, "step": 30800 }, { "epoch": 2.6913332168443125, "grad_norm": 0.632677776467444, "learning_rate": 3.1902468568118406e-07, "loss": 0.0971608281135559, "step": 30805 }, { "epoch": 2.6917700506727242, "grad_norm": 0.5472553920824637, "learning_rate": 3.181320324519477e-07, "loss": 0.08246182799339294, "step": 30810 }, { "epoch": 2.692206884501136, "grad_norm": 0.6167207112428396, "learning_rate": 3.172405887885588e-07, "loss": 0.07705953121185302, "step": 30815 }, { "epoch": 2.6926437183295473, "grad_norm": 0.6332829400400708, "learning_rate": 3.1635035492132595e-07, "loss": 0.08696420192718506, "step": 30820 }, { "epoch": 2.693080552157959, "grad_norm": 0.5860342492975569, "learning_rate": 3.154613310802396e-07, "loss": 0.1064462423324585, "step": 30825 }, { "epoch": 2.693517385986371, "grad_norm": 0.5586586356369713, "learning_rate": 3.145735174949821e-07, "loss": 0.09333983659744263, "step": 30830 }, { "epoch": 2.6939542198147826, "grad_norm": 0.5555114752574876, "learning_rate": 3.136869143949206e-07, "loss": 0.1132215142250061, "step": 30835 }, { "epoch": 2.6943910536431943, "grad_norm": 0.600437089038994, "learning_rate": 3.128015220091085e-07, "loss": 0.08898652791976928, "step": 30840 }, { "epoch": 2.6948278874716056, "grad_norm": 0.5627436298401527, "learning_rate": 3.1191734056629064e-07, "loss": 0.10955002307891845, "step": 30845 }, { "epoch": 2.6952647213000174, "grad_norm": 0.7779372825693324, "learning_rate": 3.110343702948926e-07, "loss": 0.09459041953086852, "step": 30850 }, { "epoch": 2.695701555128429, "grad_norm": 0.602650064101506, "learning_rate": 3.101526114230341e-07, "loss": 0.08308438062667847, "step": 30855 }, { "epoch": 2.696138388956841, "grad_norm": 0.6029506923271297, "learning_rate": 3.0927206417851684e-07, "loss": 0.07492023706436157, "step": 30860 }, { "epoch": 2.6965752227852526, "grad_norm": 0.6024570310203771, "learning_rate": 3.0839272878883164e-07, "loss": 0.10530967712402343, "step": 30865 }, { "epoch": 2.697012056613664, "grad_norm": 0.512363287158878, "learning_rate": 3.0751460548115564e-07, "loss": 0.08701661825180054, "step": 30870 }, { "epoch": 2.6974488904420757, "grad_norm": 0.5866817533019079, "learning_rate": 3.0663769448235403e-07, "loss": 0.07745715975761414, "step": 30875 }, { "epoch": 2.6978857242704875, "grad_norm": 0.5347536335785341, "learning_rate": 3.057619960189756e-07, "loss": 0.10860984325408936, "step": 30880 }, { "epoch": 2.698322558098899, "grad_norm": 0.8272411593504493, "learning_rate": 3.0488751031725994e-07, "loss": 0.06422856450080872, "step": 30885 }, { "epoch": 2.698759391927311, "grad_norm": 0.6478219775122985, "learning_rate": 3.0401423760313076e-07, "loss": 0.0791778802871704, "step": 30890 }, { "epoch": 2.6991962257557223, "grad_norm": 0.5261027706953704, "learning_rate": 3.031421781021987e-07, "loss": 0.09469285607337952, "step": 30895 }, { "epoch": 2.6996330595841345, "grad_norm": 0.5044073368951699, "learning_rate": 3.022713320397619e-07, "loss": 0.09328755140304565, "step": 30900 }, { "epoch": 2.7000698934125458, "grad_norm": 0.453829739877113, "learning_rate": 3.0140169964080314e-07, "loss": 0.0944032073020935, "step": 30905 }, { "epoch": 2.7005067272409575, "grad_norm": 0.6134958509454156, "learning_rate": 3.0053328112999605e-07, "loss": 0.08452010154724121, "step": 30910 }, { "epoch": 2.7009435610693693, "grad_norm": 0.5613430446716094, "learning_rate": 2.9966607673169336e-07, "loss": 0.10200083255767822, "step": 30915 }, { "epoch": 2.701380394897781, "grad_norm": 0.7221507311882759, "learning_rate": 2.9880008666994144e-07, "loss": 0.10724496841430664, "step": 30920 }, { "epoch": 2.701817228726193, "grad_norm": 0.6606576317557079, "learning_rate": 2.97935311168468e-07, "loss": 0.1093597412109375, "step": 30925 }, { "epoch": 2.702254062554604, "grad_norm": 0.5855865227900062, "learning_rate": 2.970717504506898e-07, "loss": 0.1012219786643982, "step": 30930 }, { "epoch": 2.702690896383016, "grad_norm": 0.6828281200989319, "learning_rate": 2.9620940473970904e-07, "loss": 0.10054174661636353, "step": 30935 }, { "epoch": 2.7031277302114276, "grad_norm": 0.5728045948584414, "learning_rate": 2.953482742583125e-07, "loss": 0.060771751403808597, "step": 30940 }, { "epoch": 2.7035645640398394, "grad_norm": 0.5260129327840274, "learning_rate": 2.944883592289749e-07, "loss": 0.08726648688316345, "step": 30945 }, { "epoch": 2.704001397868251, "grad_norm": 0.762733533534286, "learning_rate": 2.936296598738553e-07, "loss": 0.06669214963912964, "step": 30950 }, { "epoch": 2.7044382316966624, "grad_norm": 0.6351765717072996, "learning_rate": 2.927721764148017e-07, "loss": 0.1052415132522583, "step": 30955 }, { "epoch": 2.704875065525074, "grad_norm": 0.5350531304502646, "learning_rate": 2.919159090733431e-07, "loss": 0.09314120411872864, "step": 30960 }, { "epoch": 2.705311899353486, "grad_norm": 0.47199416638301034, "learning_rate": 2.9106085807070025e-07, "loss": 0.0956137716770172, "step": 30965 }, { "epoch": 2.7057487331818977, "grad_norm": 0.6050365956428024, "learning_rate": 2.902070236277743e-07, "loss": 0.1071478009223938, "step": 30970 }, { "epoch": 2.7061855670103094, "grad_norm": 0.6342107789354557, "learning_rate": 2.893544059651554e-07, "loss": 0.09442448616027832, "step": 30975 }, { "epoch": 2.7066224008387207, "grad_norm": 0.5533419874457411, "learning_rate": 2.885030053031174e-07, "loss": 0.08824087381362915, "step": 30980 }, { "epoch": 2.7070592346671325, "grad_norm": 0.672607465041136, "learning_rate": 2.876528218616215e-07, "loss": 0.07772111296653747, "step": 30985 }, { "epoch": 2.7074960684955443, "grad_norm": 0.7794336926541073, "learning_rate": 2.868038558603131e-07, "loss": 0.09488469362258911, "step": 30990 }, { "epoch": 2.707932902323956, "grad_norm": 0.529027646118878, "learning_rate": 2.85956107518523e-07, "loss": 0.10050399303436279, "step": 30995 }, { "epoch": 2.7083697361523678, "grad_norm": 0.5350782678766138, "learning_rate": 2.851095770552692e-07, "loss": 0.08845781683921813, "step": 31000 }, { "epoch": 2.708806569980779, "grad_norm": 0.5193242649028295, "learning_rate": 2.8426426468925293e-07, "loss": 0.0740245223045349, "step": 31005 }, { "epoch": 2.709243403809191, "grad_norm": 0.5596860133228596, "learning_rate": 2.834201706388623e-07, "loss": 0.0906014859676361, "step": 31010 }, { "epoch": 2.7096802376376026, "grad_norm": 0.5197673215991219, "learning_rate": 2.8257729512216837e-07, "loss": 0.0637420654296875, "step": 31015 }, { "epoch": 2.7101170714660143, "grad_norm": 0.6564847370324187, "learning_rate": 2.8173563835693083e-07, "loss": 0.0821247398853302, "step": 31020 }, { "epoch": 2.710553905294426, "grad_norm": 0.6895782497602496, "learning_rate": 2.808952005605914e-07, "loss": 0.07908579111099243, "step": 31025 }, { "epoch": 2.710990739122838, "grad_norm": 0.4851295339252302, "learning_rate": 2.800559819502791e-07, "loss": 0.0795545220375061, "step": 31030 }, { "epoch": 2.7114275729512496, "grad_norm": 0.6536796833118751, "learning_rate": 2.79217982742806e-07, "loss": 0.08221900463104248, "step": 31035 }, { "epoch": 2.711864406779661, "grad_norm": 0.6905710751327563, "learning_rate": 2.783812031546712e-07, "loss": 0.09528670310974122, "step": 31040 }, { "epoch": 2.7123012406080726, "grad_norm": 0.5478294643076144, "learning_rate": 2.775456434020574e-07, "loss": 0.09293038249015809, "step": 31045 }, { "epoch": 2.7127380744364844, "grad_norm": 0.5478165164335209, "learning_rate": 2.767113037008307e-07, "loss": 0.09451179504394532, "step": 31050 }, { "epoch": 2.713174908264896, "grad_norm": 0.6496303170375543, "learning_rate": 2.758781842665459e-07, "loss": 0.09161447882652282, "step": 31055 }, { "epoch": 2.713611742093308, "grad_norm": 0.6284047907446833, "learning_rate": 2.750462853144392e-07, "loss": 0.08185819387435914, "step": 31060 }, { "epoch": 2.714048575921719, "grad_norm": 0.43978175039543127, "learning_rate": 2.7421560705943374e-07, "loss": 0.11450655460357666, "step": 31065 }, { "epoch": 2.714485409750131, "grad_norm": 0.6896086735785745, "learning_rate": 2.7338614971613443e-07, "loss": 0.09334888458251953, "step": 31070 }, { "epoch": 2.7149222435785427, "grad_norm": 0.5215864216167189, "learning_rate": 2.725579134988343e-07, "loss": 0.0835354506969452, "step": 31075 }, { "epoch": 2.7153590774069545, "grad_norm": 0.6135313709395007, "learning_rate": 2.7173089862150724e-07, "loss": 0.09477173686027526, "step": 31080 }, { "epoch": 2.7157959112353662, "grad_norm": 0.6907049653333743, "learning_rate": 2.709051052978151e-07, "loss": 0.07837042808532715, "step": 31085 }, { "epoch": 2.7162327450637775, "grad_norm": 1.4462699281040532, "learning_rate": 2.7008053374110223e-07, "loss": 0.08594545722007751, "step": 31090 }, { "epoch": 2.7166695788921893, "grad_norm": 0.49755754731122886, "learning_rate": 2.692571841643965e-07, "loss": 0.07670165896415711, "step": 31095 }, { "epoch": 2.717106412720601, "grad_norm": 0.6053126575576727, "learning_rate": 2.684350567804128e-07, "loss": 0.08917635083198547, "step": 31100 }, { "epoch": 2.717543246549013, "grad_norm": 0.5703324875047793, "learning_rate": 2.6761415180154793e-07, "loss": 0.10775580406188964, "step": 31105 }, { "epoch": 2.7179800803774246, "grad_norm": 0.6556685399511574, "learning_rate": 2.667944694398833e-07, "loss": 0.08826665878295899, "step": 31110 }, { "epoch": 2.718416914205836, "grad_norm": 0.4884558196729103, "learning_rate": 2.6597600990718455e-07, "loss": 0.09048320055007934, "step": 31115 }, { "epoch": 2.7188537480342476, "grad_norm": 0.5774932701760738, "learning_rate": 2.65158773414903e-07, "loss": 0.11635885238647461, "step": 31120 }, { "epoch": 2.7192905818626594, "grad_norm": 0.6181114684207324, "learning_rate": 2.6434276017417046e-07, "loss": 0.09129197597503662, "step": 31125 }, { "epoch": 2.719727415691071, "grad_norm": 0.5651329131588718, "learning_rate": 2.6352797039580767e-07, "loss": 0.0940447449684143, "step": 31130 }, { "epoch": 2.720164249519483, "grad_norm": 0.589457289028189, "learning_rate": 2.627144042903135e-07, "loss": 0.10039975643157958, "step": 31135 }, { "epoch": 2.720601083347894, "grad_norm": 0.5511792850098988, "learning_rate": 2.619020620678764e-07, "loss": 0.0879121482372284, "step": 31140 }, { "epoch": 2.7210379171763064, "grad_norm": 0.49463433150890906, "learning_rate": 2.610909439383641e-07, "loss": 0.09238718152046203, "step": 31145 }, { "epoch": 2.7214747510047177, "grad_norm": 0.5805865524177325, "learning_rate": 2.6028105011132956e-07, "loss": 0.11218186616897582, "step": 31150 }, { "epoch": 2.7219115848331294, "grad_norm": 0.5637614787191145, "learning_rate": 2.594723807960114e-07, "loss": 0.07940802574157715, "step": 31155 }, { "epoch": 2.722348418661541, "grad_norm": 0.664358716988828, "learning_rate": 2.586649362013288e-07, "loss": 0.06967593431472778, "step": 31160 }, { "epoch": 2.722785252489953, "grad_norm": 0.6140288177778768, "learning_rate": 2.578587165358876e-07, "loss": 0.09317723512649537, "step": 31165 }, { "epoch": 2.7232220863183647, "grad_norm": 0.5097294233017194, "learning_rate": 2.5705372200797385e-07, "loss": 0.09192831516265869, "step": 31170 }, { "epoch": 2.723658920146776, "grad_norm": 0.5241039351750097, "learning_rate": 2.562499528255596e-07, "loss": 0.08132552504539489, "step": 31175 }, { "epoch": 2.7240957539751878, "grad_norm": 0.6059630051609066, "learning_rate": 2.554474091962988e-07, "loss": 0.10640243291854859, "step": 31180 }, { "epoch": 2.7245325878035995, "grad_norm": 0.5212478433698453, "learning_rate": 2.5464609132753115e-07, "loss": 0.07229781150817871, "step": 31185 }, { "epoch": 2.7249694216320113, "grad_norm": 0.5397082694507487, "learning_rate": 2.5384599942627654e-07, "loss": 0.08658244609832763, "step": 31190 }, { "epoch": 2.725406255460423, "grad_norm": 0.565164182887908, "learning_rate": 2.530471336992402e-07, "loss": 0.10523422956466674, "step": 31195 }, { "epoch": 2.7258430892888343, "grad_norm": 0.5715604056324917, "learning_rate": 2.522494943528092e-07, "loss": 0.09654911160469055, "step": 31200 }, { "epoch": 2.726279923117246, "grad_norm": 0.5527306371262658, "learning_rate": 2.5145308159305605e-07, "loss": 0.0675513505935669, "step": 31205 }, { "epoch": 2.726716756945658, "grad_norm": 0.5165684091373166, "learning_rate": 2.506578956257338e-07, "loss": 0.09201369285583497, "step": 31210 }, { "epoch": 2.7271535907740696, "grad_norm": 0.7779991803668737, "learning_rate": 2.498639366562794e-07, "loss": 0.0733443558216095, "step": 31215 }, { "epoch": 2.7275904246024814, "grad_norm": 0.5225322476082686, "learning_rate": 2.4907120488981417e-07, "loss": 0.08365340828895569, "step": 31220 }, { "epoch": 2.7280272584308927, "grad_norm": 0.611318703084799, "learning_rate": 2.482797005311405e-07, "loss": 0.09602108001708984, "step": 31225 }, { "epoch": 2.7284640922593044, "grad_norm": 0.5100159986475853, "learning_rate": 2.474894237847447e-07, "loss": 0.07453390955924988, "step": 31230 }, { "epoch": 2.728900926087716, "grad_norm": 0.5804941679016691, "learning_rate": 2.467003748547947e-07, "loss": 0.0738178849220276, "step": 31235 }, { "epoch": 2.729337759916128, "grad_norm": 0.6273115948023897, "learning_rate": 2.4591255394514404e-07, "loss": 0.09530776739120483, "step": 31240 }, { "epoch": 2.7297745937445397, "grad_norm": 0.6997211319156879, "learning_rate": 2.451259612593254e-07, "loss": 0.12030577659606934, "step": 31245 }, { "epoch": 2.730211427572951, "grad_norm": 0.5291137314187199, "learning_rate": 2.443405970005558e-07, "loss": 0.09781949520111084, "step": 31250 }, { "epoch": 2.730648261401363, "grad_norm": 0.6005951134509218, "learning_rate": 2.4355646137173673e-07, "loss": 0.09802014231681824, "step": 31255 }, { "epoch": 2.7310850952297745, "grad_norm": 0.566426192063272, "learning_rate": 2.427735545754495e-07, "loss": 0.07755520343780517, "step": 31260 }, { "epoch": 2.7315219290581862, "grad_norm": 0.4329875703696607, "learning_rate": 2.4199187681395895e-07, "loss": 0.08517319560050965, "step": 31265 }, { "epoch": 2.731958762886598, "grad_norm": 0.6065629868779223, "learning_rate": 2.4121142828921195e-07, "loss": 0.11048059463500977, "step": 31270 }, { "epoch": 2.7323955967150098, "grad_norm": 0.5884592449138687, "learning_rate": 2.4043220920283936e-07, "loss": 0.08953272700309753, "step": 31275 }, { "epoch": 2.7328324305434215, "grad_norm": 0.6301933580321046, "learning_rate": 2.396542197561519e-07, "loss": 0.08957962989807129, "step": 31280 }, { "epoch": 2.733269264371833, "grad_norm": 0.5504354914544048, "learning_rate": 2.3887746015014535e-07, "loss": 0.08403231501579285, "step": 31285 }, { "epoch": 2.7337060982002446, "grad_norm": 0.5577488607070595, "learning_rate": 2.3810193058549592e-07, "loss": 0.09426190853118896, "step": 31290 }, { "epoch": 2.7341429320286563, "grad_norm": 0.5354323273266905, "learning_rate": 2.373276312625622e-07, "loss": 0.1038370132446289, "step": 31295 }, { "epoch": 2.734579765857068, "grad_norm": 0.5716628565124445, "learning_rate": 2.365545623813853e-07, "loss": 0.07408911585807801, "step": 31300 }, { "epoch": 2.73501659968548, "grad_norm": 0.5968434197539707, "learning_rate": 2.3578272414168933e-07, "loss": 0.08476885557174682, "step": 31305 }, { "epoch": 2.735453433513891, "grad_norm": 0.5549390999908081, "learning_rate": 2.3501211674287872e-07, "loss": 0.09493061900138855, "step": 31310 }, { "epoch": 2.735890267342303, "grad_norm": 0.5916461785537904, "learning_rate": 2.3424274038404082e-07, "loss": 0.07795498371124268, "step": 31315 }, { "epoch": 2.7363271011707146, "grad_norm": 0.6005268709942193, "learning_rate": 2.334745952639461e-07, "loss": 0.09059919714927674, "step": 31320 }, { "epoch": 2.7367639349991264, "grad_norm": 0.584532147130276, "learning_rate": 2.3270768158104306e-07, "loss": 0.10342342853546142, "step": 31325 }, { "epoch": 2.737200768827538, "grad_norm": 0.5339596265488811, "learning_rate": 2.3194199953346706e-07, "loss": 0.08164491057395935, "step": 31330 }, { "epoch": 2.7376376026559495, "grad_norm": 0.6403192316697256, "learning_rate": 2.3117754931903158e-07, "loss": 0.07854450345039368, "step": 31335 }, { "epoch": 2.738074436484361, "grad_norm": 0.5256311604517144, "learning_rate": 2.304143311352347e-07, "loss": 0.09292222261428833, "step": 31340 }, { "epoch": 2.738511270312773, "grad_norm": 0.5047527319907319, "learning_rate": 2.2965234517925372e-07, "loss": 0.07697880864143372, "step": 31345 }, { "epoch": 2.7389481041411847, "grad_norm": 0.48886405693369545, "learning_rate": 2.2889159164794784e-07, "loss": 0.09980636239051818, "step": 31350 }, { "epoch": 2.7393849379695965, "grad_norm": 0.5981721056598137, "learning_rate": 2.281320707378598e-07, "loss": 0.10346422195434571, "step": 31355 }, { "epoch": 2.739821771798008, "grad_norm": 0.7034333641083848, "learning_rate": 2.273737826452127e-07, "loss": 0.09386699199676514, "step": 31360 }, { "epoch": 2.74025860562642, "grad_norm": 0.6030909218289829, "learning_rate": 2.2661672756591035e-07, "loss": 0.0920262098312378, "step": 31365 }, { "epoch": 2.7406954394548313, "grad_norm": 0.6341390799304126, "learning_rate": 2.2586090569553852e-07, "loss": 0.09732198715209961, "step": 31370 }, { "epoch": 2.741132273283243, "grad_norm": 0.660625314028559, "learning_rate": 2.2510631722936606e-07, "loss": 0.07729239463806152, "step": 31375 }, { "epoch": 2.741569107111655, "grad_norm": 0.5166627142264453, "learning_rate": 2.2435296236233972e-07, "loss": 0.08744374513626099, "step": 31380 }, { "epoch": 2.7420059409400666, "grad_norm": 0.6701178997588264, "learning_rate": 2.2360084128909277e-07, "loss": 0.0928817331790924, "step": 31385 }, { "epoch": 2.7424427747684783, "grad_norm": 0.6563361551872391, "learning_rate": 2.228499542039325e-07, "loss": 0.09944862723350525, "step": 31390 }, { "epoch": 2.7428796085968896, "grad_norm": 0.5285812964717251, "learning_rate": 2.2210030130085492e-07, "loss": 0.09697481989860535, "step": 31395 }, { "epoch": 2.7433164424253014, "grad_norm": 0.472480817999592, "learning_rate": 2.2135188277353114e-07, "loss": 0.08877437114715576, "step": 31400 }, { "epoch": 2.743753276253713, "grad_norm": 0.5020388448516466, "learning_rate": 2.2060469881531765e-07, "loss": 0.0840227723121643, "step": 31405 }, { "epoch": 2.744190110082125, "grad_norm": 0.6081480547893475, "learning_rate": 2.1985874961924946e-07, "loss": 0.08487589359283447, "step": 31410 }, { "epoch": 2.7446269439105366, "grad_norm": 0.5749937291642444, "learning_rate": 2.1911403537804354e-07, "loss": 0.07680763006210327, "step": 31415 }, { "epoch": 2.745063777738948, "grad_norm": 0.4714026307007002, "learning_rate": 2.1837055628409876e-07, "loss": 0.06723449230194092, "step": 31420 }, { "epoch": 2.7455006115673597, "grad_norm": 0.46303270874415114, "learning_rate": 2.176283125294909e-07, "loss": 0.08948523998260498, "step": 31425 }, { "epoch": 2.7459374453957714, "grad_norm": 0.6566258646084474, "learning_rate": 2.1688730430598215e-07, "loss": 0.08179897665977479, "step": 31430 }, { "epoch": 2.746374279224183, "grad_norm": 0.5907673609271467, "learning_rate": 2.1614753180501157e-07, "loss": 0.0970115065574646, "step": 31435 }, { "epoch": 2.746811113052595, "grad_norm": 0.556679218558921, "learning_rate": 2.154089952177013e-07, "loss": 0.09130079746246338, "step": 31440 }, { "epoch": 2.7472479468810063, "grad_norm": 0.5875146982950555, "learning_rate": 2.1467169473485204e-07, "loss": 0.06747710108757018, "step": 31445 }, { "epoch": 2.747684780709418, "grad_norm": 0.5730274890957315, "learning_rate": 2.1393563054694643e-07, "loss": 0.09235283136367797, "step": 31450 }, { "epoch": 2.7481216145378298, "grad_norm": 0.6438580274664147, "learning_rate": 2.1320080284414735e-07, "loss": 0.10695512294769287, "step": 31455 }, { "epoch": 2.7485584483662415, "grad_norm": 0.5822186862471185, "learning_rate": 2.1246721181629904e-07, "loss": 0.0985532283782959, "step": 31460 }, { "epoch": 2.7489952821946533, "grad_norm": 0.5202604925923813, "learning_rate": 2.1173485765292489e-07, "loss": 0.08494274020195007, "step": 31465 }, { "epoch": 2.7494321160230646, "grad_norm": 0.6316648076027565, "learning_rate": 2.1100374054322913e-07, "loss": 0.09714573621749878, "step": 31470 }, { "epoch": 2.7498689498514763, "grad_norm": 0.5623687902351289, "learning_rate": 2.1027386067609846e-07, "loss": 0.09162481427192688, "step": 31475 }, { "epoch": 2.750305783679888, "grad_norm": 0.5103778320695688, "learning_rate": 2.095452182400959e-07, "loss": 0.09625368118286133, "step": 31480 }, { "epoch": 2.7507426175083, "grad_norm": 0.5636112945356043, "learning_rate": 2.0881781342346864e-07, "loss": 0.10626344680786133, "step": 31485 }, { "epoch": 2.7511794513367116, "grad_norm": 0.5954371580236051, "learning_rate": 2.0809164641414138e-07, "loss": 0.10411324501037597, "step": 31490 }, { "epoch": 2.751616285165123, "grad_norm": 0.5525289133889546, "learning_rate": 2.0736671739972124e-07, "loss": 0.10104010105133057, "step": 31495 }, { "epoch": 2.752053118993535, "grad_norm": 0.5547278166131338, "learning_rate": 2.0664302656749346e-07, "loss": 0.07940205335617065, "step": 31500 }, { "epoch": 2.7524899528219464, "grad_norm": 0.5848761992253356, "learning_rate": 2.0592057410442513e-07, "loss": 0.09705857038497925, "step": 31505 }, { "epoch": 2.752926786650358, "grad_norm": 0.5622091562251454, "learning_rate": 2.0519936019716303e-07, "loss": 0.07421103715896607, "step": 31510 }, { "epoch": 2.75336362047877, "grad_norm": 0.4423730672491565, "learning_rate": 2.0447938503203257e-07, "loss": 0.10011640787124634, "step": 31515 }, { "epoch": 2.7538004543071817, "grad_norm": 0.672368544614174, "learning_rate": 2.0376064879504054e-07, "loss": 0.08145328760147094, "step": 31520 }, { "epoch": 2.7542372881355934, "grad_norm": 0.551383898428844, "learning_rate": 2.0304315167187227e-07, "loss": 0.10091865062713623, "step": 31525 }, { "epoch": 2.7546741219640047, "grad_norm": 0.5868253434916382, "learning_rate": 2.0232689384789617e-07, "loss": 0.10567018985748292, "step": 31530 }, { "epoch": 2.7551109557924165, "grad_norm": 0.6559428409579926, "learning_rate": 2.0161187550815586e-07, "loss": 0.10120912790298461, "step": 31535 }, { "epoch": 2.7555477896208282, "grad_norm": 0.677445626532283, "learning_rate": 2.0089809683737916e-07, "loss": 0.10267341136932373, "step": 31540 }, { "epoch": 2.75598462344924, "grad_norm": 0.5921165868688326, "learning_rate": 2.0018555801997076e-07, "loss": 0.10760549306869507, "step": 31545 }, { "epoch": 2.7564214572776518, "grad_norm": 0.7549510890660626, "learning_rate": 1.9947425924001562e-07, "loss": 0.11139097213745117, "step": 31550 }, { "epoch": 2.756858291106063, "grad_norm": 0.5748492063476384, "learning_rate": 1.9876420068127845e-07, "loss": 0.11981223821640015, "step": 31555 }, { "epoch": 2.757295124934475, "grad_norm": 0.6846506864229748, "learning_rate": 1.9805538252720412e-07, "loss": 0.11372114419937134, "step": 31560 }, { "epoch": 2.7577319587628866, "grad_norm": 0.5975510758561426, "learning_rate": 1.9734780496091677e-07, "loss": 0.08122390508651733, "step": 31565 }, { "epoch": 2.7581687925912983, "grad_norm": 0.6431393133089807, "learning_rate": 1.9664146816521901e-07, "loss": 0.10782473087310791, "step": 31570 }, { "epoch": 2.75860562641971, "grad_norm": 0.6919124899794652, "learning_rate": 1.9593637232259542e-07, "loss": 0.08183257579803467, "step": 31575 }, { "epoch": 2.7590424602481214, "grad_norm": 0.5697543887468264, "learning_rate": 1.9523251761520589e-07, "loss": 0.0929517388343811, "step": 31580 }, { "epoch": 2.759479294076533, "grad_norm": 0.5857809348520424, "learning_rate": 1.9452990422489437e-07, "loss": 0.07828364372253419, "step": 31585 }, { "epoch": 2.759916127904945, "grad_norm": 0.5966203251438863, "learning_rate": 1.9382853233317954e-07, "loss": 0.08334438800811768, "step": 31590 }, { "epoch": 2.7603529617333566, "grad_norm": 0.6071287533256747, "learning_rate": 1.9312840212126426e-07, "loss": 0.08028004765510559, "step": 31595 }, { "epoch": 2.7607897955617684, "grad_norm": 1.4520329131213907, "learning_rate": 1.924295137700255e-07, "loss": 0.10183117389678956, "step": 31600 }, { "epoch": 2.7612266293901797, "grad_norm": 0.5376606024590262, "learning_rate": 1.917318674600238e-07, "loss": 0.0952633261680603, "step": 31605 }, { "epoch": 2.761663463218592, "grad_norm": 0.5419125131638454, "learning_rate": 1.9103546337149558e-07, "loss": 0.10603814125061035, "step": 31610 }, { "epoch": 2.762100297047003, "grad_norm": 0.543203244734301, "learning_rate": 1.9034030168435802e-07, "loss": 0.09658670425415039, "step": 31615 }, { "epoch": 2.762537130875415, "grad_norm": 0.48601957063184037, "learning_rate": 1.8964638257820744e-07, "loss": 0.08354725241661072, "step": 31620 }, { "epoch": 2.7629739647038267, "grad_norm": 0.5334811639418057, "learning_rate": 1.8895370623231713e-07, "loss": 0.09958547353744507, "step": 31625 }, { "epoch": 2.7634107985322385, "grad_norm": 0.6144423842320401, "learning_rate": 1.882622728256428e-07, "loss": 0.10111339092254638, "step": 31630 }, { "epoch": 2.7638476323606502, "grad_norm": 0.49779558466555124, "learning_rate": 1.8757208253681493e-07, "loss": 0.08577991724014282, "step": 31635 }, { "epoch": 2.7642844661890615, "grad_norm": 0.5673714668022796, "learning_rate": 1.868831355441475e-07, "loss": 0.10594276189804078, "step": 31640 }, { "epoch": 2.7647213000174733, "grad_norm": 0.5763101218947098, "learning_rate": 1.8619543202562819e-07, "loss": 0.08432134389877319, "step": 31645 }, { "epoch": 2.765158133845885, "grad_norm": 0.5954825480760351, "learning_rate": 1.8550897215892816e-07, "loss": 0.08764255046844482, "step": 31650 }, { "epoch": 2.765594967674297, "grad_norm": 0.6193046393819451, "learning_rate": 1.848237561213928e-07, "loss": 0.10674822330474854, "step": 31655 }, { "epoch": 2.7660318015027086, "grad_norm": 0.7461009315869285, "learning_rate": 1.841397840900505e-07, "loss": 0.11802105903625489, "step": 31660 }, { "epoch": 2.76646863533112, "grad_norm": 0.5533088696426847, "learning_rate": 1.8345705624160492e-07, "loss": 0.07771182060241699, "step": 31665 }, { "epoch": 2.7669054691595316, "grad_norm": 0.5318621989919903, "learning_rate": 1.8277557275243996e-07, "loss": 0.09389098286628723, "step": 31670 }, { "epoch": 2.7673423029879434, "grad_norm": 0.6385713326158378, "learning_rate": 1.8209533379861867e-07, "loss": 0.09217660427093506, "step": 31675 }, { "epoch": 2.767779136816355, "grad_norm": 0.5119209973940418, "learning_rate": 1.8141633955587935e-07, "loss": 0.09434523582458496, "step": 31680 }, { "epoch": 2.768215970644767, "grad_norm": 0.5402931334605964, "learning_rate": 1.807385901996428e-07, "loss": 0.10615863800048828, "step": 31685 }, { "epoch": 2.768652804473178, "grad_norm": 0.5296779152098748, "learning_rate": 1.8006208590500508e-07, "loss": 0.09111413359642029, "step": 31690 }, { "epoch": 2.76908963830159, "grad_norm": 0.491421736012301, "learning_rate": 1.7938682684674302e-07, "loss": 0.08747231960296631, "step": 31695 }, { "epoch": 2.7695264721300017, "grad_norm": 0.47985590373473513, "learning_rate": 1.7871281319930934e-07, "loss": 0.0960753858089447, "step": 31700 }, { "epoch": 2.7699633059584134, "grad_norm": 0.8386069130548224, "learning_rate": 1.7804004513683804e-07, "loss": 0.07543301582336426, "step": 31705 }, { "epoch": 2.770400139786825, "grad_norm": 0.58886489550814, "learning_rate": 1.7736852283313734e-07, "loss": 0.09340857863426208, "step": 31710 }, { "epoch": 2.7708369736152365, "grad_norm": 0.524553954135943, "learning_rate": 1.766982464616973e-07, "loss": 0.08241602182388305, "step": 31715 }, { "epoch": 2.7712738074436487, "grad_norm": 0.5248668576795186, "learning_rate": 1.7602921619568392e-07, "loss": 0.09064944386482239, "step": 31720 }, { "epoch": 2.77171064127206, "grad_norm": 0.5774089367426687, "learning_rate": 1.7536143220794167e-07, "loss": 0.09186729192733764, "step": 31725 }, { "epoch": 2.7721474751004718, "grad_norm": 0.566293392525187, "learning_rate": 1.746948946709942e-07, "loss": 0.10608954429626465, "step": 31730 }, { "epoch": 2.7725843089288835, "grad_norm": 0.7202045739294866, "learning_rate": 1.740296037570416e-07, "loss": 0.11055240631103516, "step": 31735 }, { "epoch": 2.7730211427572953, "grad_norm": 0.44269943446677656, "learning_rate": 1.7336555963796243e-07, "loss": 0.08635646104812622, "step": 31740 }, { "epoch": 2.773457976585707, "grad_norm": 0.5958103838258467, "learning_rate": 1.727027624853128e-07, "loss": 0.10101401805877686, "step": 31745 }, { "epoch": 2.7738948104141183, "grad_norm": 0.6702815532173246, "learning_rate": 1.7204121247032857e-07, "loss": 0.07685276269912719, "step": 31750 }, { "epoch": 2.77433164424253, "grad_norm": 0.7645288506491682, "learning_rate": 1.7138090976392076e-07, "loss": 0.09095876216888428, "step": 31755 }, { "epoch": 2.774768478070942, "grad_norm": 0.6024220526118133, "learning_rate": 1.7072185453667956e-07, "loss": 0.08818811178207397, "step": 31760 }, { "epoch": 2.7752053118993536, "grad_norm": 0.5892090507957655, "learning_rate": 1.7006404695887323e-07, "loss": 0.08689465522766113, "step": 31765 }, { "epoch": 2.7756421457277654, "grad_norm": 0.5400292866507093, "learning_rate": 1.6940748720044643e-07, "loss": 0.09535267949104309, "step": 31770 }, { "epoch": 2.7760789795561767, "grad_norm": 0.5534472497963272, "learning_rate": 1.687521754310223e-07, "loss": 0.08788344264030457, "step": 31775 }, { "epoch": 2.7765158133845884, "grad_norm": 0.47236030734530876, "learning_rate": 1.6809811181990042e-07, "loss": 0.09239020347595214, "step": 31780 }, { "epoch": 2.776952647213, "grad_norm": 0.5854606994166642, "learning_rate": 1.6744529653606067e-07, "loss": 0.09645856022834778, "step": 31785 }, { "epoch": 2.777389481041412, "grad_norm": 0.6074792453725756, "learning_rate": 1.66793729748157e-07, "loss": 0.0963017463684082, "step": 31790 }, { "epoch": 2.7778263148698237, "grad_norm": 0.48376518158275084, "learning_rate": 1.6614341162452363e-07, "loss": 0.07178915739059448, "step": 31795 }, { "epoch": 2.778263148698235, "grad_norm": 0.5133709388851992, "learning_rate": 1.6549434233317063e-07, "loss": 0.10412904024124145, "step": 31800 }, { "epoch": 2.7786999825266467, "grad_norm": 0.5372872260339895, "learning_rate": 1.6484652204178552e-07, "loss": 0.10799696445465087, "step": 31805 }, { "epoch": 2.7791368163550585, "grad_norm": 0.6328897540503845, "learning_rate": 1.6419995091773334e-07, "loss": 0.07805588245391845, "step": 31810 }, { "epoch": 2.7795736501834702, "grad_norm": 0.6186780371145566, "learning_rate": 1.6355462912805652e-07, "loss": 0.1034548044204712, "step": 31815 }, { "epoch": 2.780010484011882, "grad_norm": 0.5136102302946772, "learning_rate": 1.629105568394751e-07, "loss": 0.09058102369308471, "step": 31820 }, { "epoch": 2.7804473178402933, "grad_norm": 0.6312244543731803, "learning_rate": 1.6226773421838537e-07, "loss": 0.08456940650939941, "step": 31825 }, { "epoch": 2.780884151668705, "grad_norm": 0.5906551940089476, "learning_rate": 1.6162616143086228e-07, "loss": 0.08138067722320556, "step": 31830 }, { "epoch": 2.781320985497117, "grad_norm": 0.5035325230284677, "learning_rate": 1.6098583864265494e-07, "loss": 0.08652283549308777, "step": 31835 }, { "epoch": 2.7817578193255286, "grad_norm": 0.5812369658828539, "learning_rate": 1.6034676601919263e-07, "loss": 0.10302634239196777, "step": 31840 }, { "epoch": 2.7821946531539403, "grad_norm": 0.5139979826836182, "learning_rate": 1.5970894372558054e-07, "loss": 0.08698607087135315, "step": 31845 }, { "epoch": 2.782631486982352, "grad_norm": 0.5886596769288923, "learning_rate": 1.590723719266013e-07, "loss": 0.09853741526603699, "step": 31850 }, { "epoch": 2.783068320810764, "grad_norm": 0.7747181819964971, "learning_rate": 1.5843705078671224e-07, "loss": 0.08410363793373107, "step": 31855 }, { "epoch": 2.783505154639175, "grad_norm": 0.5568676642837684, "learning_rate": 1.578029804700515e-07, "loss": 0.08708814382553101, "step": 31860 }, { "epoch": 2.783941988467587, "grad_norm": 0.6155247723635617, "learning_rate": 1.5717016114043028e-07, "loss": 0.07729974985122681, "step": 31865 }, { "epoch": 2.7843788222959986, "grad_norm": 0.8986948968559381, "learning_rate": 1.5653859296133888e-07, "loss": 0.1053966760635376, "step": 31870 }, { "epoch": 2.7848156561244104, "grad_norm": 0.5297134011258323, "learning_rate": 1.5590827609594294e-07, "loss": 0.08006788492202759, "step": 31875 }, { "epoch": 2.785252489952822, "grad_norm": 0.8902285474559218, "learning_rate": 1.5527921070708608e-07, "loss": 0.09073786735534668, "step": 31880 }, { "epoch": 2.7856893237812335, "grad_norm": 0.5180043148223854, "learning_rate": 1.5465139695728827e-07, "loss": 0.07600647211074829, "step": 31885 }, { "epoch": 2.786126157609645, "grad_norm": 0.5832970621114202, "learning_rate": 1.5402483500874476e-07, "loss": 0.07449376583099365, "step": 31890 }, { "epoch": 2.786562991438057, "grad_norm": 0.7542325532400215, "learning_rate": 1.5339952502333055e-07, "loss": 0.08495044112205505, "step": 31895 }, { "epoch": 2.7869998252664687, "grad_norm": 0.5646567429528002, "learning_rate": 1.5277546716259307e-07, "loss": 0.0886548638343811, "step": 31900 }, { "epoch": 2.7874366590948805, "grad_norm": 0.4984807278530043, "learning_rate": 1.5215266158776e-07, "loss": 0.07000712156295777, "step": 31905 }, { "epoch": 2.787873492923292, "grad_norm": 0.5945061138392244, "learning_rate": 1.5153110845973206e-07, "loss": 0.10094047784805298, "step": 31910 }, { "epoch": 2.7883103267517035, "grad_norm": 0.5211651985369445, "learning_rate": 1.509108079390903e-07, "loss": 0.07776036858558655, "step": 31915 }, { "epoch": 2.7887471605801153, "grad_norm": 0.48973689074141086, "learning_rate": 1.502917601860887e-07, "loss": 0.0770624339580536, "step": 31920 }, { "epoch": 2.789183994408527, "grad_norm": 0.4733477642444992, "learning_rate": 1.4967396536065882e-07, "loss": 0.08979084491729736, "step": 31925 }, { "epoch": 2.789620828236939, "grad_norm": 0.6007351958535943, "learning_rate": 1.4905742362241017e-07, "loss": 0.07972064018249511, "step": 31930 }, { "epoch": 2.79005766206535, "grad_norm": 0.5310522568794889, "learning_rate": 1.4844213513062478e-07, "loss": 0.08433367013931274, "step": 31935 }, { "epoch": 2.790494495893762, "grad_norm": 0.5592910789753405, "learning_rate": 1.4782810004426495e-07, "loss": 0.08374800086021424, "step": 31940 }, { "epoch": 2.7909313297221736, "grad_norm": 0.6472792840870057, "learning_rate": 1.4721531852196603e-07, "loss": 0.09026480913162231, "step": 31945 }, { "epoch": 2.7913681635505854, "grad_norm": 0.6875421496194281, "learning_rate": 1.4660379072204245e-07, "loss": 0.09098503589630128, "step": 31950 }, { "epoch": 2.791804997378997, "grad_norm": 0.5229540302876265, "learning_rate": 1.4599351680248065e-07, "loss": 0.0816450834274292, "step": 31955 }, { "epoch": 2.7922418312074084, "grad_norm": 0.6630363848587846, "learning_rate": 1.453844969209489e-07, "loss": 0.111454176902771, "step": 31960 }, { "epoch": 2.7926786650358206, "grad_norm": 0.6063831909623826, "learning_rate": 1.4477673123478475e-07, "loss": 0.08850612640380859, "step": 31965 }, { "epoch": 2.793115498864232, "grad_norm": 0.6068718782157573, "learning_rate": 1.4417021990100755e-07, "loss": 0.09079298377037048, "step": 31970 }, { "epoch": 2.7935523326926437, "grad_norm": 0.47786739525919675, "learning_rate": 1.4356496307630917e-07, "loss": 0.11415035724639892, "step": 31975 }, { "epoch": 2.7939891665210554, "grad_norm": 0.5628090590719822, "learning_rate": 1.4296096091705792e-07, "loss": 0.10966777801513672, "step": 31980 }, { "epoch": 2.794426000349467, "grad_norm": 0.5158216620765905, "learning_rate": 1.423582135793e-07, "loss": 0.08945430517196655, "step": 31985 }, { "epoch": 2.794862834177879, "grad_norm": 0.5011141313930396, "learning_rate": 1.417567212187543e-07, "loss": 0.07742555141448974, "step": 31990 }, { "epoch": 2.7952996680062903, "grad_norm": 0.5384766938780224, "learning_rate": 1.4115648399081806e-07, "loss": 0.0784766435623169, "step": 31995 }, { "epoch": 2.795736501834702, "grad_norm": 0.6562360592716808, "learning_rate": 1.4055750205056228e-07, "loss": 0.08240074515342713, "step": 32000 }, { "epoch": 2.7961733356631138, "grad_norm": 1.0471295098296354, "learning_rate": 1.3995977555273598e-07, "loss": 0.09463199377059936, "step": 32005 }, { "epoch": 2.7966101694915255, "grad_norm": 0.6203256114035399, "learning_rate": 1.3936330465176117e-07, "loss": 0.09425488710403443, "step": 32010 }, { "epoch": 2.7970470033199373, "grad_norm": 0.5999817158766589, "learning_rate": 1.3876808950173737e-07, "loss": 0.09787231683731079, "step": 32015 }, { "epoch": 2.7974838371483486, "grad_norm": 0.6407906861461442, "learning_rate": 1.3817413025643933e-07, "loss": 0.11597901582717896, "step": 32020 }, { "epoch": 2.7979206709767603, "grad_norm": 0.5334243810474214, "learning_rate": 1.3758142706931655e-07, "loss": 0.07834057807922364, "step": 32025 }, { "epoch": 2.798357504805172, "grad_norm": 0.5800446155050712, "learning_rate": 1.3698998009349485e-07, "loss": 0.09067466855049133, "step": 32030 }, { "epoch": 2.798794338633584, "grad_norm": 0.5756395397012263, "learning_rate": 1.363997894817748e-07, "loss": 0.10249186754226684, "step": 32035 }, { "epoch": 2.7992311724619956, "grad_norm": 0.531440412635713, "learning_rate": 1.3581085538663385e-07, "loss": 0.08263950347900391, "step": 32040 }, { "epoch": 2.799668006290407, "grad_norm": 0.6014153164673451, "learning_rate": 1.3522317796022199e-07, "loss": 0.07128495573997498, "step": 32045 }, { "epoch": 2.8001048401188187, "grad_norm": 0.5403740721924265, "learning_rate": 1.346367573543683e-07, "loss": 0.08593986034393311, "step": 32050 }, { "epoch": 2.8005416739472304, "grad_norm": 0.5077390843446896, "learning_rate": 1.3405159372057442e-07, "loss": 0.095391845703125, "step": 32055 }, { "epoch": 2.800978507775642, "grad_norm": 0.5421418311801564, "learning_rate": 1.3346768721001768e-07, "loss": 0.0885795772075653, "step": 32060 }, { "epoch": 2.801415341604054, "grad_norm": 0.5661094130769646, "learning_rate": 1.3288503797355134e-07, "loss": 0.0932666540145874, "step": 32065 }, { "epoch": 2.8018521754324652, "grad_norm": 0.572452542808011, "learning_rate": 1.3230364616170333e-07, "loss": 0.0965316891670227, "step": 32070 }, { "epoch": 2.8022890092608774, "grad_norm": 0.5386145573071359, "learning_rate": 1.3172351192467736e-07, "loss": 0.08355308175086976, "step": 32075 }, { "epoch": 2.8027258430892887, "grad_norm": 0.5940375049311998, "learning_rate": 1.3114463541235022e-07, "loss": 0.10651493072509766, "step": 32080 }, { "epoch": 2.8031626769177005, "grad_norm": 0.5103890035672015, "learning_rate": 1.3056701677427841e-07, "loss": 0.10219523906707764, "step": 32085 }, { "epoch": 2.8035995107461122, "grad_norm": 0.4914007835022228, "learning_rate": 1.2999065615968642e-07, "loss": 0.08332257270812989, "step": 32090 }, { "epoch": 2.804036344574524, "grad_norm": 0.4888719949391555, "learning_rate": 1.294155537174807e-07, "loss": 0.10226684808731079, "step": 32095 }, { "epoch": 2.8044731784029358, "grad_norm": 0.6025027596118391, "learning_rate": 1.28841709596238e-07, "loss": 0.09553598761558532, "step": 32100 }, { "epoch": 2.804910012231347, "grad_norm": 0.5965998533592995, "learning_rate": 1.2826912394421242e-07, "loss": 0.08527535796165467, "step": 32105 }, { "epoch": 2.805346846059759, "grad_norm": 0.5947446115022114, "learning_rate": 1.276977969093318e-07, "loss": 0.08703364133834839, "step": 32110 }, { "epoch": 2.8057836798881706, "grad_norm": 0.5405982725144822, "learning_rate": 1.271277286391992e-07, "loss": 0.08362714052200318, "step": 32115 }, { "epoch": 2.8062205137165823, "grad_norm": 0.5034992042741615, "learning_rate": 1.2655891928109232e-07, "loss": 0.10668308734893799, "step": 32120 }, { "epoch": 2.806657347544994, "grad_norm": 0.5116320168922824, "learning_rate": 1.2599136898196418e-07, "loss": 0.10080182552337646, "step": 32125 }, { "epoch": 2.8070941813734054, "grad_norm": 0.4990962725534161, "learning_rate": 1.2542507788844138e-07, "loss": 0.09022084474563599, "step": 32130 }, { "epoch": 2.807531015201817, "grad_norm": 0.5692355153780213, "learning_rate": 1.2486004614682522e-07, "loss": 0.0716110646724701, "step": 32135 }, { "epoch": 2.807967849030229, "grad_norm": 0.4583224324391971, "learning_rate": 1.2429627390309396e-07, "loss": 0.06512322425842285, "step": 32140 }, { "epoch": 2.8084046828586406, "grad_norm": 0.5536042222859553, "learning_rate": 1.2373376130289715e-07, "loss": 0.09236156344413757, "step": 32145 }, { "epoch": 2.8088415166870524, "grad_norm": 0.5878601789814878, "learning_rate": 1.2317250849156303e-07, "loss": 0.07594949007034302, "step": 32150 }, { "epoch": 2.8092783505154637, "grad_norm": 0.47217554547573665, "learning_rate": 1.2261251561408838e-07, "loss": 0.08884731531143189, "step": 32155 }, { "epoch": 2.8097151843438755, "grad_norm": 0.5533958586151642, "learning_rate": 1.2205378281515078e-07, "loss": 0.09193868041038514, "step": 32160 }, { "epoch": 2.810152018172287, "grad_norm": 0.5073934133151726, "learning_rate": 1.2149631023909814e-07, "loss": 0.07304399609565734, "step": 32165 }, { "epoch": 2.810588852000699, "grad_norm": 0.7304556715932882, "learning_rate": 1.2094009802995467e-07, "loss": 0.08613899946212769, "step": 32170 }, { "epoch": 2.8110256858291107, "grad_norm": 0.5070920105913708, "learning_rate": 1.2038514633141874e-07, "loss": 0.07922906875610351, "step": 32175 }, { "epoch": 2.811462519657522, "grad_norm": 0.7005794843611971, "learning_rate": 1.1983145528686125e-07, "loss": 0.07886457443237305, "step": 32180 }, { "epoch": 2.8118993534859342, "grad_norm": 0.6393899623480607, "learning_rate": 1.1927902503933108e-07, "loss": 0.08195770382881165, "step": 32185 }, { "epoch": 2.8123361873143455, "grad_norm": 0.4977089484353733, "learning_rate": 1.1872785573154744e-07, "loss": 0.09638761281967163, "step": 32190 }, { "epoch": 2.8127730211427573, "grad_norm": 0.6037237579847357, "learning_rate": 1.1817794750590638e-07, "loss": 0.08901464939117432, "step": 32195 }, { "epoch": 2.813209854971169, "grad_norm": 0.523352963401735, "learning_rate": 1.1762930050447652e-07, "loss": 0.09235049486160278, "step": 32200 }, { "epoch": 2.813646688799581, "grad_norm": 0.6121350803019906, "learning_rate": 1.1708191486900278e-07, "loss": 0.09176039695739746, "step": 32205 }, { "epoch": 2.8140835226279926, "grad_norm": 0.5086860746259942, "learning_rate": 1.1653579074090093e-07, "loss": 0.09446120858192444, "step": 32210 }, { "epoch": 2.814520356456404, "grad_norm": 0.6126152608836628, "learning_rate": 1.1599092826126534e-07, "loss": 0.08381328582763672, "step": 32215 }, { "epoch": 2.8149571902848156, "grad_norm": 0.5731470606742007, "learning_rate": 1.1544732757085952e-07, "loss": 0.10133713483810425, "step": 32220 }, { "epoch": 2.8153940241132274, "grad_norm": 0.5769810593457171, "learning_rate": 1.1490498881012446e-07, "loss": 0.08601188659667969, "step": 32225 }, { "epoch": 2.815830857941639, "grad_norm": 0.5622136737806464, "learning_rate": 1.1436391211917363e-07, "loss": 0.09379398226737976, "step": 32230 }, { "epoch": 2.816267691770051, "grad_norm": 0.5505902145524317, "learning_rate": 1.1382409763779411e-07, "loss": 0.09145548343658447, "step": 32235 }, { "epoch": 2.816704525598462, "grad_norm": 0.5129826518242164, "learning_rate": 1.1328554550544935e-07, "loss": 0.09347424507141114, "step": 32240 }, { "epoch": 2.817141359426874, "grad_norm": 0.5494132012769329, "learning_rate": 1.1274825586127358e-07, "loss": 0.09575974941253662, "step": 32245 }, { "epoch": 2.8175781932552857, "grad_norm": 0.5120917446034347, "learning_rate": 1.122122288440769e-07, "loss": 0.11246383190155029, "step": 32250 }, { "epoch": 2.8180150270836974, "grad_norm": 0.5520093785202289, "learning_rate": 1.116774645923413e-07, "loss": 0.08591431379318237, "step": 32255 }, { "epoch": 2.818451860912109, "grad_norm": 0.5881339769428325, "learning_rate": 1.1114396324422516e-07, "loss": 0.0973059356212616, "step": 32260 }, { "epoch": 2.8188886947405205, "grad_norm": 0.49580827218372236, "learning_rate": 1.1061172493755823e-07, "loss": 0.06646927595138549, "step": 32265 }, { "epoch": 2.8193255285689323, "grad_norm": 0.48084775422876086, "learning_rate": 1.1008074980984495e-07, "loss": 0.08238922357559204, "step": 32270 }, { "epoch": 2.819762362397344, "grad_norm": 0.5198844937749733, "learning_rate": 1.095510379982645e-07, "loss": 0.09093481302261353, "step": 32275 }, { "epoch": 2.8201991962257558, "grad_norm": 0.5059816602572692, "learning_rate": 1.0902258963966683e-07, "loss": 0.08226655721664429, "step": 32280 }, { "epoch": 2.8206360300541675, "grad_norm": 0.5191749765301769, "learning_rate": 1.0849540487057831e-07, "loss": 0.09640899300575256, "step": 32285 }, { "epoch": 2.821072863882579, "grad_norm": 1.6976467173030103, "learning_rate": 1.0796948382719662e-07, "loss": 0.07061757445335388, "step": 32290 }, { "epoch": 2.8215096977109906, "grad_norm": 0.6210693462236319, "learning_rate": 1.0744482664539479e-07, "loss": 0.10312626361846924, "step": 32295 }, { "epoch": 2.8219465315394023, "grad_norm": 0.5007793340336183, "learning_rate": 1.0692143346071881e-07, "loss": 0.07406089305877686, "step": 32300 }, { "epoch": 2.822383365367814, "grad_norm": 0.6811253209145783, "learning_rate": 1.0639930440838775e-07, "loss": 0.09224539995193481, "step": 32305 }, { "epoch": 2.822820199196226, "grad_norm": 0.5174852514856367, "learning_rate": 1.0587843962329424e-07, "loss": 0.09547255039215088, "step": 32310 }, { "epoch": 2.823257033024637, "grad_norm": 0.5904711796571654, "learning_rate": 1.05358839240004e-07, "loss": 0.10173449516296387, "step": 32315 }, { "epoch": 2.8236938668530494, "grad_norm": 0.5390078747968745, "learning_rate": 1.0484050339275632e-07, "loss": 0.0894962191581726, "step": 32320 }, { "epoch": 2.8241307006814607, "grad_norm": 0.6154680077147856, "learning_rate": 1.043234322154646e-07, "loss": 0.09566062688827515, "step": 32325 }, { "epoch": 2.8245675345098724, "grad_norm": 0.4486290648913449, "learning_rate": 1.0380762584171422e-07, "loss": 0.0797271728515625, "step": 32330 }, { "epoch": 2.825004368338284, "grad_norm": 0.4812945769707414, "learning_rate": 1.032930844047636e-07, "loss": 0.08874431848526002, "step": 32335 }, { "epoch": 2.825441202166696, "grad_norm": 0.5552557050044555, "learning_rate": 1.0277980803754695e-07, "loss": 0.08862518072128296, "step": 32340 }, { "epoch": 2.8258780359951077, "grad_norm": 0.5051709770383526, "learning_rate": 1.0226779687266763e-07, "loss": 0.07074368000030518, "step": 32345 }, { "epoch": 2.826314869823519, "grad_norm": 0.566186378088242, "learning_rate": 1.0175705104240597e-07, "loss": 0.10340898036956787, "step": 32350 }, { "epoch": 2.8267517036519307, "grad_norm": 0.5340644881766831, "learning_rate": 1.0124757067871249e-07, "loss": 0.10268206596374511, "step": 32355 }, { "epoch": 2.8271885374803425, "grad_norm": 0.5174504965433576, "learning_rate": 1.0073935591321304e-07, "loss": 0.09514871835708619, "step": 32360 }, { "epoch": 2.8276253713087542, "grad_norm": 0.7493260826618916, "learning_rate": 1.0023240687720481e-07, "loss": 0.06510069966316223, "step": 32365 }, { "epoch": 2.828062205137166, "grad_norm": 0.5107885120420481, "learning_rate": 9.972672370165915e-08, "loss": 0.0989671766757965, "step": 32370 }, { "epoch": 2.8284990389655773, "grad_norm": 0.700312707618751, "learning_rate": 9.922230651721986e-08, "loss": 0.08015953302383423, "step": 32375 }, { "epoch": 2.828935872793989, "grad_norm": 0.5124947430935685, "learning_rate": 9.871915545420385e-08, "loss": 0.08186920881271362, "step": 32380 }, { "epoch": 2.829372706622401, "grad_norm": 0.44272687544056405, "learning_rate": 9.821727064259989e-08, "loss": 0.07640880942344666, "step": 32385 }, { "epoch": 2.8298095404508126, "grad_norm": 0.5649955409700469, "learning_rate": 9.771665221207039e-08, "loss": 0.08882132768630982, "step": 32390 }, { "epoch": 2.8302463742792243, "grad_norm": 0.5761214247954887, "learning_rate": 9.721730029195187e-08, "loss": 0.08681416511535645, "step": 32395 }, { "epoch": 2.8306832081076356, "grad_norm": 0.5575719932803839, "learning_rate": 9.67192150112517e-08, "loss": 0.07429810166358948, "step": 32400 }, { "epoch": 2.8311200419360474, "grad_norm": 0.6494905039486112, "learning_rate": 9.62223964986514e-08, "loss": 0.09658252000808716, "step": 32405 }, { "epoch": 2.831556875764459, "grad_norm": 0.49257530413697015, "learning_rate": 9.572684488250384e-08, "loss": 0.09658688306808472, "step": 32410 }, { "epoch": 2.831993709592871, "grad_norm": 0.5380399497106934, "learning_rate": 9.52325602908355e-08, "loss": 0.09154229164123535, "step": 32415 }, { "epoch": 2.8324305434212826, "grad_norm": 0.5905929487837661, "learning_rate": 9.473954285134534e-08, "loss": 0.0997883141040802, "step": 32420 }, { "epoch": 2.832867377249694, "grad_norm": 0.681251649244075, "learning_rate": 9.424779269140538e-08, "loss": 0.08938372135162354, "step": 32425 }, { "epoch": 2.833304211078106, "grad_norm": 0.562667989578975, "learning_rate": 9.375730993805953e-08, "loss": 0.09411466121673584, "step": 32430 }, { "epoch": 2.8337410449065175, "grad_norm": 0.5836998254673501, "learning_rate": 9.326809471802423e-08, "loss": 0.09412841796875, "step": 32435 }, { "epoch": 2.834177878734929, "grad_norm": 0.5902986773195734, "learning_rate": 9.278014715769001e-08, "loss": 0.10060588121414185, "step": 32440 }, { "epoch": 2.834614712563341, "grad_norm": 0.7462419336906082, "learning_rate": 9.229346738311718e-08, "loss": 0.09494206905364991, "step": 32445 }, { "epoch": 2.8350515463917527, "grad_norm": 0.6852796304496349, "learning_rate": 9.180805552004068e-08, "loss": 0.08934265375137329, "step": 32450 }, { "epoch": 2.8354883802201645, "grad_norm": 0.4706959081034203, "learning_rate": 9.132391169386689e-08, "loss": 0.08610602021217346, "step": 32455 }, { "epoch": 2.835925214048576, "grad_norm": 0.7162203058674018, "learning_rate": 9.084103602967576e-08, "loss": 0.09080935716629028, "step": 32460 }, { "epoch": 2.8363620478769875, "grad_norm": 0.6097846271998641, "learning_rate": 9.035942865221747e-08, "loss": 0.09048733711242676, "step": 32465 }, { "epoch": 2.8367988817053993, "grad_norm": 0.5062956632086724, "learning_rate": 8.987908968591753e-08, "loss": 0.0655784249305725, "step": 32470 }, { "epoch": 2.837235715533811, "grad_norm": 0.5644766385298233, "learning_rate": 8.940001925486996e-08, "loss": 0.10625681877136231, "step": 32475 }, { "epoch": 2.837672549362223, "grad_norm": 0.42002167843161753, "learning_rate": 8.892221748284469e-08, "loss": 0.08053110837936402, "step": 32480 }, { "epoch": 2.838109383190634, "grad_norm": 0.567125484340414, "learning_rate": 8.84456844932824e-08, "loss": 0.0823538601398468, "step": 32485 }, { "epoch": 2.838546217019046, "grad_norm": 0.5840981704352747, "learning_rate": 8.797042040929459e-08, "loss": 0.0976595401763916, "step": 32490 }, { "epoch": 2.8389830508474576, "grad_norm": 0.5233525013978536, "learning_rate": 8.749642535366754e-08, "loss": 0.09361573457717895, "step": 32495 }, { "epoch": 2.8394198846758694, "grad_norm": 0.5954346919254353, "learning_rate": 8.702369944885825e-08, "loss": 0.10107243061065674, "step": 32500 }, { "epoch": 2.839856718504281, "grad_norm": 0.5180215791282743, "learning_rate": 8.655224281699514e-08, "loss": 0.10854055881500244, "step": 32505 }, { "epoch": 2.8402935523326924, "grad_norm": 0.6127758074595938, "learning_rate": 8.608205557988026e-08, "loss": 0.08397544622421264, "step": 32510 }, { "epoch": 2.840730386161104, "grad_norm": 0.6089595159777388, "learning_rate": 8.561313785898695e-08, "loss": 0.08817883729934692, "step": 32515 }, { "epoch": 2.841167219989516, "grad_norm": 0.5332240575344653, "learning_rate": 8.514548977546056e-08, "loss": 0.08300006985664368, "step": 32520 }, { "epoch": 2.8416040538179277, "grad_norm": 0.6798594598253189, "learning_rate": 8.467911145011832e-08, "loss": 0.10384941101074219, "step": 32525 }, { "epoch": 2.8420408876463394, "grad_norm": 0.6407538923347744, "learning_rate": 8.421400300345051e-08, "loss": 0.09739752411842346, "step": 32530 }, { "epoch": 2.8424777214747508, "grad_norm": 0.5832261771161462, "learning_rate": 8.375016455561713e-08, "loss": 0.08805488348007202, "step": 32535 }, { "epoch": 2.842914555303163, "grad_norm": 0.5862928176624979, "learning_rate": 8.328759622645177e-08, "loss": 0.08874103426933289, "step": 32540 }, { "epoch": 2.8433513891315743, "grad_norm": 0.6171894349707178, "learning_rate": 8.282629813545939e-08, "loss": 0.09164037704467773, "step": 32545 }, { "epoch": 2.843788222959986, "grad_norm": 0.5936787205077494, "learning_rate": 8.236627040181799e-08, "loss": 0.10033597946166992, "step": 32550 }, { "epoch": 2.8442250567883978, "grad_norm": 0.5839280947276126, "learning_rate": 8.190751314437417e-08, "loss": 0.0674590826034546, "step": 32555 }, { "epoch": 2.8446618906168095, "grad_norm": 0.6583972642033321, "learning_rate": 8.145002648165035e-08, "loss": 0.08195151090621948, "step": 32560 }, { "epoch": 2.8450987244452213, "grad_norm": 0.5660391854488201, "learning_rate": 8.099381053183808e-08, "loss": 0.10561704635620117, "step": 32565 }, { "epoch": 2.8455355582736326, "grad_norm": 0.47345940788130736, "learning_rate": 8.053886541280088e-08, "loss": 0.09472263455390931, "step": 32570 }, { "epoch": 2.8459723921020443, "grad_norm": 0.7748922401406898, "learning_rate": 8.008519124207415e-08, "loss": 0.10386762619018555, "step": 32575 }, { "epoch": 2.846409225930456, "grad_norm": 0.5289014042850475, "learning_rate": 7.963278813686637e-08, "loss": 0.10240755081176758, "step": 32580 }, { "epoch": 2.846846059758868, "grad_norm": 0.4949601877345447, "learning_rate": 7.918165621405516e-08, "loss": 0.0811454713344574, "step": 32585 }, { "epoch": 2.8472828935872796, "grad_norm": 0.7272723575969344, "learning_rate": 7.873179559019117e-08, "loss": 0.08054050207138061, "step": 32590 }, { "epoch": 2.847719727415691, "grad_norm": 0.5694694078287179, "learning_rate": 7.8283206381497e-08, "loss": 0.09007408618927001, "step": 32595 }, { "epoch": 2.8481565612441027, "grad_norm": 1.0103988421518002, "learning_rate": 7.783588870386549e-08, "loss": 0.09224483370780945, "step": 32600 }, { "epoch": 2.8485933950725144, "grad_norm": 0.5340327146852161, "learning_rate": 7.738984267286254e-08, "loss": 0.07635395526885987, "step": 32605 }, { "epoch": 2.849030228900926, "grad_norm": 0.6555677234409022, "learning_rate": 7.694506840372318e-08, "loss": 0.10019885301589966, "step": 32610 }, { "epoch": 2.849467062729338, "grad_norm": 0.5376094487330322, "learning_rate": 7.650156601135661e-08, "loss": 0.08169400691986084, "step": 32615 }, { "epoch": 2.8499038965577492, "grad_norm": 0.5653201766393734, "learning_rate": 7.605933561034173e-08, "loss": 0.05753566026687622, "step": 32620 }, { "epoch": 2.850340730386161, "grad_norm": 0.4668808343577416, "learning_rate": 7.561837731492938e-08, "loss": 0.07956063151359558, "step": 32625 }, { "epoch": 2.8507775642145727, "grad_norm": 0.7990810407996888, "learning_rate": 7.517869123904176e-08, "loss": 0.08508431315422058, "step": 32630 }, { "epoch": 2.8512143980429845, "grad_norm": 0.5681597444810847, "learning_rate": 7.474027749627188e-08, "loss": 0.09976517558097839, "step": 32635 }, { "epoch": 2.8516512318713962, "grad_norm": 0.5289656551439473, "learning_rate": 7.430313619988472e-08, "loss": 0.10610682964324951, "step": 32640 }, { "epoch": 2.8520880656998076, "grad_norm": 0.6506420882246758, "learning_rate": 7.386726746281548e-08, "loss": 0.12470170259475707, "step": 32645 }, { "epoch": 2.8525248995282193, "grad_norm": 0.6223576867676356, "learning_rate": 7.343267139767296e-08, "loss": 0.11012043952941894, "step": 32650 }, { "epoch": 2.852961733356631, "grad_norm": 0.551309789372275, "learning_rate": 7.299934811673348e-08, "loss": 0.08887937664985657, "step": 32655 }, { "epoch": 2.853398567185043, "grad_norm": 0.46527049250843777, "learning_rate": 7.256729773194859e-08, "loss": 0.08046552538871765, "step": 32660 }, { "epoch": 2.8538354010134546, "grad_norm": 0.5848578731406153, "learning_rate": 7.213652035493735e-08, "loss": 0.10601736307144165, "step": 32665 }, { "epoch": 2.8542722348418663, "grad_norm": 0.6299378592283408, "learning_rate": 7.170701609699294e-08, "loss": 0.1083679437637329, "step": 32670 }, { "epoch": 2.854709068670278, "grad_norm": 0.587625525580688, "learning_rate": 7.127878506907715e-08, "loss": 0.08873088955879212, "step": 32675 }, { "epoch": 2.8551459024986894, "grad_norm": 0.5555412879359767, "learning_rate": 7.085182738182484e-08, "loss": 0.10795350074768066, "step": 32680 }, { "epoch": 2.855582736327101, "grad_norm": 0.5513895230131447, "learning_rate": 7.042614314554052e-08, "loss": 0.08650273084640503, "step": 32685 }, { "epoch": 2.856019570155513, "grad_norm": 0.5526457789333956, "learning_rate": 7.000173247019958e-08, "loss": 0.08359928131103515, "step": 32690 }, { "epoch": 2.8564564039839246, "grad_norm": 0.669575482001909, "learning_rate": 6.957859546545043e-08, "loss": 0.07992042303085327, "step": 32695 }, { "epoch": 2.8568932378123364, "grad_norm": 0.5666413623898646, "learning_rate": 6.915673224061004e-08, "loss": 0.10458788871765137, "step": 32700 }, { "epoch": 2.8573300716407477, "grad_norm": 0.5421331724317264, "learning_rate": 6.873614290466735e-08, "loss": 0.08510267734527588, "step": 32705 }, { "epoch": 2.8577669054691595, "grad_norm": 0.5548620810367821, "learning_rate": 6.831682756628211e-08, "loss": 0.1115799903869629, "step": 32710 }, { "epoch": 2.858203739297571, "grad_norm": 0.6662228754340126, "learning_rate": 6.789878633378544e-08, "loss": 0.08066328763961791, "step": 32715 }, { "epoch": 2.858640573125983, "grad_norm": 0.5229115459267637, "learning_rate": 6.748201931517762e-08, "loss": 0.075946843624115, "step": 32720 }, { "epoch": 2.8590774069543947, "grad_norm": 0.505386502564563, "learning_rate": 6.706652661813196e-08, "loss": 0.08681856393814087, "step": 32725 }, { "epoch": 2.859514240782806, "grad_norm": 0.5444161221010169, "learning_rate": 6.665230834999093e-08, "loss": 0.09057319164276123, "step": 32730 }, { "epoch": 2.859951074611218, "grad_norm": 0.6242981086895577, "learning_rate": 6.623936461776892e-08, "loss": 0.10311999320983886, "step": 32735 }, { "epoch": 2.8603879084396295, "grad_norm": 0.5786232620997941, "learning_rate": 6.582769552814949e-08, "loss": 0.08747860193252563, "step": 32740 }, { "epoch": 2.8608247422680413, "grad_norm": 0.6509865857772084, "learning_rate": 6.541730118748813e-08, "loss": 0.07689886093139649, "step": 32745 }, { "epoch": 2.861261576096453, "grad_norm": 0.5571536965880738, "learning_rate": 6.500818170181055e-08, "loss": 0.0894840657711029, "step": 32750 }, { "epoch": 2.8616984099248644, "grad_norm": 0.7990646130011383, "learning_rate": 6.460033717681391e-08, "loss": 0.07149970531463623, "step": 32755 }, { "epoch": 2.862135243753276, "grad_norm": 0.45413635231906946, "learning_rate": 6.4193767717865e-08, "loss": 0.07725440859794616, "step": 32760 }, { "epoch": 2.862572077581688, "grad_norm": 0.537860455390429, "learning_rate": 6.378847343000094e-08, "loss": 0.07241670489311218, "step": 32765 }, { "epoch": 2.8630089114100996, "grad_norm": 0.5361259281958984, "learning_rate": 6.338445441793017e-08, "loss": 0.06900358200073242, "step": 32770 }, { "epoch": 2.8634457452385114, "grad_norm": 0.5494610229193901, "learning_rate": 6.298171078603198e-08, "loss": 0.09339355230331421, "step": 32775 }, { "epoch": 2.8638825790669227, "grad_norm": 0.5521651035772844, "learning_rate": 6.258024263835538e-08, "loss": 0.09050775170326233, "step": 32780 }, { "epoch": 2.864319412895335, "grad_norm": 0.37216049359657183, "learning_rate": 6.218005007862016e-08, "loss": 0.07847058773040771, "step": 32785 }, { "epoch": 2.864756246723746, "grad_norm": 0.4039073224603246, "learning_rate": 6.17811332102164e-08, "loss": 0.0877373218536377, "step": 32790 }, { "epoch": 2.865193080552158, "grad_norm": 0.5851097417812523, "learning_rate": 6.138349213620442e-08, "loss": 0.07934401631355285, "step": 32795 }, { "epoch": 2.8656299143805697, "grad_norm": 0.5373687701626293, "learning_rate": 6.098712695931542e-08, "loss": 0.10102908611297608, "step": 32800 }, { "epoch": 2.8660667482089814, "grad_norm": 0.5361970945609431, "learning_rate": 6.059203778195134e-08, "loss": 0.10355224609375, "step": 32805 }, { "epoch": 2.866503582037393, "grad_norm": 0.5668671787360516, "learning_rate": 6.01982247061833e-08, "loss": 0.11606926918029785, "step": 32810 }, { "epoch": 2.8669404158658045, "grad_norm": 0.5494658042952673, "learning_rate": 5.980568783375384e-08, "loss": 0.09519967436790466, "step": 32815 }, { "epoch": 2.8673772496942163, "grad_norm": 0.5984679287143918, "learning_rate": 5.94144272660746e-08, "loss": 0.08198235034942628, "step": 32820 }, { "epoch": 2.867814083522628, "grad_norm": 0.5805033298348619, "learning_rate": 5.9024443104228614e-08, "loss": 0.07612835764884948, "step": 32825 }, { "epoch": 2.8682509173510398, "grad_norm": 0.63525551833555, "learning_rate": 5.863573544896861e-08, "loss": 0.10442143678665161, "step": 32830 }, { "epoch": 2.8686877511794515, "grad_norm": 0.5361564730425467, "learning_rate": 5.8248304400718156e-08, "loss": 0.09590556621551513, "step": 32835 }, { "epoch": 2.869124585007863, "grad_norm": 0.4958739725088898, "learning_rate": 5.786215005956997e-08, "loss": 0.10138481855392456, "step": 32840 }, { "epoch": 2.8695614188362746, "grad_norm": 0.44573409802777997, "learning_rate": 5.747727252528756e-08, "loss": 0.0703741431236267, "step": 32845 }, { "epoch": 2.8699982526646863, "grad_norm": 0.62779257880917, "learning_rate": 5.709367189730419e-08, "loss": 0.07459657788276672, "step": 32850 }, { "epoch": 2.870435086493098, "grad_norm": 0.4680561597250071, "learning_rate": 5.6711348274724465e-08, "loss": 0.06935449838638305, "step": 32855 }, { "epoch": 2.87087192032151, "grad_norm": 0.552325564646276, "learning_rate": 5.6330301756321035e-08, "loss": 0.08895348310470581, "step": 32860 }, { "epoch": 2.871308754149921, "grad_norm": 0.610619837534021, "learning_rate": 5.5950532440537944e-08, "loss": 0.09071910977363587, "step": 32865 }, { "epoch": 2.871745587978333, "grad_norm": 0.6890507465426805, "learning_rate": 5.5572040425489474e-08, "loss": 0.10615966320037842, "step": 32870 }, { "epoch": 2.8721824218067447, "grad_norm": 0.6668224280429965, "learning_rate": 5.519482580895852e-08, "loss": 0.07883254289627076, "step": 32875 }, { "epoch": 2.8726192556351564, "grad_norm": 0.5990118498935206, "learning_rate": 5.4818888688400465e-08, "loss": 0.08815658092498779, "step": 32880 }, { "epoch": 2.873056089463568, "grad_norm": 0.586294098259721, "learning_rate": 5.4444229160937614e-08, "loss": 0.09542710781097412, "step": 32885 }, { "epoch": 2.8734929232919795, "grad_norm": 0.5516096241249008, "learning_rate": 5.407084732336476e-08, "loss": 0.10388927459716797, "step": 32890 }, { "epoch": 2.8739297571203917, "grad_norm": 0.5719006810020398, "learning_rate": 5.369874327214475e-08, "loss": 0.07320986986160279, "step": 32895 }, { "epoch": 2.874366590948803, "grad_norm": 0.5569563014236241, "learning_rate": 5.332791710341123e-08, "loss": 0.09636022448539734, "step": 32900 }, { "epoch": 2.8748034247772147, "grad_norm": 0.4893378845159142, "learning_rate": 5.295836891296813e-08, "loss": 0.09947457313537597, "step": 32905 }, { "epoch": 2.8752402586056265, "grad_norm": 0.5623229330729539, "learning_rate": 5.25900987962874e-08, "loss": 0.07269433736801148, "step": 32910 }, { "epoch": 2.8756770924340382, "grad_norm": 0.5990520157494144, "learning_rate": 5.2223106848513506e-08, "loss": 0.10531539916992187, "step": 32915 }, { "epoch": 2.87611392626245, "grad_norm": 0.5411631195855562, "learning_rate": 5.185739316445837e-08, "loss": 0.09636335372924805, "step": 32920 }, { "epoch": 2.8765507600908613, "grad_norm": 0.5221579124259793, "learning_rate": 5.149295783860475e-08, "loss": 0.10115416049957275, "step": 32925 }, { "epoch": 2.876987593919273, "grad_norm": 0.5550859859204662, "learning_rate": 5.112980096510456e-08, "loss": 0.11541657447814942, "step": 32930 }, { "epoch": 2.877424427747685, "grad_norm": 0.5740597299376734, "learning_rate": 5.076792263778052e-08, "loss": 0.07157744169235229, "step": 32935 }, { "epoch": 2.8778612615760966, "grad_norm": 0.643049315421737, "learning_rate": 5.0407322950123963e-08, "loss": 0.0933627188205719, "step": 32940 }, { "epoch": 2.8782980954045083, "grad_norm": 0.5513360503440881, "learning_rate": 5.004800199529536e-08, "loss": 0.09008727669715881, "step": 32945 }, { "epoch": 2.8787349292329196, "grad_norm": 0.5007895645670599, "learning_rate": 4.968995986612768e-08, "loss": 0.08676954507827758, "step": 32950 }, { "epoch": 2.8791717630613314, "grad_norm": 0.5952964615740757, "learning_rate": 4.9333196655119705e-08, "loss": 0.08599234819412231, "step": 32955 }, { "epoch": 2.879608596889743, "grad_norm": 0.625630244077895, "learning_rate": 4.897771245444216e-08, "loss": 0.11567531824111939, "step": 32960 }, { "epoch": 2.880045430718155, "grad_norm": 0.5058666848784065, "learning_rate": 4.862350735593435e-08, "loss": 0.07810391187667846, "step": 32965 }, { "epoch": 2.8804822645465666, "grad_norm": 0.5169659675939834, "learning_rate": 4.827058145110697e-08, "loss": 0.06814528703689575, "step": 32970 }, { "epoch": 2.880919098374978, "grad_norm": 0.5662438804281915, "learning_rate": 4.7918934831137076e-08, "loss": 0.07315312623977661, "step": 32975 }, { "epoch": 2.8813559322033897, "grad_norm": 0.4232378521654911, "learning_rate": 4.756856758687478e-08, "loss": 0.07778916358947754, "step": 32980 }, { "epoch": 2.8817927660318015, "grad_norm": 0.5440821111603016, "learning_rate": 4.721947980883601e-08, "loss": 0.08374998569488526, "step": 32985 }, { "epoch": 2.882229599860213, "grad_norm": 0.651508653940629, "learning_rate": 4.687167158720918e-08, "loss": 0.07820954322814941, "step": 32990 }, { "epoch": 2.882666433688625, "grad_norm": 0.5348975710625388, "learning_rate": 4.6525143011850736e-08, "loss": 0.07332053780555725, "step": 32995 }, { "epoch": 2.8831032675170363, "grad_norm": 0.513557935992268, "learning_rate": 4.6179894172286296e-08, "loss": 0.059356963634490965, "step": 33000 }, { "epoch": 2.883540101345448, "grad_norm": 0.5396975139575777, "learning_rate": 4.583592515771174e-08, "loss": 0.0968590259552002, "step": 33005 }, { "epoch": 2.88397693517386, "grad_norm": 0.5616136101481543, "learning_rate": 4.5493236056992074e-08, "loss": 0.0851606011390686, "step": 33010 }, { "epoch": 2.8844137690022715, "grad_norm": 0.5657834163607484, "learning_rate": 4.5151826958660386e-08, "loss": 0.08865947723388672, "step": 33015 }, { "epoch": 2.8848506028306833, "grad_norm": 0.5588470016118565, "learning_rate": 4.481169795092055e-08, "loss": 0.0939554214477539, "step": 33020 }, { "epoch": 2.885287436659095, "grad_norm": 0.5890951466948818, "learning_rate": 4.447284912164618e-08, "loss": 0.11056280136108398, "step": 33025 }, { "epoch": 2.885724270487507, "grad_norm": 0.5281691725749208, "learning_rate": 4.413528055837835e-08, "loss": 0.08533008098602295, "step": 33030 }, { "epoch": 2.886161104315918, "grad_norm": 0.5073039628285761, "learning_rate": 4.379899234832841e-08, "loss": 0.07865653038024903, "step": 33035 }, { "epoch": 2.88659793814433, "grad_norm": 0.5193950795944559, "learning_rate": 4.346398457837686e-08, "loss": 0.08816137909889221, "step": 33040 }, { "epoch": 2.8870347719727416, "grad_norm": 0.5408206913988627, "learning_rate": 4.3130257335073365e-08, "loss": 0.08563929796218872, "step": 33045 }, { "epoch": 2.8874716058011534, "grad_norm": 0.48446051194505696, "learning_rate": 4.2797810704636177e-08, "loss": 0.0920864462852478, "step": 33050 }, { "epoch": 2.887908439629565, "grad_norm": 0.5202501483806238, "learning_rate": 4.246664477295437e-08, "loss": 0.09169641733169556, "step": 33055 }, { "epoch": 2.8883452734579764, "grad_norm": 0.5465217423146472, "learning_rate": 4.21367596255845e-08, "loss": 0.07410926222801209, "step": 33060 }, { "epoch": 2.888782107286388, "grad_norm": 0.7741312487542853, "learning_rate": 4.180815534775229e-08, "loss": 0.09459802508354187, "step": 33065 }, { "epoch": 2.8892189411148, "grad_norm": 0.5932242507618807, "learning_rate": 4.148083202435371e-08, "loss": 0.09214994311332703, "step": 33070 }, { "epoch": 2.8896557749432117, "grad_norm": 0.5614044044736185, "learning_rate": 4.115478973995279e-08, "loss": 0.0949684739112854, "step": 33075 }, { "epoch": 2.8900926087716234, "grad_norm": 0.5082577750219857, "learning_rate": 4.083002857878271e-08, "loss": 0.0727528989315033, "step": 33080 }, { "epoch": 2.8905294426000347, "grad_norm": 0.5744769202834359, "learning_rate": 4.0506548624746344e-08, "loss": 0.09040191173553466, "step": 33085 }, { "epoch": 2.8909662764284465, "grad_norm": 0.5632411730031692, "learning_rate": 4.0184349961415184e-08, "loss": 0.10311574935913086, "step": 33090 }, { "epoch": 2.8914031102568583, "grad_norm": 0.5428483933253151, "learning_rate": 3.9863432672028744e-08, "loss": 0.08025403022766113, "step": 33095 }, { "epoch": 2.89183994408527, "grad_norm": 0.644299851374394, "learning_rate": 3.9543796839497386e-08, "loss": 0.07654500603675843, "step": 33100 }, { "epoch": 2.8922767779136818, "grad_norm": 0.639821356433463, "learning_rate": 3.922544254639893e-08, "loss": 0.09583216905593872, "step": 33105 }, { "epoch": 2.892713611742093, "grad_norm": 0.5297179536798026, "learning_rate": 3.8908369874980364e-08, "loss": 0.09407851696014405, "step": 33110 }, { "epoch": 2.893150445570505, "grad_norm": 0.592731922687724, "learning_rate": 3.8592578907158395e-08, "loss": 0.10178215503692627, "step": 33115 }, { "epoch": 2.8935872793989166, "grad_norm": 0.5242900039103701, "learning_rate": 3.8278069724516645e-08, "loss": 0.09300177693367004, "step": 33120 }, { "epoch": 2.8940241132273283, "grad_norm": 0.5330085564553082, "learning_rate": 3.796484240831066e-08, "loss": 0.08368073105812072, "step": 33125 }, { "epoch": 2.89446094705574, "grad_norm": 0.5807000658927773, "learning_rate": 3.7652897039461846e-08, "loss": 0.09074405431747437, "step": 33130 }, { "epoch": 2.8948977808841514, "grad_norm": 0.4810025784520239, "learning_rate": 3.734223369856238e-08, "loss": 0.10424034595489502, "step": 33135 }, { "epoch": 2.8953346147125636, "grad_norm": 0.7327768930455987, "learning_rate": 3.703285246587251e-08, "loss": 0.09114760160446167, "step": 33140 }, { "epoch": 2.895771448540975, "grad_norm": 0.5653466889052128, "learning_rate": 3.672475342131998e-08, "loss": 0.09391705989837647, "step": 33145 }, { "epoch": 2.8962082823693867, "grad_norm": 0.7030519867339158, "learning_rate": 3.6417936644503884e-08, "loss": 0.09664736986160279, "step": 33150 }, { "epoch": 2.8966451161977984, "grad_norm": 0.7075431793100402, "learning_rate": 3.6112402214690256e-08, "loss": 0.10099436044692993, "step": 33155 }, { "epoch": 2.89708195002621, "grad_norm": 0.5971720720071176, "learning_rate": 3.58081502108143e-08, "loss": 0.08801512718200684, "step": 33160 }, { "epoch": 2.897518783854622, "grad_norm": 0.48591799133152813, "learning_rate": 3.550518071147924e-08, "loss": 0.07459463477134705, "step": 33165 }, { "epoch": 2.8979556176830332, "grad_norm": 0.6752847809366843, "learning_rate": 3.5203493794958575e-08, "loss": 0.09691776037216186, "step": 33170 }, { "epoch": 2.898392451511445, "grad_norm": 0.6098910268983216, "learning_rate": 3.490308953919275e-08, "loss": 0.08963560461997985, "step": 33175 }, { "epoch": 2.8988292853398567, "grad_norm": 0.5998828743872788, "learning_rate": 3.460396802179189e-08, "loss": 0.07356274724006653, "step": 33180 }, { "epoch": 2.8992661191682685, "grad_norm": 0.5935550024537568, "learning_rate": 3.430612932003419e-08, "loss": 0.09289231896400452, "step": 33185 }, { "epoch": 2.8997029529966802, "grad_norm": 0.5270389528095335, "learning_rate": 3.4009573510866976e-08, "loss": 0.08782749176025391, "step": 33190 }, { "epoch": 2.9001397868250915, "grad_norm": 0.8930910586879565, "learning_rate": 3.371430067090509e-08, "loss": 0.10222785472869873, "step": 33195 }, { "epoch": 2.9005766206535033, "grad_norm": 0.61037833108257, "learning_rate": 3.3420310876433603e-08, "loss": 0.08667529821395874, "step": 33200 }, { "epoch": 2.901013454481915, "grad_norm": 0.6170410844168447, "learning_rate": 3.3127604203403994e-08, "loss": 0.10301125049591064, "step": 33205 }, { "epoch": 2.901450288310327, "grad_norm": 0.5305657659007582, "learning_rate": 3.283618072743855e-08, "loss": 0.08886001706123352, "step": 33210 }, { "epoch": 2.9018871221387386, "grad_norm": 0.5440970156267497, "learning_rate": 3.2546040523825376e-08, "loss": 0.07293541431427002, "step": 33215 }, { "epoch": 2.90232395596715, "grad_norm": 0.573870623275772, "learning_rate": 3.2257183667523397e-08, "loss": 0.08791441917419433, "step": 33220 }, { "epoch": 2.9027607897955616, "grad_norm": 0.46691541873601733, "learning_rate": 3.1969610233159585e-08, "loss": 0.07497116923332214, "step": 33225 }, { "epoch": 2.9031976236239734, "grad_norm": 0.556724664142169, "learning_rate": 3.168332029502785e-08, "loss": 0.08039075136184692, "step": 33230 }, { "epoch": 2.903634457452385, "grad_norm": 0.5407930301178956, "learning_rate": 3.139831392709236e-08, "loss": 0.10109736919403076, "step": 33235 }, { "epoch": 2.904071291280797, "grad_norm": 0.6207762003582556, "learning_rate": 3.111459120298421e-08, "loss": 0.1056748390197754, "step": 33240 }, { "epoch": 2.904508125109208, "grad_norm": 0.5067653374813, "learning_rate": 3.0832152196004215e-08, "loss": 0.11768009662628173, "step": 33245 }, { "epoch": 2.9049449589376204, "grad_norm": 0.5234510082467, "learning_rate": 3.055099697911956e-08, "loss": 0.10410230159759522, "step": 33250 }, { "epoch": 2.9053817927660317, "grad_norm": 0.4993481473397071, "learning_rate": 3.0271125624968255e-08, "loss": 0.08306784629821777, "step": 33255 }, { "epoch": 2.9058186265944435, "grad_norm": 0.5507588232118955, "learning_rate": 2.999253820585468e-08, "loss": 0.10814359188079833, "step": 33260 }, { "epoch": 2.906255460422855, "grad_norm": 0.5502777263425308, "learning_rate": 2.9715234793752378e-08, "loss": 0.08128657341003417, "step": 33265 }, { "epoch": 2.906692294251267, "grad_norm": 0.6066498810029772, "learning_rate": 2.9439215460303484e-08, "loss": 0.08696829080581665, "step": 33270 }, { "epoch": 2.9071291280796787, "grad_norm": 0.5093240051338096, "learning_rate": 2.9164480276817064e-08, "loss": 0.09650803804397583, "step": 33275 }, { "epoch": 2.90756596190809, "grad_norm": 0.5931307518589622, "learning_rate": 2.88910293142719e-08, "loss": 0.08243407011032104, "step": 33280 }, { "epoch": 2.9080027957365018, "grad_norm": 0.57142146225034, "learning_rate": 2.8618862643313704e-08, "loss": 0.1106597900390625, "step": 33285 }, { "epoch": 2.9084396295649135, "grad_norm": 0.5653300580023165, "learning_rate": 2.8347980334257896e-08, "loss": 0.08227173686027527, "step": 33290 }, { "epoch": 2.9088764633933253, "grad_norm": 0.5533663159667598, "learning_rate": 2.8078382457086273e-08, "loss": 0.08490268588066101, "step": 33295 }, { "epoch": 2.909313297221737, "grad_norm": 0.5393531052682417, "learning_rate": 2.7810069081450898e-08, "loss": 0.11809283494949341, "step": 33300 }, { "epoch": 2.9097501310501483, "grad_norm": 0.5696197610427026, "learning_rate": 2.7543040276669654e-08, "loss": 0.09124746322631835, "step": 33305 }, { "epoch": 2.91018696487856, "grad_norm": 0.5214390251025047, "learning_rate": 2.727729611173069e-08, "loss": 0.09504783749580384, "step": 33310 }, { "epoch": 2.910623798706972, "grad_norm": 0.6269396307392232, "learning_rate": 2.7012836655288533e-08, "loss": 0.09431545734405518, "step": 33315 }, { "epoch": 2.9110606325353836, "grad_norm": 0.6438023455610067, "learning_rate": 2.674966197566742e-08, "loss": 0.10024713277816773, "step": 33320 }, { "epoch": 2.9114974663637954, "grad_norm": 0.6376538856438201, "learning_rate": 2.6487772140857958e-08, "loss": 0.08313056230545043, "step": 33325 }, { "epoch": 2.9119343001922067, "grad_norm": 0.6827808680113816, "learning_rate": 2.6227167218519923e-08, "loss": 0.09557855129241943, "step": 33330 }, { "epoch": 2.9123711340206184, "grad_norm": 0.5321746622075577, "learning_rate": 2.5967847275981673e-08, "loss": 0.08736616969108582, "step": 33335 }, { "epoch": 2.91280796784903, "grad_norm": 0.5745872622762754, "learning_rate": 2.5709812380237398e-08, "loss": 0.10574096441268921, "step": 33340 }, { "epoch": 2.913244801677442, "grad_norm": 0.5453771668916513, "learning_rate": 2.5453062597952106e-08, "loss": 0.08237209916114807, "step": 33345 }, { "epoch": 2.9136816355058537, "grad_norm": 0.571087974791642, "learning_rate": 2.5197597995456068e-08, "loss": 0.0764024555683136, "step": 33350 }, { "epoch": 2.914118469334265, "grad_norm": 0.5929857666923735, "learning_rate": 2.4943418638750382e-08, "loss": 0.09509489536285401, "step": 33355 }, { "epoch": 2.914555303162677, "grad_norm": 0.5545313996775755, "learning_rate": 2.4690524593501408e-08, "loss": 0.09187825918197631, "step": 33360 }, { "epoch": 2.9149921369910885, "grad_norm": 0.553691857008151, "learning_rate": 2.443891592504466e-08, "loss": 0.09197707772254944, "step": 33365 }, { "epoch": 2.9154289708195003, "grad_norm": 0.5439353953735978, "learning_rate": 2.418859269838425e-08, "loss": 0.07494866847991943, "step": 33370 }, { "epoch": 2.915865804647912, "grad_norm": 0.623728535325398, "learning_rate": 2.393955497819067e-08, "loss": 0.11174198389053344, "step": 33375 }, { "epoch": 2.9163026384763238, "grad_norm": 0.662471029732626, "learning_rate": 2.369180282880357e-08, "loss": 0.08021253943443299, "step": 33380 }, { "epoch": 2.9167394723047355, "grad_norm": 0.6329564524315987, "learning_rate": 2.3445336314229517e-08, "loss": 0.08330392837524414, "step": 33385 }, { "epoch": 2.917176306133147, "grad_norm": 0.5805843507849728, "learning_rate": 2.3200155498143695e-08, "loss": 0.0827954888343811, "step": 33390 }, { "epoch": 2.9176131399615586, "grad_norm": 0.5410908477065524, "learning_rate": 2.295626044388932e-08, "loss": 0.09133677482604981, "step": 33395 }, { "epoch": 2.9180499737899703, "grad_norm": 0.6719519432839351, "learning_rate": 2.271365121447655e-08, "loss": 0.09867308735847473, "step": 33400 }, { "epoch": 2.918486807618382, "grad_norm": 0.6092867482829978, "learning_rate": 2.2472327872583576e-08, "loss": 0.09914017915725708, "step": 33405 }, { "epoch": 2.918923641446794, "grad_norm": 0.48306908220865785, "learning_rate": 2.223229048055664e-08, "loss": 0.08572806119918823, "step": 33410 }, { "epoch": 2.919360475275205, "grad_norm": 0.5249878850500337, "learning_rate": 2.1993539100410022e-08, "loss": 0.10214729309082031, "step": 33415 }, { "epoch": 2.919797309103617, "grad_norm": 0.5803915015932449, "learning_rate": 2.1756073793824938e-08, "loss": 0.07873372435569763, "step": 33420 }, { "epoch": 2.9202341429320287, "grad_norm": 0.5540964808272701, "learning_rate": 2.1519894622151205e-08, "loss": 0.07812350392341613, "step": 33425 }, { "epoch": 2.9206709767604404, "grad_norm": 0.5577711536016173, "learning_rate": 2.128500164640557e-08, "loss": 0.06733280420303345, "step": 33430 }, { "epoch": 2.921107810588852, "grad_norm": 0.6995413457465783, "learning_rate": 2.1051394927273372e-08, "loss": 0.10111602544784545, "step": 33435 }, { "epoch": 2.9215446444172635, "grad_norm": 0.62045866423633, "learning_rate": 2.0819074525106343e-08, "loss": 0.07388455867767334, "step": 33440 }, { "epoch": 2.9219814782456752, "grad_norm": 0.5291821475463752, "learning_rate": 2.058804049992591e-08, "loss": 0.09792965650558472, "step": 33445 }, { "epoch": 2.922418312074087, "grad_norm": 0.6085225452913905, "learning_rate": 2.0358292911418777e-08, "loss": 0.10861014127731324, "step": 33450 }, { "epoch": 2.9228551459024987, "grad_norm": 0.58947219986367, "learning_rate": 2.012983181894135e-08, "loss": 0.07929555773735046, "step": 33455 }, { "epoch": 2.9232919797309105, "grad_norm": 0.3855753099343715, "learning_rate": 1.9902657281516412e-08, "loss": 0.08783075213432312, "step": 33460 }, { "epoch": 2.923728813559322, "grad_norm": 0.6885794608540933, "learning_rate": 1.9676769357835356e-08, "loss": 0.09311546087265014, "step": 33465 }, { "epoch": 2.9241656473877335, "grad_norm": 0.5740546400541666, "learning_rate": 1.9452168106255388e-08, "loss": 0.08163554668426513, "step": 33470 }, { "epoch": 2.9246024812161453, "grad_norm": 0.5500924016830904, "learning_rate": 1.922885358480342e-08, "loss": 0.0782279372215271, "step": 33475 }, { "epoch": 2.925039315044557, "grad_norm": 0.6915324908414366, "learning_rate": 1.9006825851173304e-08, "loss": 0.10058339834213256, "step": 33480 }, { "epoch": 2.925476148872969, "grad_norm": 0.5663468345373104, "learning_rate": 1.8786084962724715e-08, "loss": 0.10806457996368408, "step": 33485 }, { "epoch": 2.9259129827013806, "grad_norm": 0.560603190364964, "learning_rate": 1.8566630976488144e-08, "loss": 0.09904481172561645, "step": 33490 }, { "epoch": 2.9263498165297923, "grad_norm": 0.597502042774397, "learning_rate": 1.8348463949158236e-08, "loss": 0.10946090221405029, "step": 33495 }, { "epoch": 2.9267866503582036, "grad_norm": 0.5433433216290782, "learning_rate": 1.8131583937099905e-08, "loss": 0.09816356301307679, "step": 33500 }, { "epoch": 2.9272234841866154, "grad_norm": 0.6245982643314663, "learning_rate": 1.7915990996343336e-08, "loss": 0.10294396877288818, "step": 33505 }, { "epoch": 2.927660318015027, "grad_norm": 0.5664638847786125, "learning_rate": 1.7701685182587303e-08, "loss": 0.09185128211975098, "step": 33510 }, { "epoch": 2.928097151843439, "grad_norm": 0.81925239885977, "learning_rate": 1.7488666551199184e-08, "loss": 0.06831275820732116, "step": 33515 }, { "epoch": 2.9285339856718506, "grad_norm": 0.5529061501667016, "learning_rate": 1.727693515721107e-08, "loss": 0.08263176679611206, "step": 33520 }, { "epoch": 2.928970819500262, "grad_norm": 0.8228438868878827, "learning_rate": 1.7066491055324764e-08, "loss": 0.11114670038223266, "step": 33525 }, { "epoch": 2.9294076533286737, "grad_norm": 0.508315362323971, "learning_rate": 1.6857334299908433e-08, "loss": 0.08619365692138672, "step": 33530 }, { "epoch": 2.9298444871570855, "grad_norm": 0.5715917568270369, "learning_rate": 1.6649464944998304e-08, "loss": 0.07975963354110718, "step": 33535 }, { "epoch": 2.930281320985497, "grad_norm": 0.5816411758866332, "learning_rate": 1.6442883044296977e-08, "loss": 0.1001787781715393, "step": 33540 }, { "epoch": 2.930718154813909, "grad_norm": 0.5939982795731443, "learning_rate": 1.6237588651176196e-08, "loss": 0.09911080002784729, "step": 33545 }, { "epoch": 2.9311549886423203, "grad_norm": 0.4792780002003412, "learning_rate": 1.6033581818672982e-08, "loss": 0.08062248229980469, "step": 33550 }, { "epoch": 2.931591822470732, "grad_norm": 0.6221101987606982, "learning_rate": 1.583086259949351e-08, "loss": 0.10256078243255615, "step": 33555 }, { "epoch": 2.9320286562991438, "grad_norm": 0.48694313323047156, "learning_rate": 1.562943104600978e-08, "loss": 0.06079220175743103, "step": 33560 }, { "epoch": 2.9324654901275555, "grad_norm": 0.5428844357271536, "learning_rate": 1.5429287210262933e-08, "loss": 0.09372198581695557, "step": 33565 }, { "epoch": 2.9329023239559673, "grad_norm": 0.6013815574856938, "learning_rate": 1.5230431143958834e-08, "loss": 0.1057689905166626, "step": 33570 }, { "epoch": 2.9333391577843786, "grad_norm": 0.5728203013386906, "learning_rate": 1.503286289847361e-08, "loss": 0.11888411045074462, "step": 33575 }, { "epoch": 2.9337759916127903, "grad_norm": 0.4551311015271265, "learning_rate": 1.4836582524848097e-08, "loss": 0.1066065788269043, "step": 33580 }, { "epoch": 2.934212825441202, "grad_norm": 0.7080974448487679, "learning_rate": 1.4641590073792843e-08, "loss": 0.08917251825332642, "step": 33585 }, { "epoch": 2.934649659269614, "grad_norm": 0.6450602896031835, "learning_rate": 1.444788559568311e-08, "loss": 0.08105811476707458, "step": 33590 }, { "epoch": 2.9350864930980256, "grad_norm": 0.7722053251928782, "learning_rate": 1.4255469140563304e-08, "loss": 0.09291757345199585, "step": 33595 }, { "epoch": 2.935523326926437, "grad_norm": 0.6270649744996771, "learning_rate": 1.4064340758144223e-08, "loss": 0.10780856609344483, "step": 33600 }, { "epoch": 2.935960160754849, "grad_norm": 0.5670534534462898, "learning_rate": 1.3874500497803589e-08, "loss": 0.09570807218551636, "step": 33605 }, { "epoch": 2.9363969945832604, "grad_norm": 0.5636659331898454, "learning_rate": 1.3685948408588278e-08, "loss": 0.08614356517791748, "step": 33610 }, { "epoch": 2.936833828411672, "grad_norm": 0.605425425444549, "learning_rate": 1.3498684539209882e-08, "loss": 0.11648025512695312, "step": 33615 }, { "epoch": 2.937270662240084, "grad_norm": 0.44983003440267155, "learning_rate": 1.3312708938048036e-08, "loss": 0.0997154951095581, "step": 33620 }, { "epoch": 2.9377074960684957, "grad_norm": 0.5959093939919182, "learning_rate": 1.3128021653150969e-08, "loss": 0.10654597282409668, "step": 33625 }, { "epoch": 2.9381443298969074, "grad_norm": 0.5762826612599644, "learning_rate": 1.2944622732231071e-08, "loss": 0.08831787705421448, "step": 33630 }, { "epoch": 2.9385811637253187, "grad_norm": 0.565917021943221, "learning_rate": 1.2762512222670997e-08, "loss": 0.09367620944976807, "step": 33635 }, { "epoch": 2.9390179975537305, "grad_norm": 0.6015979016977151, "learning_rate": 1.2581690171519222e-08, "loss": 0.09435713291168213, "step": 33640 }, { "epoch": 2.9394548313821423, "grad_norm": 0.5320563732388126, "learning_rate": 1.2402156625490602e-08, "loss": 0.08351738452911377, "step": 33645 }, { "epoch": 2.939891665210554, "grad_norm": 0.45825006897866644, "learning_rate": 1.2223911630968033e-08, "loss": 0.10682215690612792, "step": 33650 }, { "epoch": 2.9403284990389658, "grad_norm": 0.5445799644565601, "learning_rate": 1.2046955234001344e-08, "loss": 0.09004905223846435, "step": 33655 }, { "epoch": 2.940765332867377, "grad_norm": 0.520076352928809, "learning_rate": 1.1871287480307303e-08, "loss": 0.09913620948791504, "step": 33660 }, { "epoch": 2.941202166695789, "grad_norm": 0.6707800325974049, "learning_rate": 1.1696908415270713e-08, "loss": 0.1106076955795288, "step": 33665 }, { "epoch": 2.9416390005242006, "grad_norm": 0.49790124587735274, "learning_rate": 1.1523818083941651e-08, "loss": 0.09476685523986816, "step": 33670 }, { "epoch": 2.9420758343526123, "grad_norm": 0.5242875639521688, "learning_rate": 1.1352016531038235e-08, "loss": 0.08409391641616822, "step": 33675 }, { "epoch": 2.942512668181024, "grad_norm": 0.7140248402391548, "learning_rate": 1.1181503800946625e-08, "loss": 0.10206868648529052, "step": 33680 }, { "epoch": 2.9429495020094354, "grad_norm": 0.5542234747603547, "learning_rate": 1.1012279937717697e-08, "loss": 0.0973975419998169, "step": 33685 }, { "epoch": 2.943386335837847, "grad_norm": 0.5407695061608168, "learning_rate": 1.084434498507092e-08, "loss": 0.10125032663345337, "step": 33690 }, { "epoch": 2.943823169666259, "grad_norm": 0.4828975701310102, "learning_rate": 1.0677698986392703e-08, "loss": 0.10716288089752198, "step": 33695 }, { "epoch": 2.9442600034946707, "grad_norm": 0.6040543728340456, "learning_rate": 1.051234198473694e-08, "loss": 0.09083348512649536, "step": 33700 }, { "epoch": 2.9446968373230824, "grad_norm": 0.5858637040463188, "learning_rate": 1.0348274022822235e-08, "loss": 0.11340620517730712, "step": 33705 }, { "epoch": 2.9451336711514937, "grad_norm": 0.6296374414210414, "learning_rate": 1.0185495143036906e-08, "loss": 0.08281338214874268, "step": 33710 }, { "epoch": 2.945570504979906, "grad_norm": 0.5482071466735872, "learning_rate": 1.0024005387435088e-08, "loss": 0.08619087934494019, "step": 33715 }, { "epoch": 2.946007338808317, "grad_norm": 0.5985881747829702, "learning_rate": 9.863804797736742e-09, "loss": 0.11314303874969482, "step": 33720 }, { "epoch": 2.946444172636729, "grad_norm": 0.6029237274205242, "learning_rate": 9.70489341533154e-09, "loss": 0.08780918121337891, "step": 33725 }, { "epoch": 2.9468810064651407, "grad_norm": 0.49122012441158347, "learning_rate": 9.547271281272752e-09, "loss": 0.08521759510040283, "step": 33730 }, { "epoch": 2.9473178402935525, "grad_norm": 0.5946693787023775, "learning_rate": 9.390938436282803e-09, "loss": 0.076052725315094, "step": 33735 }, { "epoch": 2.9477546741219642, "grad_norm": 0.5338301228965358, "learning_rate": 9.235894920751054e-09, "loss": 0.08008674383163453, "step": 33740 }, { "epoch": 2.9481915079503755, "grad_norm": 0.6089607039750866, "learning_rate": 9.082140774732685e-09, "loss": 0.093683922290802, "step": 33745 }, { "epoch": 2.9486283417787873, "grad_norm": 0.690863972859767, "learning_rate": 8.929676037950364e-09, "loss": 0.08223647475242615, "step": 33750 }, { "epoch": 2.949065175607199, "grad_norm": 0.4147418093062789, "learning_rate": 8.778500749793695e-09, "loss": 0.07890592813491822, "step": 33755 }, { "epoch": 2.949502009435611, "grad_norm": 0.5682139587105719, "learning_rate": 8.628614949318103e-09, "loss": 0.09134029150009156, "step": 33760 }, { "epoch": 2.9499388432640226, "grad_norm": 0.6985785368332956, "learning_rate": 8.480018675247614e-09, "loss": 0.109474778175354, "step": 33765 }, { "epoch": 2.950375677092434, "grad_norm": 0.4975876154679559, "learning_rate": 8.332711965972629e-09, "loss": 0.06413218379020691, "step": 33770 }, { "epoch": 2.9508125109208456, "grad_norm": 0.5087100937176443, "learning_rate": 8.186694859548816e-09, "loss": 0.07573640942573548, "step": 33775 }, { "epoch": 2.9512493447492574, "grad_norm": 0.6612659413517509, "learning_rate": 8.041967393700445e-09, "loss": 0.10929732322692871, "step": 33780 }, { "epoch": 2.951686178577669, "grad_norm": 0.6332225304082059, "learning_rate": 7.898529605818161e-09, "loss": 0.09476912617683411, "step": 33785 }, { "epoch": 2.952123012406081, "grad_norm": 0.6068345341525165, "learning_rate": 7.756381532959545e-09, "loss": 0.08818488121032715, "step": 33790 }, { "epoch": 2.952559846234492, "grad_norm": 0.5754680433442763, "learning_rate": 7.615523211847442e-09, "loss": 0.10348587036132813, "step": 33795 }, { "epoch": 2.952996680062904, "grad_norm": 0.5568730512070217, "learning_rate": 7.475954678874408e-09, "loss": 0.09197317957878112, "step": 33800 }, { "epoch": 2.9534335138913157, "grad_norm": 0.5052945047100359, "learning_rate": 7.337675970097158e-09, "loss": 0.08213062286376953, "step": 33805 }, { "epoch": 2.9538703477197275, "grad_norm": 0.6082679866100106, "learning_rate": 7.200687121239891e-09, "loss": 0.07906550168991089, "step": 33810 }, { "epoch": 2.954307181548139, "grad_norm": 0.4767639449650418, "learning_rate": 7.064988167694853e-09, "loss": 0.06953428387641906, "step": 33815 }, { "epoch": 2.9547440153765505, "grad_norm": 0.42110959846572327, "learning_rate": 6.930579144519e-09, "loss": 0.08472735285758973, "step": 33820 }, { "epoch": 2.9551808492049623, "grad_norm": 0.5405387317031348, "learning_rate": 6.797460086437891e-09, "loss": 0.07461661696434022, "step": 33825 }, { "epoch": 2.955617683033374, "grad_norm": 0.6050035080990809, "learning_rate": 6.665631027842345e-09, "loss": 0.10696921348571778, "step": 33830 }, { "epoch": 2.9560545168617858, "grad_norm": 0.6018925329587872, "learning_rate": 6.535092002790677e-09, "loss": 0.08954052329063415, "step": 33835 }, { "epoch": 2.9564913506901975, "grad_norm": 0.4817293405510214, "learning_rate": 6.405843045008131e-09, "loss": 0.08710180521011353, "step": 33840 }, { "epoch": 2.9569281845186093, "grad_norm": 0.5276593863072542, "learning_rate": 6.27788418788633e-09, "loss": 0.09012961387634277, "step": 33845 }, { "epoch": 2.957365018347021, "grad_norm": 0.5690819809703258, "learning_rate": 6.151215464483273e-09, "loss": 0.0846190631389618, "step": 33850 }, { "epoch": 2.9578018521754323, "grad_norm": 0.5318435440104471, "learning_rate": 6.0258369075238965e-09, "loss": 0.09229607582092285, "step": 33855 }, { "epoch": 2.958238686003844, "grad_norm": 0.5956591928762729, "learning_rate": 5.9017485493995105e-09, "loss": 0.10336906909942627, "step": 33860 }, { "epoch": 2.958675519832256, "grad_norm": 0.5194696089289691, "learning_rate": 5.778950422170027e-09, "loss": 0.09594271183013917, "step": 33865 }, { "epoch": 2.9591123536606676, "grad_norm": 0.6400984443220724, "learning_rate": 5.657442557558956e-09, "loss": 0.08782337903976441, "step": 33870 }, { "epoch": 2.9595491874890794, "grad_norm": 0.7063007787137864, "learning_rate": 5.5372249869584115e-09, "loss": 0.07362443208694458, "step": 33875 }, { "epoch": 2.9599860213174907, "grad_norm": 0.5749054373164428, "learning_rate": 5.418297741426881e-09, "loss": 0.08150789737701417, "step": 33880 }, { "epoch": 2.9604228551459024, "grad_norm": 0.5743189741020497, "learning_rate": 5.300660851689232e-09, "loss": 0.07438963651657104, "step": 33885 }, { "epoch": 2.960859688974314, "grad_norm": 0.5646446801473999, "learning_rate": 5.184314348137265e-09, "loss": 0.08196298480033874, "step": 33890 }, { "epoch": 2.961296522802726, "grad_norm": 0.6024200228042331, "learning_rate": 5.069258260829158e-09, "loss": 0.08896365165710449, "step": 33895 }, { "epoch": 2.9617333566311377, "grad_norm": 0.5464183656348899, "learning_rate": 4.955492619490021e-09, "loss": 0.09644302725791931, "step": 33900 }, { "epoch": 2.962170190459549, "grad_norm": 0.5457458633599787, "learning_rate": 4.84301745351079e-09, "loss": 0.08699400424957275, "step": 33905 }, { "epoch": 2.9626070242879607, "grad_norm": 0.6907685317607489, "learning_rate": 4.731832791949886e-09, "loss": 0.09504532814025879, "step": 33910 }, { "epoch": 2.9630438581163725, "grad_norm": 0.5949628851392064, "learning_rate": 4.621938663531556e-09, "loss": 0.10525414943695069, "step": 33915 }, { "epoch": 2.9634806919447843, "grad_norm": 0.5277917691690025, "learning_rate": 4.513335096648086e-09, "loss": 0.08956842422485352, "step": 33920 }, { "epoch": 2.963917525773196, "grad_norm": 0.5901791386878565, "learning_rate": 4.406022119356479e-09, "loss": 0.10058825016021729, "step": 33925 }, { "epoch": 2.9643543596016073, "grad_norm": 0.5486275503166995, "learning_rate": 4.299999759381224e-09, "loss": 0.09447291493415833, "step": 33930 }, { "epoch": 2.964791193430019, "grad_norm": 0.4716511108270006, "learning_rate": 4.195268044113188e-09, "loss": 0.09662890434265137, "step": 33935 }, { "epoch": 2.965228027258431, "grad_norm": 0.551931954316631, "learning_rate": 4.091827000610726e-09, "loss": 0.06337050199508668, "step": 33940 }, { "epoch": 2.9656648610868426, "grad_norm": 0.5345584391164443, "learning_rate": 3.989676655597463e-09, "loss": 0.0936396837234497, "step": 33945 }, { "epoch": 2.9661016949152543, "grad_norm": 0.5889368675611726, "learning_rate": 3.888817035463399e-09, "loss": 0.10055760145187378, "step": 33950 }, { "epoch": 2.9665385287436656, "grad_norm": 0.6327949842541961, "learning_rate": 3.789248166266579e-09, "loss": 0.07253519296646119, "step": 33955 }, { "epoch": 2.966975362572078, "grad_norm": 0.5398615962984549, "learning_rate": 3.6909700737308706e-09, "loss": 0.07915496826171875, "step": 33960 }, { "epoch": 2.967412196400489, "grad_norm": 0.5354933422772152, "learning_rate": 3.593982783245964e-09, "loss": 0.08571004867553711, "step": 33965 }, { "epoch": 2.967849030228901, "grad_norm": 0.49313685001636876, "learning_rate": 3.4982863198684825e-09, "loss": 0.09270508289337158, "step": 33970 }, { "epoch": 2.9682858640573127, "grad_norm": 0.5428407186091716, "learning_rate": 3.4038807083225378e-09, "loss": 0.10842688083648681, "step": 33975 }, { "epoch": 2.9687226978857244, "grad_norm": 0.6584078924145262, "learning_rate": 3.3107659729975096e-09, "loss": 0.1299384593963623, "step": 33980 }, { "epoch": 2.969159531714136, "grad_norm": 0.5821332735716291, "learning_rate": 3.2189421379491546e-09, "loss": 0.08856396675109864, "step": 33985 }, { "epoch": 2.9695963655425475, "grad_norm": 0.5781896898265148, "learning_rate": 3.1284092269012743e-09, "loss": 0.10272805690765381, "step": 33990 }, { "epoch": 2.970033199370959, "grad_norm": 0.5850977359593723, "learning_rate": 3.039167263241827e-09, "loss": 0.10730624198913574, "step": 33995 }, { "epoch": 2.970470033199371, "grad_norm": 0.6204387458138934, "learning_rate": 2.9512162700284784e-09, "loss": 0.10514590740203858, "step": 34000 }, { "epoch": 2.9709068670277827, "grad_norm": 0.5534106928630903, "learning_rate": 2.864556269981389e-09, "loss": 0.09621605873107911, "step": 34005 }, { "epoch": 2.9713437008561945, "grad_norm": 0.5617065732094316, "learning_rate": 2.7791872854904255e-09, "loss": 0.08579744100570678, "step": 34010 }, { "epoch": 2.971780534684606, "grad_norm": 0.47967835278772636, "learning_rate": 2.6951093386107243e-09, "loss": 0.08906238079071045, "step": 34015 }, { "epoch": 2.9722173685130175, "grad_norm": 0.5854834496209557, "learning_rate": 2.612322451063798e-09, "loss": 0.10499017238616944, "step": 34020 }, { "epoch": 2.9726542023414293, "grad_norm": 0.5950959186968693, "learning_rate": 2.5308266442380937e-09, "loss": 0.11276160478591919, "step": 34025 }, { "epoch": 2.973091036169841, "grad_norm": 0.5739611288671395, "learning_rate": 2.4506219391873256e-09, "loss": 0.07617582678794861, "step": 34030 }, { "epoch": 2.973527869998253, "grad_norm": 0.6004022551732042, "learning_rate": 2.371708356632696e-09, "loss": 0.09159606099128723, "step": 34035 }, { "epoch": 2.973964703826664, "grad_norm": 0.6191002969816233, "learning_rate": 2.2940859169617858e-09, "loss": 0.08751972913742065, "step": 34040 }, { "epoch": 2.974401537655076, "grad_norm": 0.49904416326779316, "learning_rate": 2.2177546402291085e-09, "loss": 0.09919162988662719, "step": 34045 }, { "epoch": 2.9748383714834876, "grad_norm": 0.6999087489243434, "learning_rate": 2.14271454615389e-09, "loss": 0.1042179822921753, "step": 34050 }, { "epoch": 2.9752752053118994, "grad_norm": 0.4800004164643887, "learning_rate": 2.068965654122845e-09, "loss": 0.07945063114166259, "step": 34055 }, { "epoch": 2.975712039140311, "grad_norm": 0.5739586635148652, "learning_rate": 1.996507983190177e-09, "loss": 0.09375212192535401, "step": 34060 }, { "epoch": 2.9761488729687224, "grad_norm": 0.5833158992207688, "learning_rate": 1.9253415520742447e-09, "loss": 0.07921696901321411, "step": 34065 }, { "epoch": 2.9765857067971346, "grad_norm": 0.6295486305694599, "learning_rate": 1.8554663791614525e-09, "loss": 0.10492962598800659, "step": 34070 }, { "epoch": 2.977022540625546, "grad_norm": 1.000382544732829, "learning_rate": 1.7868824825040265e-09, "loss": 0.0899443805217743, "step": 34075 }, { "epoch": 2.9774593744539577, "grad_norm": 0.5633943354735663, "learning_rate": 1.7195898798211263e-09, "loss": 0.0904410421848297, "step": 34080 }, { "epoch": 2.9778962082823695, "grad_norm": 0.5985803552472355, "learning_rate": 1.653588588497179e-09, "loss": 0.08611985445022582, "step": 34085 }, { "epoch": 2.978333042110781, "grad_norm": 0.5103005422486495, "learning_rate": 1.5888786255841005e-09, "loss": 0.0876761257648468, "step": 34090 }, { "epoch": 2.978769875939193, "grad_norm": 0.5369881314599233, "learning_rate": 1.5254600077996285e-09, "loss": 0.08211992979049683, "step": 34095 }, { "epoch": 2.9792067097676043, "grad_norm": 0.5552048250155033, "learning_rate": 1.4633327515284346e-09, "loss": 0.07450251579284668, "step": 34100 }, { "epoch": 2.979643543596016, "grad_norm": 0.7239048546588649, "learning_rate": 1.402496872820458e-09, "loss": 0.0930323600769043, "step": 34105 }, { "epoch": 2.9800803774244278, "grad_norm": 0.5432348444590299, "learning_rate": 1.3429523873931261e-09, "loss": 0.09773153066635132, "step": 34110 }, { "epoch": 2.9805172112528395, "grad_norm": 0.692261010598566, "learning_rate": 1.2846993106302442e-09, "loss": 0.09886118173599243, "step": 34115 }, { "epoch": 2.9809540450812513, "grad_norm": 0.5053106608080725, "learning_rate": 1.2277376575808853e-09, "loss": 0.0951552927494049, "step": 34120 }, { "epoch": 2.9813908789096626, "grad_norm": 0.6520628820753597, "learning_rate": 1.1720674429610556e-09, "loss": 0.09973527193069458, "step": 34125 }, { "epoch": 2.9818277127380743, "grad_norm": 0.5728728436169177, "learning_rate": 1.1176886811536947e-09, "loss": 0.10648293495178222, "step": 34130 }, { "epoch": 2.982264546566486, "grad_norm": 0.5202288401377252, "learning_rate": 1.0646013862075645e-09, "loss": 0.07589495182037354, "step": 34135 }, { "epoch": 2.982701380394898, "grad_norm": 0.5360109116887176, "learning_rate": 1.012805571837805e-09, "loss": 0.09903440475463868, "step": 34140 }, { "epoch": 2.9831382142233096, "grad_norm": 0.6415468343422116, "learning_rate": 9.623012514259345e-10, "loss": 0.08717002868652343, "step": 34145 }, { "epoch": 2.983575048051721, "grad_norm": 0.6155621453162293, "learning_rate": 9.130884380192939e-10, "loss": 0.11152350902557373, "step": 34150 }, { "epoch": 2.9840118818801327, "grad_norm": 0.48939281198056706, "learning_rate": 8.651671443332676e-10, "loss": 0.09112855195999145, "step": 34155 }, { "epoch": 2.9844487157085444, "grad_norm": 0.6303016631024223, "learning_rate": 8.185373827468424e-10, "loss": 0.10579335689544678, "step": 34160 }, { "epoch": 2.984885549536956, "grad_norm": 0.4698023175144731, "learning_rate": 7.731991653081583e-10, "loss": 0.08129670619964599, "step": 34165 }, { "epoch": 2.985322383365368, "grad_norm": 0.5639501391243446, "learning_rate": 7.291525037295133e-10, "loss": 0.10409467220306397, "step": 34170 }, { "epoch": 2.9857592171937792, "grad_norm": 0.4856558046647639, "learning_rate": 6.863974093906934e-10, "loss": 0.09097541570663452, "step": 34175 }, { "epoch": 2.9861960510221914, "grad_norm": 0.5592932013201786, "learning_rate": 6.449338933378624e-10, "loss": 0.08789713382720947, "step": 34180 }, { "epoch": 2.9866328848506027, "grad_norm": 0.5066949736519285, "learning_rate": 6.047619662830073e-10, "loss": 0.07800759077072143, "step": 34185 }, { "epoch": 2.9870697186790145, "grad_norm": 0.6038527258269286, "learning_rate": 5.658816386039379e-10, "loss": 0.08928012251853942, "step": 34190 }, { "epoch": 2.9875065525074262, "grad_norm": 0.5903486477822821, "learning_rate": 5.28292920346507e-10, "loss": 0.08382228612899781, "step": 34195 }, { "epoch": 2.987943386335838, "grad_norm": 0.5753850250162943, "learning_rate": 4.919958212207254e-10, "loss": 0.07729458808898926, "step": 34200 }, { "epoch": 2.9883802201642498, "grad_norm": 0.6566342229231081, "learning_rate": 4.5699035060520204e-10, "loss": 0.08776944875717163, "step": 34205 }, { "epoch": 2.988817053992661, "grad_norm": 0.6204618822251604, "learning_rate": 4.2327651754270336e-10, "loss": 0.08993226885795594, "step": 34210 }, { "epoch": 2.989253887821073, "grad_norm": 0.5093692674374416, "learning_rate": 3.908543307434842e-10, "loss": 0.09135284423828124, "step": 34215 }, { "epoch": 2.9896907216494846, "grad_norm": 0.5664322972976826, "learning_rate": 3.5972379858362215e-10, "loss": 0.0869238793849945, "step": 34220 }, { "epoch": 2.9901275554778963, "grad_norm": 0.4483474580572616, "learning_rate": 3.298849291061279e-10, "loss": 0.08142510652542115, "step": 34225 }, { "epoch": 2.990564389306308, "grad_norm": 0.5648377912674816, "learning_rate": 3.01337730019835e-10, "loss": 0.09038633108139038, "step": 34230 }, { "epoch": 2.9910012231347194, "grad_norm": 0.4493932255738925, "learning_rate": 2.7408220869995505e-10, "loss": 0.0810268759727478, "step": 34235 }, { "epoch": 2.991438056963131, "grad_norm": 0.5938017212084222, "learning_rate": 2.4811837218752245e-10, "loss": 0.08877135515213012, "step": 34240 }, { "epoch": 2.991874890791543, "grad_norm": 0.577487118649023, "learning_rate": 2.234462271910598e-10, "loss": 0.07187597751617432, "step": 34245 }, { "epoch": 2.9923117246199546, "grad_norm": 0.5531755453441487, "learning_rate": 2.0006578008435751e-10, "loss": 0.07577044367790223, "step": 34250 }, { "epoch": 2.9927485584483664, "grad_norm": 0.5517932620326705, "learning_rate": 1.7797703690758395e-10, "loss": 0.10948874950408935, "step": 34255 }, { "epoch": 2.9931853922767777, "grad_norm": 0.5225400163613084, "learning_rate": 1.5718000336728546e-10, "loss": 0.09389148950576783, "step": 34260 }, { "epoch": 2.9936222261051895, "grad_norm": 0.5466173115418115, "learning_rate": 1.3767468483694146e-10, "loss": 0.08043140172958374, "step": 34265 }, { "epoch": 2.994059059933601, "grad_norm": 0.5995030025984254, "learning_rate": 1.1946108635474407e-10, "loss": 0.10157883167266846, "step": 34270 }, { "epoch": 2.994495893762013, "grad_norm": 0.5815307786537817, "learning_rate": 1.0253921262748378e-10, "loss": 0.09865264892578125, "step": 34275 }, { "epoch": 2.9949327275904247, "grad_norm": 0.562195805282765, "learning_rate": 8.690906802666377e-11, "loss": 0.10160671472549439, "step": 34280 }, { "epoch": 2.995369561418836, "grad_norm": 0.605774920740792, "learning_rate": 7.257065658961004e-11, "loss": 0.07216809988021851, "step": 34285 }, { "epoch": 2.995806395247248, "grad_norm": 0.5456062098263781, "learning_rate": 5.952398202113685e-11, "loss": 0.09450020790100097, "step": 34290 }, { "epoch": 2.9962432290756595, "grad_norm": 0.5559206623873691, "learning_rate": 4.776904769188129e-11, "loss": 0.07947906255722045, "step": 34295 }, { "epoch": 2.9966800629040713, "grad_norm": 0.5233852273135208, "learning_rate": 3.730585663885844e-11, "loss": 0.08428159952163697, "step": 34300 }, { "epoch": 2.997116896732483, "grad_norm": 0.5776463873500868, "learning_rate": 2.813441156490626e-11, "loss": 0.08771494030952454, "step": 34305 }, { "epoch": 2.9975537305608944, "grad_norm": 0.6468323570951684, "learning_rate": 2.0254714839795797e-11, "loss": 0.09190446734428406, "step": 34310 }, { "epoch": 2.9979905643893066, "grad_norm": 0.5886581379021629, "learning_rate": 1.3666768499120964e-11, "loss": 0.09134281873703003, "step": 34315 }, { "epoch": 2.998427398217718, "grad_norm": 0.6928674235428266, "learning_rate": 8.370574244298546e-12, "loss": 0.09687595963478088, "step": 34320 }, { "epoch": 2.9988642320461296, "grad_norm": 0.5364033634565747, "learning_rate": 4.366133444788645e-12, "loss": 0.11010109186172486, "step": 34325 }, { "epoch": 2.9993010658745414, "grad_norm": 0.5105002430315109, "learning_rate": 1.653447134764008e-12, "loss": 0.07477107048034667, "step": 34330 }, { "epoch": 2.999737899702953, "grad_norm": 0.5126872618432291, "learning_rate": 2.3251601422025206e-13, "loss": 0.11864917278289795, "step": 34335 }, { "epoch": 3.0, "eval_loss": 0.15029732882976532, "eval_runtime": 0.7928, "eval_samples_per_second": 11.352, "eval_steps_per_second": 2.523, "eval_token_acc": 0.9356783714180599, "step": 34338 }, { "epoch": 3.0, "eval_loss": 0.15029732882976532, "eval_runtime": 1.1528, "eval_samples_per_second": 7.807, "eval_steps_per_second": 1.735, "eval_token_acc": 0.9356783714180599, "step": 34338 } ], "logging_steps": 5, "max_steps": 34338, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2492618179104768.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }