| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 15000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002, |
| "grad_norm": 14.211126327514648, |
| "learning_rate": 6.000000000000001e-08, |
| "loss": 0.9806, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 8.715738296508789, |
| "learning_rate": 1.2666666666666666e-07, |
| "loss": 0.9899, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 10.858619689941406, |
| "learning_rate": 1.9333333333333337e-07, |
| "loss": 1.0494, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 11.522907257080078, |
| "learning_rate": 2.6e-07, |
| "loss": 0.9985, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 11.823868751525879, |
| "learning_rate": 3.266666666666667e-07, |
| "loss": 0.9687, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 10.01672077178955, |
| "learning_rate": 3.9333333333333336e-07, |
| "loss": 0.7783, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 8.285725593566895, |
| "learning_rate": 4.6000000000000004e-07, |
| "loss": 0.8029, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.7833917140960693, |
| "learning_rate": 5.266666666666667e-07, |
| "loss": 0.6355, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 3.166145086288452, |
| "learning_rate": 5.933333333333334e-07, |
| "loss": 0.5781, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 2.388826847076416, |
| "learning_rate": 6.6e-07, |
| "loss": 0.5602, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 2.683410167694092, |
| "learning_rate": 7.266666666666668e-07, |
| "loss": 0.5822, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 2.4587814807891846, |
| "learning_rate": 7.933333333333335e-07, |
| "loss": 0.4702, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 2.512497663497925, |
| "learning_rate": 8.6e-07, |
| "loss": 0.5497, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 1.4995685815811157, |
| "learning_rate": 9.266666666666667e-07, |
| "loss": 0.5195, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 2.7800076007843018, |
| "learning_rate": 9.933333333333333e-07, |
| "loss": 0.4665, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.491605281829834, |
| "learning_rate": 1.06e-06, |
| "loss": 0.5083, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 1.643656611442566, |
| "learning_rate": 1.1266666666666667e-06, |
| "loss": 0.4549, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 2.509424924850464, |
| "learning_rate": 1.1933333333333335e-06, |
| "loss": 0.4816, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 1.9511481523513794, |
| "learning_rate": 1.26e-06, |
| "loss": 0.5072, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.516787052154541, |
| "learning_rate": 1.3266666666666667e-06, |
| "loss": 0.497, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 2.568359851837158, |
| "learning_rate": 1.3933333333333335e-06, |
| "loss": 0.485, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 1.8086744546890259, |
| "learning_rate": 1.46e-06, |
| "loss": 0.5004, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 1.9711308479309082, |
| "learning_rate": 1.526666666666667e-06, |
| "loss": 0.502, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 1.787848949432373, |
| "learning_rate": 1.5933333333333335e-06, |
| "loss": 0.5269, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 1.7853062152862549, |
| "learning_rate": 1.6600000000000002e-06, |
| "loss": 0.4902, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 1.663499355316162, |
| "learning_rate": 1.7266666666666667e-06, |
| "loss": 0.5142, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 1.4939693212509155, |
| "learning_rate": 1.7933333333333337e-06, |
| "loss": 0.4769, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 1.9765783548355103, |
| "learning_rate": 1.8600000000000002e-06, |
| "loss": 0.4955, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 1.5771379470825195, |
| "learning_rate": 1.926666666666667e-06, |
| "loss": 0.4419, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 1.0956958532333374, |
| "learning_rate": 1.9933333333333334e-06, |
| "loss": 0.4221, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 2.1830596923828125, |
| "learning_rate": 2.06e-06, |
| "loss": 0.5702, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 2.9908502101898193, |
| "learning_rate": 2.126666666666667e-06, |
| "loss": 0.5233, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 1.752899169921875, |
| "learning_rate": 2.1933333333333332e-06, |
| "loss": 0.4822, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 1.7774080038070679, |
| "learning_rate": 2.2600000000000004e-06, |
| "loss": 0.4571, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.2225241661071777, |
| "learning_rate": 2.3266666666666667e-06, |
| "loss": 0.3769, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 1.7270737886428833, |
| "learning_rate": 2.3933333333333334e-06, |
| "loss": 0.4333, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 1.9639253616333008, |
| "learning_rate": 2.46e-06, |
| "loss": 0.494, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 1.4829277992248535, |
| "learning_rate": 2.526666666666667e-06, |
| "loss": 0.455, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 2.67972469329834, |
| "learning_rate": 2.5933333333333336e-06, |
| "loss": 0.4865, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.0642127990722656, |
| "learning_rate": 2.6600000000000004e-06, |
| "loss": 0.4665, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 1.5665631294250488, |
| "learning_rate": 2.726666666666667e-06, |
| "loss": 0.4586, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 1.1839872598648071, |
| "learning_rate": 2.7933333333333334e-06, |
| "loss": 0.4541, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 1.2557004690170288, |
| "learning_rate": 2.86e-06, |
| "loss": 0.4357, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 1.4764341115951538, |
| "learning_rate": 2.9266666666666673e-06, |
| "loss": 0.5011, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 1.9956254959106445, |
| "learning_rate": 2.9933333333333336e-06, |
| "loss": 0.4933, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 1.6207813024520874, |
| "learning_rate": 3.0600000000000003e-06, |
| "loss": 0.4402, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 1.7665313482284546, |
| "learning_rate": 3.1266666666666667e-06, |
| "loss": 0.4392, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.76856529712677, |
| "learning_rate": 3.193333333333334e-06, |
| "loss": 0.4521, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 1.7528122663497925, |
| "learning_rate": 3.2600000000000006e-06, |
| "loss": 0.4168, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.3086860179901123, |
| "learning_rate": 3.326666666666667e-06, |
| "loss": 0.4228, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 2.676274061203003, |
| "learning_rate": 3.3933333333333336e-06, |
| "loss": 0.4733, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 1.9702718257904053, |
| "learning_rate": 3.46e-06, |
| "loss": 0.4273, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 1.8276294469833374, |
| "learning_rate": 3.526666666666667e-06, |
| "loss": 0.491, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 3.189826726913452, |
| "learning_rate": 3.593333333333334e-06, |
| "loss": 0.5354, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 1.3039813041687012, |
| "learning_rate": 3.66e-06, |
| "loss": 0.4561, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.4156097173690796, |
| "learning_rate": 3.726666666666667e-06, |
| "loss": 0.3896, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 1.6464276313781738, |
| "learning_rate": 3.793333333333334e-06, |
| "loss": 0.4438, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 1.4432697296142578, |
| "learning_rate": 3.86e-06, |
| "loss": 0.5175, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 1.6621713638305664, |
| "learning_rate": 3.926666666666667e-06, |
| "loss": 0.4429, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.6414673328399658, |
| "learning_rate": 3.993333333333334e-06, |
| "loss": 0.4958, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 1.51468026638031, |
| "learning_rate": 4.060000000000001e-06, |
| "loss": 0.4636, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 1.9095144271850586, |
| "learning_rate": 4.126666666666667e-06, |
| "loss": 0.4545, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 1.6873503923416138, |
| "learning_rate": 4.1933333333333336e-06, |
| "loss": 0.4203, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.8741629123687744, |
| "learning_rate": 4.26e-06, |
| "loss": 0.4713, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 1.5286614894866943, |
| "learning_rate": 4.326666666666667e-06, |
| "loss": 0.3901, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 1.4989817142486572, |
| "learning_rate": 4.393333333333334e-06, |
| "loss": 0.4117, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 1.642307996749878, |
| "learning_rate": 4.4600000000000005e-06, |
| "loss": 0.5395, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 2.079261064529419, |
| "learning_rate": 4.526666666666667e-06, |
| "loss": 0.4314, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 1.440948486328125, |
| "learning_rate": 4.593333333333333e-06, |
| "loss": 0.451, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 1.213745355606079, |
| "learning_rate": 4.66e-06, |
| "loss": 0.4941, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 2.1743085384368896, |
| "learning_rate": 4.7266666666666674e-06, |
| "loss": 0.4749, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.6815190315246582, |
| "learning_rate": 4.793333333333334e-06, |
| "loss": 0.606, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 1.5993754863739014, |
| "learning_rate": 4.86e-06, |
| "loss": 0.4041, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 2.1879842281341553, |
| "learning_rate": 4.926666666666667e-06, |
| "loss": 0.482, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.3680864572525024, |
| "learning_rate": 4.9933333333333335e-06, |
| "loss": 0.4598, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.4133329391479492, |
| "learning_rate": 5.060000000000001e-06, |
| "loss": 0.4606, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 1.6556570529937744, |
| "learning_rate": 5.126666666666668e-06, |
| "loss": 0.3716, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 1.5880504846572876, |
| "learning_rate": 5.193333333333333e-06, |
| "loss": 0.5006, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 1.4626226425170898, |
| "learning_rate": 5.2600000000000005e-06, |
| "loss": 0.4802, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.9758862257003784, |
| "learning_rate": 5.326666666666667e-06, |
| "loss": 0.4725, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 1.7073544263839722, |
| "learning_rate": 5.393333333333334e-06, |
| "loss": 0.412, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 1.9127854108810425, |
| "learning_rate": 5.460000000000001e-06, |
| "loss": 0.4806, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 1.452903389930725, |
| "learning_rate": 5.5266666666666666e-06, |
| "loss": 0.4171, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.603830337524414, |
| "learning_rate": 5.593333333333334e-06, |
| "loss": 0.4348, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.7848249673843384, |
| "learning_rate": 5.66e-06, |
| "loss": 0.4739, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 1.8033798933029175, |
| "learning_rate": 5.726666666666667e-06, |
| "loss": 0.4251, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 1.6332205533981323, |
| "learning_rate": 5.793333333333334e-06, |
| "loss": 0.4422, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.8807281255722046, |
| "learning_rate": 5.86e-06, |
| "loss": 0.4224, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 1.7026886940002441, |
| "learning_rate": 5.926666666666667e-06, |
| "loss": 0.4299, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.69607412815094, |
| "learning_rate": 5.993333333333334e-06, |
| "loss": 0.5092, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 1.3589431047439575, |
| "learning_rate": 6.0600000000000004e-06, |
| "loss": 0.399, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.3969651460647583, |
| "learning_rate": 6.126666666666668e-06, |
| "loss": 0.5225, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 1.8621569871902466, |
| "learning_rate": 6.193333333333333e-06, |
| "loss": 0.4389, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 1.6221462488174438, |
| "learning_rate": 6.26e-06, |
| "loss": 0.4367, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.452337622642517, |
| "learning_rate": 6.326666666666667e-06, |
| "loss": 0.452, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.5459558963775635, |
| "learning_rate": 6.393333333333334e-06, |
| "loss": 0.5254, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 1.353761076927185, |
| "learning_rate": 6.460000000000001e-06, |
| "loss": 0.4755, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 1.3966904878616333, |
| "learning_rate": 6.526666666666666e-06, |
| "loss": 0.4842, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 1.3571563959121704, |
| "learning_rate": 6.5933333333333335e-06, |
| "loss": 0.4776, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.7380127906799316, |
| "learning_rate": 6.660000000000001e-06, |
| "loss": 0.5087, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 1.30695378780365, |
| "learning_rate": 6.726666666666667e-06, |
| "loss": 0.4261, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 1.4552329778671265, |
| "learning_rate": 6.793333333333334e-06, |
| "loss": 0.4485, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 1.4665312767028809, |
| "learning_rate": 6.860000000000001e-06, |
| "loss": 0.4326, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.511225938796997, |
| "learning_rate": 6.926666666666667e-06, |
| "loss": 0.4234, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.2507603168487549, |
| "learning_rate": 6.993333333333334e-06, |
| "loss": 0.3983, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 1.8667805194854736, |
| "learning_rate": 7.06e-06, |
| "loss": 0.4798, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 1.400815725326538, |
| "learning_rate": 7.126666666666667e-06, |
| "loss": 0.4947, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 1.2267810106277466, |
| "learning_rate": 7.1933333333333345e-06, |
| "loss": 0.4703, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 1.2562822103500366, |
| "learning_rate": 7.260000000000001e-06, |
| "loss": 0.4272, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.6255943775177002, |
| "learning_rate": 7.326666666666667e-06, |
| "loss": 0.5082, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 1.6352773904800415, |
| "learning_rate": 7.393333333333333e-06, |
| "loss": 0.4679, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.4105769395828247, |
| "learning_rate": 7.4600000000000006e-06, |
| "loss": 0.4716, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 1.766946792602539, |
| "learning_rate": 7.526666666666668e-06, |
| "loss": 0.5016, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 1.1533312797546387, |
| "learning_rate": 7.593333333333334e-06, |
| "loss": 0.4691, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.6040184497833252, |
| "learning_rate": 7.660000000000001e-06, |
| "loss": 0.4123, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.3271604776382446, |
| "learning_rate": 7.726666666666667e-06, |
| "loss": 0.4571, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 1.7813302278518677, |
| "learning_rate": 7.793333333333334e-06, |
| "loss": 0.4868, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 1.5846160650253296, |
| "learning_rate": 7.860000000000001e-06, |
| "loss": 0.4237, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 1.4138127565383911, |
| "learning_rate": 7.926666666666666e-06, |
| "loss": 0.4485, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 1.7159594297409058, |
| "learning_rate": 7.993333333333334e-06, |
| "loss": 0.5088, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 1.5566740036010742, |
| "learning_rate": 8.06e-06, |
| "loss": 0.4248, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 1.4056092500686646, |
| "learning_rate": 8.126666666666668e-06, |
| "loss": 0.4242, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 1.2954442501068115, |
| "learning_rate": 8.193333333333335e-06, |
| "loss": 0.479, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 1.576978325843811, |
| "learning_rate": 8.26e-06, |
| "loss": 0.4207, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.4118748903274536, |
| "learning_rate": 8.326666666666668e-06, |
| "loss": 0.4451, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 1.3957098722457886, |
| "learning_rate": 8.393333333333335e-06, |
| "loss": 0.4955, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 1.2895923852920532, |
| "learning_rate": 8.46e-06, |
| "loss": 0.4241, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 1.5065820217132568, |
| "learning_rate": 8.526666666666667e-06, |
| "loss": 0.4727, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 1.086529016494751, |
| "learning_rate": 8.593333333333333e-06, |
| "loss": 0.4429, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.2884867191314697, |
| "learning_rate": 8.66e-06, |
| "loss": 0.4496, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 1.344300627708435, |
| "learning_rate": 8.726666666666667e-06, |
| "loss": 0.3874, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 1.2245051860809326, |
| "learning_rate": 8.793333333333334e-06, |
| "loss": 0.5057, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 1.3924754858016968, |
| "learning_rate": 8.860000000000002e-06, |
| "loss": 0.4008, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 1.4217268228530884, |
| "learning_rate": 8.926666666666669e-06, |
| "loss": 0.4915, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.9328693747520447, |
| "learning_rate": 8.993333333333334e-06, |
| "loss": 0.4044, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 1.2598109245300293, |
| "learning_rate": 9.060000000000001e-06, |
| "loss": 0.4407, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 1.253463625907898, |
| "learning_rate": 9.126666666666667e-06, |
| "loss": 0.4671, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 1.41267728805542, |
| "learning_rate": 9.193333333333334e-06, |
| "loss": 0.5014, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 1.285569190979004, |
| "learning_rate": 9.260000000000001e-06, |
| "loss": 0.5425, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9558253288269043, |
| "learning_rate": 9.326666666666667e-06, |
| "loss": 0.4851, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 1.1378921270370483, |
| "learning_rate": 9.393333333333334e-06, |
| "loss": 0.5159, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 1.152918815612793, |
| "learning_rate": 9.460000000000001e-06, |
| "loss": 0.4235, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 1.405306339263916, |
| "learning_rate": 9.526666666666668e-06, |
| "loss": 0.4881, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 1.4959778785705566, |
| "learning_rate": 9.593333333333335e-06, |
| "loss": 0.4729, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.3399648666381836, |
| "learning_rate": 9.66e-06, |
| "loss": 0.4554, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 1.1871027946472168, |
| "learning_rate": 9.726666666666668e-06, |
| "loss": 0.4591, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 1.196593165397644, |
| "learning_rate": 9.793333333333333e-06, |
| "loss": 0.5592, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.4280208349227905, |
| "learning_rate": 9.86e-06, |
| "loss": 0.4551, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 1.1892995834350586, |
| "learning_rate": 9.926666666666668e-06, |
| "loss": 0.4864, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.3978432416915894, |
| "learning_rate": 9.993333333333333e-06, |
| "loss": 0.5178, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 1.2265063524246216, |
| "learning_rate": 9.999989033776898e-06, |
| "loss": 0.446, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 1.7466825246810913, |
| "learning_rate": 9.999951125906936e-06, |
| "loss": 0.5599, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 1.2893444299697876, |
| "learning_rate": 9.999886141209892e-06, |
| "loss": 0.5088, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 1.459552526473999, |
| "learning_rate": 9.999794080037675e-06, |
| "loss": 0.4981, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.654597520828247, |
| "learning_rate": 9.99967494288884e-06, |
| "loss": 0.561, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 1.3994464874267578, |
| "learning_rate": 9.999528730408565e-06, |
| "loss": 0.4872, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 1.6109237670898438, |
| "learning_rate": 9.999355443388649e-06, |
| "loss": 0.4748, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 1.2756563425064087, |
| "learning_rate": 9.999155082767515e-06, |
| "loss": 0.4265, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 1.2647457122802734, |
| "learning_rate": 9.998927649630202e-06, |
| "loss": 0.4898, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 1.2949209213256836, |
| "learning_rate": 9.998673145208351e-06, |
| "loss": 0.4366, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 1.2482858896255493, |
| "learning_rate": 9.998391570880212e-06, |
| "loss": 0.5014, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 1.0791841745376587, |
| "learning_rate": 9.99808292817063e-06, |
| "loss": 0.4954, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 1.4692087173461914, |
| "learning_rate": 9.997747218751032e-06, |
| "loss": 0.5007, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 1.5948970317840576, |
| "learning_rate": 9.997384444439424e-06, |
| "loss": 0.4542, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.2097396850585938, |
| "learning_rate": 9.996994607200382e-06, |
| "loss": 0.4525, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 1.343247413635254, |
| "learning_rate": 9.99657770914504e-06, |
| "loss": 0.4434, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 1.5379422903060913, |
| "learning_rate": 9.996133752531071e-06, |
| "loss": 0.5859, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 1.158941388130188, |
| "learning_rate": 9.99566273976269e-06, |
| "loss": 0.495, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 1.1630929708480835, |
| "learning_rate": 9.995164673390624e-06, |
| "loss": 0.5463, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.256372094154358, |
| "learning_rate": 9.994639556112113e-06, |
| "loss": 0.4853, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 1.8223704099655151, |
| "learning_rate": 9.994087390770887e-06, |
| "loss": 0.4836, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 1.945887565612793, |
| "learning_rate": 9.993508180357154e-06, |
| "loss": 0.5115, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 1.6152077913284302, |
| "learning_rate": 9.992901928007577e-06, |
| "loss": 0.4939, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 1.2321640253067017, |
| "learning_rate": 9.992268637005268e-06, |
| "loss": 0.5277, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 1.2367174625396729, |
| "learning_rate": 9.991608310779762e-06, |
| "loss": 0.4691, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 1.744498372077942, |
| "learning_rate": 9.990920952907005e-06, |
| "loss": 0.497, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 1.5234036445617676, |
| "learning_rate": 9.99020656710932e-06, |
| "loss": 0.4469, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 1.0841994285583496, |
| "learning_rate": 9.989465157255413e-06, |
| "loss": 0.4317, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 1.1552493572235107, |
| "learning_rate": 9.988696727360323e-06, |
| "loss": 0.461, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.6440882682800293, |
| "learning_rate": 9.987901281585423e-06, |
| "loss": 0.5038, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 1.3549693822860718, |
| "learning_rate": 9.987078824238384e-06, |
| "loss": 0.468, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 1.3959366083145142, |
| "learning_rate": 9.986229359773154e-06, |
| "loss": 0.4709, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 1.612743854522705, |
| "learning_rate": 9.985352892789941e-06, |
| "loss": 0.4717, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 1.2929461002349854, |
| "learning_rate": 9.98444942803518e-06, |
| "loss": 0.4811, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.450260877609253, |
| "learning_rate": 9.983518970401508e-06, |
| "loss": 0.5203, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 1.1035138368606567, |
| "learning_rate": 9.982561524927749e-06, |
| "loss": 0.4759, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 1.2472127676010132, |
| "learning_rate": 9.981577096798864e-06, |
| "loss": 0.5506, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.1840726137161255, |
| "learning_rate": 9.980565691345945e-06, |
| "loss": 0.4878, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 1.3959659337997437, |
| "learning_rate": 9.979527314046177e-06, |
| "loss": 0.4916, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.154722809791565, |
| "learning_rate": 9.978461970522807e-06, |
| "loss": 0.4592, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 1.2845226526260376, |
| "learning_rate": 9.977369666545114e-06, |
| "loss": 0.5465, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 1.175721287727356, |
| "learning_rate": 9.976250408028383e-06, |
| "loss": 0.4742, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 1.5375049114227295, |
| "learning_rate": 9.975104201033868e-06, |
| "loss": 0.4783, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 1.305464506149292, |
| "learning_rate": 9.973931051768756e-06, |
| "loss": 0.4931, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 1.2971112728118896, |
| "learning_rate": 9.972730966586144e-06, |
| "loss": 0.4617, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 1.2412433624267578, |
| "learning_rate": 9.971503951984996e-06, |
| "loss": 0.5366, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 1.3253568410873413, |
| "learning_rate": 9.970250014610105e-06, |
| "loss": 0.5186, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 1.2338709831237793, |
| "learning_rate": 9.968969161252072e-06, |
| "loss": 0.5074, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 1.0964592695236206, |
| "learning_rate": 9.96766139884725e-06, |
| "loss": 0.4096, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 1.8194420337677002, |
| "learning_rate": 9.966326734477721e-06, |
| "loss": 0.4439, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 1.0826818943023682, |
| "learning_rate": 9.96496517537125e-06, |
| "loss": 0.4602, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 0.9985308647155762, |
| "learning_rate": 9.96357672890125e-06, |
| "loss": 0.429, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 1.4715704917907715, |
| "learning_rate": 9.96216140258674e-06, |
| "loss": 0.5181, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.606497049331665, |
| "learning_rate": 9.9607192040923e-06, |
| "loss": 0.4314, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.4677903652191162, |
| "learning_rate": 9.959250141228046e-06, |
| "loss": 0.4204, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 1.4426929950714111, |
| "learning_rate": 9.95775422194956e-06, |
| "loss": 0.4803, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 1.2194904088974, |
| "learning_rate": 9.956231454357876e-06, |
| "loss": 0.4695, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 1.1238853931427002, |
| "learning_rate": 9.954681846699414e-06, |
| "loss": 0.5755, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 1.0031379461288452, |
| "learning_rate": 9.953105407365952e-06, |
| "loss": 0.4325, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.6501160860061646, |
| "learning_rate": 9.951502144894566e-06, |
| "loss": 0.5403, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 1.440741777420044, |
| "learning_rate": 9.94987206796759e-06, |
| "loss": 0.4725, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 1.2382850646972656, |
| "learning_rate": 9.948215185412578e-06, |
| "loss": 0.4235, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 1.3473109006881714, |
| "learning_rate": 9.94653150620224e-06, |
| "loss": 0.4388, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 1.2151130437850952, |
| "learning_rate": 9.944821039454403e-06, |
| "loss": 0.5012, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 1.1826070547103882, |
| "learning_rate": 9.943083794431959e-06, |
| "loss": 0.486, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 1.0905817747116089, |
| "learning_rate": 9.941319780542817e-06, |
| "loss": 0.5423, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 1.3496341705322266, |
| "learning_rate": 9.939529007339852e-06, |
| "loss": 0.5141, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 1.33112633228302, |
| "learning_rate": 9.937711484520848e-06, |
| "loss": 0.4819, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 1.1748610734939575, |
| "learning_rate": 9.935867221928454e-06, |
| "loss": 0.5097, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.2363240718841553, |
| "learning_rate": 9.93399622955012e-06, |
| "loss": 0.4261, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 1.667703628540039, |
| "learning_rate": 9.932098517518056e-06, |
| "loss": 0.4662, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 1.446291446685791, |
| "learning_rate": 9.930174096109163e-06, |
| "loss": 0.445, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 1.476731300354004, |
| "learning_rate": 9.928222975744992e-06, |
| "loss": 0.4666, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.3054319620132446, |
| "learning_rate": 9.926245166991671e-06, |
| "loss": 0.4248, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.5817550420761108, |
| "learning_rate": 9.924240680559867e-06, |
| "loss": 0.4325, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 1.0748894214630127, |
| "learning_rate": 9.922209527304709e-06, |
| "loss": 0.4862, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 1.6298894882202148, |
| "learning_rate": 9.920151718225743e-06, |
| "loss": 0.542, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.4869043827056885, |
| "learning_rate": 9.918067264466867e-06, |
| "loss": 0.4627, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 1.3258094787597656, |
| "learning_rate": 9.915956177316269e-06, |
| "loss": 0.4389, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.586165189743042, |
| "learning_rate": 9.913818468206368e-06, |
| "loss": 0.6127, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 1.3697926998138428, |
| "learning_rate": 9.911654148713757e-06, |
| "loss": 0.574, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 1.212844729423523, |
| "learning_rate": 9.909463230559127e-06, |
| "loss": 0.5196, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 0.9597765207290649, |
| "learning_rate": 9.907245725607217e-06, |
| "loss": 0.5269, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 1.3319644927978516, |
| "learning_rate": 9.905001645866746e-06, |
| "loss": 0.4136, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.0987292528152466, |
| "learning_rate": 9.902731003490344e-06, |
| "loss": 0.4325, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.3473751544952393, |
| "learning_rate": 9.90043381077449e-06, |
| "loss": 0.464, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 1.5998402833938599, |
| "learning_rate": 9.898110080159442e-06, |
| "loss": 0.6172, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 0.9513059258460999, |
| "learning_rate": 9.895759824229176e-06, |
| "loss": 0.4431, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 1.3348459005355835, |
| "learning_rate": 9.893383055711308e-06, |
| "loss": 0.4383, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.3876981735229492, |
| "learning_rate": 9.890979787477036e-06, |
| "loss": 0.5066, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 1.3132597208023071, |
| "learning_rate": 9.88855003254106e-06, |
| "loss": 0.4995, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 1.3520206212997437, |
| "learning_rate": 9.886093804061523e-06, |
| "loss": 0.4658, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 1.4986363649368286, |
| "learning_rate": 9.883611115339929e-06, |
| "loss": 0.4718, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.3672336339950562, |
| "learning_rate": 9.881101979821075e-06, |
| "loss": 0.5289, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.610236406326294, |
| "learning_rate": 9.87856641109298e-06, |
| "loss": 0.5065, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 1.241811752319336, |
| "learning_rate": 9.876004422886809e-06, |
| "loss": 0.3937, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 1.385197639465332, |
| "learning_rate": 9.873416029076801e-06, |
| "loss": 0.5553, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 1.2933454513549805, |
| "learning_rate": 9.870801243680191e-06, |
| "loss": 0.4653, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 1.2639875411987305, |
| "learning_rate": 9.868160080857134e-06, |
| "loss": 0.4908, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.142906665802002, |
| "learning_rate": 9.865492554910634e-06, |
| "loss": 0.4884, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 1.0850920677185059, |
| "learning_rate": 9.862798680286459e-06, |
| "loss": 0.5224, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 1.3904608488082886, |
| "learning_rate": 9.860078471573066e-06, |
| "loss": 0.5273, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 1.1105901002883911, |
| "learning_rate": 9.857331943501527e-06, |
| "loss": 0.4478, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 1.278568983078003, |
| "learning_rate": 9.854559110945436e-06, |
| "loss": 0.5204, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.9616843461990356, |
| "learning_rate": 9.851759988920843e-06, |
| "loss": 0.452, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 1.7013401985168457, |
| "learning_rate": 9.848934592586165e-06, |
| "loss": 0.5092, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 1.2884467840194702, |
| "learning_rate": 9.846082937242108e-06, |
| "loss": 0.4333, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 1.0977705717086792, |
| "learning_rate": 9.843205038331574e-06, |
| "loss": 0.4188, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 1.1589962244033813, |
| "learning_rate": 9.84030091143959e-06, |
| "loss": 0.5456, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.4452130794525146, |
| "learning_rate": 9.837370572293221e-06, |
| "loss": 0.538, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 0.8810553550720215, |
| "learning_rate": 9.834414036761477e-06, |
| "loss": 0.4697, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 1.2297122478485107, |
| "learning_rate": 9.831431320855235e-06, |
| "loss": 0.4525, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 1.4245611429214478, |
| "learning_rate": 9.828422440727152e-06, |
| "loss": 0.5304, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 1.4869318008422852, |
| "learning_rate": 9.82538741267157e-06, |
| "loss": 0.5631, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.0916733741760254, |
| "learning_rate": 9.822326253124436e-06, |
| "loss": 0.4249, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 1.3281223773956299, |
| "learning_rate": 9.819238978663212e-06, |
| "loss": 0.4827, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 1.1685035228729248, |
| "learning_rate": 9.816125606006777e-06, |
| "loss": 0.547, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 1.309923768043518, |
| "learning_rate": 9.812986152015349e-06, |
| "loss": 0.4927, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 1.3431212902069092, |
| "learning_rate": 9.809820633690383e-06, |
| "loss": 0.5215, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.6138921976089478, |
| "learning_rate": 9.806629068174486e-06, |
| "loss": 0.4668, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 1.1583821773529053, |
| "learning_rate": 9.803411472751321e-06, |
| "loss": 0.4882, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.1183314323425293, |
| "learning_rate": 9.800167864845513e-06, |
| "loss": 0.5254, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 1.4351534843444824, |
| "learning_rate": 9.796898262022555e-06, |
| "loss": 0.5631, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 1.114702582359314, |
| "learning_rate": 9.793602681988714e-06, |
| "loss": 0.5087, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.6921616792678833, |
| "learning_rate": 9.790281142590937e-06, |
| "loss": 0.5482, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 1.3350164890289307, |
| "learning_rate": 9.786933661816747e-06, |
| "loss": 0.5148, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 1.3733949661254883, |
| "learning_rate": 9.783560257794153e-06, |
| "loss": 0.4336, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 1.232109546661377, |
| "learning_rate": 9.78016094879155e-06, |
| "loss": 0.4304, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 1.4043456315994263, |
| "learning_rate": 9.776735753217618e-06, |
| "loss": 0.4891, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.1592031717300415, |
| "learning_rate": 9.773284689621223e-06, |
| "loss": 0.4892, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 1.1605182886123657, |
| "learning_rate": 9.76980777669132e-06, |
| "loss": 0.4433, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 1.1218903064727783, |
| "learning_rate": 9.766305033256847e-06, |
| "loss": 0.5072, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 1.4614591598510742, |
| "learning_rate": 9.762776478286622e-06, |
| "loss": 0.4988, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 1.0388537645339966, |
| "learning_rate": 9.75922213088925e-06, |
| "loss": 0.4314, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.3844202756881714, |
| "learning_rate": 9.755642010313007e-06, |
| "loss": 0.4516, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 1.5695295333862305, |
| "learning_rate": 9.752036135945743e-06, |
| "loss": 0.4761, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 1.3042584657669067, |
| "learning_rate": 9.748404527314782e-06, |
| "loss": 0.4851, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.3485811948776245, |
| "learning_rate": 9.744747204086795e-06, |
| "loss": 0.4127, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 1.074900507926941, |
| "learning_rate": 9.741064186067723e-06, |
| "loss": 0.4803, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.3059568405151367, |
| "learning_rate": 9.73735549320265e-06, |
| "loss": 0.4279, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 1.1361761093139648, |
| "learning_rate": 9.733621145575697e-06, |
| "loss": 0.5364, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 1.4308760166168213, |
| "learning_rate": 9.72986116340992e-06, |
| "loss": 0.5978, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 0.9736759066581726, |
| "learning_rate": 9.726075567067193e-06, |
| "loss": 0.4471, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 1.555156946182251, |
| "learning_rate": 9.722264377048105e-06, |
| "loss": 0.526, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.1218699216842651, |
| "learning_rate": 9.718427613991848e-06, |
| "loss": 0.4025, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 1.1059517860412598, |
| "learning_rate": 9.714565298676093e-06, |
| "loss": 0.4503, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 1.3590087890625, |
| "learning_rate": 9.710677452016898e-06, |
| "loss": 0.5018, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 0.9605317115783691, |
| "learning_rate": 9.706764095068579e-06, |
| "loss": 0.474, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 0.9634230136871338, |
| "learning_rate": 9.702825249023597e-06, |
| "loss": 0.4051, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 1.0012608766555786, |
| "learning_rate": 9.698860935212455e-06, |
| "loss": 0.5104, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 1.254647970199585, |
| "learning_rate": 9.69487117510357e-06, |
| "loss": 0.4357, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 1.2579063177108765, |
| "learning_rate": 9.69085599030316e-06, |
| "loss": 0.4413, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 1.1054672002792358, |
| "learning_rate": 9.686815402555133e-06, |
| "loss": 0.6412, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 1.3997899293899536, |
| "learning_rate": 9.682749433740963e-06, |
| "loss": 0.4615, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.025417685508728, |
| "learning_rate": 9.678658105879568e-06, |
| "loss": 0.4214, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 1.0472135543823242, |
| "learning_rate": 9.674541441127202e-06, |
| "loss": 0.5116, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 1.078364372253418, |
| "learning_rate": 9.670399461777328e-06, |
| "loss": 0.4569, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 1.3852829933166504, |
| "learning_rate": 9.666232190260496e-06, |
| "loss": 0.53, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 1.3731964826583862, |
| "learning_rate": 9.662039649144224e-06, |
| "loss": 0.4696, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.291717290878296, |
| "learning_rate": 9.65782186113288e-06, |
| "loss": 0.4263, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 1.3405389785766602, |
| "learning_rate": 9.653578849067542e-06, |
| "loss": 0.4435, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 1.226496696472168, |
| "learning_rate": 9.649310635925904e-06, |
| "loss": 0.4492, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 1.15779447555542, |
| "learning_rate": 9.645017244822124e-06, |
| "loss": 0.5387, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 1.0545017719268799, |
| "learning_rate": 9.640698699006708e-06, |
| "loss": 0.4616, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.0854800939559937, |
| "learning_rate": 9.636355021866388e-06, |
| "loss": 0.518, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 1.292471170425415, |
| "learning_rate": 9.631986236923998e-06, |
| "loss": 0.4888, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 1.2289882898330688, |
| "learning_rate": 9.62759236783833e-06, |
| "loss": 0.5193, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 1.4421725273132324, |
| "learning_rate": 9.623173438404027e-06, |
| "loss": 0.5314, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 1.4118566513061523, |
| "learning_rate": 9.61872947255144e-06, |
| "loss": 0.5525, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.1490426063537598, |
| "learning_rate": 9.614260494346505e-06, |
| "loss": 0.4832, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 1.3505995273590088, |
| "learning_rate": 9.609766527990604e-06, |
| "loss": 0.4742, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 1.4278666973114014, |
| "learning_rate": 9.605247597820448e-06, |
| "loss": 0.458, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 1.5824147462844849, |
| "learning_rate": 9.600703728307935e-06, |
| "loss": 0.4327, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 1.1965423822402954, |
| "learning_rate": 9.596134944060018e-06, |
| "loss": 0.4241, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.3452823162078857, |
| "learning_rate": 9.591541269818574e-06, |
| "loss": 0.4312, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 1.6801984310150146, |
| "learning_rate": 9.586922730460273e-06, |
| "loss": 0.4858, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 1.521422266960144, |
| "learning_rate": 9.582279350996437e-06, |
| "loss": 0.4886, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 1.0848934650421143, |
| "learning_rate": 9.577611156572908e-06, |
| "loss": 0.5233, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 1.131194829940796, |
| "learning_rate": 9.572918172469912e-06, |
| "loss": 0.5093, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 0.7532334923744202, |
| "learning_rate": 9.568200424101918e-06, |
| "loss": 0.4439, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 0.992387056350708, |
| "learning_rate": 9.563457937017514e-06, |
| "loss": 0.4405, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 1.5561617612838745, |
| "learning_rate": 9.558690736899248e-06, |
| "loss": 0.5558, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 0.9538534283638, |
| "learning_rate": 9.553898849563502e-06, |
| "loss": 0.4767, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 1.5033385753631592, |
| "learning_rate": 9.549082300960351e-06, |
| "loss": 0.4721, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.4979592561721802, |
| "learning_rate": 9.544241117173422e-06, |
| "loss": 0.4778, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 1.2602815628051758, |
| "learning_rate": 9.539375324419748e-06, |
| "loss": 0.4715, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 1.1525267362594604, |
| "learning_rate": 9.534484949049636e-06, |
| "loss": 0.5271, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 1.18638014793396, |
| "learning_rate": 9.529570017546512e-06, |
| "loss": 0.5016, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 1.3862594366073608, |
| "learning_rate": 9.524630556526788e-06, |
| "loss": 0.4872, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 1.5191760063171387, |
| "learning_rate": 9.51966659273971e-06, |
| "loss": 0.4688, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 1.176364541053772, |
| "learning_rate": 9.514678153067218e-06, |
| "loss": 0.4674, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 1.2833201885223389, |
| "learning_rate": 9.509665264523803e-06, |
| "loss": 0.4414, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 0.9362300634384155, |
| "learning_rate": 9.504627954256352e-06, |
| "loss": 0.368, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 1.2998939752578735, |
| "learning_rate": 9.499566249544006e-06, |
| "loss": 0.4505, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 0.7536768913269043, |
| "learning_rate": 9.494480177798013e-06, |
| "loss": 0.5055, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 1.1865419149398804, |
| "learning_rate": 9.489369766561584e-06, |
| "loss": 0.4671, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 1.128171443939209, |
| "learning_rate": 9.48423504350973e-06, |
| "loss": 0.4721, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 1.1129778623580933, |
| "learning_rate": 9.479076036449125e-06, |
| "loss": 0.5855, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 1.0635051727294922, |
| "learning_rate": 9.473892773317952e-06, |
| "loss": 0.4509, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.9211987257003784, |
| "learning_rate": 9.468685282185745e-06, |
| "loss": 0.4543, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 1.2037076950073242, |
| "learning_rate": 9.463453591253253e-06, |
| "loss": 0.4839, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.9373881220817566, |
| "learning_rate": 9.458197728852268e-06, |
| "loss": 0.4215, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 0.9381740093231201, |
| "learning_rate": 9.452917723445484e-06, |
| "loss": 0.4598, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 1.6144822835922241, |
| "learning_rate": 9.447613603626337e-06, |
| "loss": 0.5762, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.3809951543807983, |
| "learning_rate": 9.44228539811886e-06, |
| "loss": 0.4934, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 1.1491771936416626, |
| "learning_rate": 9.436933135777511e-06, |
| "loss": 0.4639, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 1.0632610321044922, |
| "learning_rate": 9.431556845587029e-06, |
| "loss": 0.4966, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 1.2312216758728027, |
| "learning_rate": 9.426156556662276e-06, |
| "loss": 0.4287, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 1.2142528295516968, |
| "learning_rate": 9.420732298248077e-06, |
| "loss": 0.4216, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.7389311194419861, |
| "learning_rate": 9.41528409971906e-06, |
| "loss": 0.4317, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 1.421856164932251, |
| "learning_rate": 9.409811990579498e-06, |
| "loss": 0.4798, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 1.5812768936157227, |
| "learning_rate": 9.404316000463152e-06, |
| "loss": 0.5848, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 0.9333869218826294, |
| "learning_rate": 9.398796159133108e-06, |
| "loss": 0.4232, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 1.2076997756958008, |
| "learning_rate": 9.393252496481615e-06, |
| "loss": 0.5026, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.4881305694580078, |
| "learning_rate": 9.387685042529926e-06, |
| "loss": 0.525, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 1.2889773845672607, |
| "learning_rate": 9.382093827428135e-06, |
| "loss": 0.4729, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 1.4050449132919312, |
| "learning_rate": 9.376478881455008e-06, |
| "loss": 0.4944, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.8713739514350891, |
| "learning_rate": 9.370840235017829e-06, |
| "loss": 0.4217, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 1.031435251235962, |
| "learning_rate": 9.365177918652226e-06, |
| "loss": 0.4611, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.8381909132003784, |
| "learning_rate": 9.35949196302201e-06, |
| "loss": 0.4212, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 1.1888693571090698, |
| "learning_rate": 9.353782398919012e-06, |
| "loss": 0.4543, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 1.4899085760116577, |
| "learning_rate": 9.348049257262908e-06, |
| "loss": 0.5147, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 1.2330344915390015, |
| "learning_rate": 9.342292569101061e-06, |
| "loss": 0.6145, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 1.0226585865020752, |
| "learning_rate": 9.336512365608343e-06, |
| "loss": 0.4879, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.1054939031600952, |
| "learning_rate": 9.330708678086975e-06, |
| "loss": 0.4206, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.4774991273880005, |
| "learning_rate": 9.324881537966355e-06, |
| "loss": 0.4093, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 1.1856415271759033, |
| "learning_rate": 9.319030976802881e-06, |
| "loss": 0.4614, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 1.3986643552780151, |
| "learning_rate": 9.313157026279792e-06, |
| "loss": 0.5174, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 1.2036128044128418, |
| "learning_rate": 9.307259718206984e-06, |
| "loss": 0.464, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.8177152276039124, |
| "learning_rate": 9.301339084520853e-06, |
| "loss": 0.4331, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 1.1826705932617188, |
| "learning_rate": 9.295395157284103e-06, |
| "loss": 0.4363, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 1.0908697843551636, |
| "learning_rate": 9.289427968685588e-06, |
| "loss": 0.4087, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 0.9723864793777466, |
| "learning_rate": 9.28343755104013e-06, |
| "loss": 0.3952, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.1800845861434937, |
| "learning_rate": 9.277423936788348e-06, |
| "loss": 0.4966, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.5162053108215332, |
| "learning_rate": 9.271387158496477e-06, |
| "loss": 0.5525, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 1.1737457513809204, |
| "learning_rate": 9.265327248856198e-06, |
| "loss": 0.4374, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 1.1317731142044067, |
| "learning_rate": 9.259244240684457e-06, |
| "loss": 0.4321, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 1.3863508701324463, |
| "learning_rate": 9.25313816692329e-06, |
| "loss": 0.5441, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 1.0942466259002686, |
| "learning_rate": 9.247009060639637e-06, |
| "loss": 0.5124, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.3743023872375488, |
| "learning_rate": 9.240856955025175e-06, |
| "loss": 0.4357, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 1.1339491605758667, |
| "learning_rate": 9.234681883396129e-06, |
| "loss": 0.5066, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 1.9541819095611572, |
| "learning_rate": 9.228483879193096e-06, |
| "loss": 0.4864, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 1.257652759552002, |
| "learning_rate": 9.22226297598086e-06, |
| "loss": 0.4361, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 1.4581412076950073, |
| "learning_rate": 9.216019207448216e-06, |
| "loss": 0.5005, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.6676081418991089, |
| "learning_rate": 9.209752607407784e-06, |
| "loss": 0.4307, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.898801863193512, |
| "learning_rate": 9.203463209795822e-06, |
| "loss": 0.487, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 1.6811095476150513, |
| "learning_rate": 9.197151048672051e-06, |
| "loss": 0.5448, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 1.222164511680603, |
| "learning_rate": 9.190816158219462e-06, |
| "loss": 0.4603, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 1.161139726638794, |
| "learning_rate": 9.18445857274414e-06, |
| "loss": 0.5216, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.3466241359710693, |
| "learning_rate": 9.178078326675069e-06, |
| "loss": 0.5273, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 1.1372344493865967, |
| "learning_rate": 9.171675454563949e-06, |
| "loss": 0.4842, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 1.10002601146698, |
| "learning_rate": 9.165249991085012e-06, |
| "loss": 0.5429, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 0.8916090726852417, |
| "learning_rate": 9.158801971034832e-06, |
| "loss": 0.5703, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 1.131230115890503, |
| "learning_rate": 9.152331429332136e-06, |
| "loss": 0.4555, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.9842562079429626, |
| "learning_rate": 9.14583840101761e-06, |
| "loss": 0.4475, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 1.0680824518203735, |
| "learning_rate": 9.139322921253724e-06, |
| "loss": 0.4471, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 1.2112277746200562, |
| "learning_rate": 9.132785025324524e-06, |
| "loss": 0.5342, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 1.359027624130249, |
| "learning_rate": 9.12622474863545e-06, |
| "loss": 0.5038, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 1.2182432413101196, |
| "learning_rate": 9.119642126713147e-06, |
| "loss": 0.4761, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 0.905654788017273, |
| "learning_rate": 9.113037195205267e-06, |
| "loss": 0.4434, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 1.273230791091919, |
| "learning_rate": 9.106409989880274e-06, |
| "loss": 0.4119, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 1.1936393976211548, |
| "learning_rate": 9.099760546627262e-06, |
| "loss": 0.5159, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 1.196582555770874, |
| "learning_rate": 9.093088901455746e-06, |
| "loss": 0.4788, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 1.420160174369812, |
| "learning_rate": 9.086395090495475e-06, |
| "loss": 0.4298, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.2535393238067627, |
| "learning_rate": 9.079679149996235e-06, |
| "loss": 0.5149, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.9435884356498718, |
| "learning_rate": 9.072941116327654e-06, |
| "loss": 0.4495, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 1.1288723945617676, |
| "learning_rate": 9.066181025979006e-06, |
| "loss": 0.4988, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 1.53179132938385, |
| "learning_rate": 9.059398915559005e-06, |
| "loss": 0.4834, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 1.48374342918396, |
| "learning_rate": 9.052594821795616e-06, |
| "loss": 0.5016, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 1.3857916593551636, |
| "learning_rate": 9.045768781535857e-06, |
| "loss": 0.5277, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 1.1805408000946045, |
| "learning_rate": 9.038920831745587e-06, |
| "loss": 0.432, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 1.1315293312072754, |
| "learning_rate": 9.032051009509324e-06, |
| "loss": 0.4377, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 0.9386146068572998, |
| "learning_rate": 9.025159352030024e-06, |
| "loss": 0.4993, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 1.187091588973999, |
| "learning_rate": 9.0182458966289e-06, |
| "loss": 0.5185, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.0254029035568237, |
| "learning_rate": 9.011310680745203e-06, |
| "loss": 0.471, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 1.1841859817504883, |
| "learning_rate": 9.004353741936028e-06, |
| "loss": 0.4047, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 1.25779390335083, |
| "learning_rate": 8.99737511787611e-06, |
| "loss": 0.4827, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 1.2962913513183594, |
| "learning_rate": 8.990374846357616e-06, |
| "loss": 0.4792, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 1.1236214637756348, |
| "learning_rate": 8.98335296528995e-06, |
| "loss": 0.5092, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.0570876598358154, |
| "learning_rate": 8.97630951269953e-06, |
| "loss": 0.4761, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 1.0793334245681763, |
| "learning_rate": 8.969244526729599e-06, |
| "loss": 0.425, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 1.0466865301132202, |
| "learning_rate": 8.962158045640014e-06, |
| "loss": 0.4867, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 1.8099735975265503, |
| "learning_rate": 8.955050107807035e-06, |
| "loss": 0.453, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 1.0396076440811157, |
| "learning_rate": 8.947920751723119e-06, |
| "loss": 0.4536, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.1914063692092896, |
| "learning_rate": 8.940770015996707e-06, |
| "loss": 0.448, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 1.0686973333358765, |
| "learning_rate": 8.933597939352031e-06, |
| "loss": 0.4932, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 0.9669390320777893, |
| "learning_rate": 8.926404560628882e-06, |
| "loss": 0.4812, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 1.4926239252090454, |
| "learning_rate": 8.919189918782419e-06, |
| "loss": 0.4337, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 1.3908580541610718, |
| "learning_rate": 8.911954052882941e-06, |
| "loss": 0.4595, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.088005781173706, |
| "learning_rate": 8.904697002115693e-06, |
| "loss": 0.4685, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 1.2951703071594238, |
| "learning_rate": 8.89741880578064e-06, |
| "loss": 0.5138, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 1.3422932624816895, |
| "learning_rate": 8.890119503292258e-06, |
| "loss": 0.5339, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 1.6533243656158447, |
| "learning_rate": 8.882799134179326e-06, |
| "loss": 0.4854, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 1.5386734008789062, |
| "learning_rate": 8.875457738084706e-06, |
| "loss": 0.4791, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.3411436080932617, |
| "learning_rate": 8.868095354765125e-06, |
| "loss": 0.4922, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 0.9799019694328308, |
| "learning_rate": 8.860712024090971e-06, |
| "loss": 0.4265, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 1.7590001821517944, |
| "learning_rate": 8.853307786046073e-06, |
| "loss": 0.5033, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.1440484523773193, |
| "learning_rate": 8.84588268072747e-06, |
| "loss": 0.5036, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 1.0841706991195679, |
| "learning_rate": 8.838436748345217e-06, |
| "loss": 0.5344, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.1884775161743164, |
| "learning_rate": 8.830970029222152e-06, |
| "loss": 0.3504, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 0.9909444451332092, |
| "learning_rate": 8.823482563793687e-06, |
| "loss": 0.4838, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 1.079696774482727, |
| "learning_rate": 8.815974392607573e-06, |
| "loss": 0.4601, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 1.1453964710235596, |
| "learning_rate": 8.808445556323703e-06, |
| "loss": 0.4623, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 1.1338945627212524, |
| "learning_rate": 8.80089609571387e-06, |
| "loss": 0.5288, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.3115707635879517, |
| "learning_rate": 8.79332605166157e-06, |
| "loss": 0.5267, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 1.3860828876495361, |
| "learning_rate": 8.785735465161752e-06, |
| "loss": 0.4985, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 1.2067689895629883, |
| "learning_rate": 8.778124377320619e-06, |
| "loss": 0.4699, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 1.4744197130203247, |
| "learning_rate": 8.770492829355395e-06, |
| "loss": 0.4483, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 1.1251822710037231, |
| "learning_rate": 8.762840862594106e-06, |
| "loss": 0.4664, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.8830546736717224, |
| "learning_rate": 8.755168518475351e-06, |
| "loss": 0.533, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 1.2034662961959839, |
| "learning_rate": 8.747475838548088e-06, |
| "loss": 0.4262, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 1.3047678470611572, |
| "learning_rate": 8.739762864471392e-06, |
| "loss": 0.5621, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 1.0258435010910034, |
| "learning_rate": 8.732029638014249e-06, |
| "loss": 0.5337, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 1.1698180437088013, |
| "learning_rate": 8.724276201055311e-06, |
| "loss": 0.4402, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.0054242610931396, |
| "learning_rate": 8.716502595582685e-06, |
| "loss": 0.5391, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 0.8926470875740051, |
| "learning_rate": 8.708708863693696e-06, |
| "loss": 0.4663, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 1.3373093605041504, |
| "learning_rate": 8.700895047594664e-06, |
| "loss": 0.5086, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.17959725856781, |
| "learning_rate": 8.693061189600671e-06, |
| "loss": 0.4284, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 1.0325318574905396, |
| "learning_rate": 8.685207332135337e-06, |
| "loss": 0.5204, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.8622048497200012, |
| "learning_rate": 8.677333517730582e-06, |
| "loss": 0.4318, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 1.214130163192749, |
| "learning_rate": 8.669439789026409e-06, |
| "loss": 0.4886, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 1.3919086456298828, |
| "learning_rate": 8.66152618877066e-06, |
| "loss": 0.4725, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 1.263291835784912, |
| "learning_rate": 8.65359275981879e-06, |
| "loss": 0.4854, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 1.5392141342163086, |
| "learning_rate": 8.645639545133638e-06, |
| "loss": 0.5634, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.246302843093872, |
| "learning_rate": 8.637666587785185e-06, |
| "loss": 0.4597, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 1.0350593328475952, |
| "learning_rate": 8.629673930950335e-06, |
| "loss": 0.4152, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 1.295961618423462, |
| "learning_rate": 8.621661617912665e-06, |
| "loss": 0.5035, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 1.3225280046463013, |
| "learning_rate": 8.613629692062204e-06, |
| "loss": 0.5491, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 1.495888352394104, |
| "learning_rate": 8.60557819689519e-06, |
| "loss": 0.5184, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.549146056175232, |
| "learning_rate": 8.597507176013839e-06, |
| "loss": 0.4976, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.962, |
| "grad_norm": 1.1670464277267456, |
| "learning_rate": 8.589416673126104e-06, |
| "loss": 0.4262, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.964, |
| "grad_norm": 1.1644515991210938, |
| "learning_rate": 8.581306732045443e-06, |
| "loss": 0.4646, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.966, |
| "grad_norm": 0.8502632975578308, |
| "learning_rate": 8.57317739669058e-06, |
| "loss": 0.4702, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 1.3403635025024414, |
| "learning_rate": 8.565028711085266e-06, |
| "loss": 0.4864, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.1791622638702393, |
| "learning_rate": 8.556860719358045e-06, |
| "loss": 0.41, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.972, |
| "grad_norm": 1.5249660015106201, |
| "learning_rate": 8.548673465742006e-06, |
| "loss": 0.4471, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.974, |
| "grad_norm": 1.3172943592071533, |
| "learning_rate": 8.540466994574556e-06, |
| "loss": 0.4734, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 1.012715458869934, |
| "learning_rate": 8.532241350297167e-06, |
| "loss": 0.4765, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.978, |
| "grad_norm": 0.9505335092544556, |
| "learning_rate": 8.523996577455144e-06, |
| "loss": 0.4513, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 0.9823141098022461, |
| "learning_rate": 8.515732720697383e-06, |
| "loss": 0.5406, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.982, |
| "grad_norm": 1.1936461925506592, |
| "learning_rate": 8.507449824776125e-06, |
| "loss": 0.4852, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.1408801078796387, |
| "learning_rate": 8.499147934546716e-06, |
| "loss": 0.454, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.986, |
| "grad_norm": 1.1299476623535156, |
| "learning_rate": 8.490827094967364e-06, |
| "loss": 0.4875, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.988, |
| "grad_norm": 0.7930355668067932, |
| "learning_rate": 8.482487351098899e-06, |
| "loss": 0.4449, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.4795210361480713, |
| "learning_rate": 8.47412874810452e-06, |
| "loss": 0.5077, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 1.3691011667251587, |
| "learning_rate": 8.465751331249558e-06, |
| "loss": 0.4822, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.994, |
| "grad_norm": 1.6543487310409546, |
| "learning_rate": 8.457355145901235e-06, |
| "loss": 0.4765, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.996, |
| "grad_norm": 1.147316813468933, |
| "learning_rate": 8.448940237528404e-06, |
| "loss": 0.4662, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.998, |
| "grad_norm": 1.3053276538848877, |
| "learning_rate": 8.440506651701315e-06, |
| "loss": 0.4694, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.914472758769989, |
| "learning_rate": 8.43205443409136e-06, |
| "loss": 0.3919, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.002, |
| "grad_norm": 1.1292088031768799, |
| "learning_rate": 8.42358363047084e-06, |
| "loss": 0.2872, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.004, |
| "grad_norm": 1.1585907936096191, |
| "learning_rate": 8.415094286712694e-06, |
| "loss": 0.3777, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.006, |
| "grad_norm": 1.4778828620910645, |
| "learning_rate": 8.406586448790277e-06, |
| "loss": 0.3282, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 1.3356106281280518, |
| "learning_rate": 8.398060162777084e-06, |
| "loss": 0.3194, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 1.343839168548584, |
| "learning_rate": 8.389515474846522e-06, |
| "loss": 0.3194, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.012, |
| "grad_norm": 1.3251718282699585, |
| "learning_rate": 8.380952431271653e-06, |
| "loss": 0.3459, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.014, |
| "grad_norm": 1.7042008638381958, |
| "learning_rate": 8.372371078424941e-06, |
| "loss": 0.3321, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 1.3194146156311035, |
| "learning_rate": 8.363771462778e-06, |
| "loss": 0.3652, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.018, |
| "grad_norm": 1.481685996055603, |
| "learning_rate": 8.355153630901344e-06, |
| "loss": 0.3853, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 1.6290403604507446, |
| "learning_rate": 8.346517629464141e-06, |
| "loss": 0.2857, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.022, |
| "grad_norm": 0.9795701503753662, |
| "learning_rate": 8.337863505233954e-06, |
| "loss": 0.3388, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 1.4000308513641357, |
| "learning_rate": 8.32919130507648e-06, |
| "loss": 0.3922, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.026, |
| "grad_norm": 1.081754446029663, |
| "learning_rate": 8.320501075955317e-06, |
| "loss": 0.2852, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.028, |
| "grad_norm": 1.0564137697219849, |
| "learning_rate": 8.311792864931686e-06, |
| "loss": 0.3219, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 1.1438813209533691, |
| "learning_rate": 8.303066719164195e-06, |
| "loss": 0.2861, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 1.4927905797958374, |
| "learning_rate": 8.294322685908576e-06, |
| "loss": 0.3144, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.034, |
| "grad_norm": 1.9864319562911987, |
| "learning_rate": 8.285560812517423e-06, |
| "loss": 0.3573, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.036, |
| "grad_norm": 1.1521250009536743, |
| "learning_rate": 8.27678114643995e-06, |
| "loss": 0.3696, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.038, |
| "grad_norm": 1.2286995649337769, |
| "learning_rate": 8.26798373522172e-06, |
| "loss": 0.3882, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 1.3970285654067993, |
| "learning_rate": 8.259168626504395e-06, |
| "loss": 0.3408, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.042, |
| "grad_norm": 1.3147468566894531, |
| "learning_rate": 8.250335868025477e-06, |
| "loss": 0.3673, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.044, |
| "grad_norm": 1.1939131021499634, |
| "learning_rate": 8.241485507618046e-06, |
| "loss": 0.3329, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.046, |
| "grad_norm": 1.4440771341323853, |
| "learning_rate": 8.232617593210512e-06, |
| "loss": 0.3474, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 1.0242334604263306, |
| "learning_rate": 8.223732172826336e-06, |
| "loss": 0.361, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 1.4275943040847778, |
| "learning_rate": 8.214829294583786e-06, |
| "loss": 0.3043, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.052, |
| "grad_norm": 1.5001543760299683, |
| "learning_rate": 8.205909006695679e-06, |
| "loss": 0.3676, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.054, |
| "grad_norm": 1.3494583368301392, |
| "learning_rate": 8.196971357469098e-06, |
| "loss": 0.3486, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 1.2720623016357422, |
| "learning_rate": 8.188016395305156e-06, |
| "loss": 0.3145, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.058, |
| "grad_norm": 1.5187513828277588, |
| "learning_rate": 8.179044168698722e-06, |
| "loss": 0.3581, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 2.041038990020752, |
| "learning_rate": 8.170054726238152e-06, |
| "loss": 0.3858, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.062, |
| "grad_norm": 1.375152587890625, |
| "learning_rate": 8.161048116605039e-06, |
| "loss": 0.3401, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.8547447323799133, |
| "learning_rate": 8.152024388573945e-06, |
| "loss": 0.3006, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.066, |
| "grad_norm": 2.131930351257324, |
| "learning_rate": 8.142983591012128e-06, |
| "loss": 0.3521, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.068, |
| "grad_norm": 1.6276785135269165, |
| "learning_rate": 8.133925772879292e-06, |
| "loss": 0.3877, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 1.4579194784164429, |
| "learning_rate": 8.124850983227313e-06, |
| "loss": 0.3793, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 1.182732105255127, |
| "learning_rate": 8.115759271199967e-06, |
| "loss": 0.3425, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.074, |
| "grad_norm": 1.5831196308135986, |
| "learning_rate": 8.106650686032687e-06, |
| "loss": 0.3424, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.076, |
| "grad_norm": 0.9246402978897095, |
| "learning_rate": 8.097525277052265e-06, |
| "loss": 0.3245, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.078, |
| "grad_norm": 1.0254276990890503, |
| "learning_rate": 8.08838309367661e-06, |
| "loss": 0.3218, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.9792593717575073, |
| "learning_rate": 8.079224185414471e-06, |
| "loss": 0.3681, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.082, |
| "grad_norm": 1.3363689184188843, |
| "learning_rate": 8.07004860186517e-06, |
| "loss": 0.3799, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.084, |
| "grad_norm": 1.571494698524475, |
| "learning_rate": 8.060856392718326e-06, |
| "loss": 0.2975, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.086, |
| "grad_norm": 1.1808347702026367, |
| "learning_rate": 8.051647607753598e-06, |
| "loss": 0.3532, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 2.0380117893218994, |
| "learning_rate": 8.04242229684041e-06, |
| "loss": 0.3543, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 1.3502197265625, |
| "learning_rate": 8.033180509937683e-06, |
| "loss": 0.3566, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.092, |
| "grad_norm": 1.074385404586792, |
| "learning_rate": 8.023922297093557e-06, |
| "loss": 0.3152, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.094, |
| "grad_norm": 1.133074402809143, |
| "learning_rate": 8.014647708445124e-06, |
| "loss": 0.3784, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 1.1202969551086426, |
| "learning_rate": 8.005356794218167e-06, |
| "loss": 0.4327, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.098, |
| "grad_norm": 1.4087947607040405, |
| "learning_rate": 7.99604960472687e-06, |
| "loss": 0.3206, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 1.140956163406372, |
| "learning_rate": 7.986726190373562e-06, |
| "loss": 0.2804, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.102, |
| "grad_norm": 0.9047802686691284, |
| "learning_rate": 7.977386601648427e-06, |
| "loss": 0.3162, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 0.9571928381919861, |
| "learning_rate": 7.968030889129247e-06, |
| "loss": 0.2635, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.106, |
| "grad_norm": 1.3515325784683228, |
| "learning_rate": 7.95865910348112e-06, |
| "loss": 0.3598, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.108, |
| "grad_norm": 1.0597070455551147, |
| "learning_rate": 7.949271295456187e-06, |
| "loss": 0.2918, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 1.3461328744888306, |
| "learning_rate": 7.939867515893353e-06, |
| "loss": 0.2928, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 1.0447543859481812, |
| "learning_rate": 7.930447815718022e-06, |
| "loss": 0.3584, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.114, |
| "grad_norm": 1.611246109008789, |
| "learning_rate": 7.921012245941809e-06, |
| "loss": 0.3816, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.116, |
| "grad_norm": 0.886353611946106, |
| "learning_rate": 7.91156085766227e-06, |
| "loss": 0.322, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.1179999999999999, |
| "grad_norm": 1.2039692401885986, |
| "learning_rate": 7.90209370206263e-06, |
| "loss": 0.2804, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 1.0296542644500732, |
| "learning_rate": 7.892610830411496e-06, |
| "loss": 0.3007, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.1219999999999999, |
| "grad_norm": 1.3383187055587769, |
| "learning_rate": 7.883112294062585e-06, |
| "loss": 0.38, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.124, |
| "grad_norm": 1.2576732635498047, |
| "learning_rate": 7.873598144454444e-06, |
| "loss": 0.3637, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.126, |
| "grad_norm": 0.6940869688987732, |
| "learning_rate": 7.864068433110176e-06, |
| "loss": 0.2982, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 1.15080726146698, |
| "learning_rate": 7.854523211637152e-06, |
| "loss": 0.31, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 1.8969014883041382, |
| "learning_rate": 7.844962531726742e-06, |
| "loss": 0.3375, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.1320000000000001, |
| "grad_norm": 1.5290457010269165, |
| "learning_rate": 7.835386445154023e-06, |
| "loss": 0.3467, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.134, |
| "grad_norm": 1.6540489196777344, |
| "learning_rate": 7.825795003777515e-06, |
| "loss": 0.3403, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 1.4303158521652222, |
| "learning_rate": 7.816188259538885e-06, |
| "loss": 0.3727, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.138, |
| "grad_norm": 1.2462937831878662, |
| "learning_rate": 7.806566264462668e-06, |
| "loss": 0.3325, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.1400000000000001, |
| "grad_norm": 1.2328237295150757, |
| "learning_rate": 7.796929070655994e-06, |
| "loss": 0.3572, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.142, |
| "grad_norm": 1.305349349975586, |
| "learning_rate": 7.787276730308304e-06, |
| "loss": 0.3046, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 1.3444890975952148, |
| "learning_rate": 7.777609295691055e-06, |
| "loss": 0.3464, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.146, |
| "grad_norm": 0.8992980718612671, |
| "learning_rate": 7.767926819157452e-06, |
| "loss": 0.3742, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.148, |
| "grad_norm": 1.571289300918579, |
| "learning_rate": 7.758229353142153e-06, |
| "loss": 0.3863, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 1.2571865320205688, |
| "learning_rate": 7.748516950160993e-06, |
| "loss": 0.3611, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 0.9523794651031494, |
| "learning_rate": 7.738789662810702e-06, |
| "loss": 0.3352, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.154, |
| "grad_norm": 1.1043280363082886, |
| "learning_rate": 7.729047543768608e-06, |
| "loss": 0.4024, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.156, |
| "grad_norm": 1.4577151536941528, |
| "learning_rate": 7.719290645792361e-06, |
| "loss": 0.3124, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.158, |
| "grad_norm": 1.7957823276519775, |
| "learning_rate": 7.709519021719644e-06, |
| "loss": 0.4165, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 1.2084007263183594, |
| "learning_rate": 7.699732724467894e-06, |
| "loss": 0.3357, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.162, |
| "grad_norm": 1.2520586252212524, |
| "learning_rate": 7.689931807033999e-06, |
| "loss": 0.3114, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.164, |
| "grad_norm": 1.048161268234253, |
| "learning_rate": 7.68011632249403e-06, |
| "loss": 0.3216, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.166, |
| "grad_norm": 1.231397032737732, |
| "learning_rate": 7.670286324002943e-06, |
| "loss": 0.3458, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 1.6105576753616333, |
| "learning_rate": 7.66044186479429e-06, |
| "loss": 0.3423, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 1.3911272287368774, |
| "learning_rate": 7.650582998179939e-06, |
| "loss": 0.3088, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.172, |
| "grad_norm": 1.618174433708191, |
| "learning_rate": 7.640709777549773e-06, |
| "loss": 0.3516, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.174, |
| "grad_norm": 2.1823925971984863, |
| "learning_rate": 7.630822256371415e-06, |
| "loss": 0.3054, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 1.236488938331604, |
| "learning_rate": 7.620920488189929e-06, |
| "loss": 0.3375, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.178, |
| "grad_norm": 1.020044207572937, |
| "learning_rate": 7.6110045266275305e-06, |
| "loss": 0.2757, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 0.9718794822692871, |
| "learning_rate": 7.601074425383302e-06, |
| "loss": 0.3025, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.182, |
| "grad_norm": 1.4841420650482178, |
| "learning_rate": 7.591130238232892e-06, |
| "loss": 0.3117, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 1.511138916015625, |
| "learning_rate": 7.581172019028238e-06, |
| "loss": 0.3936, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.186, |
| "grad_norm": 1.3772696256637573, |
| "learning_rate": 7.571199821697263e-06, |
| "loss": 0.3486, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.188, |
| "grad_norm": 1.3091078996658325, |
| "learning_rate": 7.561213700243584e-06, |
| "loss": 0.2936, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 1.3505816459655762, |
| "learning_rate": 7.55121370874623e-06, |
| "loss": 0.3203, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 1.3498326539993286, |
| "learning_rate": 7.541199901359335e-06, |
| "loss": 0.3734, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.194, |
| "grad_norm": 1.5176153182983398, |
| "learning_rate": 7.531172332311861e-06, |
| "loss": 0.2997, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.196, |
| "grad_norm": 0.9407598972320557, |
| "learning_rate": 7.521131055907283e-06, |
| "loss": 0.3204, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.198, |
| "grad_norm": 1.3535823822021484, |
| "learning_rate": 7.5110761265233156e-06, |
| "loss": 0.2981, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 1.7967560291290283, |
| "learning_rate": 7.501007598611609e-06, |
| "loss": 0.3508, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.202, |
| "grad_norm": 1.682611107826233, |
| "learning_rate": 7.490925526697455e-06, |
| "loss": 0.3258, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.204, |
| "grad_norm": 1.2590186595916748, |
| "learning_rate": 7.480829965379489e-06, |
| "loss": 0.3376, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.206, |
| "grad_norm": 1.1302205324172974, |
| "learning_rate": 7.470720969329399e-06, |
| "loss": 0.3297, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 1.1471021175384521, |
| "learning_rate": 7.460598593291628e-06, |
| "loss": 0.3566, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.21, |
| "grad_norm": 2.0590806007385254, |
| "learning_rate": 7.450462892083079e-06, |
| "loss": 0.3354, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.212, |
| "grad_norm": 1.6424793004989624, |
| "learning_rate": 7.44031392059281e-06, |
| "loss": 0.348, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.214, |
| "grad_norm": 1.1267763376235962, |
| "learning_rate": 7.430151733781752e-06, |
| "loss": 0.324, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 1.049166202545166, |
| "learning_rate": 7.419976386682395e-06, |
| "loss": 0.3282, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.218, |
| "grad_norm": 1.6423687934875488, |
| "learning_rate": 7.409787934398502e-06, |
| "loss": 0.2872, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.22, |
| "grad_norm": 1.4791516065597534, |
| "learning_rate": 7.3995864321048036e-06, |
| "loss": 0.416, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.222, |
| "grad_norm": 1.1335197687149048, |
| "learning_rate": 7.389371935046703e-06, |
| "loss": 0.3191, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 1.4202783107757568, |
| "learning_rate": 7.3791444985399755e-06, |
| "loss": 0.3429, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.226, |
| "grad_norm": 1.132926106452942, |
| "learning_rate": 7.368904177970466e-06, |
| "loss": 0.3469, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.228, |
| "grad_norm": 1.7757046222686768, |
| "learning_rate": 7.358651028793797e-06, |
| "loss": 0.333, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 1.3629705905914307, |
| "learning_rate": 7.3483851065350595e-06, |
| "loss": 0.3802, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 1.0564378499984741, |
| "learning_rate": 7.33810646678852e-06, |
| "loss": 0.3904, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.234, |
| "grad_norm": 1.6500481367111206, |
| "learning_rate": 7.327815165217309e-06, |
| "loss": 0.3675, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.236, |
| "grad_norm": 1.5274205207824707, |
| "learning_rate": 7.317511257553131e-06, |
| "loss": 0.3182, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.238, |
| "grad_norm": 1.1405715942382812, |
| "learning_rate": 7.307194799595958e-06, |
| "loss": 0.3103, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 1.4158636331558228, |
| "learning_rate": 7.296865847213724e-06, |
| "loss": 0.3453, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.242, |
| "grad_norm": 1.440067172050476, |
| "learning_rate": 7.2865244563420304e-06, |
| "loss": 0.4289, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.244, |
| "grad_norm": 1.2499229907989502, |
| "learning_rate": 7.27617068298383e-06, |
| "loss": 0.3657, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.246, |
| "grad_norm": 1.599689245223999, |
| "learning_rate": 7.265804583209142e-06, |
| "loss": 0.339, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 1.6074563264846802, |
| "learning_rate": 7.25542621315473e-06, |
| "loss": 0.3562, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 1.6778024435043335, |
| "learning_rate": 7.245035629023812e-06, |
| "loss": 0.313, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.252, |
| "grad_norm": 1.2234922647476196, |
| "learning_rate": 7.2346328870857465e-06, |
| "loss": 0.3822, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.254, |
| "grad_norm": 1.2148653268814087, |
| "learning_rate": 7.224218043675735e-06, |
| "loss": 0.3567, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.9696487188339233, |
| "learning_rate": 7.21379115519451e-06, |
| "loss": 0.3688, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.258, |
| "grad_norm": 1.371671438217163, |
| "learning_rate": 7.2033522781080325e-06, |
| "loss": 0.3409, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.26, |
| "grad_norm": 1.5369791984558105, |
| "learning_rate": 7.192901468947193e-06, |
| "loss": 0.3777, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.262, |
| "grad_norm": 1.8359071016311646, |
| "learning_rate": 7.182438784307495e-06, |
| "loss": 0.3179, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.9479062557220459, |
| "learning_rate": 7.171964280848749e-06, |
| "loss": 0.2901, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.266, |
| "grad_norm": 1.220046877861023, |
| "learning_rate": 7.161478015294778e-06, |
| "loss": 0.3478, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.268, |
| "grad_norm": 1.2605116367340088, |
| "learning_rate": 7.150980044433094e-06, |
| "loss": 0.2351, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 1.6339104175567627, |
| "learning_rate": 7.140470425114603e-06, |
| "loss": 0.3123, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 1.1417044401168823, |
| "learning_rate": 7.1299492142532876e-06, |
| "loss": 0.3129, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.274, |
| "grad_norm": 0.994657039642334, |
| "learning_rate": 7.119416468825908e-06, |
| "loss": 0.3247, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.276, |
| "grad_norm": 1.182531714439392, |
| "learning_rate": 7.108872245871687e-06, |
| "loss": 0.2929, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.278, |
| "grad_norm": 1.1829276084899902, |
| "learning_rate": 7.098316602492004e-06, |
| "loss": 0.3174, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 1.1945512294769287, |
| "learning_rate": 7.087749595850084e-06, |
| "loss": 0.3341, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.282, |
| "grad_norm": 1.33237886428833, |
| "learning_rate": 7.0771712831706855e-06, |
| "loss": 0.3453, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.284, |
| "grad_norm": 1.4239740371704102, |
| "learning_rate": 7.066581721739801e-06, |
| "loss": 0.3416, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.286, |
| "grad_norm": 1.329602837562561, |
| "learning_rate": 7.0559809689043325e-06, |
| "loss": 0.3434, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 1.4223501682281494, |
| "learning_rate": 7.045369082071793e-06, |
| "loss": 0.3664, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.29, |
| "grad_norm": 1.5064311027526855, |
| "learning_rate": 7.034746118709989e-06, |
| "loss": 0.3446, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.292, |
| "grad_norm": 1.0884929895401, |
| "learning_rate": 7.024112136346713e-06, |
| "loss": 0.2676, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.294, |
| "grad_norm": 1.623273491859436, |
| "learning_rate": 7.013467192569427e-06, |
| "loss": 0.4001, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 1.3760849237442017, |
| "learning_rate": 7.002811345024951e-06, |
| "loss": 0.3415, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.298, |
| "grad_norm": 1.071457862854004, |
| "learning_rate": 6.992144651419163e-06, |
| "loss": 0.3584, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 1.245360255241394, |
| "learning_rate": 6.981467169516671e-06, |
| "loss": 0.341, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.302, |
| "grad_norm": 1.653593897819519, |
| "learning_rate": 6.9707789571405025e-06, |
| "loss": 0.3626, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 0.9302300214767456, |
| "learning_rate": 6.960080072171802e-06, |
| "loss": 0.2995, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.306, |
| "grad_norm": 1.358490228652954, |
| "learning_rate": 6.949370572549506e-06, |
| "loss": 0.3312, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.308, |
| "grad_norm": 1.5318645238876343, |
| "learning_rate": 6.938650516270038e-06, |
| "loss": 0.3552, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.31, |
| "grad_norm": 1.6989809274673462, |
| "learning_rate": 6.927919961386984e-06, |
| "loss": 0.3347, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 1.2937415838241577, |
| "learning_rate": 6.9171789660107876e-06, |
| "loss": 0.3167, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.314, |
| "grad_norm": 1.2662570476531982, |
| "learning_rate": 6.906427588308436e-06, |
| "loss": 0.374, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.316, |
| "grad_norm": 1.152688980102539, |
| "learning_rate": 6.895665886503136e-06, |
| "loss": 0.2952, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.318, |
| "grad_norm": 1.0898213386535645, |
| "learning_rate": 6.8848939188740034e-06, |
| "loss": 0.3529, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 1.2499762773513794, |
| "learning_rate": 6.874111743755751e-06, |
| "loss": 0.335, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.322, |
| "grad_norm": 1.5297431945800781, |
| "learning_rate": 6.863319419538366e-06, |
| "loss": 0.3896, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.324, |
| "grad_norm": 1.6625542640686035, |
| "learning_rate": 6.852517004666801e-06, |
| "loss": 0.3455, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.326, |
| "grad_norm": 1.4104151725769043, |
| "learning_rate": 6.84170455764065e-06, |
| "loss": 0.3535, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 1.2529921531677246, |
| "learning_rate": 6.830882137013839e-06, |
| "loss": 0.3219, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.33, |
| "grad_norm": 1.6299757957458496, |
| "learning_rate": 6.820049801394303e-06, |
| "loss": 0.4037, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.332, |
| "grad_norm": 1.215126872062683, |
| "learning_rate": 6.80920760944367e-06, |
| "loss": 0.3307, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.334, |
| "grad_norm": 1.0372707843780518, |
| "learning_rate": 6.798355619876944e-06, |
| "loss": 0.3326, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 1.1610356569290161, |
| "learning_rate": 6.787493891462191e-06, |
| "loss": 0.3467, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.338, |
| "grad_norm": 1.7156941890716553, |
| "learning_rate": 6.776622483020214e-06, |
| "loss": 0.4261, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.34, |
| "grad_norm": 0.9658931493759155, |
| "learning_rate": 6.765741453424237e-06, |
| "loss": 0.307, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.342, |
| "grad_norm": 0.8651520013809204, |
| "learning_rate": 6.754850861599589e-06, |
| "loss": 0.3802, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 1.15030837059021, |
| "learning_rate": 6.743950766523377e-06, |
| "loss": 0.3026, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.346, |
| "grad_norm": 1.2045297622680664, |
| "learning_rate": 6.733041227224182e-06, |
| "loss": 0.3673, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.3479999999999999, |
| "grad_norm": 1.098310112953186, |
| "learning_rate": 6.722122302781716e-06, |
| "loss": 0.3305, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 1.5491714477539062, |
| "learning_rate": 6.711194052326528e-06, |
| "loss": 0.3063, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 1.581486463546753, |
| "learning_rate": 6.700256535039665e-06, |
| "loss": 0.3534, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.354, |
| "grad_norm": 1.3409374952316284, |
| "learning_rate": 6.689309810152359e-06, |
| "loss": 0.3866, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.3559999999999999, |
| "grad_norm": 1.0301975011825562, |
| "learning_rate": 6.678353936945704e-06, |
| "loss": 0.3391, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.358, |
| "grad_norm": 1.1352406740188599, |
| "learning_rate": 6.6673889747503364e-06, |
| "loss": 0.2808, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 1.2140257358551025, |
| "learning_rate": 6.656414982946115e-06, |
| "loss": 0.3619, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.362, |
| "grad_norm": 1.853940725326538, |
| "learning_rate": 6.645432020961796e-06, |
| "loss": 0.3014, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.3639999999999999, |
| "grad_norm": 0.9603062272071838, |
| "learning_rate": 6.634440148274712e-06, |
| "loss": 0.3011, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.366, |
| "grad_norm": 0.9446805715560913, |
| "learning_rate": 6.623439424410456e-06, |
| "loss": 0.3335, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 1.3330634832382202, |
| "learning_rate": 6.612429908942546e-06, |
| "loss": 0.3776, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 1.3779356479644775, |
| "learning_rate": 6.601411661492114e-06, |
| "loss": 0.3175, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.3719999999999999, |
| "grad_norm": 1.492652416229248, |
| "learning_rate": 6.590384741727583e-06, |
| "loss": 0.3377, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.374, |
| "grad_norm": 1.719750165939331, |
| "learning_rate": 6.579349209364332e-06, |
| "loss": 0.3711, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 1.331905722618103, |
| "learning_rate": 6.5683051241643894e-06, |
| "loss": 0.3052, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.3780000000000001, |
| "grad_norm": 1.1688127517700195, |
| "learning_rate": 6.557252545936095e-06, |
| "loss": 0.301, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 0.8940510153770447, |
| "learning_rate": 6.546191534533783e-06, |
| "loss": 0.3122, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.3820000000000001, |
| "grad_norm": 1.363988995552063, |
| "learning_rate": 6.53512214985746e-06, |
| "loss": 0.3079, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.9122200608253479, |
| "learning_rate": 6.5240444518524736e-06, |
| "loss": 0.3486, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.3860000000000001, |
| "grad_norm": 1.0334652662277222, |
| "learning_rate": 6.512958500509193e-06, |
| "loss": 0.2696, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.388, |
| "grad_norm": 1.257965326309204, |
| "learning_rate": 6.501864355862682e-06, |
| "loss": 0.3648, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.3900000000000001, |
| "grad_norm": 1.1571115255355835, |
| "learning_rate": 6.490762077992376e-06, |
| "loss": 0.3474, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 1.2089656591415405, |
| "learning_rate": 6.479651727021754e-06, |
| "loss": 0.3313, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.3940000000000001, |
| "grad_norm": 1.4407920837402344, |
| "learning_rate": 6.4685333631180145e-06, |
| "loss": 0.3695, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.396, |
| "grad_norm": 2.0778284072875977, |
| "learning_rate": 6.457407046491748e-06, |
| "loss": 0.2625, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.3980000000000001, |
| "grad_norm": 1.36530339717865, |
| "learning_rate": 6.4462728373966165e-06, |
| "loss": 0.3015, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 1.8104184865951538, |
| "learning_rate": 6.435130796129019e-06, |
| "loss": 0.4064, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.4020000000000001, |
| "grad_norm": 1.3507591485977173, |
| "learning_rate": 6.423980983027769e-06, |
| "loss": 0.3225, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.404, |
| "grad_norm": 0.7463653087615967, |
| "learning_rate": 6.412823458473772e-06, |
| "loss": 0.2916, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.4060000000000001, |
| "grad_norm": 1.5296553373336792, |
| "learning_rate": 6.401658282889689e-06, |
| "loss": 0.3413, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 1.1751571893692017, |
| "learning_rate": 6.390485516739616e-06, |
| "loss": 0.3598, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.41, |
| "grad_norm": 1.090745449066162, |
| "learning_rate": 6.379305220528758e-06, |
| "loss": 0.3398, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.412, |
| "grad_norm": 1.3860448598861694, |
| "learning_rate": 6.368117454803093e-06, |
| "loss": 0.2953, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.414, |
| "grad_norm": 1.4662338495254517, |
| "learning_rate": 6.356922280149058e-06, |
| "loss": 0.3662, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 1.0763154029846191, |
| "learning_rate": 6.345719757193203e-06, |
| "loss": 0.3182, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.418, |
| "grad_norm": 1.2442153692245483, |
| "learning_rate": 6.334509946601879e-06, |
| "loss": 0.3177, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 1.304867148399353, |
| "learning_rate": 6.323292909080897e-06, |
| "loss": 0.3322, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.422, |
| "grad_norm": 1.5023126602172852, |
| "learning_rate": 6.3120687053752114e-06, |
| "loss": 0.3425, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 1.539007306098938, |
| "learning_rate": 6.3008373962685785e-06, |
| "loss": 0.3376, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.426, |
| "grad_norm": 1.6825087070465088, |
| "learning_rate": 6.289599042583237e-06, |
| "loss": 0.364, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.428, |
| "grad_norm": 1.3176345825195312, |
| "learning_rate": 6.278353705179572e-06, |
| "loss": 0.3753, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.43, |
| "grad_norm": 1.2803112268447876, |
| "learning_rate": 6.267101444955792e-06, |
| "loss": 0.3295, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 1.4513520002365112, |
| "learning_rate": 6.255842322847594e-06, |
| "loss": 0.3179, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.434, |
| "grad_norm": 1.314857006072998, |
| "learning_rate": 6.244576399827831e-06, |
| "loss": 0.3537, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.436, |
| "grad_norm": 1.1115436553955078, |
| "learning_rate": 6.233303736906193e-06, |
| "loss": 0.313, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.438, |
| "grad_norm": 1.1615487337112427, |
| "learning_rate": 6.222024395128864e-06, |
| "loss": 0.3293, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 1.4486738443374634, |
| "learning_rate": 6.210738435578198e-06, |
| "loss": 0.3793, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.442, |
| "grad_norm": 1.143863558769226, |
| "learning_rate": 6.199445919372388e-06, |
| "loss": 0.4045, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.444, |
| "grad_norm": 1.4991508722305298, |
| "learning_rate": 6.1881469076651336e-06, |
| "loss": 0.3216, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.446, |
| "grad_norm": 1.1230875253677368, |
| "learning_rate": 6.176841461645311e-06, |
| "loss": 0.3679, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 1.141588568687439, |
| "learning_rate": 6.16552964253664e-06, |
| "loss": 0.3378, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 1.8605092763900757, |
| "learning_rate": 6.15421151159735e-06, |
| "loss": 0.3412, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.452, |
| "grad_norm": 1.14920175075531, |
| "learning_rate": 6.14288713011986e-06, |
| "loss": 0.2663, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.454, |
| "grad_norm": 1.7807815074920654, |
| "learning_rate": 6.13155655943043e-06, |
| "loss": 0.3288, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 1.6375200748443604, |
| "learning_rate": 6.1202198608888416e-06, |
| "loss": 0.4102, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.458, |
| "grad_norm": 1.5959913730621338, |
| "learning_rate": 6.1088770958880595e-06, |
| "loss": 0.3615, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.46, |
| "grad_norm": 1.452133297920227, |
| "learning_rate": 6.097528325853903e-06, |
| "loss": 0.2841, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.462, |
| "grad_norm": 1.8519127368927002, |
| "learning_rate": 6.086173612244708e-06, |
| "loss": 0.3331, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 1.0498560667037964, |
| "learning_rate": 6.074813016550998e-06, |
| "loss": 0.2939, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.466, |
| "grad_norm": 1.369956612586975, |
| "learning_rate": 6.0634466002951545e-06, |
| "loss": 0.3859, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.468, |
| "grad_norm": 1.7278661727905273, |
| "learning_rate": 6.052074425031075e-06, |
| "loss": 0.3599, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 1.1767780780792236, |
| "learning_rate": 6.040696552343845e-06, |
| "loss": 0.3392, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 1.4453753232955933, |
| "learning_rate": 6.029313043849407e-06, |
| "loss": 0.4165, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.474, |
| "grad_norm": 1.0843770503997803, |
| "learning_rate": 6.017923961194221e-06, |
| "loss": 0.334, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.476, |
| "grad_norm": 1.1147040128707886, |
| "learning_rate": 6.006529366054935e-06, |
| "loss": 0.3453, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.478, |
| "grad_norm": 1.7290093898773193, |
| "learning_rate": 5.995129320138047e-06, |
| "loss": 0.3127, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 1.4090754985809326, |
| "learning_rate": 5.983723885179576e-06, |
| "loss": 0.3197, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.482, |
| "grad_norm": 1.1394073963165283, |
| "learning_rate": 5.972313122944724e-06, |
| "loss": 0.3234, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.484, |
| "grad_norm": 1.281721591949463, |
| "learning_rate": 5.960897095227541e-06, |
| "loss": 0.3311, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.486, |
| "grad_norm": 0.9098526239395142, |
| "learning_rate": 5.949475863850595e-06, |
| "loss": 0.3342, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 0.939958930015564, |
| "learning_rate": 5.938049490664629e-06, |
| "loss": 0.3139, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.49, |
| "grad_norm": 1.2372125387191772, |
| "learning_rate": 5.926618037548237e-06, |
| "loss": 0.3121, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.492, |
| "grad_norm": 1.0505400896072388, |
| "learning_rate": 5.915181566407519e-06, |
| "loss": 0.3165, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.494, |
| "grad_norm": 1.4748669862747192, |
| "learning_rate": 5.903740139175752e-06, |
| "loss": 0.3584, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 1.09158456325531, |
| "learning_rate": 5.892293817813048e-06, |
| "loss": 0.3162, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.498, |
| "grad_norm": 1.4084733724594116, |
| "learning_rate": 5.8808426643060265e-06, |
| "loss": 0.3474, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.2051564455032349, |
| "learning_rate": 5.869386740667478e-06, |
| "loss": 0.3169, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.502, |
| "grad_norm": 1.2120217084884644, |
| "learning_rate": 5.857926108936015e-06, |
| "loss": 0.3545, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 1.084529161453247, |
| "learning_rate": 5.84646083117576e-06, |
| "loss": 0.3407, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.506, |
| "grad_norm": 2.2102389335632324, |
| "learning_rate": 5.834990969475984e-06, |
| "loss": 0.3275, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.508, |
| "grad_norm": 1.6285516023635864, |
| "learning_rate": 5.823516585950787e-06, |
| "loss": 0.2969, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.51, |
| "grad_norm": 1.2168736457824707, |
| "learning_rate": 5.812037742738759e-06, |
| "loss": 0.3845, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 1.2020785808563232, |
| "learning_rate": 5.800554502002635e-06, |
| "loss": 0.2706, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.514, |
| "grad_norm": 1.3338371515274048, |
| "learning_rate": 5.78906692592897e-06, |
| "loss": 0.3217, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.516, |
| "grad_norm": 1.2562854290008545, |
| "learning_rate": 5.77757507672779e-06, |
| "loss": 0.338, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.518, |
| "grad_norm": 1.2902873754501343, |
| "learning_rate": 5.766079016632272e-06, |
| "loss": 0.3115, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 1.3528056144714355, |
| "learning_rate": 5.7545788078983875e-06, |
| "loss": 0.3526, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.522, |
| "grad_norm": 0.9707136750221252, |
| "learning_rate": 5.743074512804579e-06, |
| "loss": 0.3693, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.524, |
| "grad_norm": 1.6493264436721802, |
| "learning_rate": 5.731566193651416e-06, |
| "loss": 0.3304, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.526, |
| "grad_norm": 1.095731258392334, |
| "learning_rate": 5.720053912761261e-06, |
| "loss": 0.3102, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 1.691691279411316, |
| "learning_rate": 5.708537732477934e-06, |
| "loss": 0.3019, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.53, |
| "grad_norm": 1.4168593883514404, |
| "learning_rate": 5.697017715166366e-06, |
| "loss": 0.3338, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.532, |
| "grad_norm": 1.5588029623031616, |
| "learning_rate": 5.685493923212273e-06, |
| "loss": 0.2633, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.534, |
| "grad_norm": 1.6496397256851196, |
| "learning_rate": 5.673966419021806e-06, |
| "loss": 0.4173, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 1.0448615550994873, |
| "learning_rate": 5.662435265021225e-06, |
| "loss": 0.2849, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.538, |
| "grad_norm": 1.2294347286224365, |
| "learning_rate": 5.650900523656553e-06, |
| "loss": 0.2609, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.54, |
| "grad_norm": 1.5806496143341064, |
| "learning_rate": 5.63936225739324e-06, |
| "loss": 0.3771, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.542, |
| "grad_norm": 1.1037795543670654, |
| "learning_rate": 5.627820528715824e-06, |
| "loss": 0.3149, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 0.8963010907173157, |
| "learning_rate": 5.616275400127594e-06, |
| "loss": 0.3073, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.546, |
| "grad_norm": 1.5788958072662354, |
| "learning_rate": 5.604726934150253e-06, |
| "loss": 0.3628, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.548, |
| "grad_norm": 1.0473259687423706, |
| "learning_rate": 5.593175193323574e-06, |
| "loss": 0.3114, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 1.295811414718628, |
| "learning_rate": 5.581620240205068e-06, |
| "loss": 0.4363, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 1.9268922805786133, |
| "learning_rate": 5.57006213736964e-06, |
| "loss": 0.2928, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.554, |
| "grad_norm": 1.4225900173187256, |
| "learning_rate": 5.558500947409249e-06, |
| "loss": 0.3989, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.556, |
| "grad_norm": 1.2886687517166138, |
| "learning_rate": 5.546936732932578e-06, |
| "loss": 0.3041, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.558, |
| "grad_norm": 1.265717625617981, |
| "learning_rate": 5.535369556564687e-06, |
| "loss": 0.3384, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 1.2660807371139526, |
| "learning_rate": 5.523799480946673e-06, |
| "loss": 0.294, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.562, |
| "grad_norm": 1.319225788116455, |
| "learning_rate": 5.512226568735338e-06, |
| "loss": 0.3159, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.564, |
| "grad_norm": 1.193955659866333, |
| "learning_rate": 5.500650882602842e-06, |
| "loss": 0.3044, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.5659999999999998, |
| "grad_norm": 1.2253046035766602, |
| "learning_rate": 5.489072485236368e-06, |
| "loss": 0.3376, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 1.0734641551971436, |
| "learning_rate": 5.477491439337782e-06, |
| "loss": 0.3555, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.5699999999999998, |
| "grad_norm": 1.7126284837722778, |
| "learning_rate": 5.4659078076232906e-06, |
| "loss": 0.3871, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.572, |
| "grad_norm": 1.323228359222412, |
| "learning_rate": 5.45432165282311e-06, |
| "loss": 0.3337, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.5739999999999998, |
| "grad_norm": 1.7261728048324585, |
| "learning_rate": 5.442733037681112e-06, |
| "loss": 0.2912, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 1.790432095527649, |
| "learning_rate": 5.431142024954496e-06, |
| "loss": 0.3271, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.5779999999999998, |
| "grad_norm": 1.6568429470062256, |
| "learning_rate": 5.419548677413445e-06, |
| "loss": 0.3535, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.58, |
| "grad_norm": 1.545526146888733, |
| "learning_rate": 5.4079530578407895e-06, |
| "loss": 0.348, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.5819999999999999, |
| "grad_norm": 1.8154616355895996, |
| "learning_rate": 5.396355229031656e-06, |
| "loss": 0.2892, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.9787377715110779, |
| "learning_rate": 5.3847552537931395e-06, |
| "loss": 0.2931, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.5859999999999999, |
| "grad_norm": 1.3805267810821533, |
| "learning_rate": 5.373153194943962e-06, |
| "loss": 0.3615, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.588, |
| "grad_norm": 2.3790717124938965, |
| "learning_rate": 5.361549115314123e-06, |
| "loss": 0.3537, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.5899999999999999, |
| "grad_norm": 1.5367940664291382, |
| "learning_rate": 5.349943077744573e-06, |
| "loss": 0.3413, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 1.2089755535125732, |
| "learning_rate": 5.338335145086855e-06, |
| "loss": 0.3632, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.5939999999999999, |
| "grad_norm": 1.4796146154403687, |
| "learning_rate": 5.3267253802027826e-06, |
| "loss": 0.4095, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.596, |
| "grad_norm": 1.4473693370819092, |
| "learning_rate": 5.315113845964091e-06, |
| "loss": 0.3714, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.5979999999999999, |
| "grad_norm": 1.5862455368041992, |
| "learning_rate": 5.303500605252095e-06, |
| "loss": 0.3283, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.4115406274795532, |
| "learning_rate": 5.291885720957351e-06, |
| "loss": 0.3127, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.6019999999999999, |
| "grad_norm": 1.6145530939102173, |
| "learning_rate": 5.2802692559793175e-06, |
| "loss": 0.4085, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.604, |
| "grad_norm": 1.8569374084472656, |
| "learning_rate": 5.268651273226011e-06, |
| "loss": 0.2898, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.6059999999999999, |
| "grad_norm": 1.736876368522644, |
| "learning_rate": 5.2570318356136705e-06, |
| "loss": 0.3503, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 1.5102406740188599, |
| "learning_rate": 5.2454110060664075e-06, |
| "loss": 0.3111, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.6099999999999999, |
| "grad_norm": 1.7225403785705566, |
| "learning_rate": 5.233788847515882e-06, |
| "loss": 0.3586, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.612, |
| "grad_norm": 1.0642247200012207, |
| "learning_rate": 5.222165422900939e-06, |
| "loss": 0.3721, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.6139999999999999, |
| "grad_norm": 1.3807940483093262, |
| "learning_rate": 5.210540795167287e-06, |
| "loss": 0.398, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 1.396348237991333, |
| "learning_rate": 5.19891502726715e-06, |
| "loss": 0.3657, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.6179999999999999, |
| "grad_norm": 0.8553020358085632, |
| "learning_rate": 5.187288182158924e-06, |
| "loss": 0.3018, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 1.608389973640442, |
| "learning_rate": 5.175660322806838e-06, |
| "loss": 0.3563, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.6219999999999999, |
| "grad_norm": 1.3906745910644531, |
| "learning_rate": 5.164031512180616e-06, |
| "loss": 0.3186, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 1.6109449863433838, |
| "learning_rate": 5.152401813255134e-06, |
| "loss": 0.3028, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.626, |
| "grad_norm": 1.3031402826309204, |
| "learning_rate": 5.140771289010073e-06, |
| "loss": 0.3089, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.6280000000000001, |
| "grad_norm": 0.8983224034309387, |
| "learning_rate": 5.1291400024295946e-06, |
| "loss": 0.3388, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.63, |
| "grad_norm": 1.6017338037490845, |
| "learning_rate": 5.117508016501973e-06, |
| "loss": 0.3465, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 1.4374866485595703, |
| "learning_rate": 5.105875394219283e-06, |
| "loss": 0.3152, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.634, |
| "grad_norm": 1.3967483043670654, |
| "learning_rate": 5.0942421985770415e-06, |
| "loss": 0.3158, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.6360000000000001, |
| "grad_norm": 1.0798684358596802, |
| "learning_rate": 5.0826084925738675e-06, |
| "loss": 0.3273, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.638, |
| "grad_norm": 1.1431955099105835, |
| "learning_rate": 5.070974339211148e-06, |
| "loss": 0.4185, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 1.4295889139175415, |
| "learning_rate": 5.059339801492687e-06, |
| "loss": 0.3667, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.642, |
| "grad_norm": 1.2767932415008545, |
| "learning_rate": 5.047704942424377e-06, |
| "loss": 0.3993, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.6440000000000001, |
| "grad_norm": 1.4808293581008911, |
| "learning_rate": 5.0360698250138465e-06, |
| "loss": 0.3618, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.646, |
| "grad_norm": 1.2203787565231323, |
| "learning_rate": 5.024434512270123e-06, |
| "loss": 0.3237, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 1.2480844259262085, |
| "learning_rate": 5.0127990672032945e-06, |
| "loss": 0.3361, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 1.269690990447998, |
| "learning_rate": 5.001163552824162e-06, |
| "loss": 0.2754, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.6520000000000001, |
| "grad_norm": 1.5688775777816772, |
| "learning_rate": 4.9895280321439036e-06, |
| "loss": 0.3312, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.654, |
| "grad_norm": 1.5578945875167847, |
| "learning_rate": 4.977892568173733e-06, |
| "loss": 0.3141, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 1.0505149364471436, |
| "learning_rate": 4.966257223924554e-06, |
| "loss": 0.2919, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.658, |
| "grad_norm": 1.2484036684036255, |
| "learning_rate": 4.954622062406623e-06, |
| "loss": 0.3513, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.6600000000000001, |
| "grad_norm": 1.8447239398956299, |
| "learning_rate": 4.94298714662921e-06, |
| "loss": 0.3111, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.662, |
| "grad_norm": 1.375136137008667, |
| "learning_rate": 4.931352539600248e-06, |
| "loss": 0.2827, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 1.4478310346603394, |
| "learning_rate": 4.9197183043260035e-06, |
| "loss": 0.3177, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.666, |
| "grad_norm": 1.2656853199005127, |
| "learning_rate": 4.9080845038107264e-06, |
| "loss": 0.2888, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.6680000000000001, |
| "grad_norm": 1.177017092704773, |
| "learning_rate": 4.896451201056315e-06, |
| "loss": 0.365, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 1.2763571739196777, |
| "learning_rate": 4.88481845906197e-06, |
| "loss": 0.2697, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 1.6193398237228394, |
| "learning_rate": 4.873186340823854e-06, |
| "loss": 0.2913, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.674, |
| "grad_norm": 1.353211760520935, |
| "learning_rate": 4.861554909334757e-06, |
| "loss": 0.3652, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.6760000000000002, |
| "grad_norm": 1.4687409400939941, |
| "learning_rate": 4.8499242275837444e-06, |
| "loss": 0.3417, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.678, |
| "grad_norm": 1.644424557685852, |
| "learning_rate": 4.838294358555824e-06, |
| "loss": 0.3228, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 1.8202049732208252, |
| "learning_rate": 4.826665365231601e-06, |
| "loss": 0.2984, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.682, |
| "grad_norm": 0.9253039360046387, |
| "learning_rate": 4.815037310586941e-06, |
| "loss": 0.3807, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.6840000000000002, |
| "grad_norm": 1.1849908828735352, |
| "learning_rate": 4.803410257592625e-06, |
| "loss": 0.2896, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.686, |
| "grad_norm": 1.0804787874221802, |
| "learning_rate": 4.791784269214004e-06, |
| "loss": 0.3129, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 1.3081520795822144, |
| "learning_rate": 4.780159408410677e-06, |
| "loss": 0.3055, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.69, |
| "grad_norm": 1.2547225952148438, |
| "learning_rate": 4.7685357381361224e-06, |
| "loss": 0.3321, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.692, |
| "grad_norm": 1.228356957435608, |
| "learning_rate": 4.75691332133738e-06, |
| "loss": 0.3556, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.694, |
| "grad_norm": 1.2856372594833374, |
| "learning_rate": 4.745292220954696e-06, |
| "loss": 0.3334, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 1.2076910734176636, |
| "learning_rate": 4.733672499921195e-06, |
| "loss": 0.2957, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.698, |
| "grad_norm": 1.3459007740020752, |
| "learning_rate": 4.722054221162528e-06, |
| "loss": 0.3326, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 1.899490475654602, |
| "learning_rate": 4.710437447596528e-06, |
| "loss": 0.3721, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.702, |
| "grad_norm": 1.0619953870773315, |
| "learning_rate": 4.698822242132891e-06, |
| "loss": 0.3474, |
| "step": 8510 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 1.1741877794265747, |
| "learning_rate": 4.687208667672812e-06, |
| "loss": 0.3206, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.706, |
| "grad_norm": 1.295904517173767, |
| "learning_rate": 4.675596787108652e-06, |
| "loss": 0.3445, |
| "step": 8530 |
| }, |
| { |
| "epoch": 1.708, |
| "grad_norm": 1.2269822359085083, |
| "learning_rate": 4.6639866633236066e-06, |
| "loss": 0.32, |
| "step": 8540 |
| }, |
| { |
| "epoch": 1.71, |
| "grad_norm": 1.142608404159546, |
| "learning_rate": 4.652378359191352e-06, |
| "loss": 0.3178, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 1.4112498760223389, |
| "learning_rate": 4.6407719375757095e-06, |
| "loss": 0.3598, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.714, |
| "grad_norm": 1.3373513221740723, |
| "learning_rate": 4.629167461330308e-06, |
| "loss": 0.3895, |
| "step": 8570 |
| }, |
| { |
| "epoch": 1.716, |
| "grad_norm": 1.5300648212432861, |
| "learning_rate": 4.617564993298244e-06, |
| "loss": 0.3675, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.718, |
| "grad_norm": 1.3708034753799438, |
| "learning_rate": 4.605964596311733e-06, |
| "loss": 0.3543, |
| "step": 8590 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 1.1198241710662842, |
| "learning_rate": 4.594366333191778e-06, |
| "loss": 0.4109, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.722, |
| "grad_norm": 1.1231240034103394, |
| "learning_rate": 4.5827702667478275e-06, |
| "loss": 0.3643, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.724, |
| "grad_norm": 1.6655538082122803, |
| "learning_rate": 4.571176459777431e-06, |
| "loss": 0.3271, |
| "step": 8620 |
| }, |
| { |
| "epoch": 1.726, |
| "grad_norm": 1.260842204093933, |
| "learning_rate": 4.559584975065905e-06, |
| "loss": 0.3342, |
| "step": 8630 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 1.2399426698684692, |
| "learning_rate": 4.547995875385986e-06, |
| "loss": 0.2852, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.73, |
| "grad_norm": 1.537028193473816, |
| "learning_rate": 4.5364092234975e-06, |
| "loss": 0.4146, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.732, |
| "grad_norm": 1.4281028509140015, |
| "learning_rate": 4.524825082147013e-06, |
| "loss": 0.2961, |
| "step": 8660 |
| }, |
| { |
| "epoch": 1.734, |
| "grad_norm": 1.294998049736023, |
| "learning_rate": 4.513243514067495e-06, |
| "loss": 0.2937, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 1.0920134782791138, |
| "learning_rate": 4.5016645819779865e-06, |
| "loss": 0.3434, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.738, |
| "grad_norm": 1.160057544708252, |
| "learning_rate": 4.490088348583246e-06, |
| "loss": 0.2608, |
| "step": 8690 |
| }, |
| { |
| "epoch": 1.74, |
| "grad_norm": 1.2526758909225464, |
| "learning_rate": 4.47851487657342e-06, |
| "loss": 0.3652, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.742, |
| "grad_norm": 1.275640845298767, |
| "learning_rate": 4.466944228623701e-06, |
| "loss": 0.3862, |
| "step": 8710 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 1.3032732009887695, |
| "learning_rate": 4.455376467393991e-06, |
| "loss": 0.2721, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.746, |
| "grad_norm": 1.2573343515396118, |
| "learning_rate": 4.443811655528553e-06, |
| "loss": 0.3276, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.748, |
| "grad_norm": 1.478424310684204, |
| "learning_rate": 4.432249855655681e-06, |
| "loss": 0.3695, |
| "step": 8740 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.3663966655731201, |
| "learning_rate": 4.420691130387365e-06, |
| "loss": 0.3233, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 1.5956822633743286, |
| "learning_rate": 4.409135542318931e-06, |
| "loss": 0.3789, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.754, |
| "grad_norm": 1.566262125968933, |
| "learning_rate": 4.397583154028725e-06, |
| "loss": 0.3546, |
| "step": 8770 |
| }, |
| { |
| "epoch": 1.756, |
| "grad_norm": 1.0735946893692017, |
| "learning_rate": 4.38603402807776e-06, |
| "loss": 0.3638, |
| "step": 8780 |
| }, |
| { |
| "epoch": 1.758, |
| "grad_norm": 1.182511329650879, |
| "learning_rate": 4.374488227009391e-06, |
| "loss": 0.3271, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 1.5073894262313843, |
| "learning_rate": 4.362945813348956e-06, |
| "loss": 0.2934, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.762, |
| "grad_norm": 1.476199746131897, |
| "learning_rate": 4.351406849603452e-06, |
| "loss": 0.3143, |
| "step": 8810 |
| }, |
| { |
| "epoch": 1.764, |
| "grad_norm": 1.2955067157745361, |
| "learning_rate": 4.3398713982612e-06, |
| "loss": 0.3358, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.766, |
| "grad_norm": 1.4470880031585693, |
| "learning_rate": 4.328339521791493e-06, |
| "loss": 0.3117, |
| "step": 8830 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 1.264491319656372, |
| "learning_rate": 4.316811282644265e-06, |
| "loss": 0.3316, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.77, |
| "grad_norm": 0.9871441125869751, |
| "learning_rate": 4.305286743249756e-06, |
| "loss": 0.3224, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.772, |
| "grad_norm": 1.7321498394012451, |
| "learning_rate": 4.293765966018167e-06, |
| "loss": 0.3177, |
| "step": 8860 |
| }, |
| { |
| "epoch": 1.774, |
| "grad_norm": 1.4253642559051514, |
| "learning_rate": 4.282249013339328e-06, |
| "loss": 0.396, |
| "step": 8870 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 1.1400525569915771, |
| "learning_rate": 4.270735947582352e-06, |
| "loss": 0.3207, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.778, |
| "grad_norm": 1.3001306056976318, |
| "learning_rate": 4.259226831095311e-06, |
| "loss": 0.3579, |
| "step": 8890 |
| }, |
| { |
| "epoch": 1.78, |
| "grad_norm": 0.9259517788887024, |
| "learning_rate": 4.247721726204883e-06, |
| "loss": 0.3522, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.782, |
| "grad_norm": 1.352211594581604, |
| "learning_rate": 4.236220695216024e-06, |
| "loss": 0.3184, |
| "step": 8910 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 1.4260315895080566, |
| "learning_rate": 4.224723800411631e-06, |
| "loss": 0.3632, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.786, |
| "grad_norm": 1.3724855184555054, |
| "learning_rate": 4.2132311040521975e-06, |
| "loss": 0.314, |
| "step": 8930 |
| }, |
| { |
| "epoch": 1.788, |
| "grad_norm": 1.4513239860534668, |
| "learning_rate": 4.201742668375481e-06, |
| "loss": 0.3224, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.79, |
| "grad_norm": 1.2112324237823486, |
| "learning_rate": 4.190258555596168e-06, |
| "loss": 0.3068, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 1.5572727918624878, |
| "learning_rate": 4.178778827905535e-06, |
| "loss": 0.3771, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.794, |
| "grad_norm": 0.9895251989364624, |
| "learning_rate": 4.167303547471108e-06, |
| "loss": 0.3205, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.796, |
| "grad_norm": 1.2107610702514648, |
| "learning_rate": 4.155832776436331e-06, |
| "loss": 0.353, |
| "step": 8980 |
| }, |
| { |
| "epoch": 1.798, |
| "grad_norm": 1.2437814474105835, |
| "learning_rate": 4.14436657692023e-06, |
| "loss": 0.3566, |
| "step": 8990 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 1.1405911445617676, |
| "learning_rate": 4.132905011017071e-06, |
| "loss": 0.3001, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.802, |
| "grad_norm": 1.2086176872253418, |
| "learning_rate": 4.121448140796029e-06, |
| "loss": 0.3399, |
| "step": 9010 |
| }, |
| { |
| "epoch": 1.804, |
| "grad_norm": 1.3747698068618774, |
| "learning_rate": 4.109996028300847e-06, |
| "loss": 0.3545, |
| "step": 9020 |
| }, |
| { |
| "epoch": 1.806, |
| "grad_norm": 1.1939738988876343, |
| "learning_rate": 4.098548735549508e-06, |
| "loss": 0.3397, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 1.5216190814971924, |
| "learning_rate": 4.087106324533891e-06, |
| "loss": 0.3071, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 1.557185411453247, |
| "learning_rate": 4.075668857219436e-06, |
| "loss": 0.2898, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.812, |
| "grad_norm": 1.694498896598816, |
| "learning_rate": 4.0642363955448175e-06, |
| "loss": 0.3705, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.814, |
| "grad_norm": 1.7340755462646484, |
| "learning_rate": 4.052809001421595e-06, |
| "loss": 0.3218, |
| "step": 9070 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 1.123704195022583, |
| "learning_rate": 4.041386736733889e-06, |
| "loss": 0.3743, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.818, |
| "grad_norm": 1.6737864017486572, |
| "learning_rate": 4.029969663338042e-06, |
| "loss": 0.3169, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.8199999999999998, |
| "grad_norm": 1.1411502361297607, |
| "learning_rate": 4.018557843062282e-06, |
| "loss": 0.2783, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.822, |
| "grad_norm": 1.4459763765335083, |
| "learning_rate": 4.007151337706391e-06, |
| "loss": 0.3288, |
| "step": 9110 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 1.1251758337020874, |
| "learning_rate": 3.995750209041365e-06, |
| "loss": 0.2966, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.826, |
| "grad_norm": 1.534656286239624, |
| "learning_rate": 3.98435451880909e-06, |
| "loss": 0.3306, |
| "step": 9130 |
| }, |
| { |
| "epoch": 1.8279999999999998, |
| "grad_norm": 1.2617437839508057, |
| "learning_rate": 3.972964328721992e-06, |
| "loss": 0.2749, |
| "step": 9140 |
| }, |
| { |
| "epoch": 1.83, |
| "grad_norm": 1.1563127040863037, |
| "learning_rate": 3.961579700462715e-06, |
| "loss": 0.348, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 1.5472575426101685, |
| "learning_rate": 3.950200695683788e-06, |
| "loss": 0.297, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.834, |
| "grad_norm": 1.342626690864563, |
| "learning_rate": 3.938827376007281e-06, |
| "loss": 0.3328, |
| "step": 9170 |
| }, |
| { |
| "epoch": 1.8359999999999999, |
| "grad_norm": 1.0215874910354614, |
| "learning_rate": 3.927459803024475e-06, |
| "loss": 0.3094, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.838, |
| "grad_norm": 1.6598362922668457, |
| "learning_rate": 3.9160980382955336e-06, |
| "loss": 0.3091, |
| "step": 9190 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 1.050584077835083, |
| "learning_rate": 3.904742143349169e-06, |
| "loss": 0.3078, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.842, |
| "grad_norm": 1.560577630996704, |
| "learning_rate": 3.893392179682304e-06, |
| "loss": 0.323, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.8439999999999999, |
| "grad_norm": 1.0825576782226562, |
| "learning_rate": 3.882048208759735e-06, |
| "loss": 0.3313, |
| "step": 9220 |
| }, |
| { |
| "epoch": 1.846, |
| "grad_norm": 1.0511835813522339, |
| "learning_rate": 3.870710292013815e-06, |
| "loss": 0.3295, |
| "step": 9230 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 1.3847706317901611, |
| "learning_rate": 3.859378490844104e-06, |
| "loss": 0.3417, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 1.1455384492874146, |
| "learning_rate": 3.8480528666170495e-06, |
| "loss": 0.3394, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.8519999999999999, |
| "grad_norm": 1.5466185808181763, |
| "learning_rate": 3.836733480665637e-06, |
| "loss": 0.3222, |
| "step": 9260 |
| }, |
| { |
| "epoch": 1.854, |
| "grad_norm": 1.2878413200378418, |
| "learning_rate": 3.825420394289085e-06, |
| "loss": 0.3517, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.8397305011749268, |
| "learning_rate": 3.814113668752486e-06, |
| "loss": 0.3493, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.858, |
| "grad_norm": 1.3638249635696411, |
| "learning_rate": 3.8028133652864872e-06, |
| "loss": 0.4023, |
| "step": 9290 |
| }, |
| { |
| "epoch": 1.8599999999999999, |
| "grad_norm": 1.1861424446105957, |
| "learning_rate": 3.791519545086963e-06, |
| "loss": 0.3283, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.862, |
| "grad_norm": 1.501705527305603, |
| "learning_rate": 3.7802322693146726e-06, |
| "loss": 0.3947, |
| "step": 9310 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 1.1663841009140015, |
| "learning_rate": 3.7689515990949364e-06, |
| "loss": 0.3183, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.866, |
| "grad_norm": 1.418528437614441, |
| "learning_rate": 3.757677595517302e-06, |
| "loss": 0.2897, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.8679999999999999, |
| "grad_norm": 1.609471321105957, |
| "learning_rate": 3.7464103196352176e-06, |
| "loss": 0.3356, |
| "step": 9340 |
| }, |
| { |
| "epoch": 1.87, |
| "grad_norm": 1.3416111469268799, |
| "learning_rate": 3.7351498324656944e-06, |
| "loss": 0.3206, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 1.485053539276123, |
| "learning_rate": 3.7238961949889796e-06, |
| "loss": 0.3133, |
| "step": 9360 |
| }, |
| { |
| "epoch": 1.874, |
| "grad_norm": 1.3999130725860596, |
| "learning_rate": 3.7126494681482317e-06, |
| "loss": 0.367, |
| "step": 9370 |
| }, |
| { |
| "epoch": 1.876, |
| "grad_norm": 1.3190253973007202, |
| "learning_rate": 3.70140971284918e-06, |
| "loss": 0.3881, |
| "step": 9380 |
| }, |
| { |
| "epoch": 1.8780000000000001, |
| "grad_norm": 1.468934416770935, |
| "learning_rate": 3.690176989959801e-06, |
| "loss": 0.3039, |
| "step": 9390 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 1.4125316143035889, |
| "learning_rate": 3.678951360309988e-06, |
| "loss": 0.3603, |
| "step": 9400 |
| }, |
| { |
| "epoch": 1.8820000000000001, |
| "grad_norm": 1.2523292303085327, |
| "learning_rate": 3.6677328846912237e-06, |
| "loss": 0.3139, |
| "step": 9410 |
| }, |
| { |
| "epoch": 1.884, |
| "grad_norm": 1.6310887336730957, |
| "learning_rate": 3.6565216238562464e-06, |
| "loss": 0.342, |
| "step": 9420 |
| }, |
| { |
| "epoch": 1.8860000000000001, |
| "grad_norm": 1.028517723083496, |
| "learning_rate": 3.645317638518721e-06, |
| "loss": 0.3088, |
| "step": 9430 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 1.4186135530471802, |
| "learning_rate": 3.6341209893529195e-06, |
| "loss": 0.3238, |
| "step": 9440 |
| }, |
| { |
| "epoch": 1.8900000000000001, |
| "grad_norm": 1.2561564445495605, |
| "learning_rate": 3.6229317369933786e-06, |
| "loss": 0.3813, |
| "step": 9450 |
| }, |
| { |
| "epoch": 1.892, |
| "grad_norm": 1.3972420692443848, |
| "learning_rate": 3.61174994203458e-06, |
| "loss": 0.3561, |
| "step": 9460 |
| }, |
| { |
| "epoch": 1.8940000000000001, |
| "grad_norm": 1.1440085172653198, |
| "learning_rate": 3.6005756650306258e-06, |
| "loss": 0.2912, |
| "step": 9470 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 1.243796944618225, |
| "learning_rate": 3.589408966494897e-06, |
| "loss": 0.2798, |
| "step": 9480 |
| }, |
| { |
| "epoch": 1.8980000000000001, |
| "grad_norm": 1.2843304872512817, |
| "learning_rate": 3.5782499068997386e-06, |
| "loss": 0.3649, |
| "step": 9490 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 1.2538890838623047, |
| "learning_rate": 3.5670985466761243e-06, |
| "loss": 0.3573, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.9020000000000001, |
| "grad_norm": 1.5263408422470093, |
| "learning_rate": 3.5559549462133407e-06, |
| "loss": 0.3468, |
| "step": 9510 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 1.2119961977005005, |
| "learning_rate": 3.5448191658586423e-06, |
| "loss": 0.2936, |
| "step": 9520 |
| }, |
| { |
| "epoch": 1.9060000000000001, |
| "grad_norm": 1.630476474761963, |
| "learning_rate": 3.5336912659169366e-06, |
| "loss": 0.3447, |
| "step": 9530 |
| }, |
| { |
| "epoch": 1.908, |
| "grad_norm": 1.7474474906921387, |
| "learning_rate": 3.522571306650462e-06, |
| "loss": 0.3666, |
| "step": 9540 |
| }, |
| { |
| "epoch": 1.9100000000000001, |
| "grad_norm": 2.313354730606079, |
| "learning_rate": 3.511459348278448e-06, |
| "loss": 0.3717, |
| "step": 9550 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 1.660021185874939, |
| "learning_rate": 3.5003554509767966e-06, |
| "loss": 0.3335, |
| "step": 9560 |
| }, |
| { |
| "epoch": 1.9140000000000001, |
| "grad_norm": 1.404435396194458, |
| "learning_rate": 3.4892596748777563e-06, |
| "loss": 0.3466, |
| "step": 9570 |
| }, |
| { |
| "epoch": 1.916, |
| "grad_norm": 1.2691371440887451, |
| "learning_rate": 3.4781720800696006e-06, |
| "loss": 0.3342, |
| "step": 9580 |
| }, |
| { |
| "epoch": 1.9180000000000001, |
| "grad_norm": 1.8161771297454834, |
| "learning_rate": 3.4670927265962908e-06, |
| "loss": 0.3479, |
| "step": 9590 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 1.5305116176605225, |
| "learning_rate": 3.4560216744571607e-06, |
| "loss": 0.3067, |
| "step": 9600 |
| }, |
| { |
| "epoch": 1.9220000000000002, |
| "grad_norm": 1.5921387672424316, |
| "learning_rate": 3.444958983606592e-06, |
| "loss": 0.3088, |
| "step": 9610 |
| }, |
| { |
| "epoch": 1.924, |
| "grad_norm": 1.490713119506836, |
| "learning_rate": 3.433904713953682e-06, |
| "loss": 0.3352, |
| "step": 9620 |
| }, |
| { |
| "epoch": 1.9260000000000002, |
| "grad_norm": 1.590400218963623, |
| "learning_rate": 3.4228589253619247e-06, |
| "loss": 0.3203, |
| "step": 9630 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 1.140105128288269, |
| "learning_rate": 3.411821677648887e-06, |
| "loss": 0.3612, |
| "step": 9640 |
| }, |
| { |
| "epoch": 1.9300000000000002, |
| "grad_norm": 1.3013497591018677, |
| "learning_rate": 3.400793030585884e-06, |
| "loss": 0.277, |
| "step": 9650 |
| }, |
| { |
| "epoch": 1.932, |
| "grad_norm": 1.3757359981536865, |
| "learning_rate": 3.389773043897652e-06, |
| "loss": 0.3651, |
| "step": 9660 |
| }, |
| { |
| "epoch": 1.9340000000000002, |
| "grad_norm": 1.2634108066558838, |
| "learning_rate": 3.378761777262028e-06, |
| "loss": 0.3305, |
| "step": 9670 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 1.5106260776519775, |
| "learning_rate": 3.36775929030963e-06, |
| "loss": 0.3067, |
| "step": 9680 |
| }, |
| { |
| "epoch": 1.938, |
| "grad_norm": 1.1745408773422241, |
| "learning_rate": 3.3567656426235275e-06, |
| "loss": 0.364, |
| "step": 9690 |
| }, |
| { |
| "epoch": 1.94, |
| "grad_norm": 1.7287566661834717, |
| "learning_rate": 3.34578089373892e-06, |
| "loss": 0.3663, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.942, |
| "grad_norm": 1.4090641736984253, |
| "learning_rate": 3.3348051031428184e-06, |
| "loss": 0.2921, |
| "step": 9710 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 1.6581404209136963, |
| "learning_rate": 3.323838330273723e-06, |
| "loss": 0.4043, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.946, |
| "grad_norm": 1.7018346786499023, |
| "learning_rate": 3.312880634521295e-06, |
| "loss": 0.3818, |
| "step": 9730 |
| }, |
| { |
| "epoch": 1.948, |
| "grad_norm": 1.4111535549163818, |
| "learning_rate": 3.301932075226041e-06, |
| "loss": 0.33, |
| "step": 9740 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 1.4237406253814697, |
| "learning_rate": 3.2909927116789908e-06, |
| "loss": 0.3326, |
| "step": 9750 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 1.3221118450164795, |
| "learning_rate": 3.280062603121373e-06, |
| "loss": 0.2659, |
| "step": 9760 |
| }, |
| { |
| "epoch": 1.954, |
| "grad_norm": 1.3768141269683838, |
| "learning_rate": 3.2691418087442995e-06, |
| "loss": 0.3813, |
| "step": 9770 |
| }, |
| { |
| "epoch": 1.956, |
| "grad_norm": 1.5262187719345093, |
| "learning_rate": 3.2582303876884406e-06, |
| "loss": 0.3554, |
| "step": 9780 |
| }, |
| { |
| "epoch": 1.958, |
| "grad_norm": 1.184778094291687, |
| "learning_rate": 3.247328399043706e-06, |
| "loss": 0.2866, |
| "step": 9790 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 1.3611551523208618, |
| "learning_rate": 3.2364359018489245e-06, |
| "loss": 0.3865, |
| "step": 9800 |
| }, |
| { |
| "epoch": 1.962, |
| "grad_norm": 1.296720027923584, |
| "learning_rate": 3.2255529550915242e-06, |
| "loss": 0.3272, |
| "step": 9810 |
| }, |
| { |
| "epoch": 1.964, |
| "grad_norm": 1.1612666845321655, |
| "learning_rate": 3.2146796177072183e-06, |
| "loss": 0.2417, |
| "step": 9820 |
| }, |
| { |
| "epoch": 1.966, |
| "grad_norm": 1.3343042135238647, |
| "learning_rate": 3.203815948579674e-06, |
| "loss": 0.2749, |
| "step": 9830 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 1.5263704061508179, |
| "learning_rate": 3.192962006540205e-06, |
| "loss": 0.2788, |
| "step": 9840 |
| }, |
| { |
| "epoch": 1.97, |
| "grad_norm": 1.9184904098510742, |
| "learning_rate": 3.1821178503674515e-06, |
| "loss": 0.2875, |
| "step": 9850 |
| }, |
| { |
| "epoch": 1.972, |
| "grad_norm": 1.6000325679779053, |
| "learning_rate": 3.1712835387870527e-06, |
| "loss": 0.3293, |
| "step": 9860 |
| }, |
| { |
| "epoch": 1.974, |
| "grad_norm": 1.1482776403427124, |
| "learning_rate": 3.1604591304713394e-06, |
| "loss": 0.3441, |
| "step": 9870 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 1.1263827085494995, |
| "learning_rate": 3.149644684039008e-06, |
| "loss": 0.3172, |
| "step": 9880 |
| }, |
| { |
| "epoch": 1.978, |
| "grad_norm": 1.3861206769943237, |
| "learning_rate": 3.1388402580548154e-06, |
| "loss": 0.3633, |
| "step": 9890 |
| }, |
| { |
| "epoch": 1.98, |
| "grad_norm": 1.7515361309051514, |
| "learning_rate": 3.1280459110292474e-06, |
| "loss": 0.3882, |
| "step": 9900 |
| }, |
| { |
| "epoch": 1.982, |
| "grad_norm": 1.409308671951294, |
| "learning_rate": 3.117261701418204e-06, |
| "loss": 0.3439, |
| "step": 9910 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 1.6333212852478027, |
| "learning_rate": 3.106487687622697e-06, |
| "loss": 0.3062, |
| "step": 9920 |
| }, |
| { |
| "epoch": 1.986, |
| "grad_norm": 1.5096412897109985, |
| "learning_rate": 3.095723927988517e-06, |
| "loss": 0.3322, |
| "step": 9930 |
| }, |
| { |
| "epoch": 1.988, |
| "grad_norm": 0.9250149726867676, |
| "learning_rate": 3.0849704808059266e-06, |
| "loss": 0.3363, |
| "step": 9940 |
| }, |
| { |
| "epoch": 1.99, |
| "grad_norm": 1.4919676780700684, |
| "learning_rate": 3.074227404309336e-06, |
| "loss": 0.3459, |
| "step": 9950 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 1.1677219867706299, |
| "learning_rate": 3.063494756677005e-06, |
| "loss": 0.3087, |
| "step": 9960 |
| }, |
| { |
| "epoch": 1.994, |
| "grad_norm": 1.3163172006607056, |
| "learning_rate": 3.0527725960307083e-06, |
| "loss": 0.3544, |
| "step": 9970 |
| }, |
| { |
| "epoch": 1.996, |
| "grad_norm": 1.1766602993011475, |
| "learning_rate": 3.0420609804354295e-06, |
| "loss": 0.2968, |
| "step": 9980 |
| }, |
| { |
| "epoch": 1.998, |
| "grad_norm": 1.754724144935608, |
| "learning_rate": 3.0313599678990514e-06, |
| "loss": 0.3324, |
| "step": 9990 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.334686279296875, |
| "learning_rate": 3.0206696163720317e-06, |
| "loss": 0.3427, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.002, |
| "grad_norm": 1.149556279182434, |
| "learning_rate": 3.0099899837470976e-06, |
| "loss": 0.2296, |
| "step": 10010 |
| }, |
| { |
| "epoch": 2.004, |
| "grad_norm": 2.647596597671509, |
| "learning_rate": 2.999321127858925e-06, |
| "loss": 0.1997, |
| "step": 10020 |
| }, |
| { |
| "epoch": 2.006, |
| "grad_norm": 1.3816395998001099, |
| "learning_rate": 2.9886631064838355e-06, |
| "loss": 0.1688, |
| "step": 10030 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 1.5958433151245117, |
| "learning_rate": 2.9780159773394713e-06, |
| "loss": 0.1862, |
| "step": 10040 |
| }, |
| { |
| "epoch": 2.01, |
| "grad_norm": 1.7773202657699585, |
| "learning_rate": 2.96737979808449e-06, |
| "loss": 0.1681, |
| "step": 10050 |
| }, |
| { |
| "epoch": 2.012, |
| "grad_norm": 1.0190283060073853, |
| "learning_rate": 2.9567546263182554e-06, |
| "loss": 0.1732, |
| "step": 10060 |
| }, |
| { |
| "epoch": 2.014, |
| "grad_norm": 1.5294930934906006, |
| "learning_rate": 2.9461405195805146e-06, |
| "loss": 0.1918, |
| "step": 10070 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 1.375820517539978, |
| "learning_rate": 2.9355375353510973e-06, |
| "loss": 0.1856, |
| "step": 10080 |
| }, |
| { |
| "epoch": 2.018, |
| "grad_norm": 1.9150773286819458, |
| "learning_rate": 2.9249457310495994e-06, |
| "loss": 0.2228, |
| "step": 10090 |
| }, |
| { |
| "epoch": 2.02, |
| "grad_norm": 1.4141868352890015, |
| "learning_rate": 2.91436516403507e-06, |
| "loss": 0.2042, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.022, |
| "grad_norm": 1.665558099746704, |
| "learning_rate": 2.9037958916057104e-06, |
| "loss": 0.1844, |
| "step": 10110 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 1.4332926273345947, |
| "learning_rate": 2.893237970998547e-06, |
| "loss": 0.2012, |
| "step": 10120 |
| }, |
| { |
| "epoch": 2.026, |
| "grad_norm": 1.575321078300476, |
| "learning_rate": 2.8826914593891396e-06, |
| "loss": 0.2173, |
| "step": 10130 |
| }, |
| { |
| "epoch": 2.028, |
| "grad_norm": 1.420453667640686, |
| "learning_rate": 2.872156413891263e-06, |
| "loss": 0.1859, |
| "step": 10140 |
| }, |
| { |
| "epoch": 2.03, |
| "grad_norm": 1.3123276233673096, |
| "learning_rate": 2.8616328915565907e-06, |
| "loss": 0.1539, |
| "step": 10150 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 2.069629430770874, |
| "learning_rate": 2.8511209493744015e-06, |
| "loss": 0.1664, |
| "step": 10160 |
| }, |
| { |
| "epoch": 2.034, |
| "grad_norm": 1.5814684629440308, |
| "learning_rate": 2.8406206442712618e-06, |
| "loss": 0.2212, |
| "step": 10170 |
| }, |
| { |
| "epoch": 2.036, |
| "grad_norm": 1.5895538330078125, |
| "learning_rate": 2.830132033110713e-06, |
| "loss": 0.1419, |
| "step": 10180 |
| }, |
| { |
| "epoch": 2.038, |
| "grad_norm": 1.795087456703186, |
| "learning_rate": 2.8196551726929745e-06, |
| "loss": 0.1644, |
| "step": 10190 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 1.6379607915878296, |
| "learning_rate": 2.80919011975463e-06, |
| "loss": 0.1866, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.042, |
| "grad_norm": 1.2147035598754883, |
| "learning_rate": 2.798736930968315e-06, |
| "loss": 0.1596, |
| "step": 10210 |
| }, |
| { |
| "epoch": 2.044, |
| "grad_norm": 1.3540210723876953, |
| "learning_rate": 2.788295662942423e-06, |
| "loss": 0.1673, |
| "step": 10220 |
| }, |
| { |
| "epoch": 2.046, |
| "grad_norm": 1.2441480159759521, |
| "learning_rate": 2.777866372220789e-06, |
| "loss": 0.1877, |
| "step": 10230 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 2.1177780628204346, |
| "learning_rate": 2.7674491152823825e-06, |
| "loss": 0.2515, |
| "step": 10240 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 1.9897186756134033, |
| "learning_rate": 2.7570439485410116e-06, |
| "loss": 0.1739, |
| "step": 10250 |
| }, |
| { |
| "epoch": 2.052, |
| "grad_norm": 2.053022861480713, |
| "learning_rate": 2.7466509283450026e-06, |
| "loss": 0.1839, |
| "step": 10260 |
| }, |
| { |
| "epoch": 2.054, |
| "grad_norm": 1.2909846305847168, |
| "learning_rate": 2.736270110976912e-06, |
| "loss": 0.1838, |
| "step": 10270 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 1.4775480031967163, |
| "learning_rate": 2.7259015526532074e-06, |
| "loss": 0.1464, |
| "step": 10280 |
| }, |
| { |
| "epoch": 2.058, |
| "grad_norm": 2.2092342376708984, |
| "learning_rate": 2.7155453095239682e-06, |
| "loss": 0.1622, |
| "step": 10290 |
| }, |
| { |
| "epoch": 2.06, |
| "grad_norm": 1.2695140838623047, |
| "learning_rate": 2.705201437672585e-06, |
| "loss": 0.1665, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.062, |
| "grad_norm": 1.4544492959976196, |
| "learning_rate": 2.6948699931154533e-06, |
| "loss": 0.158, |
| "step": 10310 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 1.6260836124420166, |
| "learning_rate": 2.684551031801662e-06, |
| "loss": 0.1697, |
| "step": 10320 |
| }, |
| { |
| "epoch": 2.066, |
| "grad_norm": 1.1283328533172607, |
| "learning_rate": 2.6742446096127086e-06, |
| "loss": 0.1621, |
| "step": 10330 |
| }, |
| { |
| "epoch": 2.068, |
| "grad_norm": 1.4716851711273193, |
| "learning_rate": 2.66395078236218e-06, |
| "loss": 0.1704, |
| "step": 10340 |
| }, |
| { |
| "epoch": 2.07, |
| "grad_norm": 1.6541157960891724, |
| "learning_rate": 2.6536696057954553e-06, |
| "loss": 0.2194, |
| "step": 10350 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 1.5134286880493164, |
| "learning_rate": 2.6434011355894074e-06, |
| "loss": 0.185, |
| "step": 10360 |
| }, |
| { |
| "epoch": 2.074, |
| "grad_norm": 1.936950922012329, |
| "learning_rate": 2.633145427352102e-06, |
| "loss": 0.1546, |
| "step": 10370 |
| }, |
| { |
| "epoch": 2.076, |
| "grad_norm": 2.237762212753296, |
| "learning_rate": 2.6229025366224835e-06, |
| "loss": 0.1923, |
| "step": 10380 |
| }, |
| { |
| "epoch": 2.078, |
| "grad_norm": 2.4793312549591064, |
| "learning_rate": 2.612672518870093e-06, |
| "loss": 0.1842, |
| "step": 10390 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 1.5138216018676758, |
| "learning_rate": 2.602455429494758e-06, |
| "loss": 0.1437, |
| "step": 10400 |
| }, |
| { |
| "epoch": 2.082, |
| "grad_norm": 1.5739482641220093, |
| "learning_rate": 2.5922513238262915e-06, |
| "loss": 0.1765, |
| "step": 10410 |
| }, |
| { |
| "epoch": 2.084, |
| "grad_norm": 1.1942251920700073, |
| "learning_rate": 2.582060257124195e-06, |
| "loss": 0.1793, |
| "step": 10420 |
| }, |
| { |
| "epoch": 2.086, |
| "grad_norm": 1.485691785812378, |
| "learning_rate": 2.5718822845773516e-06, |
| "loss": 0.1927, |
| "step": 10430 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 1.794269323348999, |
| "learning_rate": 2.5617174613037503e-06, |
| "loss": 0.1779, |
| "step": 10440 |
| }, |
| { |
| "epoch": 2.09, |
| "grad_norm": 1.6613082885742188, |
| "learning_rate": 2.5515658423501573e-06, |
| "loss": 0.1955, |
| "step": 10450 |
| }, |
| { |
| "epoch": 2.092, |
| "grad_norm": 1.5060651302337646, |
| "learning_rate": 2.541427482691832e-06, |
| "loss": 0.1463, |
| "step": 10460 |
| }, |
| { |
| "epoch": 2.094, |
| "grad_norm": 1.6522201299667358, |
| "learning_rate": 2.5313024372322413e-06, |
| "loss": 0.2071, |
| "step": 10470 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 2.138770818710327, |
| "learning_rate": 2.5211907608027366e-06, |
| "loss": 0.182, |
| "step": 10480 |
| }, |
| { |
| "epoch": 2.098, |
| "grad_norm": 2.109570264816284, |
| "learning_rate": 2.5110925081622796e-06, |
| "loss": 0.1743, |
| "step": 10490 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 1.1957253217697144, |
| "learning_rate": 2.5010077339971283e-06, |
| "loss": 0.1715, |
| "step": 10500 |
| }, |
| { |
| "epoch": 2.102, |
| "grad_norm": 1.4688587188720703, |
| "learning_rate": 2.4909364929205575e-06, |
| "loss": 0.1838, |
| "step": 10510 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 1.1424540281295776, |
| "learning_rate": 2.480878839472552e-06, |
| "loss": 0.1815, |
| "step": 10520 |
| }, |
| { |
| "epoch": 2.106, |
| "grad_norm": 1.2021138668060303, |
| "learning_rate": 2.470834828119509e-06, |
| "loss": 0.1657, |
| "step": 10530 |
| }, |
| { |
| "epoch": 2.108, |
| "grad_norm": 1.8013497591018677, |
| "learning_rate": 2.4608045132539536e-06, |
| "loss": 0.2006, |
| "step": 10540 |
| }, |
| { |
| "epoch": 2.11, |
| "grad_norm": 2.014941930770874, |
| "learning_rate": 2.4507879491942388e-06, |
| "loss": 0.1859, |
| "step": 10550 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 1.798327922821045, |
| "learning_rate": 2.4407851901842465e-06, |
| "loss": 0.1798, |
| "step": 10560 |
| }, |
| { |
| "epoch": 2.114, |
| "grad_norm": 1.7324855327606201, |
| "learning_rate": 2.4307962903931025e-06, |
| "loss": 0.1823, |
| "step": 10570 |
| }, |
| { |
| "epoch": 2.116, |
| "grad_norm": 1.536121129989624, |
| "learning_rate": 2.4208213039148803e-06, |
| "loss": 0.1776, |
| "step": 10580 |
| }, |
| { |
| "epoch": 2.118, |
| "grad_norm": 2.203296422958374, |
| "learning_rate": 2.4108602847683012e-06, |
| "loss": 0.2172, |
| "step": 10590 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 1.6374783515930176, |
| "learning_rate": 2.4009132868964525e-06, |
| "loss": 0.1755, |
| "step": 10600 |
| }, |
| { |
| "epoch": 2.122, |
| "grad_norm": 2.1419484615325928, |
| "learning_rate": 2.3909803641664907e-06, |
| "loss": 0.1998, |
| "step": 10610 |
| }, |
| { |
| "epoch": 2.124, |
| "grad_norm": 1.4426078796386719, |
| "learning_rate": 2.3810615703693446e-06, |
| "loss": 0.207, |
| "step": 10620 |
| }, |
| { |
| "epoch": 2.126, |
| "grad_norm": 1.7481943368911743, |
| "learning_rate": 2.3711569592194363e-06, |
| "loss": 0.1504, |
| "step": 10630 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 1.914016604423523, |
| "learning_rate": 2.3612665843543737e-06, |
| "loss": 0.1743, |
| "step": 10640 |
| }, |
| { |
| "epoch": 2.13, |
| "grad_norm": 1.829076886177063, |
| "learning_rate": 2.3513904993346775e-06, |
| "loss": 0.2314, |
| "step": 10650 |
| }, |
| { |
| "epoch": 2.132, |
| "grad_norm": 1.4599560499191284, |
| "learning_rate": 2.3415287576434807e-06, |
| "loss": 0.1967, |
| "step": 10660 |
| }, |
| { |
| "epoch": 2.134, |
| "grad_norm": 1.5102792978286743, |
| "learning_rate": 2.3316814126862377e-06, |
| "loss": 0.1772, |
| "step": 10670 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 1.5047998428344727, |
| "learning_rate": 2.321848517790442e-06, |
| "loss": 0.2231, |
| "step": 10680 |
| }, |
| { |
| "epoch": 2.138, |
| "grad_norm": 1.4379587173461914, |
| "learning_rate": 2.312030126205335e-06, |
| "loss": 0.1765, |
| "step": 10690 |
| }, |
| { |
| "epoch": 2.14, |
| "grad_norm": 2.076664447784424, |
| "learning_rate": 2.302226291101609e-06, |
| "loss": 0.1563, |
| "step": 10700 |
| }, |
| { |
| "epoch": 2.142, |
| "grad_norm": 1.7419641017913818, |
| "learning_rate": 2.2924370655711407e-06, |
| "loss": 0.1456, |
| "step": 10710 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 1.5634517669677734, |
| "learning_rate": 2.282662502626678e-06, |
| "loss": 0.1508, |
| "step": 10720 |
| }, |
| { |
| "epoch": 2.146, |
| "grad_norm": 2.313948631286621, |
| "learning_rate": 2.2729026552015653e-06, |
| "loss": 0.1975, |
| "step": 10730 |
| }, |
| { |
| "epoch": 2.148, |
| "grad_norm": 1.3335062265396118, |
| "learning_rate": 2.263157576149463e-06, |
| "loss": 0.1611, |
| "step": 10740 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 1.4061542749404907, |
| "learning_rate": 2.2534273182440515e-06, |
| "loss": 0.1353, |
| "step": 10750 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 2.1555397510528564, |
| "learning_rate": 2.2437119341787444e-06, |
| "loss": 0.1941, |
| "step": 10760 |
| }, |
| { |
| "epoch": 2.154, |
| "grad_norm": 1.6966259479522705, |
| "learning_rate": 2.2340114765664137e-06, |
| "loss": 0.1828, |
| "step": 10770 |
| }, |
| { |
| "epoch": 2.156, |
| "grad_norm": 1.6515167951583862, |
| "learning_rate": 2.224325997939095e-06, |
| "loss": 0.1654, |
| "step": 10780 |
| }, |
| { |
| "epoch": 2.158, |
| "grad_norm": 1.68687105178833, |
| "learning_rate": 2.214655550747709e-06, |
| "loss": 0.1473, |
| "step": 10790 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 1.4554318189620972, |
| "learning_rate": 2.2050001873617716e-06, |
| "loss": 0.1534, |
| "step": 10800 |
| }, |
| { |
| "epoch": 2.162, |
| "grad_norm": 1.4342589378356934, |
| "learning_rate": 2.19535996006911e-06, |
| "loss": 0.1541, |
| "step": 10810 |
| }, |
| { |
| "epoch": 2.164, |
| "grad_norm": 1.8129370212554932, |
| "learning_rate": 2.1857349210755956e-06, |
| "loss": 0.1739, |
| "step": 10820 |
| }, |
| { |
| "epoch": 2.166, |
| "grad_norm": 1.8690457344055176, |
| "learning_rate": 2.1761251225048385e-06, |
| "loss": 0.1665, |
| "step": 10830 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 1.6199536323547363, |
| "learning_rate": 2.1665306163979132e-06, |
| "loss": 0.141, |
| "step": 10840 |
| }, |
| { |
| "epoch": 2.17, |
| "grad_norm": 1.7210586071014404, |
| "learning_rate": 2.156951454713093e-06, |
| "loss": 0.1729, |
| "step": 10850 |
| }, |
| { |
| "epoch": 2.172, |
| "grad_norm": 2.0658435821533203, |
| "learning_rate": 2.147387689325539e-06, |
| "loss": 0.2072, |
| "step": 10860 |
| }, |
| { |
| "epoch": 2.174, |
| "grad_norm": 2.763272762298584, |
| "learning_rate": 2.137839372027047e-06, |
| "loss": 0.2201, |
| "step": 10870 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 1.9413820505142212, |
| "learning_rate": 2.1283065545257443e-06, |
| "loss": 0.1413, |
| "step": 10880 |
| }, |
| { |
| "epoch": 2.178, |
| "grad_norm": 1.5634864568710327, |
| "learning_rate": 2.118789288445829e-06, |
| "loss": 0.1497, |
| "step": 10890 |
| }, |
| { |
| "epoch": 2.18, |
| "grad_norm": 2.5157878398895264, |
| "learning_rate": 2.1092876253272793e-06, |
| "loss": 0.1919, |
| "step": 10900 |
| }, |
| { |
| "epoch": 2.182, |
| "grad_norm": 1.4105507135391235, |
| "learning_rate": 2.099801616625573e-06, |
| "loss": 0.1154, |
| "step": 10910 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 1.6929118633270264, |
| "learning_rate": 2.090331313711417e-06, |
| "loss": 0.1904, |
| "step": 10920 |
| }, |
| { |
| "epoch": 2.186, |
| "grad_norm": 2.1146175861358643, |
| "learning_rate": 2.080876767870466e-06, |
| "loss": 0.191, |
| "step": 10930 |
| }, |
| { |
| "epoch": 2.188, |
| "grad_norm": 1.6462335586547852, |
| "learning_rate": 2.0714380303030373e-06, |
| "loss": 0.2233, |
| "step": 10940 |
| }, |
| { |
| "epoch": 2.19, |
| "grad_norm": 2.0763607025146484, |
| "learning_rate": 2.0620151521238453e-06, |
| "loss": 0.1847, |
| "step": 10950 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 1.8710356950759888, |
| "learning_rate": 2.0526081843617183e-06, |
| "loss": 0.1867, |
| "step": 10960 |
| }, |
| { |
| "epoch": 2.194, |
| "grad_norm": 1.8661998510360718, |
| "learning_rate": 2.04321717795932e-06, |
| "loss": 0.1691, |
| "step": 10970 |
| }, |
| { |
| "epoch": 2.196, |
| "grad_norm": 1.706262230873108, |
| "learning_rate": 2.03384218377288e-06, |
| "loss": 0.1848, |
| "step": 10980 |
| }, |
| { |
| "epoch": 2.198, |
| "grad_norm": 1.395787239074707, |
| "learning_rate": 2.0244832525719155e-06, |
| "loss": 0.1661, |
| "step": 10990 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 1.469756841659546, |
| "learning_rate": 2.015140435038951e-06, |
| "loss": 0.1701, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.202, |
| "grad_norm": 2.2943742275238037, |
| "learning_rate": 2.005813781769253e-06, |
| "loss": 0.159, |
| "step": 11010 |
| }, |
| { |
| "epoch": 2.204, |
| "grad_norm": 2.527089834213257, |
| "learning_rate": 1.996503343270554e-06, |
| "loss": 0.2051, |
| "step": 11020 |
| }, |
| { |
| "epoch": 2.206, |
| "grad_norm": 2.241828680038452, |
| "learning_rate": 1.987209169962769e-06, |
| "loss": 0.1856, |
| "step": 11030 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 2.044684410095215, |
| "learning_rate": 1.9779313121777382e-06, |
| "loss": 0.1704, |
| "step": 11040 |
| }, |
| { |
| "epoch": 2.21, |
| "grad_norm": 1.545622706413269, |
| "learning_rate": 1.9686698201589395e-06, |
| "loss": 0.1666, |
| "step": 11050 |
| }, |
| { |
| "epoch": 2.212, |
| "grad_norm": 2.001255750656128, |
| "learning_rate": 1.9594247440612293e-06, |
| "loss": 0.1813, |
| "step": 11060 |
| }, |
| { |
| "epoch": 2.214, |
| "grad_norm": 2.0734426975250244, |
| "learning_rate": 1.9501961339505626e-06, |
| "loss": 0.2223, |
| "step": 11070 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 1.9134544134140015, |
| "learning_rate": 1.94098403980372e-06, |
| "loss": 0.1919, |
| "step": 11080 |
| }, |
| { |
| "epoch": 2.218, |
| "grad_norm": 1.5950722694396973, |
| "learning_rate": 1.9317885115080514e-06, |
| "loss": 0.234, |
| "step": 11090 |
| }, |
| { |
| "epoch": 2.22, |
| "grad_norm": 1.8253746032714844, |
| "learning_rate": 1.922609598861187e-06, |
| "loss": 0.1471, |
| "step": 11100 |
| }, |
| { |
| "epoch": 2.222, |
| "grad_norm": 1.5433813333511353, |
| "learning_rate": 1.913447351570776e-06, |
| "loss": 0.1804, |
| "step": 11110 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 2.1074371337890625, |
| "learning_rate": 1.9043018192542228e-06, |
| "loss": 0.2, |
| "step": 11120 |
| }, |
| { |
| "epoch": 2.226, |
| "grad_norm": 1.8115590810775757, |
| "learning_rate": 1.8951730514384103e-06, |
| "loss": 0.2082, |
| "step": 11130 |
| }, |
| { |
| "epoch": 2.228, |
| "grad_norm": 0.8639247417449951, |
| "learning_rate": 1.8860610975594384e-06, |
| "loss": 0.1944, |
| "step": 11140 |
| }, |
| { |
| "epoch": 2.23, |
| "grad_norm": 1.839328408241272, |
| "learning_rate": 1.8769660069623448e-06, |
| "loss": 0.212, |
| "step": 11150 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 1.7485737800598145, |
| "learning_rate": 1.8678878289008511e-06, |
| "loss": 0.2095, |
| "step": 11160 |
| }, |
| { |
| "epoch": 2.234, |
| "grad_norm": 1.7731024026870728, |
| "learning_rate": 1.8588266125370929e-06, |
| "loss": 0.1836, |
| "step": 11170 |
| }, |
| { |
| "epoch": 2.2359999999999998, |
| "grad_norm": 1.8955568075180054, |
| "learning_rate": 1.8497824069413445e-06, |
| "loss": 0.2007, |
| "step": 11180 |
| }, |
| { |
| "epoch": 2.238, |
| "grad_norm": 1.6335498094558716, |
| "learning_rate": 1.84075526109176e-06, |
| "loss": 0.1826, |
| "step": 11190 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 1.6790274381637573, |
| "learning_rate": 1.831745223874118e-06, |
| "loss": 0.1806, |
| "step": 11200 |
| }, |
| { |
| "epoch": 2.242, |
| "grad_norm": 1.6211411952972412, |
| "learning_rate": 1.8227523440815331e-06, |
| "loss": 0.1884, |
| "step": 11210 |
| }, |
| { |
| "epoch": 2.2439999999999998, |
| "grad_norm": 1.6547083854675293, |
| "learning_rate": 1.8137766704142141e-06, |
| "loss": 0.1747, |
| "step": 11220 |
| }, |
| { |
| "epoch": 2.246, |
| "grad_norm": 1.8069316148757935, |
| "learning_rate": 1.8048182514791901e-06, |
| "loss": 0.1676, |
| "step": 11230 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 2.3242225646972656, |
| "learning_rate": 1.7958771357900446e-06, |
| "loss": 0.1868, |
| "step": 11240 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 2.2176382541656494, |
| "learning_rate": 1.7869533717666626e-06, |
| "loss": 0.1645, |
| "step": 11250 |
| }, |
| { |
| "epoch": 2.252, |
| "grad_norm": 1.437696933746338, |
| "learning_rate": 1.7780470077349566e-06, |
| "loss": 0.1623, |
| "step": 11260 |
| }, |
| { |
| "epoch": 2.254, |
| "grad_norm": 2.0108892917633057, |
| "learning_rate": 1.769158091926615e-06, |
| "loss": 0.2113, |
| "step": 11270 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 1.2373582124710083, |
| "learning_rate": 1.760286672478837e-06, |
| "loss": 0.1587, |
| "step": 11280 |
| }, |
| { |
| "epoch": 2.258, |
| "grad_norm": 1.3458646535873413, |
| "learning_rate": 1.751432797434068e-06, |
| "loss": 0.2014, |
| "step": 11290 |
| }, |
| { |
| "epoch": 2.26, |
| "grad_norm": 1.4614745378494263, |
| "learning_rate": 1.7425965147397462e-06, |
| "loss": 0.1785, |
| "step": 11300 |
| }, |
| { |
| "epoch": 2.262, |
| "grad_norm": 2.939737558364868, |
| "learning_rate": 1.7337778722480413e-06, |
| "loss": 0.2342, |
| "step": 11310 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 1.6198670864105225, |
| "learning_rate": 1.7249769177155879e-06, |
| "loss": 0.1486, |
| "step": 11320 |
| }, |
| { |
| "epoch": 2.266, |
| "grad_norm": 2.2954392433166504, |
| "learning_rate": 1.7161936988032386e-06, |
| "loss": 0.196, |
| "step": 11330 |
| }, |
| { |
| "epoch": 2.268, |
| "grad_norm": 1.6478867530822754, |
| "learning_rate": 1.7074282630757998e-06, |
| "loss": 0.1795, |
| "step": 11340 |
| }, |
| { |
| "epoch": 2.27, |
| "grad_norm": 1.3108015060424805, |
| "learning_rate": 1.6986806580017695e-06, |
| "loss": 0.1213, |
| "step": 11350 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 1.4279754161834717, |
| "learning_rate": 1.689950930953091e-06, |
| "loss": 0.1663, |
| "step": 11360 |
| }, |
| { |
| "epoch": 2.274, |
| "grad_norm": 1.3529280424118042, |
| "learning_rate": 1.6812391292048897e-06, |
| "loss": 0.1478, |
| "step": 11370 |
| }, |
| { |
| "epoch": 2.276, |
| "grad_norm": 2.1252269744873047, |
| "learning_rate": 1.6725452999352137e-06, |
| "loss": 0.1675, |
| "step": 11380 |
| }, |
| { |
| "epoch": 2.278, |
| "grad_norm": 2.0060176849365234, |
| "learning_rate": 1.6638694902247866e-06, |
| "loss": 0.2113, |
| "step": 11390 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 2.1761856079101562, |
| "learning_rate": 1.655211747056749e-06, |
| "loss": 0.1726, |
| "step": 11400 |
| }, |
| { |
| "epoch": 2.282, |
| "grad_norm": 1.4057432413101196, |
| "learning_rate": 1.6465721173164e-06, |
| "loss": 0.1713, |
| "step": 11410 |
| }, |
| { |
| "epoch": 2.284, |
| "grad_norm": 1.271407961845398, |
| "learning_rate": 1.6379506477909518e-06, |
| "loss": 0.173, |
| "step": 11420 |
| }, |
| { |
| "epoch": 2.286, |
| "grad_norm": 1.5162497758865356, |
| "learning_rate": 1.629347385169263e-06, |
| "loss": 0.1821, |
| "step": 11430 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 1.081526517868042, |
| "learning_rate": 1.6207623760416074e-06, |
| "loss": 0.1428, |
| "step": 11440 |
| }, |
| { |
| "epoch": 2.29, |
| "grad_norm": 1.603796362876892, |
| "learning_rate": 1.6121956668993977e-06, |
| "loss": 0.1669, |
| "step": 11450 |
| }, |
| { |
| "epoch": 2.292, |
| "grad_norm": 1.392375111579895, |
| "learning_rate": 1.6036473041349438e-06, |
| "loss": 0.1698, |
| "step": 11460 |
| }, |
| { |
| "epoch": 2.294, |
| "grad_norm": 1.9549520015716553, |
| "learning_rate": 1.5951173340412134e-06, |
| "loss": 0.2162, |
| "step": 11470 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 1.8508301973342896, |
| "learning_rate": 1.58660580281156e-06, |
| "loss": 0.1534, |
| "step": 11480 |
| }, |
| { |
| "epoch": 2.298, |
| "grad_norm": 1.5918387174606323, |
| "learning_rate": 1.5781127565394838e-06, |
| "loss": 0.161, |
| "step": 11490 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 2.0230064392089844, |
| "learning_rate": 1.5696382412183853e-06, |
| "loss": 0.1709, |
| "step": 11500 |
| }, |
| { |
| "epoch": 2.302, |
| "grad_norm": 2.3765511512756348, |
| "learning_rate": 1.5611823027413109e-06, |
| "loss": 0.1861, |
| "step": 11510 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 1.9833528995513916, |
| "learning_rate": 1.5527449869007055e-06, |
| "loss": 0.2067, |
| "step": 11520 |
| }, |
| { |
| "epoch": 2.306, |
| "grad_norm": 1.4727070331573486, |
| "learning_rate": 1.5443263393881619e-06, |
| "loss": 0.1758, |
| "step": 11530 |
| }, |
| { |
| "epoch": 2.308, |
| "grad_norm": 1.8662854433059692, |
| "learning_rate": 1.535926405794179e-06, |
| "loss": 0.1892, |
| "step": 11540 |
| }, |
| { |
| "epoch": 2.31, |
| "grad_norm": 1.810899257659912, |
| "learning_rate": 1.5275452316079143e-06, |
| "loss": 0.1321, |
| "step": 11550 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 2.3489248752593994, |
| "learning_rate": 1.519182862216929e-06, |
| "loss": 0.1976, |
| "step": 11560 |
| }, |
| { |
| "epoch": 2.314, |
| "grad_norm": 1.5641025304794312, |
| "learning_rate": 1.5108393429069501e-06, |
| "loss": 0.1718, |
| "step": 11570 |
| }, |
| { |
| "epoch": 2.316, |
| "grad_norm": 2.0649514198303223, |
| "learning_rate": 1.5025147188616308e-06, |
| "loss": 0.1905, |
| "step": 11580 |
| }, |
| { |
| "epoch": 2.318, |
| "grad_norm": 1.309147596359253, |
| "learning_rate": 1.4942090351622884e-06, |
| "loss": 0.185, |
| "step": 11590 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 1.3974589109420776, |
| "learning_rate": 1.4859223367876762e-06, |
| "loss": 0.1564, |
| "step": 11600 |
| }, |
| { |
| "epoch": 2.322, |
| "grad_norm": 1.8548953533172607, |
| "learning_rate": 1.477654668613735e-06, |
| "loss": 0.1659, |
| "step": 11610 |
| }, |
| { |
| "epoch": 2.324, |
| "grad_norm": 1.7054270505905151, |
| "learning_rate": 1.469406075413342e-06, |
| "loss": 0.2003, |
| "step": 11620 |
| }, |
| { |
| "epoch": 2.326, |
| "grad_norm": 1.4067456722259521, |
| "learning_rate": 1.4611766018560835e-06, |
| "loss": 0.1569, |
| "step": 11630 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 2.1007652282714844, |
| "learning_rate": 1.4529662925080023e-06, |
| "loss": 0.1882, |
| "step": 11640 |
| }, |
| { |
| "epoch": 2.33, |
| "grad_norm": 1.6421812772750854, |
| "learning_rate": 1.4447751918313552e-06, |
| "loss": 0.2184, |
| "step": 11650 |
| }, |
| { |
| "epoch": 2.332, |
| "grad_norm": 2.9512548446655273, |
| "learning_rate": 1.4366033441843823e-06, |
| "loss": 0.1994, |
| "step": 11660 |
| }, |
| { |
| "epoch": 2.334, |
| "grad_norm": 1.2109729051589966, |
| "learning_rate": 1.4284507938210545e-06, |
| "loss": 0.1702, |
| "step": 11670 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 1.767602801322937, |
| "learning_rate": 1.420317584890844e-06, |
| "loss": 0.1508, |
| "step": 11680 |
| }, |
| { |
| "epoch": 2.338, |
| "grad_norm": 1.924599289894104, |
| "learning_rate": 1.4122037614384814e-06, |
| "loss": 0.2122, |
| "step": 11690 |
| }, |
| { |
| "epoch": 2.34, |
| "grad_norm": 1.8495938777923584, |
| "learning_rate": 1.404109367403712e-06, |
| "loss": 0.1816, |
| "step": 11700 |
| }, |
| { |
| "epoch": 2.342, |
| "grad_norm": 1.7605769634246826, |
| "learning_rate": 1.3960344466210669e-06, |
| "loss": 0.2041, |
| "step": 11710 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 2.3613359928131104, |
| "learning_rate": 1.3879790428196226e-06, |
| "loss": 0.1848, |
| "step": 11720 |
| }, |
| { |
| "epoch": 2.346, |
| "grad_norm": 1.8871616125106812, |
| "learning_rate": 1.3799431996227569e-06, |
| "loss": 0.1771, |
| "step": 11730 |
| }, |
| { |
| "epoch": 2.348, |
| "grad_norm": 1.523779034614563, |
| "learning_rate": 1.3719269605479241e-06, |
| "loss": 0.1653, |
| "step": 11740 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 1.7912616729736328, |
| "learning_rate": 1.363930369006415e-06, |
| "loss": 0.1834, |
| "step": 11750 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 2.330554485321045, |
| "learning_rate": 1.3559534683031133e-06, |
| "loss": 0.1694, |
| "step": 11760 |
| }, |
| { |
| "epoch": 2.354, |
| "grad_norm": 1.3133292198181152, |
| "learning_rate": 1.3479963016362768e-06, |
| "loss": 0.1925, |
| "step": 11770 |
| }, |
| { |
| "epoch": 2.356, |
| "grad_norm": 1.9684919118881226, |
| "learning_rate": 1.3400589120972922e-06, |
| "loss": 0.1448, |
| "step": 11780 |
| }, |
| { |
| "epoch": 2.358, |
| "grad_norm": 1.2997586727142334, |
| "learning_rate": 1.3321413426704426e-06, |
| "loss": 0.2113, |
| "step": 11790 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 2.1797378063201904, |
| "learning_rate": 1.3242436362326804e-06, |
| "loss": 0.1585, |
| "step": 11800 |
| }, |
| { |
| "epoch": 2.362, |
| "grad_norm": 1.4730124473571777, |
| "learning_rate": 1.3163658355533866e-06, |
| "loss": 0.1928, |
| "step": 11810 |
| }, |
| { |
| "epoch": 2.364, |
| "grad_norm": 1.4231460094451904, |
| "learning_rate": 1.3085079832941528e-06, |
| "loss": 0.2012, |
| "step": 11820 |
| }, |
| { |
| "epoch": 2.366, |
| "grad_norm": 1.8164182901382446, |
| "learning_rate": 1.3006701220085338e-06, |
| "loss": 0.1639, |
| "step": 11830 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 2.293713092803955, |
| "learning_rate": 1.2928522941418241e-06, |
| "loss": 0.168, |
| "step": 11840 |
| }, |
| { |
| "epoch": 2.37, |
| "grad_norm": 1.6054863929748535, |
| "learning_rate": 1.2850545420308386e-06, |
| "loss": 0.1507, |
| "step": 11850 |
| }, |
| { |
| "epoch": 2.372, |
| "grad_norm": 2.180938959121704, |
| "learning_rate": 1.2772769079036639e-06, |
| "loss": 0.1563, |
| "step": 11860 |
| }, |
| { |
| "epoch": 2.374, |
| "grad_norm": 1.553425908088684, |
| "learning_rate": 1.2695194338794414e-06, |
| "loss": 0.1574, |
| "step": 11870 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 1.3529037237167358, |
| "learning_rate": 1.2617821619681397e-06, |
| "loss": 0.1941, |
| "step": 11880 |
| }, |
| { |
| "epoch": 2.378, |
| "grad_norm": 1.9350522756576538, |
| "learning_rate": 1.2540651340703231e-06, |
| "loss": 0.1551, |
| "step": 11890 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 2.1500189304351807, |
| "learning_rate": 1.246368391976927e-06, |
| "loss": 0.166, |
| "step": 11900 |
| }, |
| { |
| "epoch": 2.382, |
| "grad_norm": 1.851035237312317, |
| "learning_rate": 1.2386919773690276e-06, |
| "loss": 0.154, |
| "step": 11910 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 1.200472116470337, |
| "learning_rate": 1.2310359318176229e-06, |
| "loss": 0.1441, |
| "step": 11920 |
| }, |
| { |
| "epoch": 2.386, |
| "grad_norm": 1.9997992515563965, |
| "learning_rate": 1.2234002967834036e-06, |
| "loss": 0.1886, |
| "step": 11930 |
| }, |
| { |
| "epoch": 2.388, |
| "grad_norm": 2.4099040031433105, |
| "learning_rate": 1.2157851136165243e-06, |
| "loss": 0.1631, |
| "step": 11940 |
| }, |
| { |
| "epoch": 2.39, |
| "grad_norm": 1.5515005588531494, |
| "learning_rate": 1.2081904235563908e-06, |
| "loss": 0.1468, |
| "step": 11950 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 1.075179100036621, |
| "learning_rate": 1.2006162677314265e-06, |
| "loss": 0.1464, |
| "step": 11960 |
| }, |
| { |
| "epoch": 2.394, |
| "grad_norm": 1.8599796295166016, |
| "learning_rate": 1.1930626871588525e-06, |
| "loss": 0.1865, |
| "step": 11970 |
| }, |
| { |
| "epoch": 2.396, |
| "grad_norm": 1.7627993822097778, |
| "learning_rate": 1.185529722744469e-06, |
| "loss": 0.165, |
| "step": 11980 |
| }, |
| { |
| "epoch": 2.398, |
| "grad_norm": 1.7737939357757568, |
| "learning_rate": 1.1780174152824297e-06, |
| "loss": 0.1545, |
| "step": 11990 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.9698889851570129, |
| "learning_rate": 1.1705258054550212e-06, |
| "loss": 0.1575, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.402, |
| "grad_norm": 1.0956158638000488, |
| "learning_rate": 1.1630549338324454e-06, |
| "loss": 0.1703, |
| "step": 12010 |
| }, |
| { |
| "epoch": 2.404, |
| "grad_norm": 1.4204548597335815, |
| "learning_rate": 1.155604840872599e-06, |
| "loss": 0.166, |
| "step": 12020 |
| }, |
| { |
| "epoch": 2.406, |
| "grad_norm": 1.6942260265350342, |
| "learning_rate": 1.1481755669208495e-06, |
| "loss": 0.1596, |
| "step": 12030 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 1.5839786529541016, |
| "learning_rate": 1.1407671522098262e-06, |
| "loss": 0.1931, |
| "step": 12040 |
| }, |
| { |
| "epoch": 2.41, |
| "grad_norm": 2.013857841491699, |
| "learning_rate": 1.1333796368591915e-06, |
| "loss": 0.1791, |
| "step": 12050 |
| }, |
| { |
| "epoch": 2.412, |
| "grad_norm": 1.7067184448242188, |
| "learning_rate": 1.126013060875432e-06, |
| "loss": 0.1849, |
| "step": 12060 |
| }, |
| { |
| "epoch": 2.414, |
| "grad_norm": 1.8866993188858032, |
| "learning_rate": 1.1186674641516415e-06, |
| "loss": 0.1922, |
| "step": 12070 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 2.4251067638397217, |
| "learning_rate": 1.1113428864672954e-06, |
| "loss": 0.1807, |
| "step": 12080 |
| }, |
| { |
| "epoch": 2.418, |
| "grad_norm": 1.515995740890503, |
| "learning_rate": 1.1040393674880478e-06, |
| "loss": 0.1896, |
| "step": 12090 |
| }, |
| { |
| "epoch": 2.42, |
| "grad_norm": 1.7295435667037964, |
| "learning_rate": 1.0967569467655104e-06, |
| "loss": 0.1473, |
| "step": 12100 |
| }, |
| { |
| "epoch": 2.422, |
| "grad_norm": 1.7556982040405273, |
| "learning_rate": 1.0894956637370363e-06, |
| "loss": 0.1475, |
| "step": 12110 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 1.9276478290557861, |
| "learning_rate": 1.082255557725511e-06, |
| "loss": 0.1539, |
| "step": 12120 |
| }, |
| { |
| "epoch": 2.426, |
| "grad_norm": 1.5865824222564697, |
| "learning_rate": 1.0750366679391393e-06, |
| "loss": 0.1449, |
| "step": 12130 |
| }, |
| { |
| "epoch": 2.428, |
| "grad_norm": 1.8489198684692383, |
| "learning_rate": 1.0678390334712275e-06, |
| "loss": 0.1547, |
| "step": 12140 |
| }, |
| { |
| "epoch": 2.43, |
| "grad_norm": 1.6484555006027222, |
| "learning_rate": 1.0606626932999775e-06, |
| "loss": 0.1648, |
| "step": 12150 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 1.7841017246246338, |
| "learning_rate": 1.053507686288276e-06, |
| "loss": 0.1557, |
| "step": 12160 |
| }, |
| { |
| "epoch": 2.434, |
| "grad_norm": 1.6266376972198486, |
| "learning_rate": 1.0463740511834759e-06, |
| "loss": 0.1449, |
| "step": 12170 |
| }, |
| { |
| "epoch": 2.436, |
| "grad_norm": 1.884961724281311, |
| "learning_rate": 1.0392618266171983e-06, |
| "loss": 0.1606, |
| "step": 12180 |
| }, |
| { |
| "epoch": 2.438, |
| "grad_norm": 2.732299566268921, |
| "learning_rate": 1.0321710511051108e-06, |
| "loss": 0.1637, |
| "step": 12190 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 1.4075685739517212, |
| "learning_rate": 1.0251017630467347e-06, |
| "loss": 0.1798, |
| "step": 12200 |
| }, |
| { |
| "epoch": 2.442, |
| "grad_norm": 1.3892203569412231, |
| "learning_rate": 1.01805400072522e-06, |
| "loss": 0.1867, |
| "step": 12210 |
| }, |
| { |
| "epoch": 2.444, |
| "grad_norm": 1.6456845998764038, |
| "learning_rate": 1.0110278023071445e-06, |
| "loss": 0.1632, |
| "step": 12220 |
| }, |
| { |
| "epoch": 2.446, |
| "grad_norm": 1.5476148128509521, |
| "learning_rate": 1.0040232058423182e-06, |
| "loss": 0.1513, |
| "step": 12230 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 1.5470598936080933, |
| "learning_rate": 9.970402492635583e-07, |
| "loss": 0.1741, |
| "step": 12240 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 1.6543699502944946, |
| "learning_rate": 9.900789703864933e-07, |
| "loss": 0.1374, |
| "step": 12250 |
| }, |
| { |
| "epoch": 2.452, |
| "grad_norm": 1.6343821287155151, |
| "learning_rate": 9.831394069093663e-07, |
| "loss": 0.1526, |
| "step": 12260 |
| }, |
| { |
| "epoch": 2.454, |
| "grad_norm": 1.9217232465744019, |
| "learning_rate": 9.762215964128124e-07, |
| "loss": 0.1716, |
| "step": 12270 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 2.077751874923706, |
| "learning_rate": 9.69325576359672e-07, |
| "loss": 0.1599, |
| "step": 12280 |
| }, |
| { |
| "epoch": 2.458, |
| "grad_norm": 1.3649940490722656, |
| "learning_rate": 9.624513840947764e-07, |
| "loss": 0.1499, |
| "step": 12290 |
| }, |
| { |
| "epoch": 2.46, |
| "grad_norm": 1.173978328704834, |
| "learning_rate": 9.555990568447538e-07, |
| "loss": 0.1685, |
| "step": 12300 |
| }, |
| { |
| "epoch": 2.462, |
| "grad_norm": 1.5035467147827148, |
| "learning_rate": 9.487686317178241e-07, |
| "loss": 0.1905, |
| "step": 12310 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 1.3181893825531006, |
| "learning_rate": 9.419601457035943e-07, |
| "loss": 0.1994, |
| "step": 12320 |
| }, |
| { |
| "epoch": 2.466, |
| "grad_norm": 1.2825194597244263, |
| "learning_rate": 9.351736356728657e-07, |
| "loss": 0.161, |
| "step": 12330 |
| }, |
| { |
| "epoch": 2.468, |
| "grad_norm": 2.407343626022339, |
| "learning_rate": 9.284091383774313e-07, |
| "loss": 0.1929, |
| "step": 12340 |
| }, |
| { |
| "epoch": 2.4699999999999998, |
| "grad_norm": 1.4245587587356567, |
| "learning_rate": 9.2166669044987e-07, |
| "loss": 0.154, |
| "step": 12350 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 2.4359230995178223, |
| "learning_rate": 9.149463284033605e-07, |
| "loss": 0.1854, |
| "step": 12360 |
| }, |
| { |
| "epoch": 2.474, |
| "grad_norm": 1.9102237224578857, |
| "learning_rate": 9.08248088631476e-07, |
| "loss": 0.2134, |
| "step": 12370 |
| }, |
| { |
| "epoch": 2.476, |
| "grad_norm": 1.268130898475647, |
| "learning_rate": 9.015720074079837e-07, |
| "loss": 0.2093, |
| "step": 12380 |
| }, |
| { |
| "epoch": 2.4779999999999998, |
| "grad_norm": 1.4418933391571045, |
| "learning_rate": 8.949181208866581e-07, |
| "loss": 0.1573, |
| "step": 12390 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 1.8565024137496948, |
| "learning_rate": 8.882864651010798e-07, |
| "loss": 0.1778, |
| "step": 12400 |
| }, |
| { |
| "epoch": 2.482, |
| "grad_norm": 1.8131656646728516, |
| "learning_rate": 8.816770759644361e-07, |
| "loss": 0.1609, |
| "step": 12410 |
| }, |
| { |
| "epoch": 2.484, |
| "grad_norm": 1.573459506034851, |
| "learning_rate": 8.750899892693376e-07, |
| "loss": 0.155, |
| "step": 12420 |
| }, |
| { |
| "epoch": 2.4859999999999998, |
| "grad_norm": 1.8090794086456299, |
| "learning_rate": 8.685252406876116e-07, |
| "loss": 0.2023, |
| "step": 12430 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 1.2365047931671143, |
| "learning_rate": 8.61982865770119e-07, |
| "loss": 0.1746, |
| "step": 12440 |
| }, |
| { |
| "epoch": 2.49, |
| "grad_norm": 1.4933933019638062, |
| "learning_rate": 8.554628999465592e-07, |
| "loss": 0.1726, |
| "step": 12450 |
| }, |
| { |
| "epoch": 2.492, |
| "grad_norm": 1.6978332996368408, |
| "learning_rate": 8.489653785252711e-07, |
| "loss": 0.1853, |
| "step": 12460 |
| }, |
| { |
| "epoch": 2.4939999999999998, |
| "grad_norm": 1.4669007062911987, |
| "learning_rate": 8.424903366930531e-07, |
| "loss": 0.22, |
| "step": 12470 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 1.660845398902893, |
| "learning_rate": 8.360378095149674e-07, |
| "loss": 0.1646, |
| "step": 12480 |
| }, |
| { |
| "epoch": 2.498, |
| "grad_norm": 1.6666799783706665, |
| "learning_rate": 8.296078319341444e-07, |
| "loss": 0.1526, |
| "step": 12490 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.4847021102905273, |
| "learning_rate": 8.232004387716053e-07, |
| "loss": 0.1516, |
| "step": 12500 |
| }, |
| { |
| "epoch": 2.502, |
| "grad_norm": 1.512980580329895, |
| "learning_rate": 8.16815664726065e-07, |
| "loss": 0.1886, |
| "step": 12510 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 1.0014886856079102, |
| "learning_rate": 8.104535443737438e-07, |
| "loss": 0.151, |
| "step": 12520 |
| }, |
| { |
| "epoch": 2.5060000000000002, |
| "grad_norm": 1.831697940826416, |
| "learning_rate": 8.041141121681867e-07, |
| "loss": 0.1517, |
| "step": 12530 |
| }, |
| { |
| "epoch": 2.508, |
| "grad_norm": 1.6552419662475586, |
| "learning_rate": 7.977974024400703e-07, |
| "loss": 0.1864, |
| "step": 12540 |
| }, |
| { |
| "epoch": 2.51, |
| "grad_norm": 2.3965301513671875, |
| "learning_rate": 7.91503449397022e-07, |
| "loss": 0.1646, |
| "step": 12550 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 2.3798716068267822, |
| "learning_rate": 7.852322871234286e-07, |
| "loss": 0.1702, |
| "step": 12560 |
| }, |
| { |
| "epoch": 2.5140000000000002, |
| "grad_norm": 1.1681225299835205, |
| "learning_rate": 7.789839495802581e-07, |
| "loss": 0.1726, |
| "step": 12570 |
| }, |
| { |
| "epoch": 2.516, |
| "grad_norm": 1.7020608186721802, |
| "learning_rate": 7.727584706048735e-07, |
| "loss": 0.1862, |
| "step": 12580 |
| }, |
| { |
| "epoch": 2.518, |
| "grad_norm": 1.7006206512451172, |
| "learning_rate": 7.665558839108467e-07, |
| "loss": 0.1638, |
| "step": 12590 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 1.530461072921753, |
| "learning_rate": 7.603762230877776e-07, |
| "loss": 0.1511, |
| "step": 12600 |
| }, |
| { |
| "epoch": 2.5220000000000002, |
| "grad_norm": 1.7536654472351074, |
| "learning_rate": 7.542195216011188e-07, |
| "loss": 0.1693, |
| "step": 12610 |
| }, |
| { |
| "epoch": 2.524, |
| "grad_norm": 2.3601443767547607, |
| "learning_rate": 7.480858127919821e-07, |
| "loss": 0.1946, |
| "step": 12620 |
| }, |
| { |
| "epoch": 2.526, |
| "grad_norm": 1.876556158065796, |
| "learning_rate": 7.419751298769667e-07, |
| "loss": 0.1628, |
| "step": 12630 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 1.4596962928771973, |
| "learning_rate": 7.358875059479792e-07, |
| "loss": 0.1703, |
| "step": 12640 |
| }, |
| { |
| "epoch": 2.5300000000000002, |
| "grad_norm": 1.8861957788467407, |
| "learning_rate": 7.29822973972047e-07, |
| "loss": 0.1865, |
| "step": 12650 |
| }, |
| { |
| "epoch": 2.532, |
| "grad_norm": 1.5087050199508667, |
| "learning_rate": 7.237815667911502e-07, |
| "loss": 0.1665, |
| "step": 12660 |
| }, |
| { |
| "epoch": 2.534, |
| "grad_norm": 1.47129487991333, |
| "learning_rate": 7.177633171220339e-07, |
| "loss": 0.1728, |
| "step": 12670 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 2.511249542236328, |
| "learning_rate": 7.117682575560386e-07, |
| "loss": 0.2098, |
| "step": 12680 |
| }, |
| { |
| "epoch": 2.5380000000000003, |
| "grad_norm": 1.4766125679016113, |
| "learning_rate": 7.057964205589218e-07, |
| "loss": 0.1697, |
| "step": 12690 |
| }, |
| { |
| "epoch": 2.54, |
| "grad_norm": 2.0010459423065186, |
| "learning_rate": 6.99847838470677e-07, |
| "loss": 0.2231, |
| "step": 12700 |
| }, |
| { |
| "epoch": 2.542, |
| "grad_norm": 2.9428231716156006, |
| "learning_rate": 6.939225435053648e-07, |
| "loss": 0.2108, |
| "step": 12710 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 2.4707274436950684, |
| "learning_rate": 6.880205677509383e-07, |
| "loss": 0.207, |
| "step": 12720 |
| }, |
| { |
| "epoch": 2.5460000000000003, |
| "grad_norm": 1.7522751092910767, |
| "learning_rate": 6.821419431690629e-07, |
| "loss": 0.1804, |
| "step": 12730 |
| }, |
| { |
| "epoch": 2.548, |
| "grad_norm": 2.5205798149108887, |
| "learning_rate": 6.762867015949514e-07, |
| "loss": 0.2039, |
| "step": 12740 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 2.847193479537964, |
| "learning_rate": 6.704548747371869e-07, |
| "loss": 0.1945, |
| "step": 12750 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 1.2266204357147217, |
| "learning_rate": 6.646464941775499e-07, |
| "loss": 0.178, |
| "step": 12760 |
| }, |
| { |
| "epoch": 2.5540000000000003, |
| "grad_norm": 1.5307732820510864, |
| "learning_rate": 6.588615913708524e-07, |
| "loss": 0.1616, |
| "step": 12770 |
| }, |
| { |
| "epoch": 2.556, |
| "grad_norm": 1.3956282138824463, |
| "learning_rate": 6.531001976447637e-07, |
| "loss": 0.1128, |
| "step": 12780 |
| }, |
| { |
| "epoch": 2.558, |
| "grad_norm": 1.526992678642273, |
| "learning_rate": 6.47362344199639e-07, |
| "loss": 0.2029, |
| "step": 12790 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 1.8423537015914917, |
| "learning_rate": 6.416480621083582e-07, |
| "loss": 0.1969, |
| "step": 12800 |
| }, |
| { |
| "epoch": 2.5620000000000003, |
| "grad_norm": 1.3227084875106812, |
| "learning_rate": 6.359573823161457e-07, |
| "loss": 0.2046, |
| "step": 12810 |
| }, |
| { |
| "epoch": 2.564, |
| "grad_norm": 2.2461440563201904, |
| "learning_rate": 6.302903356404161e-07, |
| "loss": 0.2142, |
| "step": 12820 |
| }, |
| { |
| "epoch": 2.566, |
| "grad_norm": 0.9911752939224243, |
| "learning_rate": 6.246469527705978e-07, |
| "loss": 0.1574, |
| "step": 12830 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 1.4631303548812866, |
| "learning_rate": 6.190272642679674e-07, |
| "loss": 0.1516, |
| "step": 12840 |
| }, |
| { |
| "epoch": 2.57, |
| "grad_norm": 1.7750544548034668, |
| "learning_rate": 6.134313005654929e-07, |
| "loss": 0.1454, |
| "step": 12850 |
| }, |
| { |
| "epoch": 2.572, |
| "grad_norm": 2.154233455657959, |
| "learning_rate": 6.078590919676575e-07, |
| "loss": 0.1687, |
| "step": 12860 |
| }, |
| { |
| "epoch": 2.574, |
| "grad_norm": 1.879696011543274, |
| "learning_rate": 6.023106686502988e-07, |
| "loss": 0.1999, |
| "step": 12870 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 1.5651618242263794, |
| "learning_rate": 5.967860606604553e-07, |
| "loss": 0.1647, |
| "step": 12880 |
| }, |
| { |
| "epoch": 2.578, |
| "grad_norm": 1.1055610179901123, |
| "learning_rate": 5.912852979161876e-07, |
| "loss": 0.1461, |
| "step": 12890 |
| }, |
| { |
| "epoch": 2.58, |
| "grad_norm": 1.6748799085617065, |
| "learning_rate": 5.858084102064271e-07, |
| "loss": 0.1708, |
| "step": 12900 |
| }, |
| { |
| "epoch": 2.582, |
| "grad_norm": 1.5193976163864136, |
| "learning_rate": 5.803554271908124e-07, |
| "loss": 0.172, |
| "step": 12910 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 1.606832504272461, |
| "learning_rate": 5.749263783995279e-07, |
| "loss": 0.1459, |
| "step": 12920 |
| }, |
| { |
| "epoch": 2.586, |
| "grad_norm": 2.012147903442383, |
| "learning_rate": 5.69521293233145e-07, |
| "loss": 0.1558, |
| "step": 12930 |
| }, |
| { |
| "epoch": 2.588, |
| "grad_norm": 1.5900416374206543, |
| "learning_rate": 5.641402009624591e-07, |
| "loss": 0.2086, |
| "step": 12940 |
| }, |
| { |
| "epoch": 2.59, |
| "grad_norm": 2.680753469467163, |
| "learning_rate": 5.587831307283375e-07, |
| "loss": 0.1668, |
| "step": 12950 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 1.6326476335525513, |
| "learning_rate": 5.534501115415575e-07, |
| "loss": 0.2084, |
| "step": 12960 |
| }, |
| { |
| "epoch": 2.594, |
| "grad_norm": 1.771647572517395, |
| "learning_rate": 5.48141172282648e-07, |
| "loss": 0.1903, |
| "step": 12970 |
| }, |
| { |
| "epoch": 2.596, |
| "grad_norm": 1.6876575946807861, |
| "learning_rate": 5.428563417017335e-07, |
| "loss": 0.1602, |
| "step": 12980 |
| }, |
| { |
| "epoch": 2.598, |
| "grad_norm": 1.6764187812805176, |
| "learning_rate": 5.375956484183875e-07, |
| "loss": 0.2182, |
| "step": 12990 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 2.2477219104766846, |
| "learning_rate": 5.323591209214612e-07, |
| "loss": 0.1858, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.602, |
| "grad_norm": 1.2577763795852661, |
| "learning_rate": 5.271467875689429e-07, |
| "loss": 0.1794, |
| "step": 13010 |
| }, |
| { |
| "epoch": 2.604, |
| "grad_norm": 2.3537323474884033, |
| "learning_rate": 5.219586765877998e-07, |
| "loss": 0.1936, |
| "step": 13020 |
| }, |
| { |
| "epoch": 2.606, |
| "grad_norm": 2.4193389415740967, |
| "learning_rate": 5.167948160738206e-07, |
| "loss": 0.193, |
| "step": 13030 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 1.678281545639038, |
| "learning_rate": 5.116552339914726e-07, |
| "loss": 0.1599, |
| "step": 13040 |
| }, |
| { |
| "epoch": 2.61, |
| "grad_norm": 2.4545416831970215, |
| "learning_rate": 5.065399581737412e-07, |
| "loss": 0.2522, |
| "step": 13050 |
| }, |
| { |
| "epoch": 2.612, |
| "grad_norm": 1.6324633359909058, |
| "learning_rate": 5.014490163219854e-07, |
| "loss": 0.1548, |
| "step": 13060 |
| }, |
| { |
| "epoch": 2.614, |
| "grad_norm": 1.7150189876556396, |
| "learning_rate": 4.963824360057868e-07, |
| "loss": 0.1563, |
| "step": 13070 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 1.821446418762207, |
| "learning_rate": 4.913402446627946e-07, |
| "loss": 0.1644, |
| "step": 13080 |
| }, |
| { |
| "epoch": 2.618, |
| "grad_norm": 1.215470552444458, |
| "learning_rate": 4.863224695985858e-07, |
| "loss": 0.1826, |
| "step": 13090 |
| }, |
| { |
| "epoch": 2.62, |
| "grad_norm": 1.6227540969848633, |
| "learning_rate": 4.813291379865126e-07, |
| "loss": 0.2281, |
| "step": 13100 |
| }, |
| { |
| "epoch": 2.622, |
| "grad_norm": 2.389894723892212, |
| "learning_rate": 4.763602768675529e-07, |
| "loss": 0.1783, |
| "step": 13110 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 2.125441312789917, |
| "learning_rate": 4.714159131501689e-07, |
| "loss": 0.2042, |
| "step": 13120 |
| }, |
| { |
| "epoch": 2.626, |
| "grad_norm": 1.7118359804153442, |
| "learning_rate": 4.664960736101598e-07, |
| "loss": 0.1868, |
| "step": 13130 |
| }, |
| { |
| "epoch": 2.628, |
| "grad_norm": 2.2027170658111572, |
| "learning_rate": 4.61600784890513e-07, |
| "loss": 0.1808, |
| "step": 13140 |
| }, |
| { |
| "epoch": 2.63, |
| "grad_norm": 1.8995624780654907, |
| "learning_rate": 4.567300735012653e-07, |
| "loss": 0.1864, |
| "step": 13150 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 1.7229032516479492, |
| "learning_rate": 4.5188396581935856e-07, |
| "loss": 0.1915, |
| "step": 13160 |
| }, |
| { |
| "epoch": 2.634, |
| "grad_norm": 2.252855062484741, |
| "learning_rate": 4.470624880884905e-07, |
| "loss": 0.2062, |
| "step": 13170 |
| }, |
| { |
| "epoch": 2.636, |
| "grad_norm": 1.9822123050689697, |
| "learning_rate": 4.4226566641898173e-07, |
| "loss": 0.1733, |
| "step": 13180 |
| }, |
| { |
| "epoch": 2.638, |
| "grad_norm": 1.441658854484558, |
| "learning_rate": 4.37493526787629e-07, |
| "loss": 0.1911, |
| "step": 13190 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 1.633154273033142, |
| "learning_rate": 4.327460950375623e-07, |
| "loss": 0.1701, |
| "step": 13200 |
| }, |
| { |
| "epoch": 2.642, |
| "grad_norm": 1.7371437549591064, |
| "learning_rate": 4.280233968781139e-07, |
| "loss": 0.1534, |
| "step": 13210 |
| }, |
| { |
| "epoch": 2.644, |
| "grad_norm": 2.687547445297241, |
| "learning_rate": 4.233254578846657e-07, |
| "loss": 0.1954, |
| "step": 13220 |
| }, |
| { |
| "epoch": 2.646, |
| "grad_norm": 1.7198002338409424, |
| "learning_rate": 4.186523034985279e-07, |
| "loss": 0.172, |
| "step": 13230 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 1.4830220937728882, |
| "learning_rate": 4.140039590267836e-07, |
| "loss": 0.2197, |
| "step": 13240 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 2.4155113697052, |
| "learning_rate": 4.0938044964216164e-07, |
| "loss": 0.1562, |
| "step": 13250 |
| }, |
| { |
| "epoch": 2.652, |
| "grad_norm": 2.1205966472625732, |
| "learning_rate": 4.0478180038290296e-07, |
| "loss": 0.1917, |
| "step": 13260 |
| }, |
| { |
| "epoch": 2.654, |
| "grad_norm": 1.5603994131088257, |
| "learning_rate": 4.002080361526156e-07, |
| "loss": 0.1492, |
| "step": 13270 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 2.272676467895508, |
| "learning_rate": 3.9565918172014495e-07, |
| "loss": 0.1571, |
| "step": 13280 |
| }, |
| { |
| "epoch": 2.658, |
| "grad_norm": 2.0802717208862305, |
| "learning_rate": 3.9113526171944195e-07, |
| "loss": 0.1503, |
| "step": 13290 |
| }, |
| { |
| "epoch": 2.66, |
| "grad_norm": 1.6078263521194458, |
| "learning_rate": 3.866363006494256e-07, |
| "loss": 0.1724, |
| "step": 13300 |
| }, |
| { |
| "epoch": 2.662, |
| "grad_norm": 2.419299364089966, |
| "learning_rate": 3.821623228738536e-07, |
| "loss": 0.188, |
| "step": 13310 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 2.157850742340088, |
| "learning_rate": 3.777133526211857e-07, |
| "loss": 0.1876, |
| "step": 13320 |
| }, |
| { |
| "epoch": 2.666, |
| "grad_norm": 2.2567625045776367, |
| "learning_rate": 3.732894139844578e-07, |
| "loss": 0.1865, |
| "step": 13330 |
| }, |
| { |
| "epoch": 2.668, |
| "grad_norm": 1.955741047859192, |
| "learning_rate": 3.688905309211488e-07, |
| "loss": 0.1781, |
| "step": 13340 |
| }, |
| { |
| "epoch": 2.67, |
| "grad_norm": 1.4101120233535767, |
| "learning_rate": 3.6451672725304974e-07, |
| "loss": 0.1871, |
| "step": 13350 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 2.0826516151428223, |
| "learning_rate": 3.601680266661367e-07, |
| "loss": 0.1913, |
| "step": 13360 |
| }, |
| { |
| "epoch": 2.674, |
| "grad_norm": 2.287477493286133, |
| "learning_rate": 3.5584445271044544e-07, |
| "loss": 0.2232, |
| "step": 13370 |
| }, |
| { |
| "epoch": 2.676, |
| "grad_norm": 2.551602602005005, |
| "learning_rate": 3.515460287999345e-07, |
| "loss": 0.1898, |
| "step": 13380 |
| }, |
| { |
| "epoch": 2.678, |
| "grad_norm": 1.6702704429626465, |
| "learning_rate": 3.472727782123697e-07, |
| "loss": 0.1873, |
| "step": 13390 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 1.998603105545044, |
| "learning_rate": 3.430247240891904e-07, |
| "loss": 0.2089, |
| "step": 13400 |
| }, |
| { |
| "epoch": 2.682, |
| "grad_norm": 2.5154683589935303, |
| "learning_rate": 3.3880188943538617e-07, |
| "loss": 0.1813, |
| "step": 13410 |
| }, |
| { |
| "epoch": 2.684, |
| "grad_norm": 1.4025160074234009, |
| "learning_rate": 3.3460429711937417e-07, |
| "loss": 0.1896, |
| "step": 13420 |
| }, |
| { |
| "epoch": 2.686, |
| "grad_norm": 1.7359564304351807, |
| "learning_rate": 3.304319698728714e-07, |
| "loss": 0.1729, |
| "step": 13430 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 1.8465240001678467, |
| "learning_rate": 3.262849302907767e-07, |
| "loss": 0.1519, |
| "step": 13440 |
| }, |
| { |
| "epoch": 2.69, |
| "grad_norm": 1.9284135103225708, |
| "learning_rate": 3.2216320083104434e-07, |
| "loss": 0.2169, |
| "step": 13450 |
| }, |
| { |
| "epoch": 2.692, |
| "grad_norm": 1.5323783159255981, |
| "learning_rate": 3.180668038145629e-07, |
| "loss": 0.1498, |
| "step": 13460 |
| }, |
| { |
| "epoch": 2.694, |
| "grad_norm": 1.5375773906707764, |
| "learning_rate": 3.1399576142503606e-07, |
| "loss": 0.2163, |
| "step": 13470 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 2.2655889987945557, |
| "learning_rate": 3.0995009570886305e-07, |
| "loss": 0.1614, |
| "step": 13480 |
| }, |
| { |
| "epoch": 2.698, |
| "grad_norm": 2.173973560333252, |
| "learning_rate": 3.05929828575014e-07, |
| "loss": 0.155, |
| "step": 13490 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 1.8570374250411987, |
| "learning_rate": 3.01934981794918e-07, |
| "loss": 0.1896, |
| "step": 13500 |
| }, |
| { |
| "epoch": 2.702, |
| "grad_norm": 1.7890677452087402, |
| "learning_rate": 2.9796557700234317e-07, |
| "loss": 0.1941, |
| "step": 13510 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 1.8128156661987305, |
| "learning_rate": 2.940216356932746e-07, |
| "loss": 0.2079, |
| "step": 13520 |
| }, |
| { |
| "epoch": 2.706, |
| "grad_norm": 1.5485886335372925, |
| "learning_rate": 2.901031792258058e-07, |
| "loss": 0.1603, |
| "step": 13530 |
| }, |
| { |
| "epoch": 2.708, |
| "grad_norm": 1.4785550832748413, |
| "learning_rate": 2.862102288200186e-07, |
| "loss": 0.2021, |
| "step": 13540 |
| }, |
| { |
| "epoch": 2.71, |
| "grad_norm": 2.2677414417266846, |
| "learning_rate": 2.823428055578664e-07, |
| "loss": 0.1853, |
| "step": 13550 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 1.6280418634414673, |
| "learning_rate": 2.7850093038306493e-07, |
| "loss": 0.164, |
| "step": 13560 |
| }, |
| { |
| "epoch": 2.714, |
| "grad_norm": 1.7581329345703125, |
| "learning_rate": 2.746846241009765e-07, |
| "loss": 0.2056, |
| "step": 13570 |
| }, |
| { |
| "epoch": 2.716, |
| "grad_norm": 1.453389286994934, |
| "learning_rate": 2.7089390737849386e-07, |
| "loss": 0.1762, |
| "step": 13580 |
| }, |
| { |
| "epoch": 2.718, |
| "grad_norm": 1.141204595565796, |
| "learning_rate": 2.67128800743936e-07, |
| "loss": 0.2031, |
| "step": 13590 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 1.8374614715576172, |
| "learning_rate": 2.6338932458692847e-07, |
| "loss": 0.1961, |
| "step": 13600 |
| }, |
| { |
| "epoch": 2.722, |
| "grad_norm": 1.7040131092071533, |
| "learning_rate": 2.596754991583017e-07, |
| "loss": 0.1614, |
| "step": 13610 |
| }, |
| { |
| "epoch": 2.724, |
| "grad_norm": 1.686630368232727, |
| "learning_rate": 2.5598734456997287e-07, |
| "loss": 0.1527, |
| "step": 13620 |
| }, |
| { |
| "epoch": 2.726, |
| "grad_norm": 1.6243407726287842, |
| "learning_rate": 2.523248807948403e-07, |
| "loss": 0.164, |
| "step": 13630 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 2.1291511058807373, |
| "learning_rate": 2.486881276666808e-07, |
| "loss": 0.1484, |
| "step": 13640 |
| }, |
| { |
| "epoch": 2.73, |
| "grad_norm": 1.8856449127197266, |
| "learning_rate": 2.4507710488003155e-07, |
| "loss": 0.2218, |
| "step": 13650 |
| }, |
| { |
| "epoch": 2.732, |
| "grad_norm": 1.4211268424987793, |
| "learning_rate": 2.414918319900922e-07, |
| "loss": 0.1666, |
| "step": 13660 |
| }, |
| { |
| "epoch": 2.734, |
| "grad_norm": 1.9689491987228394, |
| "learning_rate": 2.379323284126156e-07, |
| "loss": 0.2322, |
| "step": 13670 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 1.373671054840088, |
| "learning_rate": 2.3439861342380243e-07, |
| "loss": 0.1145, |
| "step": 13680 |
| }, |
| { |
| "epoch": 2.738, |
| "grad_norm": 1.6998653411865234, |
| "learning_rate": 2.3089070616019838e-07, |
| "loss": 0.1484, |
| "step": 13690 |
| }, |
| { |
| "epoch": 2.74, |
| "grad_norm": 2.0054614543914795, |
| "learning_rate": 2.2740862561858812e-07, |
| "loss": 0.1963, |
| "step": 13700 |
| }, |
| { |
| "epoch": 2.742, |
| "grad_norm": 2.4972550868988037, |
| "learning_rate": 2.2395239065589436e-07, |
| "loss": 0.1747, |
| "step": 13710 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 1.2388681173324585, |
| "learning_rate": 2.2052201998907673e-07, |
| "loss": 0.1661, |
| "step": 13720 |
| }, |
| { |
| "epoch": 2.746, |
| "grad_norm": 1.4141393899917603, |
| "learning_rate": 2.1711753219502584e-07, |
| "loss": 0.1864, |
| "step": 13730 |
| }, |
| { |
| "epoch": 2.748, |
| "grad_norm": 1.2588516473770142, |
| "learning_rate": 2.1373894571046772e-07, |
| "loss": 0.1865, |
| "step": 13740 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 2.5091469287872314, |
| "learning_rate": 2.103862788318628e-07, |
| "loss": 0.1887, |
| "step": 13750 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 1.595197081565857, |
| "learning_rate": 2.0705954971530385e-07, |
| "loss": 0.1678, |
| "step": 13760 |
| }, |
| { |
| "epoch": 2.754, |
| "grad_norm": 2.847097396850586, |
| "learning_rate": 2.0375877637642038e-07, |
| "loss": 0.1733, |
| "step": 13770 |
| }, |
| { |
| "epoch": 2.7560000000000002, |
| "grad_norm": 1.9094551801681519, |
| "learning_rate": 2.0048397669028164e-07, |
| "loss": 0.1525, |
| "step": 13780 |
| }, |
| { |
| "epoch": 2.758, |
| "grad_norm": 1.487987995147705, |
| "learning_rate": 1.9723516839129765e-07, |
| "loss": 0.1754, |
| "step": 13790 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 2.1492459774017334, |
| "learning_rate": 1.9401236907312437e-07, |
| "loss": 0.1516, |
| "step": 13800 |
| }, |
| { |
| "epoch": 2.762, |
| "grad_norm": 1.7400575876235962, |
| "learning_rate": 1.9081559618856938e-07, |
| "loss": 0.1693, |
| "step": 13810 |
| }, |
| { |
| "epoch": 2.7640000000000002, |
| "grad_norm": 1.5171923637390137, |
| "learning_rate": 1.8764486704949402e-07, |
| "loss": 0.1957, |
| "step": 13820 |
| }, |
| { |
| "epoch": 2.766, |
| "grad_norm": 2.1869702339172363, |
| "learning_rate": 1.8450019882672366e-07, |
| "loss": 0.1919, |
| "step": 13830 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 1.4110556840896606, |
| "learning_rate": 1.8138160854995145e-07, |
| "loss": 0.195, |
| "step": 13840 |
| }, |
| { |
| "epoch": 2.77, |
| "grad_norm": 1.9794977903366089, |
| "learning_rate": 1.7828911310764974e-07, |
| "loss": 0.1523, |
| "step": 13850 |
| }, |
| { |
| "epoch": 2.7720000000000002, |
| "grad_norm": 1.8616331815719604, |
| "learning_rate": 1.7522272924697438e-07, |
| "loss": 0.1648, |
| "step": 13860 |
| }, |
| { |
| "epoch": 2.774, |
| "grad_norm": 1.7763707637786865, |
| "learning_rate": 1.7218247357367656e-07, |
| "loss": 0.1844, |
| "step": 13870 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 1.4819947481155396, |
| "learning_rate": 1.6916836255201297e-07, |
| "loss": 0.1744, |
| "step": 13880 |
| }, |
| { |
| "epoch": 2.778, |
| "grad_norm": 1.3867019414901733, |
| "learning_rate": 1.6618041250465623e-07, |
| "loss": 0.1908, |
| "step": 13890 |
| }, |
| { |
| "epoch": 2.7800000000000002, |
| "grad_norm": 2.0330920219421387, |
| "learning_rate": 1.6321863961260452e-07, |
| "loss": 0.1612, |
| "step": 13900 |
| }, |
| { |
| "epoch": 2.782, |
| "grad_norm": 2.5724823474884033, |
| "learning_rate": 1.602830599150984e-07, |
| "loss": 0.2228, |
| "step": 13910 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 1.4262503385543823, |
| "learning_rate": 1.573736893095301e-07, |
| "loss": 0.1397, |
| "step": 13920 |
| }, |
| { |
| "epoch": 2.786, |
| "grad_norm": 1.5158369541168213, |
| "learning_rate": 1.5449054355135718e-07, |
| "loss": 0.1641, |
| "step": 13930 |
| }, |
| { |
| "epoch": 2.7880000000000003, |
| "grad_norm": 1.3711594343185425, |
| "learning_rate": 1.5163363825402121e-07, |
| "loss": 0.213, |
| "step": 13940 |
| }, |
| { |
| "epoch": 2.79, |
| "grad_norm": 1.3406068086624146, |
| "learning_rate": 1.488029888888598e-07, |
| "loss": 0.1991, |
| "step": 13950 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 1.1743435859680176, |
| "learning_rate": 1.459986107850231e-07, |
| "loss": 0.1848, |
| "step": 13960 |
| }, |
| { |
| "epoch": 2.794, |
| "grad_norm": 1.5763418674468994, |
| "learning_rate": 1.4322051912939173e-07, |
| "loss": 0.166, |
| "step": 13970 |
| }, |
| { |
| "epoch": 2.7960000000000003, |
| "grad_norm": 1.492067813873291, |
| "learning_rate": 1.40468728966493e-07, |
| "loss": 0.1746, |
| "step": 13980 |
| }, |
| { |
| "epoch": 2.798, |
| "grad_norm": 1.7857205867767334, |
| "learning_rate": 1.3774325519842423e-07, |
| "loss": 0.2087, |
| "step": 13990 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 2.416992425918579, |
| "learning_rate": 1.3504411258476514e-07, |
| "loss": 0.1844, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.802, |
| "grad_norm": 2.181525945663452, |
| "learning_rate": 1.323713157425005e-07, |
| "loss": 0.1949, |
| "step": 14010 |
| }, |
| { |
| "epoch": 2.8040000000000003, |
| "grad_norm": 1.5728259086608887, |
| "learning_rate": 1.29724879145946e-07, |
| "loss": 0.1979, |
| "step": 14020 |
| }, |
| { |
| "epoch": 2.806, |
| "grad_norm": 1.9201794862747192, |
| "learning_rate": 1.2710481712666144e-07, |
| "loss": 0.1512, |
| "step": 14030 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 0.9020956158638, |
| "learning_rate": 1.245111438733798e-07, |
| "loss": 0.1687, |
| "step": 14040 |
| }, |
| { |
| "epoch": 2.81, |
| "grad_norm": 1.2932790517807007, |
| "learning_rate": 1.2194387343192504e-07, |
| "loss": 0.1652, |
| "step": 14050 |
| }, |
| { |
| "epoch": 2.8120000000000003, |
| "grad_norm": 1.8155263662338257, |
| "learning_rate": 1.194030197051421e-07, |
| "loss": 0.1753, |
| "step": 14060 |
| }, |
| { |
| "epoch": 2.814, |
| "grad_norm": 2.2060601711273193, |
| "learning_rate": 1.1688859645281659e-07, |
| "loss": 0.1405, |
| "step": 14070 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 1.731437087059021, |
| "learning_rate": 1.1440061729160235e-07, |
| "loss": 0.1529, |
| "step": 14080 |
| }, |
| { |
| "epoch": 2.818, |
| "grad_norm": 2.3887579441070557, |
| "learning_rate": 1.1193909569494676e-07, |
| "loss": 0.2014, |
| "step": 14090 |
| }, |
| { |
| "epoch": 2.82, |
| "grad_norm": 1.5371918678283691, |
| "learning_rate": 1.0950404499302015e-07, |
| "loss": 0.1534, |
| "step": 14100 |
| }, |
| { |
| "epoch": 2.822, |
| "grad_norm": 1.737833857536316, |
| "learning_rate": 1.0709547837263967e-07, |
| "loss": 0.1727, |
| "step": 14110 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 1.5639506578445435, |
| "learning_rate": 1.0471340887720171e-07, |
| "loss": 0.1899, |
| "step": 14120 |
| }, |
| { |
| "epoch": 2.826, |
| "grad_norm": 1.4468129873275757, |
| "learning_rate": 1.0235784940660965e-07, |
| "loss": 0.1617, |
| "step": 14130 |
| }, |
| { |
| "epoch": 2.828, |
| "grad_norm": 1.7118704319000244, |
| "learning_rate": 1.0002881271720222e-07, |
| "loss": 0.1679, |
| "step": 14140 |
| }, |
| { |
| "epoch": 2.83, |
| "grad_norm": 1.9047939777374268, |
| "learning_rate": 9.772631142168864e-08, |
| "loss": 0.196, |
| "step": 14150 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 2.239691734313965, |
| "learning_rate": 9.545035798907642e-08, |
| "loss": 0.1941, |
| "step": 14160 |
| }, |
| { |
| "epoch": 2.834, |
| "grad_norm": 2.0404582023620605, |
| "learning_rate": 9.320096474460527e-08, |
| "loss": 0.1634, |
| "step": 14170 |
| }, |
| { |
| "epoch": 2.836, |
| "grad_norm": 2.4190192222595215, |
| "learning_rate": 9.097814386968052e-08, |
| "loss": 0.2195, |
| "step": 14180 |
| }, |
| { |
| "epoch": 2.838, |
| "grad_norm": 2.054330348968506, |
| "learning_rate": 8.878190740180759e-08, |
| "loss": 0.1953, |
| "step": 14190 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 2.02219557762146, |
| "learning_rate": 8.661226723452542e-08, |
| "loss": 0.1761, |
| "step": 14200 |
| }, |
| { |
| "epoch": 2.842, |
| "grad_norm": 1.775745153427124, |
| "learning_rate": 8.446923511734317e-08, |
| "loss": 0.1449, |
| "step": 14210 |
| }, |
| { |
| "epoch": 2.844, |
| "grad_norm": 1.5100916624069214, |
| "learning_rate": 8.235282265567635e-08, |
| "loss": 0.1972, |
| "step": 14220 |
| }, |
| { |
| "epoch": 2.846, |
| "grad_norm": 2.3685286045074463, |
| "learning_rate": 8.02630413107841e-08, |
| "loss": 0.1673, |
| "step": 14230 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 1.453798770904541, |
| "learning_rate": 7.819990239970654e-08, |
| "loss": 0.1724, |
| "step": 14240 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 1.8765348196029663, |
| "learning_rate": 7.616341709520359e-08, |
| "loss": 0.1887, |
| "step": 14250 |
| }, |
| { |
| "epoch": 2.852, |
| "grad_norm": 1.9411275386810303, |
| "learning_rate": 7.415359642569564e-08, |
| "loss": 0.1774, |
| "step": 14260 |
| }, |
| { |
| "epoch": 2.854, |
| "grad_norm": 1.686895489692688, |
| "learning_rate": 7.21704512752025e-08, |
| "loss": 0.1713, |
| "step": 14270 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 1.6647790670394897, |
| "learning_rate": 7.021399238328452e-08, |
| "loss": 0.1696, |
| "step": 14280 |
| }, |
| { |
| "epoch": 2.858, |
| "grad_norm": 2.324164390563965, |
| "learning_rate": 6.828423034498488e-08, |
| "loss": 0.1702, |
| "step": 14290 |
| }, |
| { |
| "epoch": 2.86, |
| "grad_norm": 1.610374093055725, |
| "learning_rate": 6.638117561077295e-08, |
| "loss": 0.1636, |
| "step": 14300 |
| }, |
| { |
| "epoch": 2.862, |
| "grad_norm": 1.3403874635696411, |
| "learning_rate": 6.450483848648547e-08, |
| "loss": 0.1701, |
| "step": 14310 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 2.045170545578003, |
| "learning_rate": 6.265522913327326e-08, |
| "loss": 0.1838, |
| "step": 14320 |
| }, |
| { |
| "epoch": 2.866, |
| "grad_norm": 1.726438045501709, |
| "learning_rate": 6.083235756754513e-08, |
| "loss": 0.1493, |
| "step": 14330 |
| }, |
| { |
| "epoch": 2.868, |
| "grad_norm": 1.6481523513793945, |
| "learning_rate": 5.903623366091349e-08, |
| "loss": 0.1588, |
| "step": 14340 |
| }, |
| { |
| "epoch": 2.87, |
| "grad_norm": 1.7567453384399414, |
| "learning_rate": 5.726686714013996e-08, |
| "loss": 0.156, |
| "step": 14350 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 1.5608904361724854, |
| "learning_rate": 5.552426758708429e-08, |
| "loss": 0.212, |
| "step": 14360 |
| }, |
| { |
| "epoch": 2.874, |
| "grad_norm": 1.747316837310791, |
| "learning_rate": 5.380844443865274e-08, |
| "loss": 0.1706, |
| "step": 14370 |
| }, |
| { |
| "epoch": 2.876, |
| "grad_norm": 2.100895881652832, |
| "learning_rate": 5.2119406986745336e-08, |
| "loss": 0.168, |
| "step": 14380 |
| }, |
| { |
| "epoch": 2.878, |
| "grad_norm": 1.1627360582351685, |
| "learning_rate": 5.0457164378205914e-08, |
| "loss": 0.1368, |
| "step": 14390 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 1.7362346649169922, |
| "learning_rate": 4.882172561477438e-08, |
| "loss": 0.1627, |
| "step": 14400 |
| }, |
| { |
| "epoch": 2.882, |
| "grad_norm": 2.2970685958862305, |
| "learning_rate": 4.7213099553035655e-08, |
| "loss": 0.1751, |
| "step": 14410 |
| }, |
| { |
| "epoch": 2.884, |
| "grad_norm": 2.3094658851623535, |
| "learning_rate": 4.563129490437246e-08, |
| "loss": 0.2271, |
| "step": 14420 |
| }, |
| { |
| "epoch": 2.886, |
| "grad_norm": 1.8514689207077026, |
| "learning_rate": 4.4076320234919254e-08, |
| "loss": 0.162, |
| "step": 14430 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 1.9428884983062744, |
| "learning_rate": 4.2548183965513415e-08, |
| "loss": 0.2024, |
| "step": 14440 |
| }, |
| { |
| "epoch": 2.89, |
| "grad_norm": 1.88101327419281, |
| "learning_rate": 4.104689437165354e-08, |
| "loss": 0.162, |
| "step": 14450 |
| }, |
| { |
| "epoch": 2.892, |
| "grad_norm": 3.1377224922180176, |
| "learning_rate": 3.957245958345013e-08, |
| "loss": 0.1987, |
| "step": 14460 |
| }, |
| { |
| "epoch": 2.894, |
| "grad_norm": 1.4738689661026, |
| "learning_rate": 3.812488758558386e-08, |
| "loss": 0.1797, |
| "step": 14470 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 1.6967921257019043, |
| "learning_rate": 3.6704186217263457e-08, |
| "loss": 0.1748, |
| "step": 14480 |
| }, |
| { |
| "epoch": 2.898, |
| "grad_norm": 1.5632615089416504, |
| "learning_rate": 3.531036317218128e-08, |
| "loss": 0.1959, |
| "step": 14490 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 1.3949593305587769, |
| "learning_rate": 3.394342599847111e-08, |
| "loss": 0.1612, |
| "step": 14500 |
| }, |
| { |
| "epoch": 2.902, |
| "grad_norm": 1.8362523317337036, |
| "learning_rate": 3.260338209867153e-08, |
| "loss": 0.1713, |
| "step": 14510 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 1.7234903573989868, |
| "learning_rate": 3.129023872967874e-08, |
| "loss": 0.1514, |
| "step": 14520 |
| }, |
| { |
| "epoch": 2.906, |
| "grad_norm": 1.4591097831726074, |
| "learning_rate": 3.0004003002714886e-08, |
| "loss": 0.1996, |
| "step": 14530 |
| }, |
| { |
| "epoch": 2.908, |
| "grad_norm": 2.5706465244293213, |
| "learning_rate": 2.8744681883284274e-08, |
| "loss": 0.1624, |
| "step": 14540 |
| }, |
| { |
| "epoch": 2.91, |
| "grad_norm": 1.6069506406784058, |
| "learning_rate": 2.7512282191136663e-08, |
| "loss": 0.1696, |
| "step": 14550 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 1.6182852983474731, |
| "learning_rate": 2.6306810600233435e-08, |
| "loss": 0.1736, |
| "step": 14560 |
| }, |
| { |
| "epoch": 2.914, |
| "grad_norm": 1.7584065198898315, |
| "learning_rate": 2.5128273638706513e-08, |
| "loss": 0.1879, |
| "step": 14570 |
| }, |
| { |
| "epoch": 2.916, |
| "grad_norm": 2.2820541858673096, |
| "learning_rate": 2.3976677688827276e-08, |
| "loss": 0.194, |
| "step": 14580 |
| }, |
| { |
| "epoch": 2.918, |
| "grad_norm": 1.8409901857376099, |
| "learning_rate": 2.285202898696881e-08, |
| "loss": 0.208, |
| "step": 14590 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 2.1918840408325195, |
| "learning_rate": 2.175433362357482e-08, |
| "loss": 0.1869, |
| "step": 14600 |
| }, |
| { |
| "epoch": 2.922, |
| "grad_norm": 1.8916000127792358, |
| "learning_rate": 2.0683597543124655e-08, |
| "loss": 0.1531, |
| "step": 14610 |
| }, |
| { |
| "epoch": 2.924, |
| "grad_norm": 1.347509741783142, |
| "learning_rate": 1.963982654410279e-08, |
| "loss": 0.1879, |
| "step": 14620 |
| }, |
| { |
| "epoch": 2.926, |
| "grad_norm": 1.712788701057434, |
| "learning_rate": 1.862302627896495e-08, |
| "loss": 0.1711, |
| "step": 14630 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 2.293809175491333, |
| "learning_rate": 1.763320225411036e-08, |
| "loss": 0.2065, |
| "step": 14640 |
| }, |
| { |
| "epoch": 2.93, |
| "grad_norm": 2.585679531097412, |
| "learning_rate": 1.6670359829850657e-08, |
| "loss": 0.2012, |
| "step": 14650 |
| }, |
| { |
| "epoch": 2.932, |
| "grad_norm": 2.2906439304351807, |
| "learning_rate": 1.573450422037992e-08, |
| "loss": 0.1803, |
| "step": 14660 |
| }, |
| { |
| "epoch": 2.934, |
| "grad_norm": 1.0971269607543945, |
| "learning_rate": 1.4825640493747462e-08, |
| "loss": 0.1916, |
| "step": 14670 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 1.7314507961273193, |
| "learning_rate": 1.3943773571831188e-08, |
| "loss": 0.205, |
| "step": 14680 |
| }, |
| { |
| "epoch": 2.9379999999999997, |
| "grad_norm": 1.9683465957641602, |
| "learning_rate": 1.3088908230308728e-08, |
| "loss": 0.1355, |
| "step": 14690 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 2.5472261905670166, |
| "learning_rate": 1.2261049098634125e-08, |
| "loss": 0.1713, |
| "step": 14700 |
| }, |
| { |
| "epoch": 2.942, |
| "grad_norm": 1.2666493654251099, |
| "learning_rate": 1.146020066001119e-08, |
| "loss": 0.1523, |
| "step": 14710 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 1.82552170753479, |
| "learning_rate": 1.0686367251368512e-08, |
| "loss": 0.1705, |
| "step": 14720 |
| }, |
| { |
| "epoch": 2.9459999999999997, |
| "grad_norm": 1.3673129081726074, |
| "learning_rate": 9.939553063337826e-09, |
| "loss": 0.1737, |
| "step": 14730 |
| }, |
| { |
| "epoch": 2.948, |
| "grad_norm": 2.0833985805511475, |
| "learning_rate": 9.219762140231237e-09, |
| "loss": 0.1701, |
| "step": 14740 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 1.2477442026138306, |
| "learning_rate": 8.526998380016804e-09, |
| "loss": 0.1864, |
| "step": 14750 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 2.231452226638794, |
| "learning_rate": 7.861265534300222e-09, |
| "loss": 0.1928, |
| "step": 14760 |
| }, |
| { |
| "epoch": 2.9539999999999997, |
| "grad_norm": 1.7045234441757202, |
| "learning_rate": 7.222567208303721e-09, |
| "loss": 0.1471, |
| "step": 14770 |
| }, |
| { |
| "epoch": 2.956, |
| "grad_norm": 1.1955382823944092, |
| "learning_rate": 6.610906860845534e-09, |
| "loss": 0.1582, |
| "step": 14780 |
| }, |
| { |
| "epoch": 2.958, |
| "grad_norm": 1.9184893369674683, |
| "learning_rate": 6.026287804322683e-09, |
| "loss": 0.2123, |
| "step": 14790 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 2.046898126602173, |
| "learning_rate": 5.468713204692111e-09, |
| "loss": 0.1694, |
| "step": 14800 |
| }, |
| { |
| "epoch": 2.9619999999999997, |
| "grad_norm": 1.4556955099105835, |
| "learning_rate": 4.9381860814529115e-09, |
| "loss": 0.1834, |
| "step": 14810 |
| }, |
| { |
| "epoch": 2.964, |
| "grad_norm": 1.5225237607955933, |
| "learning_rate": 4.434709307631901e-09, |
| "loss": 0.1509, |
| "step": 14820 |
| }, |
| { |
| "epoch": 2.966, |
| "grad_norm": 2.2594845294952393, |
| "learning_rate": 3.9582856097658554e-09, |
| "loss": 0.1882, |
| "step": 14830 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 1.5588994026184082, |
| "learning_rate": 3.5089175678892916e-09, |
| "loss": 0.203, |
| "step": 14840 |
| }, |
| { |
| "epoch": 2.9699999999999998, |
| "grad_norm": 2.009692668914795, |
| "learning_rate": 3.0866076155172677e-09, |
| "loss": 0.1684, |
| "step": 14850 |
| }, |
| { |
| "epoch": 2.972, |
| "grad_norm": 1.583719253540039, |
| "learning_rate": 2.6913580396359384e-09, |
| "loss": 0.1863, |
| "step": 14860 |
| }, |
| { |
| "epoch": 2.974, |
| "grad_norm": 1.9162685871124268, |
| "learning_rate": 2.323170980685907e-09, |
| "loss": 0.1777, |
| "step": 14870 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 1.2543405294418335, |
| "learning_rate": 1.982048432554451e-09, |
| "loss": 0.1686, |
| "step": 14880 |
| }, |
| { |
| "epoch": 2.9779999999999998, |
| "grad_norm": 2.3343191146850586, |
| "learning_rate": 1.6679922425638651e-09, |
| "loss": 0.2004, |
| "step": 14890 |
| }, |
| { |
| "epoch": 2.98, |
| "grad_norm": 2.2958972454071045, |
| "learning_rate": 1.3810041114581397e-09, |
| "loss": 0.2172, |
| "step": 14900 |
| }, |
| { |
| "epoch": 2.982, |
| "grad_norm": 1.3667978048324585, |
| "learning_rate": 1.1210855933996289e-09, |
| "loss": 0.1731, |
| "step": 14910 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 2.113403558731079, |
| "learning_rate": 8.88238095955174e-10, |
| "loss": 0.1527, |
| "step": 14920 |
| }, |
| { |
| "epoch": 2.9859999999999998, |
| "grad_norm": 2.218931198120117, |
| "learning_rate": 6.824628800911059e-10, |
| "loss": 0.1829, |
| "step": 14930 |
| }, |
| { |
| "epoch": 2.988, |
| "grad_norm": 1.7202938795089722, |
| "learning_rate": 5.037610601665855e-10, |
| "loss": 0.148, |
| "step": 14940 |
| }, |
| { |
| "epoch": 2.99, |
| "grad_norm": 1.6666905879974365, |
| "learning_rate": 3.521336039263856e-10, |
| "loss": 0.1999, |
| "step": 14950 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 1.587571620941162, |
| "learning_rate": 2.2758133249756175e-10, |
| "loss": 0.14, |
| "step": 14960 |
| }, |
| { |
| "epoch": 2.9939999999999998, |
| "grad_norm": 1.2970430850982666, |
| "learning_rate": 1.3010492038223465e-10, |
| "loss": 0.1418, |
| "step": 14970 |
| }, |
| { |
| "epoch": 2.996, |
| "grad_norm": 1.8305574655532837, |
| "learning_rate": 5.970489545537028e-11, |
| "loss": 0.1642, |
| "step": 14980 |
| }, |
| { |
| "epoch": 2.998, |
| "grad_norm": 1.9229685068130493, |
| "learning_rate": 1.63816389631144e-11, |
| "loss": 0.1686, |
| "step": 14990 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.2971068620681763, |
| "learning_rate": 1.3538551946190582e-13, |
| "loss": 0.1871, |
| "step": 15000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 15000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.3591424743807386e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|